diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index c00dcde0486..6b038ab1ee6 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -229,11 +229,31 @@ jobs: "include": [ {"test":"ic-good-opt-off", "make_configs":["src/test/regress:installcheck-good"], - "pg_settings":{"optimizer":"off"} + "pg_settings":{ + "optimizer":"off", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" + } }, {"test":"ic-good-opt-on", "make_configs":["src/test/regress:installcheck-good"], - "pg_settings":{"optimizer":"on"} + "pg_settings":{ + "optimizer":"on", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" + } }, {"test":"pax-ic-good-opt-off", "make_configs":[ @@ -242,7 +262,16 @@ jobs: ], "pg_settings":{ "optimizer":"off", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1", + "force_parallel_mode":"1" } }, {"test":"pax-ic-good-opt-on", @@ -252,14 +281,30 @@ jobs: ], "pg_settings":{ "optimizer":"on", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" } }, {"test":"pax-ic-isolation2-opt-off", "make_configs":["contrib/pax_storage/:isolation2_test"], "pg_settings":{ "optimizer":"off", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" }, "enable_core_check":false }, @@ -267,7 +312,15 @@ jobs: "make_configs":["contrib/pax_storage/:isolation2_test"], "pg_settings":{ "optimizer":"on", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" }, "enable_core_check":false }, @@ -1434,10 +1487,16 @@ jobs: # 2. Follow the same pattern as optimizer # 3. Update matrix entries to include the new setting - # Set PostgreSQL options if defined + # Set PostgreSQL options from pg_settings matrix PG_OPTS="" - if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then - PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" + PG_SETTINGS_JSON='${{ toJson(matrix.pg_settings) }}' + if [[ "$PG_SETTINGS_JSON" != "null" && "$PG_SETTINGS_JSON" != "" ]]; then + while IFS= read -r line; do + # Extract key and value from JSON lines like: "key": "value" + if [[ "$line" =~ \"([^\"]+)\"[[:space:]]*:[[:space:]]*\"([^\"]+)\" ]]; then + PG_OPTS="$PG_OPTS -c ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}" + fi + done <<< "$PG_SETTINGS_JSON" fi # Create extension if required @@ -1464,10 +1523,6 @@ jobs: esac fi - if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then - PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" - fi - # Read configs into array IFS=' ' read -r -a configs <<< "${{ join(matrix.make_configs, ' ') }}" diff --git a/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml b/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml index 041eabc252b..7b8aaa6b639 100644 --- a/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml +++ b/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml @@ -204,11 +204,31 @@ jobs: "include": [ {"test":"ic-deb-good-opt-off", "make_configs":["src/test/regress:installcheck-good"], - "pg_settings":{"optimizer":"off"} + "pg_settings":{ + "optimizer":"off", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" + } }, {"test":"ic-deb-good-opt-on", "make_configs":["src/test/regress:installcheck-good"], - "pg_settings":{"optimizer":"on"} + "pg_settings":{ + "optimizer":"on", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" + } }, {"test":"pax-ic-deb-good-opt-off", "make_configs":[ @@ -217,7 +237,16 @@ jobs: ], "pg_settings":{ "optimizer":"off", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1", + "force_parallel_mode":"1" } }, {"test":"pax-ic-deb-good-opt-on", @@ -227,7 +256,15 @@ jobs: ], "pg_settings":{ "optimizer":"on", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" } }, {"test":"ic-deb-contrib", @@ -1401,14 +1438,16 @@ jobs: esac fi - # Set PostgreSQL options if defined + # Set PostgreSQL options from pg_settings matrix PG_OPTS="" - if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then - PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" - fi - - if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then - PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" + PG_SETTINGS_JSON='${{ toJson(matrix.pg_settings) }}' + if [[ "$PG_SETTINGS_JSON" != "null" && "$PG_SETTINGS_JSON" != "" ]]; then + while IFS= read -r line; do + # Extract key and value from JSON lines like: "key": "value" + if [[ "$line" =~ \"([^\"]+)\"[[:space:]]*:[[:space:]]*\"([^\"]+)\" ]]; then + PG_OPTS="$PG_OPTS -c ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}" + fi + done <<< "$PG_SETTINGS_JSON" fi # Read configs into array diff --git a/.github/workflows/build-deb-cloudberry.yml b/.github/workflows/build-deb-cloudberry.yml index f8eadee3c8f..eab70fb6687 100644 --- a/.github/workflows/build-deb-cloudberry.yml +++ b/.github/workflows/build-deb-cloudberry.yml @@ -197,11 +197,31 @@ jobs: "include": [ {"test":"ic-deb-good-opt-off", "make_configs":["src/test/regress:installcheck-good"], - "pg_settings":{"optimizer":"off"} + "pg_settings":{ + "optimizer":"off", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" + } }, {"test":"ic-deb-good-opt-on", "make_configs":["src/test/regress:installcheck-good"], - "pg_settings":{"optimizer":"on"} + "pg_settings":{ + "optimizer":"on", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" + } }, {"test":"pax-ic-deb-good-opt-off", "make_configs":[ @@ -210,7 +230,16 @@ jobs: ], "pg_settings":{ "optimizer":"off", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1", + "force_parallel_mode":"1" } }, {"test":"pax-ic-deb-good-opt-on", @@ -220,7 +249,15 @@ jobs: ], "pg_settings":{ "optimizer":"on", - "default_table_access_method":"pax" + "default_table_access_method":"pax", + "cbdb_inner_join_selectivity_damping_factor":"1", + "cbdb_streaming_damping_factor":"1", + "cbdb_eager_subplan":"off", + "cbdb_dedup_semi_damping_factor":"1", + "cbdb_enable_multi_window_agg":"off", + "cbdb_enable_setop_pre_dedup":"off", + "cbdb_enable_dynamic_shared_scan":"off", + "cbdb_2phase_agg_cardinality_cap":"1" } }, {"test":"ic-deb-contrib", @@ -1394,14 +1431,16 @@ jobs: esac fi - # Set PostgreSQL options if defined + # Set PostgreSQL options from pg_settings matrix PG_OPTS="" - if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then - PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" - fi - - if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then - PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" + PG_SETTINGS_JSON='${{ toJson(matrix.pg_settings) }}' + if [[ "$PG_SETTINGS_JSON" != "null" && "$PG_SETTINGS_JSON" != "" ]]; then + while IFS= read -r line; do + # Extract key and value from JSON lines like: "key": "value" + if [[ "$line" =~ \"([^\"]+)\"[[:space:]]*:[[:space:]]*\"([^\"]+)\" ]]; then + PG_OPTS="$PG_OPTS -c ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}" + fi + done <<< "$PG_SETTINGS_JSON" fi # Read configs into array diff --git a/contrib/pax_storage/expected/cbdb_parallel.out b/contrib/pax_storage/expected/cbdb_parallel.out index ec6ceba7e3c..e52bf56f0b9 100644 --- a/contrib/pax_storage/expected/cbdb_parallel.out +++ b/contrib/pax_storage/expected/cbdb_parallel.out @@ -613,6 +613,15 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt2 on select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- + 5 | 6 | 5 | 5 | 5 | 6 + 6 | 7 | 6 | 6 | 6 | 7 + 9 | 10 | 9 | 9 | 9 | 10 + 10 | 11 | 10 | 10 | 10 | 11 + 6 | 7 | 5 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 10 | 11 | 9 | 10 | 10 | 11 + 1 | 2 | 1 | 1 | 1 | 2 + 2 | 3 | 1 | 2 | 2 | 3 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 3 | 3 | 3 | 4 4 | 5 | 4 | 4 | 4 | 5 @@ -623,15 +632,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 5 | 6 | 4 | 5 | 5 | 6 8 | 9 | 7 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 1 | 2 | 1 | 1 | 1 | 2 - 2 | 3 | 1 | 2 | 2 | 3 - 5 | 6 | 5 | 5 | 5 | 6 - 6 | 7 | 6 | 6 | 6 | 7 - 9 | 10 | 9 | 9 | 9 | 10 - 10 | 11 | 10 | 10 | 10 | 11 - 6 | 7 | 5 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 10 | 11 | 9 | 10 | 10 | 11 (19 rows) -- parallel hash join @@ -674,6 +674,15 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt2 on select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- + 5 | 6 | 5 | 5 | 5 | 6 + 6 | 7 | 5 | 6 | 6 | 7 + 6 | 7 | 6 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 9 | 10 | 9 | 9 | 9 | 10 + 10 | 11 | 9 | 10 | 10 | 11 + 10 | 11 | 10 | 10 | 10 | 11 + 1 | 2 | 1 | 1 | 1 | 2 + 2 | 3 | 1 | 2 | 2 | 3 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 2 | 3 | 3 | 4 3 | 4 | 3 | 3 | 3 | 4 @@ -684,15 +693,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 8 | 9 | 7 | 8 | 8 | 9 8 | 9 | 8 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 1 | 2 | 1 | 1 | 1 | 2 - 2 | 3 | 1 | 2 | 2 | 3 - 5 | 6 | 5 | 5 | 5 | 6 - 6 | 7 | 5 | 6 | 6 | 7 - 6 | 7 | 6 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 9 | 10 | 9 | 9 | 9 | 10 - 10 | 11 | 9 | 10 | 10 | 11 - 10 | 11 | 10 | 10 | 10 | 11 (19 rows) -- @@ -728,6 +728,13 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; ----+----+----+----+----+---- 1 | 2 | 1 | 1 | 1 | 2 2 | 3 | 1 | 2 | 2 | 3 + 5 | 6 | 5 | 5 | 5 | 6 + 6 | 7 | 6 | 6 | 6 | 7 + 9 | 10 | 9 | 9 | 9 | 10 + 10 | 11 | 10 | 10 | 10 | 11 + 6 | 7 | 5 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 10 | 11 | 9 | 10 | 10 | 11 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 3 | 3 | 3 | 4 4 | 5 | 4 | 4 | 4 | 5 @@ -738,13 +745,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; 5 | 6 | 4 | 5 | 5 | 6 8 | 9 | 7 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 5 | 6 | 5 | 5 | 5 | 6 - 6 | 7 | 6 | 6 | 6 | 7 - 9 | 10 | 9 | 9 | 9 | 10 - 10 | 11 | 10 | 10 | 10 | 11 - 6 | 7 | 5 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 10 | 11 | 9 | 10 | 10 | 11 (19 rows) -- parallel join without parallel hash @@ -814,12 +814,12 @@ set local enable_parallel = off; select * from rt4 join t2 using(b); b | a | a ----+----+---- + 2 | 1 | 1 3 | 2 | 2 4 | 3 | 3 5 | 4 | 4 8 | 7 | 7 9 | 8 | 8 - 2 | 1 | 1 6 | 5 | 5 7 | 6 | 6 10 | 9 | 9 @@ -858,12 +858,12 @@ select * from rt4 join t2 using(b); 7 | 6 | 6 10 | 9 | 9 11 | 10 | 10 + 2 | 1 | 1 3 | 2 | 2 4 | 3 | 3 5 | 4 | 4 8 | 7 | 7 9 | 8 | 8 - 2 | 1 | 1 (10 rows) create table t3(a int, b int) with(parallel_workers=2); @@ -947,11 +947,11 @@ explain(locus, costs off) select * from t_replica_workers_2 join t_random_worker select * from t_replica_workers_2 join t_random_workers_0 using(a); a | b | b ---+---+--- - 1 | 2 | 2 2 | 3 | 3 - 4 | 5 | 5 3 | 4 | 4 5 | 6 | 6 + 1 | 2 | 2 + 4 | 5 | 5 (5 rows) -- non parallel results @@ -959,11 +959,11 @@ set local enable_parallel=false; select * from t_replica_workers_2 join t_random_workers_0 using(a); a | b | b ---+---+--- - 3 | 4 | 4 - 5 | 6 | 6 - 4 | 5 | 5 1 | 2 | 2 + 4 | 5 | 5 2 | 3 | 3 + 3 | 4 | 4 + 5 | 6 | 6 (5 rows) abort; @@ -1005,10 +1005,10 @@ select * from t_replica_workers_2 right join t_random_workers_2 using(a); a | b | b ---+---+--- 1 | 2 | 2 - 2 | 3 | 3 - 3 | 4 | 4 4 | 5 | 5 + 3 | 4 | 4 5 | 6 | 6 + 2 | 3 | 3 (5 rows) -- non parallel results @@ -1016,11 +1016,11 @@ set local enable_parallel=false; select * from t_replica_workers_2 right join t_random_workers_2 using(a); a | b | b ---+---+--- - 1 | 2 | 2 - 2 | 3 | 3 3 | 4 | 4 - 4 | 5 | 5 5 | 6 | 6 + 2 | 3 | 3 + 1 | 2 | 2 + 4 | 5 | 5 (5 rows) abort; @@ -1061,11 +1061,11 @@ explain(locus, costs off) select * from t_replica_workers_2 join t_random_worker select * from t_replica_workers_2 join t_random_workers_2 using(a); a | b | b ---+---+--- - 1 | 2 | 2 3 | 4 | 4 - 2 | 3 | 3 4 | 5 | 5 + 1 | 2 | 2 5 | 6 | 6 + 2 | 3 | 3 (5 rows) -- non parallel results @@ -1073,10 +1073,10 @@ set local enable_parallel=false; select * from t_replica_workers_2 join t_random_workers_2 using(a); a | b | b ---+---+--- - 1 | 2 | 2 - 3 | 4 | 4 2 | 3 | 3 + 3 | 4 | 4 4 | 5 | 5 + 1 | 2 | 2 5 | 6 | 6 (5 rows) @@ -1103,17 +1103,17 @@ explain(costs off) select * from t1 right join t2 on t1.b = t2.a; QUERY PLAN ------------------------------------------------------------------ Gather Motion 9:1 (slice1; segments: 9) - -> Parallel Hash Right Join - Hash Cond: (t1.b = t2.a) - -> Redistribute Motion 9:9 (slice2; segments: 9) - Hash Key: t1.b + -> Parallel Hash Left Join + Hash Cond: (t2.a = t1.b) + -> Redistribute Motion 6:9 (slice2; segments: 6) + Hash Key: t2.a Hash Module: 3 - -> Parallel Seq Scan on t1 + -> Parallel Seq Scan on t2 -> Parallel Hash - -> Redistribute Motion 6:9 (slice3; segments: 6) - Hash Key: t2.a + -> Redistribute Motion 9:9 (slice3; segments: 9) + Hash Key: t1.b Hash Module: 3 - -> Parallel Seq Scan on t2 + -> Parallel Seq Scan on t1 Optimizer: Postgres query optimizer (13 rows) @@ -1136,34 +1136,34 @@ analyze t2; set local enable_parallel = on; -- parallel hash join with shared table, SinglQE as outer partial path. explain(locus, costs off) select * from (select count(*) as a from t2) t2 left join t1 on t1.a = t2.a; - QUERY PLAN ------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------ Gather Motion 6:1 (slice1; segments: 6) Locus: Entry - -> Parallel Hash Right Join - Locus: HashedWorkers + -> Parallel Hash Left Join + Locus: Hashed Parallel Workers: 2 - Hash Cond: (t1.a = (count(*))) - -> Parallel Seq Scan on t1 - Locus: HashedWorkers - Parallel Workers: 2 - -> Parallel Hash + Hash Cond: ((count(*)) = t1.a) + -> Redistribute Motion 1:6 (slice2; segments: 1) Locus: Hashed - -> Redistribute Motion 1:6 (slice2; segments: 1) - Locus: Hashed - Parallel Workers: 2 - Hash Key: (count(*)) - Hash Module: 3 - -> Finalize Aggregate + Parallel Workers: 2 + Hash Key: (count(*)) + Hash Module: 3 + -> Finalize Aggregate + Locus: SingleQE + -> Gather Motion 6:1 (slice3; segments: 6) Locus: SingleQE - -> Gather Motion 6:1 (slice3; segments: 6) - Locus: SingleQE - -> Partial Aggregate + -> Partial Aggregate + Locus: HashedWorkers + Parallel Workers: 2 + -> Parallel Seq Scan on t2 Locus: HashedWorkers Parallel Workers: 2 - -> Parallel Seq Scan on t2 - Locus: HashedWorkers - Parallel Workers: 2 + -> Parallel Hash + Locus: Hashed + -> Parallel Seq Scan on t1 + Locus: HashedWorkers + Parallel Workers: 2 Optimizer: Postgres query optimizer (27 rows) @@ -1427,7 +1427,7 @@ explain (locus, costs off) select * from rt1 union all select * from t1; -> Result Locus: Strewn Parallel Workers: 2 - One-Time Filter: (gp_execution_segment() = 0) + One-Time Filter: (gp_execution_segment() = 2) -> Parallel Seq Scan on rt1 Locus: SegmentGeneralWorkers Parallel Workers: 2 @@ -1451,7 +1451,7 @@ explain (locus, costs off) select * from rt1 union all select * from t2; -> Result Locus: Strewn Parallel Workers: 2 - One-Time Filter: (gp_execution_segment() = 0) + One-Time Filter: (gp_execution_segment() = 2) -> Parallel Seq Scan on rt1 Locus: SegmentGeneralWorkers Parallel Workers: 2 @@ -1930,49 +1930,62 @@ set local enable_hashagg to false; set local enable_parallel = off; explain (costs off, locus) select c, sum(a), avg(b), count(*) from pagg_tab group by 1 having avg(d) < 15 order by 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------ Sort Locus: Entry Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) - -> Append + -> Merge Append Locus: Entry - -> Finalize GroupAggregate - Locus: Entry - Group Key: pagg_tab.c - Filter: (avg(pagg_tab.d) < '15'::numeric) - -> Gather Motion 3:1 (slice1; segments: 3) - Locus: Entry - Merge Key: pagg_tab.c - -> Partial GroupAggregate + Sort Key: pagg_tab.c + -> Gather Motion 3:1 (slice1; segments: 3) + Locus: SingleQE + Merge Key: pagg_tab.c + -> Finalize GroupAggregate + Locus: Hashed + Group Key: pagg_tab.c + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Sort Locus: Hashed - Group Key: pagg_tab.c - -> Sort + Sort Key: pagg_tab.c + -> Redistribute Motion 3:3 (slice2; segments: 3) Locus: Hashed - Sort Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab + Hash Key: pagg_tab.c + -> Partial GroupAggregate Locus: Hashed - -> Finalize GroupAggregate - Locus: Entry - Group Key: pagg_tab_1.c - Filter: (avg(pagg_tab_1.d) < '15'::numeric) - -> Gather Motion 3:1 (slice2; segments: 3) - Locus: Entry - Merge Key: pagg_tab_1.c - -> Partial GroupAggregate + Group Key: pagg_tab.c + -> Sort + Locus: Hashed + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + Locus: Hashed + -> Gather Motion 3:1 (slice3; segments: 3) + Locus: SingleQE + Merge Key: pagg_tab_1.c + -> Finalize GroupAggregate + Locus: Hashed + Group Key: pagg_tab_1.c + Filter: (avg(pagg_tab_1.d) < '15'::numeric) + -> Sort Locus: Hashed - Group Key: pagg_tab_1.c - -> Sort + Sort Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice4; segments: 3) Locus: Hashed - Sort Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 + Hash Key: pagg_tab_1.c + -> Partial GroupAggregate Locus: Hashed + Group Key: pagg_tab_1.c + -> Sort + Locus: Hashed + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + Locus: Hashed -> Finalize GroupAggregate - Locus: Entry + Locus: SingleQE Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) - -> Gather Motion 3:1 (slice3; segments: 3) - Locus: Entry + -> Gather Motion 3:1 (slice5; segments: 3) + Locus: SingleQE Merge Key: pagg_tab_2.c -> Partial GroupAggregate Locus: Hashed @@ -1983,7 +1996,7 @@ select c, sum(a), avg(b), count(*) from pagg_tab group by 1 having avg(d) < 15 o -> Seq Scan on pagg_tab_p3 pagg_tab_2 Locus: Hashed Optimizer: Postgres query optimizer -(51 rows) +(64 rows) abort; -- diff --git a/contrib/pax_storage/src/test/regress/expected/partition_aggregate.out b/contrib/pax_storage/src/test/regress/expected/partition_aggregate.out index 65381d04800..ec6d83bd11c 100644 --- a/contrib/pax_storage/src/test/regress/expected/partition_aggregate.out +++ b/contrib/pax_storage/src/test/regress/expected/partition_aggregate.out @@ -5,6 +5,9 @@ -- Note: to ensure plan stability, it's a good idea to make the partitions of -- any one partitioned table in this test all have different numbers of rows. -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- Disable ORCA since it does support partition-wise aggregates set optimizer to off; -- Enable partitionwise aggregate, which by default is disabled. @@ -19,37 +22,46 @@ SET enable_incremental_sort TO off; -- Tests for list partitioned tables. -- CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003', '0004'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0005', '0006', '0007', '0008'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0009', '0010', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; ANALYZE pagg_tab; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) -> Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) -> Append - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: pagg_tab.c Filter: (avg(pagg_tab.d) < '15'::numeric) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: pagg_tab.c - -> Partial HashAggregate - Group Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab - -> Finalize HashAggregate + -> Sort + Sort Key: pagg_tab.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: pagg_tab.c + -> Partial HashAggregate + Group Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Finalize GroupAggregate Group Key: pagg_tab_1.c Filter: (avg(pagg_tab_1.d) < '15'::numeric) - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: pagg_tab_1.c - -> Partial HashAggregate - Group Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Sort + Sort Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: pagg_tab_1.c + -> Partial HashAggregate + Group Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Finalize GroupAggregate Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) @@ -61,7 +73,7 @@ SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVI Group Key: pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer -(30 rows) +(36 rows) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count | min | max @@ -77,8 +89,8 @@ SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVI -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------------ + QUERY PLAN +-------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) -> Sort @@ -117,8 +129,8 @@ SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVI -- Check with multiple columns in GROUP BY EXPLAIN (COSTS OFF) SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Append -> HashAggregate @@ -136,8 +148,8 @@ SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; -- Check with multiple columns in GROUP BY, order in GROUP BY is reversed EXPLAIN (COSTS OFF) SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Append -> HashAggregate @@ -155,8 +167,8 @@ SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; -- Check with multiple columns in GROUP BY, order in target-list is reversed EXPLAIN (COSTS OFF) SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Append -> HashAggregate @@ -174,13 +186,14 @@ SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; -- Test when input relation for grouping is dummy EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; - QUERY PLAN --------------------------------- + QUERY PLAN +------------------------------------- HashAggregate Group Key: c -> Result One-Time Filter: false -(4 rows) + Optimizer: Postgres query optimizer +(5 rows) SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; c | sum @@ -189,13 +202,14 @@ SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; - QUERY PLAN --------------------------------- + QUERY PLAN +------------------------------------- GroupAggregate Group Key: c -> Result One-Time Filter: false -(4 rows) + Optimizer: Postgres query optimizer +(5 rows) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; c | sum @@ -207,35 +221,44 @@ SET enable_hashagg TO false; -- When GROUP BY clause matches full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------ Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) - -> Append - -> Finalize GroupAggregate - Group Key: pagg_tab.c - Filter: (avg(pagg_tab.d) < '15'::numeric) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: pagg_tab.c - -> Partial GroupAggregate - Group Key: pagg_tab.c - -> Sort - Sort Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab - -> Finalize GroupAggregate - Group Key: pagg_tab_1.c - Filter: (avg(pagg_tab_1.d) < '15'::numeric) - -> Gather Motion 3:1 (slice2; segments: 3) - Merge Key: pagg_tab_1.c - -> Partial GroupAggregate - Group Key: pagg_tab_1.c - -> Sort - Sort Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Merge Append + Sort Key: pagg_tab.c + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pagg_tab.c + -> Finalize GroupAggregate + Group Key: pagg_tab.c + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: pagg_tab.c + -> Partial GroupAggregate + Group Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Gather Motion 3:1 (slice3; segments: 3) + Merge Key: pagg_tab_1.c + -> Finalize GroupAggregate + Group Key: pagg_tab_1.c + Filter: (avg(pagg_tab_1.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: pagg_tab_1.c + -> Partial GroupAggregate + Group Key: pagg_tab_1.c + -> Sort + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Finalize GroupAggregate Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) - -> Gather Motion 3:1 (slice3; segments: 3) + -> Gather Motion 3:1 (slice5; segments: 3) Merge Key: pagg_tab_2.c -> Partial GroupAggregate Group Key: pagg_tab_2.c @@ -243,7 +266,7 @@ SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 O Sort Key: pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer -(34 rows) +(43 rows) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count @@ -306,39 +329,48 @@ SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 O -- Test partitionwise grouping without any aggregates EXPLAIN (COSTS OFF) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; - QUERY PLAN -------------------------------------------------------- - Merge Append - Sort Key: pagg_tab.c - -> GroupAggregate - Group Key: pagg_tab.c - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: pagg_tab.c + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Append -> GroupAggregate Group Key: pagg_tab.c -> Sort Sort Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab - -> GroupAggregate - Group Key: pagg_tab_1.c - -> Gather Motion 3:1 (slice2; segments: 3) - Merge Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: pagg_tab.c + -> GroupAggregate + Group Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab -> GroupAggregate Group Key: pagg_tab_1.c -> Sort Sort Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 - -> GroupAggregate - Group Key: pagg_tab_2.c - -> Gather Motion 3:1 (slice3; segments: 3) - Merge Key: pagg_tab_2.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: pagg_tab_1.c + -> GroupAggregate + Group Key: pagg_tab_1.c + -> Sort + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> GroupAggregate Group Key: pagg_tab_2.c -> Sort Sort Key: pagg_tab_2.c - -> Seq Scan on pagg_tab_p3 pagg_tab_2 + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: pagg_tab_2.c + -> GroupAggregate + Group Key: pagg_tab_2.c + -> Sort + Sort Key: pagg_tab_2.c + -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer -(30 rows) +(39 rows) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; c @@ -400,8 +432,8 @@ RESET enable_hashagg; -- ROLLUP, partitionwise aggregation does not apply EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab.c, (sum(pagg_tab.a)) -> Sort @@ -462,8 +494,8 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; -- partitionwise aggregation plan is not generated. EXPLAIN (COSTS OFF) SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab.a, (sum(pagg_tab.b ORDER BY pagg_tab.a)) -> Sort @@ -481,13 +513,23 @@ SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; -- JOIN query CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; ANALYZE pagg_tab1; @@ -872,8 +914,8 @@ SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y G -- But right now we are unable to do partitionwise join in this case. EXPLAIN (COSTS OFF) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab1.x, pagg_tab2.y -> Sort @@ -917,8 +959,8 @@ SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOI -- But right now we are unable to do partitionwise join in this case. EXPLAIN (COSTS OFF) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab1.x, pagg_tab2.y -> Sort @@ -970,7 +1012,8 @@ SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a Sort Key: pagg_tab1.y -> Result One-Time Filter: false -(6 rows) + Optimizer: Postgres query optimizer +(7 rows) SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; x | y | count @@ -979,16 +1022,21 @@ SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a -- Partition by multiple columns CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (12, 12); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (12, 12) TO (22, 22); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (22, 22) TO (30, 30); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; ANALYZE pagg_tab_m; -- Partial aggregation as GROUP BY clause does not match with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +-------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) -> Sort @@ -1023,8 +1071,8 @@ SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 -- Full aggregation as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) -> Sort @@ -1059,8 +1107,8 @@ SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING su -- Full aggregation as PARTITION KEY is part of GROUP BY clause EXPLAIN (COSTS OFF) SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b)) -> Sort @@ -1094,16 +1142,27 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV -- Test with multi-level partitioning scheme CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (12); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (12) TO (20) PARTITION BY LIST (c); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001', '0002'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0003'); +NOTICE: table has parent, setting distribution columns to match parent table -- This level of partitioning has different column positions than the parent CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ALTER TABLE pagg_tab_ml_p3 SET DISTRIBUTED BY (a); CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ALTER TABLE pagg_tab_ml_p3_s1 SET DISTRIBUTED BY (a); CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (7) TO (10); +NOTICE: table has parent, setting distribution columns to match parent table ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (7); ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; @@ -1511,9 +1570,14 @@ SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 O -- costing such plans. SET parallel_setup_cost TO 10; CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (12); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (12) TO (22); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (22) TO (30); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; ANALYZE pagg_tab_para; -- When GROUP BY clause matches; full aggregation is performed for each partition. diff --git a/contrib/pax_storage/src/test/regress/expected/partition_join.out b/contrib/pax_storage/src/test/regress/expected/partition_join.out index ec936d30e1c..05677ad013b 100644 --- a/contrib/pax_storage/src/test/regress/expected/partition_join.out +++ b/contrib/pax_storage/src/test/regress/expected/partition_join.out @@ -2,6 +2,9 @@ -- PARTITION_JOIN -- Test partitionwise join between partitioned tables -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- Disable ORCA since it does support partition-wise joins set optimizer to off; -- Enable partitionwise join, which by default is disabled. @@ -14,18 +17,28 @@ set enable_mergejoin to true; -- partitioned by a single column -- CREATE TABLE prt1 (a int, b int, c varchar) PARTITION BY RANGE(a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES FROM (0) TO (250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES FROM (500) TO (600); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES FROM (250) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt1 SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 2 = 0; CREATE INDEX iprt1_p1_a on prt1_p1(a); CREATE INDEX iprt1_p2_a on prt1_p2(a); CREATE INDEX iprt1_p3_a on prt1_p3(a); ANALYZE prt1; CREATE TABLE prt2 (a int, b int, c varchar) PARTITION BY RANGE(b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_p1 PARTITION OF prt2 FOR VALUES FROM (0) TO (250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_p2 PARTITION OF prt2 FOR VALUES FROM (250) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_p3 PARTITION OF prt2 FOR VALUES FROM (500) TO (600); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 3 = 0; CREATE INDEX iprt2_p1_b on prt2_p1(b); CREATE INDEX iprt2_p2_b on prt2_p2(b); @@ -426,11 +439,11 @@ EXPLAIN (COSTS OFF) SELECT * FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a; -ERROR: could not devise a query plan for the given query +ERROR: could not devise a query plan for the given query (pathnode.c:280) SELECT * FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a; -ERROR: could not devise a query plan for the given query +ERROR: could not devise a query plan for the given query (pathnode.c:280) EXPLAIN (COSTS OFF) SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss @@ -578,18 +591,28 @@ RESET enable_hashjoin; -- partitioned by expression -- CREATE TABLE prt1_e (a int, b int, c int) PARTITION BY RANGE(((a + b)/2)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_e_p1 PARTITION OF prt1_e FOR VALUES FROM (0) TO (250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_e_p2 PARTITION OF prt1_e FOR VALUES FROM (250) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_e_p3 PARTITION OF prt1_e FOR VALUES FROM (500) TO (600); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt1_e SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i; CREATE INDEX iprt1_e_p1_ab2 on prt1_e_p1(((a+b)/2)); CREATE INDEX iprt1_e_p2_ab2 on prt1_e_p2(((a+b)/2)); CREATE INDEX iprt1_e_p3_ab2 on prt1_e_p3(((a+b)/2)); ANALYZE prt1_e; CREATE TABLE prt2_e (a int, b int, c int) PARTITION BY RANGE(((b + a)/2)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_e_p1 PARTITION OF prt2_e FOR VALUES FROM (0) TO (250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_e_p2 PARTITION OF prt2_e FOR VALUES FROM (250) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_e_p3 PARTITION OF prt2_e FOR VALUES FROM (500) TO (600); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_e SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i; ANALYZE prt2_e; EXPLAIN (COSTS OFF) @@ -1493,15 +1516,25 @@ RESET enable_nestloop; -- partitioned by multiple columns -- CREATE TABLE prt1_m (a int, b int, c int) PARTITION BY RANGE(a, ((a + b)/2)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_m_p1 PARTITION OF prt1_m FOR VALUES FROM (0, 0) TO (250, 250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_m_p2 PARTITION OF prt1_m FOR VALUES FROM (250, 250) TO (500, 500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_m_p3 PARTITION OF prt1_m FOR VALUES FROM (500, 500) TO (600, 600); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt1_m SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i; ANALYZE prt1_m; CREATE TABLE prt2_m (a int, b int, c int) PARTITION BY RANGE(((b + a)/2), b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_m_p1 PARTITION OF prt2_m FOR VALUES FROM (0, 0) TO (250, 250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_m_p2 PARTITION OF prt2_m FOR VALUES FROM (250, 250) TO (500, 500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_m_p3 PARTITION OF prt2_m FOR VALUES FROM (500, 500) TO (600, 600); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_m SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i; ANALYZE prt2_m; EXPLAIN (COSTS OFF) @@ -1568,24 +1601,39 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 -- tests for list partitioned tables. -- CREATE TABLE plt1 (a int, b int, c text) PARTITION BY LIST(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt1_p1 PARTITION OF plt1 FOR VALUES IN ('0000', '0003', '0004', '0010'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_p2 PARTITION OF plt1 FOR VALUES IN ('0001', '0005', '0002', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_p3 PARTITION OF plt1 FOR VALUES IN ('0006', '0007', '0008', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE plt1; CREATE TABLE plt2 (a int, b int, c text) PARTITION BY LIST(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt2_p1 PARTITION OF plt2 FOR VALUES IN ('0000', '0003', '0004', '0010'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_p2 PARTITION OF plt2 FOR VALUES IN ('0001', '0005', '0002', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_p3 PARTITION OF plt2 FOR VALUES IN ('0006', '0007', '0008', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i; ANALYZE plt2; -- -- list partitioned by expression -- CREATE TABLE plt1_e (a int, b int, c text) PARTITION BY LIST(ltrim(c, 'A')); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt1_e_p1 PARTITION OF plt1_e FOR VALUES IN ('0000', '0003', '0004', '0010'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_e_p2 PARTITION OF plt1_e FOR VALUES IN ('0001', '0005', '0002', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_e_p3 PARTITION OF plt1_e FOR VALUES IN ('0006', '0007', '0008', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE plt1_e; -- test partition matching with N-way join @@ -1658,19 +1706,21 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl -- joins where one of the relations is proven empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a = 1 AND t1.a = 2; - QUERY PLAN --------------------------- + QUERY PLAN +------------------------------------- Result One-Time Filter: false -(2 rows) + Optimizer: Postgres query optimizer +(3 rows) EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 LEFT JOIN prt2 t2 ON t1.a = t2.b; - QUERY PLAN --------------------------- + QUERY PLAN +------------------------------------- Result One-Time Filter: false -(2 rows) + Optimizer: Postgres query optimizer +(3 rows) EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b, prt1 t3 WHERE t2.b = t3.a; @@ -1734,24 +1784,39 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 -- tests for hash partitioned tables. -- CREATE TABLE pht1 (a int, b int, c text) PARTITION BY HASH(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pht1_p1 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pht1_p2 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pht1_p3 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pht1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE pht1; CREATE TABLE pht2 (a int, b int, c text) PARTITION BY HASH(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pht2_p1 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pht2_p2 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pht2_p3 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pht2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i; ANALYZE pht2; -- -- hash partitioned by expression -- CREATE TABLE pht1_e (a int, b int, c text) PARTITION BY HASH(ltrim(c, 'A')); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE pht1_e_p1 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 0); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pht1_e_p2 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 1); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE pht1_e_p3 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 2); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO pht1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 299, 2) i; ANALYZE pht1_e; -- test partition matching with N-way join @@ -1872,10 +1937,10 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c --------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: t1.c - -> Sort - Sort Key: t1.c - -> Finalize HashAggregate - Group Key: t1.c, t2.c + -> Finalize GroupAggregate + Group Key: t1.c, t2.c + -> Sort + Sort Key: t1.c -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: t1.c, t1.c -> Partial HashAggregate @@ -1909,23 +1974,41 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c -- multiple levels of partitioning -- CREATE TABLE prt1_l (a int, b int, c varchar) PARTITION BY RANGE(a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_l_p1 PARTITION OF prt1_l FOR VALUES FROM (0) TO (250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_l_p2 PARTITION OF prt1_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_l_p2_p1 PARTITION OF prt1_l_p2 FOR VALUES IN ('0000', '0001'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_l_p2_p2 PARTITION OF prt1_l_p2 FOR VALUES IN ('0002', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_l_p3 PARTITION OF prt1_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (b); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_l_p3_p1 PARTITION OF prt1_l_p3 FOR VALUES FROM (0) TO (13); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_l_p3_p2 PARTITION OF prt1_l_p3 FOR VALUES FROM (13) TO (25); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt1_l SELECT i, i % 25, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE prt1_l; CREATE TABLE prt2_l (a int, b int, c varchar) PARTITION BY RANGE(b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_l_p1 PARTITION OF prt2_l FOR VALUES FROM (0) TO (250); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_l_p2 PARTITION OF prt2_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_l_p2_p1 PARTITION OF prt2_l_p2 FOR VALUES IN ('0000', '0001'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_l_p2_p2 PARTITION OF prt2_l_p2 FOR VALUES IN ('0002', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_l_p3 PARTITION OF prt2_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (a); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_l_p3_p1 PARTITION OF prt2_l_p3 FOR VALUES FROM (0) TO (13); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_l_p3_p2 PARTITION OF prt2_l_p3 FOR VALUES FROM (13) TO (25); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_l SELECT i % 25, i, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 3) i; ANALYZE prt2_l; -- inner join, qual covering only top-level partitions @@ -2181,11 +2264,11 @@ EXPLAIN (COSTS OFF) SELECT * FROM prt1_l t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a; -ERROR: could not devise a query plan for the given query +ERROR: could not devise a query plan for the given query (pathnode.c:280) SELECT * FROM prt1_l t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a; -ERROR: could not devise a query plan for the given query +ERROR: could not devise a query plan for the given query (pathnode.c:280) -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; @@ -2251,25 +2334,43 @@ WHERE EXISTS ( -- negative testcases -- CREATE TABLE prt1_n (a int, b int, c varchar) PARTITION BY RANGE(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_n_p1 PARTITION OF prt1_n FOR VALUES FROM ('0000') TO ('0250'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 499, 2) i; ANALYZE prt1_n; CREATE TABLE prt2_n (a int, b int, c text) PARTITION BY LIST(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_n_p1 PARTITION OF prt2_n FOR VALUES IN ('0000', '0003', '0004', '0010', '0006', '0007'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_n_p2 PARTITION OF prt2_n FOR VALUES IN ('0001', '0005', '0002', '0009', '0008', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE prt2_n; CREATE TABLE prt3_n (a int, b int, c text) PARTITION BY LIST(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt3_n_p1 PARTITION OF prt3_n FOR VALUES IN ('0000', '0004', '0006', '0007'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt3_n_p2 PARTITION OF prt3_n FOR VALUES IN ('0001', '0002', '0008', '0010'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt3_n_p3 PARTITION OF prt3_n FOR VALUES IN ('0003', '0005', '0009', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE prt3_n; CREATE TABLE prt4_n (a int, b int, c text) PARTITION BY RANGE(a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt4_n_p1 PARTITION OF prt4_n FOR VALUES FROM (0) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt4_n_p2 PARTITION OF prt4_n FOR VALUES FROM (300) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt4_n_p3 PARTITION OF prt4_n FOR VALUES FROM (500) TO (600); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt4_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 599, 2) i; ANALYZE prt4_n; -- partitionwise join can not be applied if the partition ranges differ @@ -2512,14 +2613,24 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 FULL JOIN prt1 t2 ON (t1.c = t2.c); -- create temp table prtx1 (a integer, b integer, c integer) partition by range (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create temp table prtx1_1 partition of prtx1 for values from (1) to (11); +NOTICE: table has parent, setting distribution columns to match parent table create temp table prtx1_2 partition of prtx1 for values from (11) to (21); +NOTICE: table has parent, setting distribution columns to match parent table create temp table prtx1_3 partition of prtx1 for values from (21) to (31); +NOTICE: table has parent, setting distribution columns to match parent table create temp table prtx2 (a integer, b integer, c integer) partition by range (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create temp table prtx2_1 partition of prtx2 for values from (1) to (11); +NOTICE: table has parent, setting distribution columns to match parent table create temp table prtx2_2 partition of prtx2 for values from (11) to (21); +NOTICE: table has parent, setting distribution columns to match parent table create temp table prtx2_3 partition of prtx2 for values from (21) to (31); +NOTICE: table has parent, setting distribution columns to match parent table insert into prtx1 select 1 + i%30, i, i from generate_series(1,1000) i; insert into prtx2 select 1 + i%30, i, i @@ -2603,16 +2714,26 @@ where not exists (select 1 from prtx2 -- -- Tests for range-partitioned tables CREATE TABLE prt1_adv (a int, b int, c varchar) PARTITION BY RANGE (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_adv_p1 PARTITION OF prt1_adv FOR VALUES FROM (100) TO (200); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_adv_p2 PARTITION OF prt1_adv FOR VALUES FROM (200) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_adv_p3 PARTITION OF prt1_adv FOR VALUES FROM (300) TO (400); +NOTICE: table has parent, setting distribution columns to match parent table CREATE INDEX prt1_adv_a_idx ON prt1_adv (a); INSERT INTO prt1_adv SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(100, 399) i; ANALYZE prt1_adv; CREATE TABLE prt2_adv (a int, b int, c varchar) PARTITION BY RANGE (b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_adv_p1 PARTITION OF prt2_adv FOR VALUES FROM (100) TO (150); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_adv_p2 PARTITION OF prt2_adv FOR VALUES FROM (200) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_adv_p3 PARTITION OF prt2_adv FOR VALUES FROM (350) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE INDEX prt2_adv_b_idx ON prt2_adv (b); INSERT INTO prt2_adv_p1 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(100, 149) i; INSERT INTO prt2_adv_p2 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(200, 299) i; @@ -2891,6 +3012,7 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 175 phv, * FROM prt1_adv WHERE prt1_a -- Test cases where one side has an extra partition CREATE TABLE prt2_adv_extra PARTITION OF prt2_adv FOR VALUES FROM (500) TO (MAXVALUE); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_adv SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(500, 599) i; ANALYZE prt2_adv; -- inner join @@ -3275,7 +3397,9 @@ DROP TABLE prt2_adv_extra; ALTER TABLE prt2_adv DETACH PARTITION prt2_adv_p3; -- Split prt2_adv_p3 into two partitions so that prt1_adv_p3 matches both CREATE TABLE prt2_adv_p3_1 PARTITION OF prt2_adv FOR VALUES FROM (350) TO (375); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_adv_p3_2 PARTITION OF prt2_adv FOR VALUES FROM (375) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO prt2_adv SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(350, 499) i; ANALYZE prt2_adv; -- inner join @@ -3558,8 +3682,12 @@ ANALYZE prt1_adv; DROP TABLE prt2_adv_p3; ANALYZE prt2_adv; CREATE TABLE prt3_adv (a int, b int, c varchar) PARTITION BY RANGE (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt3_adv_p1 PARTITION OF prt3_adv FOR VALUES FROM (200) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt3_adv_p2 PARTITION OF prt3_adv FOR VALUES FROM (300) TO (400); +NOTICE: table has parent, setting distribution columns to match parent table CREATE INDEX prt3_adv_a_idx ON prt3_adv (a); INSERT INTO prt3_adv SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(200, 399) i; ANALYZE prt3_adv; @@ -3624,15 +3752,24 @@ DROP TABLE prt2_adv; DROP TABLE prt3_adv; -- Test interaction of partitioned join with partition pruning CREATE TABLE prt1_adv (a int, b int, c varchar) PARTITION BY RANGE (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt1_adv_p1 PARTITION OF prt1_adv FOR VALUES FROM (100) TO (200); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_adv_p2 PARTITION OF prt1_adv FOR VALUES FROM (200) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt1_adv_p3 PARTITION OF prt1_adv FOR VALUES FROM (300) TO (400); +NOTICE: table has parent, setting distribution columns to match parent table CREATE INDEX prt1_adv_a_idx ON prt1_adv (a); INSERT INTO prt1_adv SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(100, 399) i; ANALYZE prt1_adv; CREATE TABLE prt2_adv (a int, b int, c varchar) PARTITION BY RANGE (b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE prt2_adv_p1 PARTITION OF prt2_adv FOR VALUES FROM (100) TO (200); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE prt2_adv_p2 PARTITION OF prt2_adv FOR VALUES FROM (200) TO (400); +NOTICE: table has parent, setting distribution columns to match parent table CREATE INDEX prt2_adv_b_idx ON prt2_adv (b); INSERT INTO prt2_adv SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(100, 399) i; ANALYZE prt2_adv; @@ -3679,8 +3816,10 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = DROP TABLE prt1_adv_p3; CREATE TABLE prt1_adv_default PARTITION OF prt1_adv DEFAULT; +NOTICE: table has parent, setting distribution columns to match parent table ANALYZE prt1_adv; CREATE TABLE prt2_adv_default PARTITION OF prt2_adv DEFAULT; +NOTICE: table has parent, setting distribution columns to match parent table ANALYZE prt2_adv; EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = t2.b) WHERE t1.a >= 100 AND t1.a < 300 AND t1.b = 0 ORDER BY t1.a, t2.b; @@ -3727,15 +3866,25 @@ DROP TABLE prt1_adv; DROP TABLE prt2_adv; -- Tests for list-partitioned tables CREATE TABLE plt1_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt1_adv_p1 PARTITION OF plt1_adv FOR VALUES IN ('0001', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_adv_p2 PARTITION OF plt1_adv FOR VALUES IN ('0004', '0006'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_adv_p3 PARTITION OF plt1_adv FOR VALUES IN ('0008', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (1, 3, 4, 6, 8, 9); ANALYZE plt1_adv; CREATE TABLE plt2_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt2_adv_p1 PARTITION OF plt2_adv FOR VALUES IN ('0002', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p2 PARTITION OF plt2_adv FOR VALUES IN ('0004', '0006'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p3 PARTITION OF plt2_adv FOR VALUES IN ('0007', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (2, 3, 4, 6, 7, 9); ANALYZE plt2_adv; -- inner join @@ -3965,6 +4114,7 @@ SELECT t1.a, t1.c, t2.a, t2.c FROM plt1_adv t1 FULL JOIN plt2_adv t2 ON (t1.a = -- Test cases where one side has an extra partition CREATE TABLE plt2_adv_extra PARTITION OF plt2_adv FOR VALUES IN ('0000'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv_extra VALUES (0, 0, '0000'); ANALYZE plt2_adv; -- inner join @@ -4240,7 +4390,9 @@ DROP TABLE plt2_adv_extra; ALTER TABLE plt2_adv DETACH PARTITION plt2_adv_p2; -- Split plt2_adv_p2 into two partitions so that plt1_adv_p2 matches both CREATE TABLE plt2_adv_p2_1 PARTITION OF plt2_adv FOR VALUES IN ('0004'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p2_2 PARTITION OF plt2_adv FOR VALUES IN ('0006'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (4, 6); ANALYZE plt2_adv; -- inner join @@ -4398,12 +4550,14 @@ ALTER TABLE plt2_adv ATTACH PARTITION plt2_adv_p2 FOR VALUES IN ('0004', '0006') ALTER TABLE plt1_adv DETACH PARTITION plt1_adv_p1; -- Change plt1_adv_p1 to the NULL partition CREATE TABLE plt1_adv_p1_null PARTITION OF plt1_adv FOR VALUES IN (NULL, '0001', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (1, 3); INSERT INTO plt1_adv VALUES (-1, -1, NULL); ANALYZE plt1_adv; ALTER TABLE plt2_adv DETACH PARTITION plt2_adv_p3; -- Change plt2_adv_p3 to the NULL partition CREATE TABLE plt2_adv_p3_null PARTITION OF plt2_adv FOR VALUES IN (NULL, '0007', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (7, 9); INSERT INTO plt2_adv VALUES (-1, -1, NULL); ANALYZE plt2_adv; @@ -4642,6 +4796,7 @@ ALTER TABLE plt1_adv ATTACH PARTITION plt1_adv_p1 FOR VALUES IN ('0001', '0003') -- Add to plt1_adv the extra NULL partition containing only NULL values as the -- key values CREATE TABLE plt1_adv_extra PARTITION OF plt1_adv FOR VALUES IN (NULL); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1_adv VALUES (-1, -1, NULL); ANALYZE plt1_adv; DROP TABLE plt2_adv_p3_null; @@ -4748,6 +4903,7 @@ SELECT t1.a, t1.c, t2.a, t2.c FROM plt1_adv t1 FULL JOIN plt2_adv t2 ON (t1.a = -- Add to plt2_adv the extra NULL partition containing only NULL values as the -- key values CREATE TABLE plt2_adv_extra PARTITION OF plt2_adv FOR VALUES IN (NULL); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv VALUES (-1, -1, NULL); ANALYZE plt2_adv; -- inner join @@ -5004,6 +5160,7 @@ ALTER TABLE plt2_adv DETACH PARTITION plt2_adv_p2; -- Change plt2_adv_p2 to contain '0005' in addition to '0004' and '0006' as -- the key values CREATE TABLE plt2_adv_p2_ext PARTITION OF plt2_adv FOR VALUES IN ('0004', '0005', '0006'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (4, 5, 6); ANALYZE plt2_adv; -- Partitioned join can't be applied because the default partition of plt1_adv @@ -5067,8 +5224,12 @@ DROP TABLE plt2_adv_p2_ext; ALTER TABLE plt2_adv ATTACH PARTITION plt2_adv_p2 FOR VALUES IN ('0004', '0006'); ANALYZE plt2_adv; CREATE TABLE plt3_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt3_adv_p1 PARTITION OF plt3_adv FOR VALUES IN ('0004', '0006'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt3_adv_p2 PARTITION OF plt3_adv FOR VALUES IN ('0007', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt3_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (4, 6, 7, 9); ANALYZE plt3_adv; -- 3-way join to test the default partition of a join relation @@ -5118,6 +5279,7 @@ SELECT t1.a, t1.c, t2.a, t2.c, t3.a, t3.c FROM plt1_adv t1 LEFT JOIN plt2_adv t2 DROP TABLE plt2_adv_p1; -- Add the NULL partition to plt2_adv CREATE TABLE plt2_adv_p1_null PARTITION OF plt2_adv FOR VALUES IN (NULL, '0001', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (1, 3); INSERT INTO plt2_adv VALUES (-1, -1, NULL); ANALYZE plt2_adv; @@ -5157,6 +5319,7 @@ SELECT t1.a, t1.c, t2.a, t2.c FROM plt1_adv t1 INNER JOIN plt2_adv t2 ON (t1.a = DROP TABLE plt2_adv_p1_null; -- Add the NULL partition that contains only NULL values as the key values CREATE TABLE plt2_adv_p1_null PARTITION OF plt2_adv FOR VALUES IN (NULL); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv VALUES (-1, -1, NULL); ANALYZE plt2_adv; EXPLAIN (COSTS OFF) @@ -5188,18 +5351,30 @@ DROP TABLE plt2_adv; DROP TABLE plt3_adv; -- Test interaction of partitioned join with partition pruning CREATE TABLE plt1_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt1_adv_p1 PARTITION OF plt1_adv FOR VALUES IN ('0001'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_adv_p2 PARTITION OF plt1_adv FOR VALUES IN ('0002'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_adv_p3 PARTITION OF plt1_adv FOR VALUES IN ('0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_adv_p4 PARTITION OF plt1_adv FOR VALUES IN (NULL, '0004', '0005'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (1, 2, 3, 4, 5); INSERT INTO plt1_adv VALUES (-1, -1, NULL); ANALYZE plt1_adv; CREATE TABLE plt2_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt2_adv_p1 PARTITION OF plt2_adv FOR VALUES IN ('0001', '0002'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p2 PARTITION OF plt2_adv FOR VALUES IN (NULL); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p3 PARTITION OF plt2_adv FOR VALUES IN ('0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p4 PARTITION OF plt2_adv FOR VALUES IN ('0004', '0005'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (1, 2, 3, 4, 5); INSERT INTO plt2_adv VALUES (-1, -1, NULL); ANALYZE plt2_adv; @@ -5261,8 +5436,10 @@ SELECT t1.a, t1.c, t2.a, t2.c FROM plt1_adv t1 LEFT JOIN plt2_adv t2 ON (t1.a = (1 row) CREATE TABLE plt1_adv_default PARTITION OF plt1_adv DEFAULT; +NOTICE: table has parent, setting distribution columns to match parent table ANALYZE plt1_adv; CREATE TABLE plt2_adv_default PARTITION OF plt2_adv DEFAULT; +NOTICE: table has parent, setting distribution columns to match parent table ANALYZE plt2_adv; EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.a, t2.c FROM plt1_adv t1 INNER JOIN plt2_adv t2 ON (t1.a = t2.a AND t1.c = t2.c) WHERE t1.c IN ('0003', '0004', '0005') AND t1.b < 10 ORDER BY t1.a; @@ -5325,18 +5502,30 @@ DROP TABLE plt1_adv; DROP TABLE plt2_adv; -- Test the process_outer_partition() code path CREATE TABLE plt1_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt1_adv_p1 PARTITION OF plt1_adv FOR VALUES IN ('0000', '0001', '0002'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt1_adv_p2 PARTITION OF plt1_adv FOR VALUES IN ('0003', '0004'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt1_adv SELECT i, i, to_char(i % 5, 'FM0000') FROM generate_series(0, 24) i; ANALYZE plt1_adv; CREATE TABLE plt2_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt2_adv_p1 PARTITION OF plt2_adv FOR VALUES IN ('0002'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt2_adv_p2 PARTITION OF plt2_adv FOR VALUES IN ('0003', '0004'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt2_adv SELECT i, i, to_char(i % 5, 'FM0000') FROM generate_series(0, 24) i WHERE i % 5 IN (2, 3, 4); ANALYZE plt2_adv; CREATE TABLE plt3_adv (a int, b int, c text) PARTITION BY LIST (c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE plt3_adv_p1 PARTITION OF plt3_adv FOR VALUES IN ('0001'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE plt3_adv_p2 PARTITION OF plt3_adv FOR VALUES IN ('0003', '0004'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO plt3_adv SELECT i, i, to_char(i % 5, 'FM0000') FROM generate_series(0, 24) i WHERE i % 5 IN (1, 3, 4); ANALYZE plt3_adv; -- This tests that when merging partitions from plt1_adv and plt2_adv in @@ -5456,26 +5645,46 @@ DROP TABLE plt2_adv; DROP TABLE plt3_adv; -- Tests for multi-level partitioned tables CREATE TABLE alpha (a double precision, b int, c text) PARTITION BY RANGE (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE alpha_neg PARTITION OF alpha FOR VALUES FROM ('-Infinity') TO (0) PARTITION BY RANGE (b); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_pos PARTITION OF alpha FOR VALUES FROM (0) TO (10.0) PARTITION BY LIST (c); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_neg_p1 PARTITION OF alpha_neg FOR VALUES FROM (100) TO (200); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_neg_p2 PARTITION OF alpha_neg FOR VALUES FROM (200) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_neg_p3 PARTITION OF alpha_neg FOR VALUES FROM (300) TO (400); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_pos_p1 PARTITION OF alpha_pos FOR VALUES IN ('0001', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_pos_p2 PARTITION OF alpha_pos FOR VALUES IN ('0004', '0006'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE alpha_pos_p3 PARTITION OF alpha_pos FOR VALUES IN ('0008', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO alpha_neg SELECT -1.0, i, to_char(i % 10, 'FM0000') FROM generate_series(100, 399) i WHERE i % 10 IN (1, 3, 4, 6, 8, 9); INSERT INTO alpha_pos SELECT 1.0, i, to_char(i % 10, 'FM0000') FROM generate_series(100, 399) i WHERE i % 10 IN (1, 3, 4, 6, 8, 9); ANALYZE alpha; CREATE TABLE beta (a double precision, b int, c text) PARTITION BY RANGE (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE beta_neg PARTITION OF beta FOR VALUES FROM (-10.0) TO (0) PARTITION BY RANGE (b); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_pos PARTITION OF beta FOR VALUES FROM (0) TO ('Infinity') PARTITION BY LIST (c); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_neg_p1 PARTITION OF beta_neg FOR VALUES FROM (100) TO (150); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_neg_p2 PARTITION OF beta_neg FOR VALUES FROM (200) TO (300); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_neg_p3 PARTITION OF beta_neg FOR VALUES FROM (350) TO (500); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_pos_p1 PARTITION OF beta_pos FOR VALUES IN ('0002', '0003'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_pos_p2 PARTITION OF beta_pos FOR VALUES IN ('0004', '0006'); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE beta_pos_p3 PARTITION OF beta_pos FOR VALUES IN ('0007', '0009'); +NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO beta_neg SELECT -1.0, i, to_char(i % 10, 'FM0000') FROM generate_series(100, 149) i WHERE i % 10 IN (2, 3, 4, 6, 7, 9); INSERT INTO beta_neg SELECT -1.0, i, to_char(i % 10, 'FM0000') FROM generate_series(200, 299) i WHERE i % 10 IN (2, 3, 4, 6, 7, 9); INSERT INTO beta_neg SELECT -1.0, i, to_char(i % 10, 'FM0000') FROM generate_series(350, 499) i WHERE i % 10 IN (2, 3, 4, 6, 7, 9); diff --git a/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out b/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out index 8c4b96b6be8..144c17d13fe 100644 --- a/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out @@ -82,6 +82,8 @@ partition by range (y) ( start (0) end (4) every (1)) insert into csq_t1 select * from csq_t1_base; insert into csq_t2 select * from csq_t2_base; explain select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1; +NOTICE: One or more columns in the following table(s) do not have statistics: csq_t1, csq_t2 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Result (cost=0.00..1293.00 rows=1 width=8) @@ -105,6 +107,8 @@ explain select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 wh (18 rows) select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1; -- expected (4,2) +NOTICE: One or more columns in the following table(s) do not have statistics: csq_t1, csq_t2 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. x | y ---+--- 4 | 2 @@ -130,7 +134,7 @@ explain select * from mrs_t1 where exists (select x from mrs_t1 where x < -1); Join Filter: true -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..431.00 rows=7 width=4) -> Materialize (cost=0.00..431.00 rows=1 width=1) - -> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=3 width=1) + -> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=1) -> Limit (cost=0.00..431.00 rows=1 width=1) -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1) -> Seq Scan on mrs_t1 (cost=0.00..431.00 rows=1 width=1) @@ -151,7 +155,7 @@ explain select * from mrs_t1 where exists (select x from mrs_t1 where x = 1); Join Filter: true -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..431.00 rows=7 width=4) -> Materialize (cost=0.00..431.00 rows=1 width=1) - -> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=3 width=1) + -> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=1) -> Limit (cost=0.00..431.00 rows=1 width=1) -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1) -> Seq Scan on mrs_t1 (cost=0.00..431.00 rows=1 width=1) @@ -629,9 +633,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t); t | n | i | v -----+---+---+----- + abc | 1 | 2 | xyz xyz | 2 | 3 | def def | 3 | 1 | abc - abc | 1 | 2 | xyz (3 rows) -- @@ -660,9 +664,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.v); t | n | i | v -----+---+---+----- - xyz | 2 | 3 | def def | 3 | 1 | abc abc | 1 | 2 | xyz + xyz | 2 | 3 | def (3 rows) -- @@ -692,9 +696,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n); t | n | i | v -----+---+---+----- + abc | 1 | 2 | xyz xyz | 2 | 3 | def def | 3 | 1 | abc - abc | 1 | 2 | xyz (3 rows) -- @@ -720,9 +724,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1); t | n | i | v -----+---+---+----- + def | 3 | 1 | abc abc | 1 | 2 | xyz xyz | 2 | 3 | def - def | 3 | 1 | abc (3 rows) -- @@ -748,9 +752,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1); t | n | i | v -----+---+---+----- + def | 3 | 1 | abc abc | 1 | 2 | xyz xyz | 2 | 3 | def - def | 3 | 1 | abc (3 rows) -- @@ -778,9 +782,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t LIMIT 1); t | n | i | v -----+---+---+----- - xyz | 2 | 3 | def def | 3 | 1 | abc abc | 1 | 2 | xyz + xyz | 2 | 3 | def (3 rows) -- subquery contains a HAVING clause @@ -807,9 +811,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t HAVING count(*) < 10); t | n | i | v -----+---+---+----- - xyz | 2 | 3 | def def | 3 | 1 | abc abc | 1 | 2 | xyz + xyz | 2 | 3 | def (3 rows) -- subquery contains quals of form 'function(outervar, innervar1) = innvervar2' @@ -887,9 +891,7 @@ select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where -- wrong results bug MPP-16477 -- drop table if exists subselect_t1; -NOTICE: table "subselect_t1" does not exist, skipping drop table if exists subselect_t2; -NOTICE: table "subselect_t2" does not exist, skipping create table subselect_t1(x int) distributed by (x); insert into subselect_t1 values(1),(2); create table subselect_t2(y int) distributed by (y); @@ -911,8 +913,8 @@ explain select * from subselect_t1 where x in (select y from subselect_t2); select * from subselect_t1 where x in (select y from subselect_t2); x --- - 1 2 + 1 (2 rows) -- start_ignore @@ -1254,6 +1256,8 @@ CASE ELSE 'Q2'::text END AS cc, 1 AS nn FROM t_mpp_20470 b; explain SELECT cc, sum(nn) over() FROM v1_mpp_20470; +NOTICE: One or more columns in the following table(s) do not have statistics: t_mpp_20470 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------------------------------------------- WindowAgg (cost=0.00..862.00 rows=2 width=16) @@ -1353,6 +1357,8 @@ create table bar(a int, b int) distributed by (a); with CT as (select a from foo except select a from bar) select * from foo where exists (select 1 from CT where CT.a = foo.a); +NOTICE: One or more columns in the following table(s) do not have statistics: foo +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. a | b ---+--- (0 rows) @@ -1381,6 +1387,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur INSERT INTO foo_s VALUES (9,9); INSERT INTO foo_s VALUES (2,9); SELECT bar_s.c from bar_s, foo_s WHERE foo_s.a=2 AND foo_s.b = (SELECT max(b) FROM foo_s WHERE bar_s.c = 9); +NOTICE: One or more columns in the following table(s) do not have statistics: foo_s +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. c --- 9 @@ -1395,6 +1403,8 @@ ANALYZE baz_s; -- because it avoids picking SubPlans from an equivalence class, when it has -- other choices. SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = bar_s.d::int4; +NOTICE: One or more columns in the following table(s) do not have statistics: foo_s +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. c --- 9 @@ -1407,12 +1417,14 @@ SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE -- merge upstream commit 4d042999f9, to suppress the SubPlans from being -- printed twice. explain SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4); +NOTICE: One or more columns in the following table(s) do not have statistics: foo_s +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1356273072.98 rows=1000 width=4) - -> Hash Join (cost=0.00..1356273072.97 rows=334 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1356273054.53 rows=1000 width=4) + -> Hash Join (cost=0.00..1356273054.51 rows=334 width=4) Hash Cond: ((((SubPlan 1)) = foo_s.b) AND (((SubPlan 2)) = foo_s.b)) - -> Seq Scan on bar_s (cost=0.00..1324053.98 rows=334 width=16) + -> Seq Scan on bar_s (cost=0.00..1324053.96 rows=334 width=16) SubPlan 1 -> Aggregate (cost=0.00..431.00 rows=1 width=4) -> Result (cost=0.00..431.00 rows=1 width=4) @@ -1431,6 +1443,8 @@ explain SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz (19 rows) SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4); +NOTICE: One or more columns in the following table(s) do not have statistics: foo_s +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. c --- 9 @@ -1533,22 +1547,22 @@ EXPLAIN SELECT * FROM tenk1 a, tenk1 b WHERE (a.unique1,b.unique2) IN (SELECT unique1,unique2 FROM tenk1 c); QUERY PLAN ----------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=287.67..637.67 rows=10000 width=488) - -> Hash Join (cost=287.67..504.33 rows=3333 width=488) + Gather Motion 3:1 (slice1; segments: 3) (cost=284.67..634.67 rows=10000 width=488) + -> Hash Join (cost=284.67..501.33 rows=3333 width=488) Hash Cond: (c.unique2 = b.unique2) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=139.00..309.83 rows=3333 width=248) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=137.00..307.83 rows=3333 width=248) Hash Key: c.unique2 - -> Hash Join (cost=139.00..243.17 rows=3333 width=248) + -> Hash Join (cost=137.00..241.17 rows=3333 width=248) Hash Cond: (c.unique1 = a.unique1) - -> HashAggregate (cost=57.00..90.33 rows=10000 width=8) + -> HashAggregate (cost=56.00..89.33 rows=10000 width=8) Group Key: c.unique1, c.unique2 - -> Seq Scan on tenk1 c (cost=0.00..40.33 rows=3333 width=8) - -> Hash (cost=40.33..40.33 rows=3333 width=244) - -> Seq Scan on tenk1 a (cost=0.00..40.33 rows=3333 width=244) - -> Hash (cost=107.00..107.00 rows=3333 width=244) - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..107.00 rows=3333 width=244) + -> Seq Scan on tenk1 c (cost=0.00..39.33 rows=3333 width=8) + -> Hash (cost=39.33..39.33 rows=3333 width=244) + -> Seq Scan on tenk1 a (cost=0.00..39.33 rows=3333 width=244) + -> Hash (cost=106.00..106.00 rows=3333 width=244) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..106.00 rows=3333 width=244) Hash Key: b.unique2 - -> Seq Scan on tenk1 b (cost=0.00..40.33 rows=3333 width=244) + -> Seq Scan on tenk1 b (cost=0.00..39.33 rows=3333 width=244) Optimizer: Postgres query optimizer (17 rows) @@ -1599,12 +1613,12 @@ EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" WHERE f2 = CAST(f3 AS integer)) ORDER BY 2,3; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Result (cost=0.00..1324033.90 rows=8 width=20) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324033.90 rows=8 width=12) + Result (cost=0.00..1324033.85 rows=8 width=20) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324033.85 rows=8 width=12) Merge Key: upper.f1, upper.f3 - -> Sort (cost=0.00..1324033.90 rows=3 width=12) + -> Sort (cost=0.00..1324033.85 rows=3 width=12) Sort Key: upper.f1, upper.f3 - -> Seq Scan on subselect_tbl upper (cost=0.00..1324033.90 rows=3 width=12) + -> Seq Scan on subselect_tbl upper (cost=0.00..1324033.85 rows=3 width=12) Filter: (SubPlan 1) SubPlan 1 -> Result (cost=0.00..431.00 rows=4 width=4) @@ -1645,18 +1659,18 @@ EXPLAIN select count(*) from where unique1 IN (select hundred from tenk1 b)) ss; QUERY PLAN -------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=0.00..826.06 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.06 rows=1 width=8) - -> Partial Aggregate (cost=0.00..826.06 rows=1 width=8) - -> Nested Loop (cost=0.00..826.06 rows=34 width=1) + Finalize Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.04 rows=1 width=8) + -> Partial Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Nested Loop (cost=0.00..826.04 rows=34 width=1) Join Filter: true - -> HashAggregate (cost=0.00..431.95 rows=34 width=4) + -> HashAggregate (cost=0.00..431.92 rows=34 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.94 rows=100 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.91 rows=100 width=4) Hash Key: b.hundred - -> Streaming HashAggregate (cost=0.00..431.94 rows=100 width=4) + -> Streaming HashAggregate (cost=0.00..431.91 rows=100 width=4) Group Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..431.51 rows=3334 width=4) + -> Seq Scan on tenk1 b (cost=0.00..431.48 rows=3334 width=4) -> Bitmap Heap Scan on tenk1 a (cost=0.00..394.11 rows=1 width=1) Recheck Cond: (unique1 = b.hundred) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..0.00 rows=0 width=0) @@ -1669,20 +1683,20 @@ EXPLAIN select count(distinct ss.ten) from where unique1 IN (select hundred from tenk1 b)) ss; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=0.00..826.07 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.07 rows=1 width=8) - -> Partial Aggregate (cost=0.00..826.07 rows=1 width=8) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..826.07 rows=34 width=4) + Finalize Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.04 rows=1 width=8) + -> Partial Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..826.04 rows=34 width=4) Hash Key: a.ten - -> Nested Loop (cost=0.00..826.07 rows=34 width=4) + -> Nested Loop (cost=0.00..826.04 rows=34 width=4) Join Filter: true - -> HashAggregate (cost=0.00..431.95 rows=34 width=4) + -> HashAggregate (cost=0.00..431.92 rows=34 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.94 rows=100 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.91 rows=100 width=4) Hash Key: b.hundred - -> Streaming HashAggregate (cost=0.00..431.94 rows=100 width=4) + -> Streaming HashAggregate (cost=0.00..431.91 rows=100 width=4) Group Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..431.51 rows=3334 width=4) + -> Seq Scan on tenk1 b (cost=0.00..431.48 rows=3334 width=4) -> Bitmap Heap Scan on tenk1 a (cost=0.00..394.12 rows=1 width=4) Recheck Cond: (unique1 = b.hundred) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..0.00 rows=0 width=0) @@ -1695,18 +1709,18 @@ EXPLAIN select count(*) from where unique1 IN (select distinct hundred from tenk1 b)) ss; QUERY PLAN -------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=0.00..826.06 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.06 rows=1 width=8) - -> Partial Aggregate (cost=0.00..826.06 rows=1 width=8) - -> Nested Loop (cost=0.00..826.06 rows=34 width=1) + Finalize Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.04 rows=1 width=8) + -> Partial Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Nested Loop (cost=0.00..826.04 rows=34 width=1) Join Filter: true - -> HashAggregate (cost=0.00..431.95 rows=34 width=4) + -> HashAggregate (cost=0.00..431.92 rows=34 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.94 rows=100 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.91 rows=100 width=4) Hash Key: b.hundred - -> Streaming HashAggregate (cost=0.00..431.94 rows=100 width=4) + -> Streaming HashAggregate (cost=0.00..431.91 rows=100 width=4) Group Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..431.51 rows=3334 width=4) + -> Seq Scan on tenk1 b (cost=0.00..431.48 rows=3334 width=4) -> Bitmap Heap Scan on tenk1 a (cost=0.00..394.11 rows=1 width=1) Recheck Cond: (unique1 = b.hundred) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..0.00 rows=0 width=0) @@ -1719,20 +1733,20 @@ EXPLAIN select count(distinct ss.ten) from where unique1 IN (select distinct hundred from tenk1 b)) ss; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=0.00..826.07 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.07 rows=1 width=8) - -> Partial Aggregate (cost=0.00..826.07 rows=1 width=8) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..826.07 rows=34 width=4) + Finalize Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..826.04 rows=1 width=8) + -> Partial Aggregate (cost=0.00..826.04 rows=1 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..826.04 rows=34 width=4) Hash Key: a.ten - -> Nested Loop (cost=0.00..826.07 rows=34 width=4) + -> Nested Loop (cost=0.00..826.04 rows=34 width=4) Join Filter: true - -> HashAggregate (cost=0.00..431.95 rows=34 width=4) + -> HashAggregate (cost=0.00..431.92 rows=34 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.94 rows=100 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.91 rows=100 width=4) Hash Key: b.hundred - -> Streaming HashAggregate (cost=0.00..431.94 rows=100 width=4) + -> Streaming HashAggregate (cost=0.00..431.91 rows=100 width=4) Group Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..431.51 rows=3334 width=4) + -> Seq Scan on tenk1 b (cost=0.00..431.48 rows=3334 width=4) -> Bitmap Heap Scan on tenk1 a (cost=0.00..394.12 rows=1 width=4) Recheck Cond: (unique1 = b.hundred) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..0.00 rows=0 width=0) @@ -1749,16 +1763,16 @@ EXPLAIN select count(distinct ss.ten) from EXPLAIN SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1; QUERY PLAN ---------------------------------------------------------------------------------------------- - Limit (cost=0.00..865.45 rows=1 width=1) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..865.45 rows=1 width=1) - -> Limit (cost=0.00..865.45 rows=1 width=1) - -> Hash Left Join (cost=0.00..865.42 rows=3334 width=8) + Limit (cost=0.00..865.39 rows=1 width=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..865.39 rows=1 width=1) + -> Limit (cost=0.00..865.39 rows=1 width=1) + -> Hash Left Join (cost=0.00..865.36 rows=3334 width=8) Hash Cond: (tenk2.unique1 = tenk1.unique1) - -> Seq Scan on tenk2 (cost=0.00..431.51 rows=3334 width=4) - -> Hash (cost=431.96..431.96 rows=3334 width=12) - -> HashAggregate (cost=0.00..431.96 rows=3334 width=12) + -> Seq Scan on tenk2 (cost=0.00..431.48 rows=3334 width=4) + -> Hash (cost=431.93..431.93 rows=3334 width=12) + -> HashAggregate (cost=0.00..431.93 rows=3334 width=12) Group Key: tenk1.unique1 - -> Seq Scan on tenk1 (cost=0.00..431.51 rows=3334 width=4) + -> Seq Scan on tenk1 (cost=0.00..431.48 rows=3334 width=4) Optimizer: GPORCA (11 rows) @@ -1834,8 +1848,8 @@ SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1); QUERY PLAN ---------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.08 rows=1 width=20) - -> Nested Loop (cost=0.00..882688.08 rows=1 width=20) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.07 rows=1 width=20) + -> Nested Loop (cost=0.00..882688.07 rows=1 width=20) Join Filter: true -> Result (cost=0.00..0.00 rows=0 width=12) One-Time Filter: false @@ -1846,8 +1860,8 @@ EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1); QUERY PLAN ---------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.08 rows=1 width=20) - -> Nested Loop (cost=0.00..882688.08 rows=1 width=20) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.07 rows=1 width=20) + -> Nested Loop (cost=0.00..882688.07 rows=1 width=20) Join Filter: true -> Result (cost=0.00..0.00 rows=0 width=12) One-Time Filter: false @@ -1858,8 +1872,8 @@ EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1); QUERY PLAN ---------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.08 rows=1 width=20) - -> Nested Loop (cost=0.00..882688.08 rows=1 width=20) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.07 rows=1 width=20) + -> Nested Loop (cost=0.00..882688.07 rows=1 width=20) Join Filter: true -> Result (cost=0.00..0.00 rows=0 width=12) One-Time Filter: false @@ -1948,9 +1962,9 @@ select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t); select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t); a --- - 1 2 3 + 1 (3 rows) explain (costs off) @@ -1973,9 +1987,9 @@ select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab t); a --- - 1 2 3 + 1 (3 rows) explain (costs off) @@ -1998,9 +2012,9 @@ select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_t select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t); a --- - 1 2 3 + 1 (3 rows) -- Also test it with non-SRFs. In principle, since the function returns exactly @@ -2113,6 +2127,7 @@ select * from init_main_plan_parallel where exists (select * from pg_class); -- hashExpr are in its targetlist, test the motion node above it also updated -- its targetlist, otherwise, a wrong answer or a crash happens. DROP TABLE IF EXISTS TEST_IN; +NOTICE: table "test_in" does not exist, skipping CREATE TABLE TEST_IN( C01 FLOAT, C02 NUMERIC(10,0) @@ -2197,8 +2212,8 @@ select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2282,8 +2297,8 @@ select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2356,8 +2371,8 @@ select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i); i --- - 1 2 + 1 (2 rows) explain (costs off) @@ -2386,8 +2401,8 @@ select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2450,8 +2465,8 @@ select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_s select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2612,12 +2627,12 @@ select * from foo where (select b.i from baz b); i | j ----+---- + 5 | 5 6 | 6 - 7 | 7 + 9 | 9 10 | 10 - 5 | 5 + 7 | 7 8 | 8 - 9 | 9 (6 rows) -- When creating plan with subquery and CTE, it sets the useless flow for the plan. @@ -2687,9 +2702,9 @@ select * from run_dt, extra_flow_dist1 where dt < '2010-01-01'::date; dt | a | b ------------+----+---- - 10-01-1949 | 22 | 22 10-01-1949 | 20 | 20 10-01-1949 | 21 | 21 + 10-01-1949 | 22 | 22 (3 rows) create table extra_flow_rand(a int) distributed replicated; @@ -2746,8 +2761,8 @@ where dt < '2010-01-01'::date; dt | a | b ------------+----+---- 10-01-1949 | 22 | 22 - 10-01-1949 | 21 | 21 10-01-1949 | 20 | 20 + 10-01-1949 | 21 | 21 (3 rows) -- case 3 for subplan with outer entry locus (CTE and subquery) @@ -2806,9 +2821,9 @@ select * from run_dt, extra_flow_dist1 where dt < '2010-01-01'::date; dt | a | b ------------+----+---- - 10-01-1949 | 22 | 22 10-01-1949 | 20 | 20 10-01-1949 | 21 | 21 + 10-01-1949 | 22 | 22 (3 rows) -- case 4 subplan with outer segment general locus without param in subplan (CTE and subquery) @@ -2820,38 +2835,40 @@ explain (verbose, costs off) with run_dt as ( ) select * from run_dt, extra_flow_dist1 where dt < extra_flow_dist1.a; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b -> Nested Loop - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) - -> Nested Loop Left Join - Output: extra_flow_rand.a, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on subselect_gp.extra_flow_rand - Output: extra_flow_rand.a, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: extra_flow_rand.a, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on subselect_gp.extra_flow_rand + Output: extra_flow_rand.a, (SubPlan 1) + SubPlan 1 + -> Materialize Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b Settings: optimizer = 'on' Optimizer: Postgres query optimizer -(29 rows) +(31 rows) -- case 5 for subplan with outer entry locus without param in subplan (CTE and subquery) explain (verbose, costs off) with run_dt as ( @@ -2862,40 +2879,42 @@ explain (verbose, costs off) with run_dt as ( ) select * from run_dt, extra_flow_dist1 where dt < extra_flow_dist1.a; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b -> Nested Loop - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) -> Broadcast Motion 1:3 (slice2) - Output: pg_class.relnatts, (max(1)) - -> Nested Loop Left Join - Output: pg_class.relnatts, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.relnatts, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Gather Motion 3:1 (slice3; segments: 3) + Output: share0_ref1.x, share0_ref1.dt + -> Shared Scan (share slice:id 2:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: pg_class.relnatts, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.relnatts, (SubPlan 1) + SubPlan 1 + -> Materialize Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + -> Gather Motion 3:1 (slice3; segments: 3) + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b Settings: optimizer = 'on' Optimizer: Postgres query optimizer -(31 rows) +(33 rows) -- case 6 without CTE, nested subquery should not add extral flow explain (verbose, costs off) select * from ( diff --git a/gpcontrib/diskquota/tests/regress/expected/test_fast_disk_check.out b/gpcontrib/diskquota/tests/regress/expected/test_fast_disk_check.out index b38b931b07b..118134cf520 100644 --- a/gpcontrib/diskquota/tests/regress/expected/test_fast_disk_check.out +++ b/gpcontrib/diskquota/tests/regress/expected/test_fast_disk_check.out @@ -9,12 +9,16 @@ SELECT diskquota.wait_for_worker_new_epoch(); t (1 row) +-- FIXME: cbdb_eager_subplan=on converts InitPlan to SubPlan in the view's +-- subqueries, creating multiple segworker groups with pg_database_size(). +SET cbdb_eager_subplan = off; SELECT (pg_database_size(oid)-dbsize)/dbsize < 0.1 FROM pg_database, diskquota.show_fast_database_size_view WHERE datname='contrib_regression'; ?column? ---------- t (1 row) +RESET cbdb_eager_subplan; RESET search_path; DROP TABLE s1.a; DROP SCHEMA s1; diff --git a/gpcontrib/diskquota/tests/regress/sql/test_fast_disk_check.sql b/gpcontrib/diskquota/tests/regress/sql/test_fast_disk_check.sql index c15e1bfed4f..de10842659a 100644 --- a/gpcontrib/diskquota/tests/regress/sql/test_fast_disk_check.sql +++ b/gpcontrib/diskquota/tests/regress/sql/test_fast_disk_check.sql @@ -5,7 +5,11 @@ SET search_path to s1; CREATE TABLE a(i int) DISTRIBUTED BY (i); INSERT INTO a SELECT generate_series(1,200000); SELECT diskquota.wait_for_worker_new_epoch(); +-- FIXME: cbdb_eager_subplan=on converts InitPlan to SubPlan in the view's +-- subqueries, creating multiple segworker groups with pg_database_size(). +SET cbdb_eager_subplan = off; SELECT (pg_database_size(oid)-dbsize)/dbsize < 0.1 FROM pg_database, diskquota.show_fast_database_size_view WHERE datname='contrib_regression'; +RESET cbdb_eager_subplan; RESET search_path; DROP TABLE s1.a; DROP SCHEMA s1; diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index be2e0ad496a..317da52dbad 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -71,6 +71,8 @@ #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#include "optimizer/restrictinfo.h" + typedef enum { INVALID_DQA = -1, @@ -180,7 +182,8 @@ static void add_first_stage_group_agg_path(PlannerInfo *root, cdb_agg_planning_context *ctx); static void add_first_stage_hash_agg_path(PlannerInfo *root, Path *path, - cdb_agg_planning_context *ctx); + cdb_agg_planning_context *ctx, + bool is_partial); static void add_second_stage_group_agg_path(PlannerInfo *root, Path *path, bool is_sorted, @@ -675,6 +678,7 @@ create_two_stage_paths(PlannerInfo *root, cdb_agg_planning_context *ctx, { Path *cheapest_path = input_rel->cheapest_total_path; Path *cheapest_partial_path = partial_pathlist ? (Path *) linitial(partial_pathlist) : NULL; + Path *input_rel_cheapest_partial_path = input_rel->partial_pathlist ? (Path *) linitial(input_rel->partial_pathlist) : NULL; /* * Consider ways to do the first Aggregate stage. @@ -730,6 +734,33 @@ create_two_stage_paths(PlannerInfo *root, cdb_agg_planning_context *ctx, add_first_stage_group_agg_partial_path(root, path, is_sorted, ctx); } } + + /* Enable Parallel GroupingSets. */ + if (ctx->groupingSets && + !ctx->is_distinct && + ctx->agg_costs->distinctAggrefs == NIL && + input_rel->partial_pathlist) + { + /* + * GroupingSets could not be partial aggregated. + * But in MPP, it still have a chance to be parallel if + * using input_rel's partial paths. + */ + foreach(lc, input_rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + if (cdbpathlocus_collocates_tlist(root, path->locus, ctx->group_tles)) + continue; + + /* Don't check locus as parallel might be winner. */ + is_sorted = pathkeys_contained_in(ctx->partial_needed_pathkeys, + path->pathkeys); + if (path == input_rel_cheapest_partial_path || is_sorted) + add_first_stage_group_agg_partial_path(root, path, is_sorted, ctx); + } + } } /* @@ -746,39 +777,75 @@ create_two_stage_paths(PlannerInfo *root, cdb_agg_planning_context *ctx, * created the straightforward one-stage plan. */ if (!cdbpathlocus_collocates_tlist(root, cheapest_path->locus, ctx->group_tles)) - add_first_stage_hash_agg_path(root, cheapest_path, ctx); + add_first_stage_hash_agg_path(root, cheapest_path, ctx, false); + + if (ctx->is_distinct && + cheapest_partial_path && + !cdbpathlocus_collocates_tlist(root, cheapest_partial_path->locus, ctx->group_tles)) + add_first_stage_hash_agg_path(root, cheapest_partial_path, ctx, true); + else if (ctx->groupingSets && + input_rel_cheapest_partial_path && + (!cdbpathlocus_collocates_tlist(root, input_rel_cheapest_partial_path->locus, ctx->group_tles))) + add_first_stage_hash_agg_path(root, input_rel_cheapest_partial_path, ctx, true); } - if (partial_pathlist) + if (!ctx->groupingSets && + (ctx->hasAggs || ctx->groupClause != NIL) && + !ctx->is_distinct && + (list_length(ctx->agg_costs->distinctAggrefs) == 0) && + cheapest_partial_path) { - ListCell *lc; - - foreach (lc, partial_pathlist) - { - Path *path = (Path *)lfirst(lc); + /* + * For GroupBy, if there were partially aggregated paths, add it to + * first stage. Erase output_rel's partial paths, eager cbdb + * multiphase. + * + * However, skip forcing multiphase in two cases: + * + * 1. When the non-parallel cheapest path is already collocated by + * GROUP BY columns. In parallel mode, partial paths have Strewn + * locus (not collocated), but within each segment the data + * retains its hash distribution. Forcing 2-phase is wasteful + * when 1-phase aggregation can run locally per segment. + * + * 2. When the GROUP BY cardinality is high relative to input rows. + * In parallel mode with Strewn locus, each worker encounters + * most of the per-segment groups. When there are many groups + * (>100K) and they exceed 10% of per-worker input rows, partial + * aggregation barely reduces row count and the hash table is + * likely to exceed work_mem, causing streaming spills that + * produce far more output rows than the group count. A 1-phase + * plan (redistribute raw rows, then aggregate on collocated + * data) is more efficient in this scenario. The absolute floor + * of 100K groups avoids affecting small queries where either + * plan performs equally well. + */ + bool force_twophase = true; - if (cdbpathlocus_collocates_tlist(root, path->locus, ctx->group_tles)) - continue; + /* Case 1: data already collocated by GROUP BY columns */ + if (cdbpathlocus_collocates_tlist(root, cheapest_partial_path->locus, + ctx->group_tles) || + cdbpathlocus_collocates_tlist(root, cheapest_path->locus, + ctx->group_tles)) + force_twophase = false; - if (ctx->is_distinct && ctx->can_hash) - { - double dNumGroups = estimate_num_groups_on_segment(ctx->dNumGroupsTotal, - path->rows, - path->locus); + /* Case 2: high cardinality GROUP BY */ + if (force_twophase) + { + double dNumGroups; + + dNumGroups = estimate_num_groups_on_segment(ctx->dNumGroupsTotal, + cheapest_partial_path->rows, + cheapest_partial_path->locus); + if (dNumGroups > 100000 && + dNumGroups > cheapest_partial_path->rows * 0.1) + force_twophase = false; + } - path = (Path *) create_agg_path(root, - ctx->partial_rel, - path, - ctx->partial_grouping_target, - AGG_HASHED, - ctx->hasAggs ? AGGSPLIT_INITIAL_SERIAL : AGGSPLIT_SIMPLE, - parallel_query_use_streaming_hashagg, /* streaming */ - ctx->groupClause, - NIL, - ctx->agg_partial_costs, - dNumGroups); - } - add_partial_path(ctx->partial_rel, path); + if (force_twophase) + { + output_rel->partial_pathlist = NIL; + add_partial_path(ctx->partial_rel, cheapest_partial_path); } } @@ -1012,6 +1079,16 @@ add_first_stage_group_agg_path(PlannerInfo *root, { DQAType dqa_type; + double dNumGroups; + + dNumGroups = estimate_num_groups_on_segment(ctx->dNumGroupsTotal, + path->rows, path->locus); + if (path->parallel_workers > 1) + { + dNumGroups /= path->parallel_workers; + dNumGroups = clamp_row_est(dNumGroups); + } + /* * DISTINCT-qualified aggregates are accepted only in the special * case that the input happens to be collocated with the DISTINCT @@ -1096,8 +1173,7 @@ add_first_stage_group_agg_path(PlannerInfo *root, ctx->groupClause, NIL, ctx->agg_partial_costs, - estimate_num_groups_on_segment(ctx->dNumGroupsTotal, - path->rows, path->locus)), + dNumGroups), root); } else @@ -1122,6 +1198,7 @@ add_second_stage_group_agg_path(PlannerInfo *root, CdbPathLocus singleQE_locus; CdbPathLocus group_locus; bool need_redistribute; + double dNumGroups; /* The input should be distributed, otherwise no point in a two-stage Agg. */ Assert(CdbPathLocus_IsPartitioned(initial_agg_path->locus)); @@ -1132,6 +1209,12 @@ add_second_stage_group_agg_path(PlannerInfo *root, &need_redistribute); Assert(need_redistribute); + if (CdbPathLocus_IsPartitioned(group_locus)) + dNumGroups = clamp_row_est(ctx->dNumGroupsTotal / + CdbPathLocus_NumSegmentsPlusParallelWorkers(group_locus)); + else + dNumGroups = ctx->dNumGroupsTotal; + /* * We consider two different loci for the final result: * @@ -1176,7 +1259,7 @@ add_second_stage_group_agg_path(PlannerInfo *root, ctx->final_groupClause, ctx->havingQual, ctx->agg_final_costs, - ctx->dNumGroupsTotal); + dNumGroups); path->pathkeys = strip_gsetid_from_pathkeys(ctx->gsetid_sortref, path->pathkeys); if (!is_partial) @@ -1229,7 +1312,8 @@ add_second_stage_group_agg_path(PlannerInfo *root, static void add_first_stage_hash_agg_path(PlannerInfo *root, Path *path, - cdb_agg_planning_context *ctx) + cdb_agg_planning_context *ctx, + bool is_partial) { Query *parse = root->parse; Path *first_stage_agg_path = NULL; @@ -1238,6 +1322,28 @@ add_first_stage_hash_agg_path(PlannerInfo *root, dNumGroups = estimate_num_groups_on_segment(ctx->dNumGroupsTotal, path->rows, path->locus); + /* + * When the estimated per-segment groups exceed half the per-segment input + * rows, the cardinality estimate is likely unreliable (e.g., from default + * statistics on UNION ALL subquery columns that lack pg_statistic data). + * + * In MPP systems, choosing a 1-phase plan (redistribute all raw rows, + * then aggregate) over a 2-phase plan (local partial aggregate, then + * redistribute fewer rows) is very costly when the estimate is wrong. + * The streaming hash aggregate can efficiently discover the true group + * count at runtime, so we optimistically reduce the estimate to give + * the 2-phase plan a fair chance in cost comparison. + */ + if (gp_use_streaming_hashagg && + cbdb_2phase_agg_cardinality_cap < 1.0 && + dNumGroups > path->rows * cbdb_2phase_agg_cardinality_cap) + dNumGroups = clamp_row_est(path->rows * 0.1); + + if (path->parallel_workers > 1) + { + dNumGroups /= path->parallel_workers; + dNumGroups = clamp_row_est(dNumGroups); + } if (parse->groupingSets && ctx->new_rollups) { @@ -1253,12 +1359,14 @@ add_first_stage_hash_agg_path(PlannerInfo *root, CdbPathLocus_MakeStrewn(&(first_stage_agg_path->locus), CdbPathLocus_NumSegments(first_stage_agg_path->locus), path->parallel_workers); - add_path(ctx->partial_rel, first_stage_agg_path, root); + if (!is_partial) + add_path(ctx->partial_rel, first_stage_agg_path, root); + else + add_partial_path(ctx->partial_rel, first_stage_agg_path); } else { - add_path(ctx->partial_rel, - (Path *) create_agg_path(root, + first_stage_agg_path = (Path *) create_agg_path(root, ctx->partial_rel, path, ctx->partial_grouping_target, @@ -1268,8 +1376,11 @@ add_first_stage_hash_agg_path(PlannerInfo *root, ctx->groupClause, NIL, ctx->agg_partial_costs, - dNumGroups), - root); + dNumGroups); + if (!is_partial) + add_path(ctx->partial_rel, first_stage_agg_path, root); + else + add_partial_path(ctx->partial_rel, first_stage_agg_path); } } @@ -1299,10 +1410,9 @@ add_second_stage_hash_agg_path(PlannerInfo *root, /* * Calculate the number of groups in the second stage, per segment. */ - // consider parallel? if (CdbPathLocus_IsPartitioned(group_locus)) dNumGroups = clamp_row_est(ctx->dNumGroupsTotal / - CdbPathLocus_NumSegments(group_locus)); + CdbPathLocus_NumSegmentsPlusParallelWorkers(group_locus)); else dNumGroups = ctx->dNumGroupsTotal; @@ -2778,11 +2888,19 @@ static void add_first_stage_group_agg_partial_path(PlannerInfo *root, bool is_sorted, cdb_agg_planning_context *ctx) { + double dNumGroups; - if (ctx->agg_costs->distinctAggrefs || - ctx->groupingSets) + if (list_length(ctx->agg_costs->distinctAggrefs) != 0) return; + dNumGroups = estimate_num_groups_on_segment(ctx->dNumGroupsTotal, + path->rows, path->locus); + if (path->parallel_workers > 1) + { + dNumGroups /= path->parallel_workers; + dNumGroups = clamp_row_est(dNumGroups); + } + if (!is_sorted) { path = (Path *) create_sort_path(root, @@ -2792,18 +2910,124 @@ static void add_first_stage_group_agg_partial_path(PlannerInfo *root, -1.0); } - Assert(ctx->hasAggs || ctx->groupClause); - add_partial_path(ctx->partial_rel, - (Path *) create_agg_path(root, - ctx->partial_rel, - path, - ctx->partial_grouping_target, - ctx->groupClause ? AGG_SORTED : AGG_PLAIN, - ctx->hasAggs ? AGGSPLIT_INITIAL_SERIAL : AGGSPLIT_SIMPLE, - false, /* streaming */ - ctx->groupClause, - NIL, - ctx->agg_partial_costs, - estimate_num_groups_on_segment(ctx->dNumGroupsTotal, - path->rows, path->locus))); + Assert(ctx->hasAggs || ctx->groupClause || ctx->groupingSets); + if (ctx->groupingSets) + { + /* + * We have grouping sets, possibly with aggregation. Make + * a GroupingSetsPath. + * + * NOTE: We don't pass the HAVING quals here. HAVING quals can + * only be evaluated in the Finalize stage, after computing the + * final aggregate values. + */ + Path *first_stage_agg_path; + + first_stage_agg_path = + (Path *) create_groupingsets_path(root, + ctx->partial_rel, + path, + AGGSPLIT_INITIAL_SERIAL, + NIL, + AGG_SORTED, + ctx->rollups, + ctx->agg_partial_costs); + add_partial_path(ctx->partial_rel, first_stage_agg_path); + } + else if (ctx->hasAggs || ctx->groupClause) + { + add_partial_path(ctx->partial_rel, + (Path *) create_agg_path(root, + ctx->partial_rel, + path, + ctx->partial_grouping_target, + ctx->groupClause ? AGG_SORTED : AGG_PLAIN, + ctx->hasAggs ? AGGSPLIT_INITIAL_SERIAL : AGGSPLIT_SIMPLE, + false, /* streaming */ + ctx->groupClause, + NIL, + ctx->agg_partial_costs, + dNumGroups)); + } +} + +/* + * cdb_create_pre_window_agg_path - Create a path with pre-filtered window aggregation + * + * This function creates a path that computes a window function (rank/dense_rank) + * and applies a filter (e.g., rank <= N) before the main window aggregation. + * This optimization reduces the number of rows processed by subsequent operations. + * + * Returns: A path with WindowAgg -> Result(filter) structure + */ +Path * +cdb_create_pre_window_agg_path(PlannerInfo *root, + bool is_sorted, + int presorted_keys, + RelOptInfo *rel, + Path *subpath, + PathTarget *target, + List *group_pathkeys, + PathTarget *window_target, + List *window_functions, + WindowClause *wc) +{ + bool use_incremental_sort = (presorted_keys != 0 && enable_incremental_sort); + PathTarget *orig_pathtarget; + + if(!is_sorted && group_pathkeys) + { + if (!use_incremental_sort) + subpath = (Path *) create_sort_path(root, + rel, + subpath, + group_pathkeys, + -1.0); + else + { + subpath = (Path *) create_incremental_sort_path(root, + rel, + subpath, + group_pathkeys, + presorted_keys, + -1.0); + } + } + + /* + * Save origin pathtarget before we create pre window filter. + * We need to keep Result node's pathtarget same as if there + * is no window filter. + * + * -> Result + * Output: tenk1.ten, tenk1.four + * Filter: ((rank() OVER (?)) < 3) + * -> WindowAgg + * Output: rank() OVER (?), tenk1.ten, tenk1.four + * Partition By: tenk1.four + * Order By: tenk1.ten + * -> Sort + * Output: tenk1.ten, tenk1.four + * Sort Key: tenk1.four, tenk1.ten + * -> Index Scan using tenk1_unique2 on public.tenk1 + * Output: tenk1.ten, tenk1.four + * Index Cond: (tenk1.unique2 < 10) + */ + orig_pathtarget = copy_pathtarget(subpath->pathtarget); + + subpath = (Path *) + create_windowagg_path(root, rel, subpath, window_target, + window_functions, wc); + + Node *window_filter = copyObject(root->upper_window_filter); + + RestrictInfo *restrict_info = make_simple_restrictinfo(root, (Expr*) window_filter); + + subpath = (Path *) create_projection_path_with_quals(root, + rel, + subpath, + orig_pathtarget, + list_make1(restrict_info), + false); + return subpath; } diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index da702806be5..e069b5f59eb 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -819,7 +819,17 @@ cdbllize_decorate_subplans_with_motions(PlannerInfo *root, Plan *plan) if (IsA(subplan, Motion) && !sstate->is_initplan && /* CBDB_PARALLEL_FIXME: enable_material && */ !sstate->useHashTable) + { subplan = (Plan *) make_material(subplan); + subplan->startup_cost = ((Material*)subplan)->plan.lefttree->startup_cost; + subplan->total_cost = ((Material*)subplan)->plan.lefttree->total_cost; + subplan->plan_rows = ((Material*)subplan)->plan.lefttree->plan_rows; + subplan->plan_width = ((Material*)subplan)->plan.lefttree->plan_width; + subplan->parallel_aware = false; + subplan->parallel_safe = subplan->parallel_safe; + + } + } subplan = (Plan *) fix_outer_query_motions_mutator((Node *) subplan, &context); diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index 6fdf21d58ae..6c18495a992 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -47,6 +47,8 @@ #include "executor/executor.h" +bool apply_shareinput_dag_to_tree_from_subplan = false; + typedef struct { plan_tree_base_prefix base; /* Required prefix for @@ -640,7 +642,7 @@ create_shareinput_producer_rte(ApplyShareInputContext *ctxt, int share_id, * Memorize the shared plan of a shared input in an array, one per share_id. */ static void -shareinput_save_producer(ShareInputScan *plan, ApplyShareInputContext *ctxt) +shareinput_save_producer(ShareInputScan *plan, ApplyShareInputContext *ctxt, bool from_subplan) { int share_id = plan->share_id; int new_shared_input_count = (share_id + 1); @@ -651,11 +653,24 @@ shareinput_save_producer(ShareInputScan *plan, ApplyShareInputContext *ctxt) { ctxt->shared_plans = palloc0(sizeof(Plan *) * new_shared_input_count); ctxt->shared_input_count = new_shared_input_count; + ctxt->ctenames = palloc0(sizeof(char *) * new_shared_input_count); + ctxt->producer_from_subplan = palloc0(sizeof(bool) * new_shared_input_count); + ctxt->producer_parent_plans = palloc0(sizeof(ShareInputScan*) * new_shared_input_count); } else if (ctxt->shared_input_count < new_shared_input_count) { ctxt->shared_plans = repalloc(ctxt->shared_plans, new_shared_input_count * sizeof(Plan *)); memset(&ctxt->shared_plans[ctxt->shared_input_count], 0, (new_shared_input_count - ctxt->shared_input_count) * sizeof(Plan *)); + + ctxt->ctenames = repalloc(ctxt->ctenames, sizeof(char *) * new_shared_input_count); + memset(&ctxt->ctenames[ctxt->shared_input_count], 0, (new_shared_input_count - ctxt->shared_input_count) * sizeof(char *)); + + ctxt->producer_from_subplan = repalloc(ctxt->producer_from_subplan , sizeof(bool) * new_shared_input_count); + memset(&ctxt->producer_from_subplan[ctxt->shared_input_count], 0, (new_shared_input_count - ctxt->shared_input_count) * sizeof(bool)); + + ctxt->producer_parent_plans = repalloc(ctxt->producer_parent_plans , new_shared_input_count * sizeof(ShareInputScan*)); + memset(&ctxt->producer_parent_plans[ctxt->shared_input_count], 0, (new_shared_input_count - ctxt->shared_input_count) * sizeof(ShareInputScan*)); + ctxt->shared_input_count = new_shared_input_count; } @@ -663,6 +678,9 @@ shareinput_save_producer(ShareInputScan *plan, ApplyShareInputContext *ctxt) Assert(ctxt->shared_plans[share_id] == NULL); ctxt->shared_plans[share_id] = plan->scan.plan.lefttree; + ctxt->ctenames[share_id] = plan->ctename; + ctxt->producer_from_subplan[share_id] = from_subplan; + ctxt->producer_parent_plans[share_id] = plan; } /* @@ -675,6 +693,25 @@ shareinput_save_producer(ShareInputScan *plan, ApplyShareInputContext *ctxt) * Also, a share_id is assigned to each ShareInputScan node, as well as the * Material/Sort nodes below the producers. The producers and its consumers * are linked together by the same share_id. + * + * Producer Relocation from SubPlan to Main Plan + * --------------------------------------------- + * When processing subplans (InitPlans), a ShareInputScan may initially become + * a producer. However, if the same CTE is also referenced in the main plan, + * we prefer to have the producer in the main plan rather than in an InitPlan. + * This is controlled by the global flag 'apply_shareinput_dag_to_tree_from_subplan': + * + * - When processing subplans, this flag is set to true, and any ShareInputScan + * encountered first becomes a tentative producer (marked with producer_from_subplan). + * + * - When processing the main plan (flag is false), if we find a ShareInputScan + * that matches a producer previously set from a subplan, we relocate the + * producer role to the main plan. The previous producer (in subplan) becomes + * a consumer instead. + * + * This relocation ensures that the shared scan producer executes in the main + * plan context, which provides better execution coordination and avoids issues + * with InitPlan execution ordering. */ static bool shareinput_mutator_dag_to_tree(Node *node, PlannerInfo *root, bool fPop) @@ -700,8 +737,55 @@ shareinput_mutator_dag_to_tree(Node *node, PlannerInfo *root, bool fPop) /* Is there a producer for this sub-tree already? */ for (share_id = 0; share_id < ctxt->shared_input_count; share_id++) { - if (ctxt->shared_plans[share_id] == subplan) + if (ctxt->shared_plans[share_id] == subplan || + strcmp(ctxt->ctenames[share_id], siscan->ctename) == 0) { + if (ctxt->producer_from_subplan[share_id] && !apply_shareinput_dag_to_tree_from_subplan) + { + /* + * Producer relocation: The existing producer was set from a subplan + * (InitPlan), but we're now processing the main plan and found another + * reference to the same CTE. Relocate the producer role to this + * ShareInputScan in the main plan. + * + * Steps: + * 1. Convert the previous producer (in subplan) to a consumer by + * removing its subtree. + * 2. Make this ShareInputScan the new producer by keeping its subtree + * and updating the context. + * + * This ensures the shared scan materializes data in the main plan + * execution context rather than in an InitPlan. + */ + ShareInputScan *pre_producer = ctxt->producer_parent_plans[share_id]; + pre_producer->scan.plan.lefttree = NULL; + + /* This ShareInputScan becomes the new producer. */ + siscan->share_id = share_id; + ctxt->shared_plans[share_id] = siscan->scan.plan.lefttree; + ctxt->producer_from_subplan[share_id] = false; + ctxt->producer_parent_plans[share_id] = siscan; + + attno = 1; + foreach(lc, subplan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resname == NULL) + { + char default_name[100]; + char *resname; + + snprintf(default_name, sizeof(default_name), "col_%d", attno); + + resname = strVal(get_tle_name(tle, ctxt->curr_rtable, default_name)); + tle->resname = pstrdup(resname); + } + attno++; + } + return true; + } + /* * Yes. This is a consumer. Remove the subtree, and assign the * same share_id as the producer. @@ -719,7 +803,7 @@ shareinput_mutator_dag_to_tree(Node *node, PlannerInfo *root, bool fPop) */ siscan->share_id = share_id; - shareinput_save_producer(siscan, ctxt); + shareinput_save_producer(siscan, ctxt, apply_shareinput_dag_to_tree_from_subplan); /* * Also make sure that all the entries in the subplan's target list @@ -783,7 +867,7 @@ collect_shareinput_producers_walker(Node *node, PlannerInfo *root, bool fPop) Assert(siscan->share_id >= 0); if (subplan) - shareinput_save_producer(siscan, ctxt); + shareinput_save_producer(siscan, ctxt, false); } return true; } diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index e9d7dac9895..08adec3460f 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -3098,6 +3098,8 @@ cdbpath_motion_for_parallel_join(PlannerInfo *root, switch (jointype) { case JOIN_INNER: + case JOIN_UNIQUE_INNER: + case JOIN_UNIQUE_OUTER: break; case JOIN_SEMI: if (!enable_parallel_semi_join) @@ -3108,8 +3110,6 @@ cdbpath_motion_for_parallel_join(PlannerInfo *root, case JOIN_LASJ_NOTIN: outer.ok_to_replicate = false; break; - case JOIN_UNIQUE_OUTER: - case JOIN_UNIQUE_INNER: case JOIN_RIGHT: case JOIN_FULL: outer.ok_to_replicate = false; @@ -3372,13 +3372,11 @@ cdbpath_motion_for_parallel_join(PlannerInfo *root, else if (CdbPathLocus_IsSegmentGeneral(outer.locus)) { /* - * In principle, we couldn't get here as: - * 1.If both's parallel_workers is 0, they should be handled in cdbpath_motion_for_join(). - * 2.If inner path's parallel_workers > 0, it must be from a partial_pathlist. - * SegmentGeneral neighter could be from base rel's partial_pathlist nor could be from - * partial_pathlist of a join locus. + * As we have enabled parallel unique outer and inner joins, this case would be possibile. + * Ex: join between rpt tables. */ - Assert(false); + + /* TODO: enable more possible cases. */ goto fail; } else if (CdbPathLocus_IsSingleQE(outer.locus)) diff --git a/src/backend/cdb/cdbsubselect.c b/src/backend/cdb/cdbsubselect.c index 43a46eace4b..38c969e70be 100644 --- a/src/backend/cdb/cdbsubselect.c +++ b/src/backend/cdb/cdbsubselect.c @@ -586,7 +586,23 @@ convert_EXPR_to_join(PlannerInfo *root, OpExpr *opexp) { Assert(root); Assert(list_length(opexp->args) == 2); - Node *rarg = list_nth(opexp->args, 1); + + Node *rarg = NULL; + Node *n_tmp = (Node*) opexp; + OpExpr *op_tmp = NULL; + + while (IsA(n_tmp, OpExpr)) + { + op_tmp = (OpExpr *) n_tmp; + Assert(list_length(op_tmp->args) == 2); + + rarg = list_nth(op_tmp->args, 1); + if (IsA(rarg, SubLink)) + { + break; + } + n_tmp = list_nth(op_tmp->args, 1); + } Assert(IsA(rarg, SubLink)); SubLink *sublink = (SubLink *) rarg; @@ -656,7 +672,7 @@ convert_EXPR_to_join(PlannerInfo *root, OpExpr *opexp) exprCollation((Node *) subselectAggTLE->expr), 0); - list_nth_replace(opexp->args, 1, aggVar); + list_nth_replace(op_tmp->args, 1, aggVar); return join_expr; } diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 5dbf2e3297a..0a45c8778f7 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -569,3 +569,12 @@ TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const econtext->ecxt_outertuple = slot1; return !ExecQualAndReset(hashtable->cur_eq_func, econtext); } + +/* + * Destroy the hashtable, free all memory allocated for the hashtable. + */ +void +DestroyTupleHashTable(TupleHashTable hashtable) +{ + tuplehash_destroy(hashtable->hashtab); +} \ No newline at end of file diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 7513d11102f..c12502d7ae8 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1888,7 +1888,10 @@ hash_agg_set_limits(AggState *aggstate, double hashentrysize, double input_group * of the buffers needed for all the tapes that need to be open at once. * Then, subtract that from the memory available for holding hash tables. */ - npartitions = hash_choose_num_partitions(aggstate, + if (aggstate && aggstate->streaming) + npartitions = 0; + else + npartitions = hash_choose_num_partitions(aggstate, input_groups, hashentrysize, used_bits, @@ -2828,7 +2831,29 @@ agg_refill_hash_table(AggState *aggstate) /* free memory and reset hash tables */ ReScanExprContext(aggstate->hashcontext); for (int setno = 0; setno < aggstate->num_hashes; setno++) - ResetTupleHashTable(aggstate->perhash[setno].hashtable); + { + TupleHashTable hashtable = aggstate->perhash[setno].hashtable; + /* + * Check the memory limitation. + * + * If hashtable memory exceeds the memory limitation, shrink the hashtable to the half + * of current size to free memory which will be used later. + */ + if (hashtable->hashtab->size * sizeof(TupleHashEntryData) >= aggstate->hash_mem_limit) + { + /* + * Hashtable creation uses fill factor to determine the new hashtable size, so we pass + * 1/4 of original hashtable size as input size. Throught the size computation, the new + * size is 1/2 of original hashtable size. + */ + uint64 size = hashtable->hashtab->size / 4; + DestroyTupleHashTable(hashtable); + aggstate->perhash[setno].hashtable->hashtab = tuplehash_create(aggstate->hash_metacxt, + size, hashtable); + continue; + } + ResetTupleHashTable(hashtable); + } aggstate->hash_ngroups_current = 0; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index c084f7e7c78..fd2521fb04e 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -83,6 +83,7 @@ static HashJoinTuple ExecParallelHashTupleAlloc(HashJoinTable hashtable, dsa_pointer *shared); static void MultiExecPrivateHash(HashState *node); static void MultiExecParallelHash(HashState *node); +static void MergeParallelRuntimeFilters(HashState *node, HashJoinTable hashtable); static inline HashJoinTuple ExecParallelHashFirstTuple(HashJoinTable table, int bucketno); static inline HashJoinTuple ExecParallelHashNextTuple(HashJoinTable table, @@ -346,6 +347,9 @@ MultiExecParallelHash(HashState *node) if (TupIsNull(slot)) break; + if (gp_enable_runtime_filter_pushdown && node->filters) + AddTupleValuesIntoRF(node, slot); + econtext->ecxt_outertuple = slot; if (ExecHashGetHashValue(node, hashtable, econtext, hashkeys, false, hashtable->keepNulls, @@ -438,6 +442,17 @@ MultiExecParallelHash(HashState *node) hashtable->totalTuples = pstate->total_tuples; ExecParallelHashEnsureBatchAccessors(hashtable); + if (gp_enable_runtime_filter_pushdown && node->filters) + { + /* + * In parallel hash join, each worker has only a partial bloom filter + * (containing ~1/N of inner tuples). We must merge all partial filters + * into a complete one before pushing down to the probe-side SeqScan. + */ + MergeParallelRuntimeFilters(node, hashtable); + PushdownRuntimeFilter(node); + } + /* * The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE * case, which will bring the build phase to PHJ_BUILD_DONE (if it isn't @@ -4313,6 +4328,177 @@ get_hash_mem(void) return (int) mem_limit; } +/* + * Shared merge buffer layout for parallel runtime filter merge. + * + * For each AttrFilter, we store: empty flag, min, max, and the bloom filter + * bitset. All workers OR their partial bitsets into this shared buffer, + * then read back the complete merged result. + */ +typedef struct SharedRFSlot +{ + bool empty; + Datum min; + Datum max; + Size bitset_bytes; + /* unsigned char bitset[] follows, at offset MAXALIGN(sizeof(SharedRFSlot)) */ +} SharedRFSlot; + +#define SHARED_RF_SLOT_HEADER MAXALIGN(sizeof(SharedRFSlot)) +#define SHARED_RF_SLOT_SIZE(bs) (SHARED_RF_SLOT_HEADER + MAXALIGN(bs)) + +static SharedRFSlot * +GetSharedRFSlot(char *base, int idx, Size bitset_bytes) +{ + return (SharedRFSlot *)(base + idx * SHARED_RF_SLOT_SIZE(bitset_bytes)); +} + +static unsigned char * +GetSharedRFBitset(SharedRFSlot *slot) +{ + return (unsigned char *)slot + SHARED_RF_SLOT_HEADER; +} + +/* + * MergeParallelRuntimeFilters - merge partial bloom filters from all parallel + * workers into a complete filter via shared memory. + * + * Each parallel hash join worker builds a partial bloom filter containing only + * the inner tuples it processed (~1/N of total). Before pushing down to the + * probe-side SeqScan, we must merge all partial filters. + * + * Protocol: + * 1. First worker to acquire lock allocates shared buffer via DSA, initializes + * it (empty=true, min=LONG_MAX, max=LONG_MIN, bitset=0). + * 2. Each worker locks, OR's its partial bitset into shared buffer, updates + * min/max, unlocks, increments atomic counter. + * 3. All workers wait for counter == nparticipants. + * 4. Each worker copies merged data back to its private AttrFilter. + */ +static void +MergeParallelRuntimeFilters(HashState *node, HashJoinTable hashtable) +{ + ParallelHashJoinState *pstate = hashtable->parallel_state; + ListCell *lc; + int nfilters = list_length(node->filters); + int i; + Size bitset_bytes; + Size total_size; + char *shared_base; + + if (nfilters == 0) + return; + + /* All filters have the same bitset size (same plan_rows, work_mem, seed) */ + bitset_bytes = bloom_bitset_bytes( + ((AttrFilter *) linitial(node->filters))->blm_filter); + + total_size = nfilters * SHARED_RF_SLOT_SIZE(bitset_bytes); + + /* + * Phase 1: Each worker merges its partial data into shared buffer. + * Use the existing LWLock in ParallelHashJoinState for serialization. + */ + LWLockAcquire(&pstate->lock, LW_EXCLUSIVE); + + /* First worker allocates and initializes the shared merge buffer */ + if (!DsaPointerIsValid(pstate->rf_merge_buf)) + { + dsa_pointer dp; + SharedRFSlot *slot; + + dp = dsa_allocate0(hashtable->area, total_size); + shared_base = dsa_get_address(hashtable->area, dp); + + for (i = 0; i < nfilters; i++) + { + slot = GetSharedRFSlot(shared_base, i, bitset_bytes); + slot->empty = true; + slot->min = LONG_MAX; + slot->max = LONG_MIN; + slot->bitset_bytes = bitset_bytes; + /* bitset is already zeroed by dsa_allocate0 */ + } + + pstate->rf_merge_buf = dp; + } + + shared_base = dsa_get_address(hashtable->area, pstate->rf_merge_buf); + + /* OR this worker's partial bloom filter into the shared buffer */ + i = 0; + foreach(lc, node->filters) + { + AttrFilter *af = lfirst(lc); + SharedRFSlot *slot = GetSharedRFSlot(shared_base, i, bitset_bytes); + unsigned char *shared_bits = GetSharedRFBitset(slot); + unsigned char *my_bits = bloom_get_bitset(af->blm_filter); + Size nbytes = bloom_bitset_bytes(af->blm_filter); + Size j; + + Assert(nbytes == bitset_bytes); + + if (!af->empty) + { + slot->empty = false; + + /* Merge min/max */ + if ((int64_t)af->min < (int64_t)slot->min) + slot->min = af->min; + if ((int64_t)af->max > (int64_t)slot->max) + slot->max = af->max; + } + + /* Bitwise OR the bloom filter bitset */ + for (j = 0; j < nbytes; j++) + shared_bits[j] |= my_bits[j]; + + i++; + } + + LWLockRelease(&pstate->lock); + + /* Signal that this worker is done merging */ + pg_atomic_add_fetch_u32(&pstate->rf_merge_count, 1); + + /* + * Wait for all workers to finish merging (cancel-safe). + * + * Use build_barrier's participant count instead of pstate->nparticipants, + * because under Parallel Append only a subset of workers may enter this + * hash join. nparticipants is the total worker count, but build_barrier + * tracks only workers that actually attached to this hash join instance. + */ + { + int nworkers = BarrierParticipants(&pstate->build_barrier); + + while (pg_atomic_read_u32(&pstate->rf_merge_count) < (uint32) nworkers) + CHECK_FOR_INTERRUPTS(); + } + + /* + * Phase 2: Copy merged data back to this worker's private AttrFilter. + * The shared buffer is read-only at this point (all writers are done). + */ + shared_base = dsa_get_address(hashtable->area, pstate->rf_merge_buf); + + i = 0; + foreach(lc, node->filters) + { + AttrFilter *af = lfirst(lc); + SharedRFSlot *slot = GetSharedRFSlot(shared_base, i, bitset_bytes); + unsigned char *shared_bits = GetSharedRFBitset(slot); + unsigned char *my_bits = bloom_get_bitset(af->blm_filter); + + af->empty = slot->empty; + af->min = slot->min; + af->max = slot->max; + memcpy(my_bits, shared_bits, bitset_bytes); + + i++; + } +} + /* * Convert AttrFilter to ScanKeyData and send these runtime filters to the * target node(seqscan). diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index a28e6a14cdb..11ac903c4f2 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -182,7 +182,8 @@ static List *FindTargetNodes(HashJoinState *hjstate, static AttrFilter *CreateAttrFilter(PlanState *target, AttrNumber lattno, AttrNumber rattno, - double plan_rows); + double plan_rows, + uint64 seed); extern bool Test_print_prefetch_joinqual; @@ -1026,8 +1027,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) } if (Gp_role == GP_ROLE_EXECUTE - && gp_enable_runtime_filter_pushdown - && !estate->useMppParallelMode) + && gp_enable_runtime_filter_pushdown) CreateRuntimeFilter(hjstate); return hjstate; @@ -2134,6 +2134,8 @@ ExecHashJoinInitializeDSM(HashJoinState *state, ParallelContext *pcxt) BarrierInit(&pstate->outer_motion_barrier, pcxt->nworkers); pstate->phs_lasj_has_null = false; + pstate->rf_merge_buf = InvalidDsaPointer; + pg_atomic_init_u32(&pstate->rf_merge_count, 0); /* Set up the space we'll use for shared temporary files. */ SharedFileSetInit(&pstate->fileset, pcxt->seg); @@ -2224,6 +2226,7 @@ CreateRuntimeFilter(HashJoinState* hjstate) AttrFilter *attr_filter; ListCell *lc; List *targets; + uint64 seed; /* * A build-side Bloom filter tells us if a row is definitely not in the build @@ -2244,6 +2247,16 @@ CreateRuntimeFilter(HashJoinState* hjstate) hstate = castNode(HashState, innerPlanState(hjstate)); hstate->filters = NIL; + /* + * For parallel-aware hash joins, all workers must use the same bloom + * filter seed so their partial filters can be merged via bitwise OR. + * Use plan_node_id as a deterministic seed in that case. + */ + if (hstate->ps.plan->parallel_aware) + seed = (uint64) hstate->ps.plan->plan_node_id; + else + seed = random(); + /* * check and initialize the runtime filter for all hash conds in * hj->hashclauses @@ -2274,7 +2287,7 @@ CreateRuntimeFilter(HashJoinState* hjstate) Assert(IsA(target, SeqScanState) || IsA(target, DynamicSeqScanState)); attr_filter = CreateAttrFilter(target, lattno, rattno, - hstate->ps.plan->plan_rows); + hstate->ps.plan->plan_rows, seed); if (attr_filter->blm_filter) hstate->filters = lappend(hstate->filters, attr_filter); else @@ -2490,7 +2503,7 @@ FindTargetNodes(HashJoinState *hjstate, AttrNumber attno, AttrNumber *lattno) static AttrFilter* CreateAttrFilter(PlanState *target, AttrNumber lattno, AttrNumber rattno, - double plan_rows) + double plan_rows, uint64 seed) { AttrFilter *attr_filter = palloc0(sizeof(AttrFilter)); attr_filter->empty = true; @@ -2499,7 +2512,7 @@ CreateAttrFilter(PlanState *target, AttrNumber lattno, AttrNumber rattno, attr_filter->lattno = lattno; attr_filter->rattno = rattno; - attr_filter->blm_filter = bloom_create_aggresive(plan_rows, work_mem, random()); + attr_filter->blm_filter = bloom_create_aggresive(plan_rows, work_mem, seed); StaticAssertDecl(sizeof(LONG_MAX) == sizeof(Datum), "sizeof(LONG_MAX) should be equal to sizeof(Datum)"); StaticAssertDecl(sizeof(LONG_MIN) == sizeof(Datum), "sizeof(LONG_MIN) should be equal to sizeof(Datum)"); diff --git a/src/backend/executor/nodeMotion.c b/src/backend/executor/nodeMotion.c index 1229d460a55..41f2188347e 100644 --- a/src/backend/executor/nodeMotion.c +++ b/src/backend/executor/nodeMotion.c @@ -734,8 +734,23 @@ ExecInitMotion(Motion *node, EState *estate, int eflags) } else { - /* sanity checks */ - if (list_length(recvSlice->segments) != 1) + /* + * A Gather normally requires exactly 1 receiver. + * When the receiving slice uses MPP parallel mode + * (parallel_workers > 1 on a single segment), the + * segments list is expanded by the parallel factor. + * This is valid: the sender will broadcast tuples + * to all parallel workers so each one gets the + * complete result set (see doSendTuple). + */ + if (recvSlice->useMppParallelMode) + { + if (list_length(recvSlice->segments) != recvSlice->parallel_workers) + elog(ERROR, "unexpected gang size: %d for parallel_workers %d", + list_length(recvSlice->segments), + recvSlice->parallel_workers); + } + else if (list_length(recvSlice->segments) != 1) elog(ERROR, "unexpected gang size: %d", list_length(recvSlice->segments)); } } @@ -1198,12 +1213,17 @@ doSendTuple(Motion *motion, MotionState *node, TupleTableSlot *outerTupleSlot) motion->motionType == MOTIONTYPE_GATHER_SINGLE) { /* - * Actually, since we can only send to a single output segment - * here, we are guaranteed that we only have a single targetRoute - * setup that we could possibly send to. So we can cheat and just - * fix the targetRoute to 0 (the 1st route). + * Normally a Gather sends to a single receiver (route 0). + * + * When the receiving slice has parallel workers (e.g., a + * SubPlan's Gather Motion embedded in a parallel slice), + * we broadcast to all workers so each one receives the + * complete set of tuples for correct aggregate finalization. */ - targetRoute = 0; + if (parallel_workers >= 2) + targetRoute = BROADCAST_SEGIDX; + else + targetRoute = 0; } else if (motion->motionType == MOTIONTYPE_BROADCAST) diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 7ee5a2e94cf..3ba5513e0f2 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -226,8 +226,7 @@ ExecInitSeqScanForPartition(SeqScan *node, EState *estate, /* * check scan slot with bloom filters in seqscan node or not. */ - if (gp_enable_runtime_filter_pushdown - && !estate->useMppParallelMode) + if (gp_enable_runtime_filter_pushdown) { scanstate->filter_in_seqscan = true; } diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c index 6bbd65ca80f..5a0638e5e56 100644 --- a/src/backend/lib/bloomfilter.c +++ b/src/backend/lib/bloomfilter.c @@ -312,6 +312,30 @@ bloom_total_bits(bloom_filter *filter) return filter->m; } +Size +bloom_bitset_bytes(bloom_filter *filter) +{ + return filter->m / BITS_PER_BYTE; +} + +unsigned char * +bloom_get_bitset(bloom_filter *filter) +{ + return filter->bitset; +} + +uint64 +bloom_get_seed(bloom_filter *filter) +{ + return filter->seed; +} + +int +bloom_get_k(bloom_filter *filter) +{ + return filter->k_hash_funcs; +} + /* * Create Bloom filter in caller's memory context. * diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3a9fd32741d..3461d9246cc 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1263,6 +1263,7 @@ _copyShareInputScan(const ShareInputScan *from) COPY_SCALAR_FIELD(nconsumers); COPY_SCALAR_FIELD(discard_output); COPY_SCALAR_FIELD(ref_set); + COPY_SCALAR_FIELD(ctename); return newnode; } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c02fcd4ea73..1ad4031bd86 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -63,6 +63,10 @@ #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" +#include "optimizer/optimizer.h" + +#include "access/attmap.h" + // TODO: these planner gucs need to be refactored into PlannerConfig. bool gp_enable_sort_limit = false; @@ -172,6 +176,18 @@ static void bring_to_singleQE(PlannerInfo *root, RelOptInfo *rel); static bool is_query_contain_limit_groupby(Query *parse); static void handle_gen_seggen_volatile_path(PlannerInfo *root, RelOptInfo *rel); +static List * +collect_cte_quals(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti, Query *subquery); + +static void subquery_push_qual_cte(Query *subquery, Relids relids, Node *qual); + +static void +remove_cte_unused_subquery_outputs(CtePlanInfo * cteplaninfo); + +static void +set_subquery_window_filter (PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti, Query *subquery); /* * make_one_rel @@ -995,6 +1011,7 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, case RTE_CTE: +#if 0 /* * CTE tuplestores aren't shared among parallel workers, so we * force all CTE scans to happen in the leader. Also, populating @@ -1003,7 +1020,16 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, * executed only once. */ return; - +#endif + /* + * CBDB_PARALLEL: + * For shared CTE, we gater partial path to single worker: + * producer. Unlinke UPSTREAM, CBDB might add path(parallel_worker=0) with + * subpaths(parallel_workers > 1) into pathlist with help of Motion. + * So that we could be parallel. + * For no-shared CTE, there is no such problem. + */ + break; case RTE_VOID: /* @@ -2536,6 +2562,10 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, */ subquery = push_down_restrict(root, rel, rte, rti, subquery); + /* set_subquery_window_filter */ + if (cbdb_enable_multi_window_agg) + set_subquery_window_filter(root, rel, rte, rti, subquery); + /* * The upper query might not use all the subquery's output columns; if * not, we can simplify. @@ -2856,6 +2886,81 @@ set_tablefunction_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rt } } +static void +recurse_push_qual_cte(Node *setOp, Query *topquery, + Relids relids, Node *qual) +{ + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = subrte->subquery; + + Assert(subquery != NULL); + subquery_push_qual_cte(subquery, relids, qual); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + recurse_push_qual_cte(op->larg, topquery, relids, qual); + recurse_push_qual_cte(op->rarg, topquery, relids, qual); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } +} + +/* + * subquery_push_qual - push down a qual that we have determined is safe + */ +static void +subquery_push_qual_cte(Query *subquery, Relids relids, Node *qual) +{ + if (subquery->setOperations != NULL) + { + /* Recurse to push it separately to each component query */ + recurse_push_qual_cte(subquery->setOperations, subquery, + relids, qual); + } + else + { + /* + * We need to replace Vars in the qual (which must refer to outputs of + * the subquery) with copies of the subquery's targetlist expressions. + * Note that at this point, any uplevel Vars in the qual should have + * been replaced with Params, so they need no work. + * + * This step also ensures that when we are pushing into a setop tree, + * each component query gets its own copy of the qual. + */ + qual = ReplaceVarsFromTargetList_CTE(qual, relids, 0, + subquery->targetList, + REPLACEVARS_REPORT_ERROR, 0, + &subquery->hasSubLinks); + + /* + * Now attach the qual to the proper place: normally WHERE, but if the + * subquery uses grouping or aggregation, put it in HAVING (since the + * qual really refers to the group-result rows). + */ + if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual) + { + subquery->havingQual = (Node *) make_and_qual(subquery->havingQual, qual); + } + else + subquery->jointree->quals = make_and_qual(subquery->jointree->quals, qual); + + /* + * We need not change the subquery's hasAggs or hasSubLinks flags, + * since we can't be pushing down any aggregates that weren't there + * before, and we don't push down subselects at all. + */ + } +} + /* * set_values_pathlist * Build the (single) access path for a VALUES RTE @@ -3014,8 +3119,27 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) break; default: /* if plan sharing is enabled and contains volatile functions in the CTE query, also generate a shared scan plan */ - is_shared = root->config->gp_cte_sharing && (cte->cterefcount > 1 || contain_volatile_function); + /* + * we must use shared scan if there is volatile function, even gp_cte_sharing is false. + * SELECT count(*) FROM ( + * WITH q1(x) AS (SELECT random() FROM generate_series(1, 5)) + * SELECT * FROM q1 + * UNION + * SELECT * FROM q1 + * ) ss; + * + */ + { + if (contain_volatile_function) + is_shared = true; + else if (cte->cterefcount <= 1) /* XXX: could it be 0 ? */ + is_shared = false; + else + is_shared = root->config->gp_cte_sharing; + if (Gp_role == GP_ROLE_EXECUTE) + is_shared = false; + } } /* @@ -3025,6 +3149,17 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) if (is_shared && contain_outer_selfref(cte->ctequery)) is_shared = false; + if (cte->cterecursive || + cteroot->hasRecursion || + root->hasRecursion || + root->glob->under_recursive_cte || + (cteroot->parent_root && cteroot->parent_root->glob->under_recursive_cte)) + { + cteroot->glob->under_recursive_cte = true; + is_shared = false; + } + + if (!is_shared) { PlannerConfig *config = CopyPlannerConfig(root->config); @@ -3033,7 +3168,6 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * Having multiple SharedScans can lead to deadlocks. For now, * disallow sharing of ctes at lower levels. */ - config->gp_cte_sharing = false; config->honor_order_by = false; @@ -3063,6 +3197,8 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) subroot = subquery_planner(cteroot->glob, subquery, root, cte->cterecursive, tuple_fraction, config); + + sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); } else { @@ -3095,22 +3231,299 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) config->honor_order_by = false; + /* + * collect all quals + * make them with OR clause + * push down to subquery + * keep rel's baserestrictioninfo as original + * reset rels->{root, paths, keys and etc.} + * if one have no qual, should be XXX OR (TRUE) ? + */ + + cteplaninfo->subquery = copyObject(subquery); + subroot = subquery_planner(cteroot->glob, subquery, cteroot, cte->cterecursive, tuple_fraction, config); /* Select best Path and turn it into a Plan */ sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + cteplaninfo->subroot = subroot; + cteplaninfo->push_quals_possible = true; + cteplaninfo->save_columns_possible = true; + if (!contain_volatile_function && + (cte->ctematerialized == CTEMaterializeDefault) && /* Don't change if user has the decision. */ + cbdb_enable_dynamic_shared_scan && + (sub_final_rel->cheapest_total_path->rows >= 10 * cte->cterefcount * sub_final_rel->cheapest_total_path->total_cost)) + { + root->config->gp_cte_sharing = false; + cteplaninfo->push_quals_possible = false; + cteplaninfo->subroot = NULL; + cte->ctematerialized = CTEMaterializeNever; + is_shared = false; + } + subroot->is_shared_scan = is_shared; + + if (is_shared) + { + if (!IS_DUMMY_REL(sub_final_rel) && (sub_final_rel->partial_pathlist != NIL)) + { + Path * partial_path = (Path*) linitial(sub_final_rel->partial_pathlist); + + if (partial_path->parallel_workers <= 1) + add_path(sub_final_rel, partial_path, subroot); + else + { + if (!IsA(partial_path, Motion)) + { + CdbPathLocus locus = cdbpathlocus_from_subquery(root, sub_final_rel, partial_path); + locus.parallel_workers = 0; + /* + * We force to add a Motion to gather partial paths becuase Shared Scan producer could be only + * one process to write tuples. + * But the locus might be not fit enough for join, ex: HashedWrokers with parallel_workers would be + * buggy. And a Redistribute Motion will make HashedWorkers to be Hashed. + */ + if (CdbPathLocus_IsHashedWorkers(locus)) + locus.locustype = CdbLocusType_Hashed; + + partial_path = cdbpath_create_motion_path(subroot, + partial_path, + partial_path->pathkeys, + false, + locus); + + add_path(sub_final_rel, partial_path, subroot); + } + } + } + } + + set_cheapest(sub_final_rel); + /* * we cannot use different plans for different instances of this CTE * reference, so keep only the cheapest */ sub_final_rel->pathlist = list_make1(sub_final_rel->cheapest_total_path); - cteplaninfo->subroot = subroot; } else subroot = cteplaninfo->subroot; + + // collect, tlist, joininfo, baserestrioninfo + Bitmapset *attrs_used = NULL; + pull_varattnos((Node *)rel->reltarget->exprs, rel->relid, &attrs_used); + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used); + } + cteplaninfo->attrs_used = bms_union(cteplaninfo->attrs_used, attrs_used); + + if (cteplaninfo->save_columns_possible) + { + /* If there is whole row, we can't save any columns. */ + if (bms_is_member(-FirstLowInvalidHeapAttributeNumber, cteplaninfo->attrs_used)) + cteplaninfo->save_columns_possible = false; + else + cteplaninfo->save_columns_possible = (bms_num_members(cteplaninfo->attrs_used) != list_length(cteplaninfo->subquery->targetList)); + } + + if (cteplaninfo->push_quals_possible || cteplaninfo->save_columns_possible) + { + /* Also replace Vars with subquery's targetlist */ + List *quals = collect_cte_quals(root, rel, rte, rel->relid, subquery); + cteplaninfo->rels = lappend(cteplaninfo->rels, rel); + cteplaninfo->relids = bms_add_member(cteplaninfo->relids, rel->relid); + + if (quals != NIL) + cteplaninfo->list_quals = lappend(cteplaninfo->list_quals, make_andclause(quals)); + else + { + /* if quals is NIL, it means a cte ref need all the data from cte */ + cteplaninfo->push_quals_possible = false; + cteplaninfo->list_quals = NIL; + } + + + if (list_length(cteplaninfo->rels) == cte->cterefcount && + (cteplaninfo->push_quals_possible || cteplaninfo->save_columns_possible)) + { + /* Do a second plan for shared cte. */ + + PlannerConfig *config = CopyPlannerConfig(root->config); + + /* + * Having multiple SharedScans can lead to deadlocks. For now, + * disallow sharing of ctes at lower levels. + */ + config->gp_cte_sharing = false; + + config->honor_order_by = false; + + if (cteplaninfo->push_quals_possible) + { + Expr *new_quals = convert_expr_to_cnf_complete(make_orclause(cteplaninfo->list_quals)); + + List *quals = make_ands_implicit(new_quals); + + ListCell *lc; + // hack to make varno = 1 in bms + cteplaninfo->relids = bms_add_member(cteplaninfo->relids, 1); + foreach(lc, quals) + { + subquery_push_qual_cte(cteplaninfo->subquery, cteplaninfo->relids, (Node *)lfirst(lc)); + } + } + + // remove unused columns + if (cteplaninfo->save_columns_possible) + remove_cte_unused_subquery_outputs(cteplaninfo); + + subroot = subquery_planner(cteroot->glob, cteplaninfo->subquery, cteroot, cte->cterecursive, + tuple_fraction, config); + + /* Select best Path and turn it into a Plan */ + sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + + /* + * we cannot use different plans for different instances of this CTE + * reference, so keep only the cheapest one. + */ + + if (!IS_DUMMY_REL(sub_final_rel) && (sub_final_rel->partial_pathlist != NIL)) + { + Path * partial_path = (Path*) linitial(sub_final_rel->partial_pathlist); + + if (partial_path->parallel_workers <= 1) + add_path(sub_final_rel, partial_path, subroot); + else + { + if (!IsA(partial_path, Motion)) + { + CdbPathLocus locus = cdbpathlocus_from_subquery(root, sub_final_rel, partial_path); + locus.parallel_workers = 0; + /* + * See comments above. + */ + if (CdbPathLocus_IsHashedWorkers(locus)) + locus.locustype = CdbLocusType_Hashed; + + partial_path = cdbpath_create_motion_path(subroot, + partial_path, + partial_path->pathkeys, + false, + locus); + + add_path(sub_final_rel, partial_path, subroot); + } + } + } + + set_cheapest(sub_final_rel); + + sub_final_rel->pathlist = list_make1(sub_final_rel->cheapest_total_path); + + cteplaninfo->subroot = subroot; + + Path *best_path = sub_final_rel->cheapest_total_path; + CdbPathLocus locus; + double sub_total_rows = 0; + + if (!IS_DUMMY_REL(sub_final_rel)) + { + double numsegments; + + if (CdbPathLocus_IsPartitioned(sub_final_rel->cheapest_total_path->locus)) + numsegments = CdbPathLocus_NumSegments(sub_final_rel->cheapest_total_path->locus); + else + numsegments = 1; + sub_total_rows = sub_final_rel->cheapest_total_path->rows * numsegments; + + } + + if (sub_total_rows == 0) + sub_total_rows = 1; + + foreach (lc, cteplaninfo->rels) + { + RelOptInfo *cte_rel = (RelOptInfo*) lfirst(lc); + Relids required_outer = cte_rel->lateral_relids; + Path *cte_path = NULL; + + if (IS_DUMMY_REL(sub_final_rel)) + { + set_dummy_rel_pathlist(root, cte_rel); + continue; + } + + /* + * Set size estimates per consumer, respecting RTE type. + * CTE consumers that appear as RTE_SUBQUERY (e.g. inside + * a subquery wrapper) need set_subquery_size_estimates. + */ + RangeTblEntry *cte_rte = planner_rt_fetch(cte_rel->relid, root); + if (cte_rte->rtekind == RTE_CTE) + set_cte_size_estimates(root, cte_rel, sub_total_rows); + else if (cte_rte->rtekind == RTE_SUBQUERY) + set_subquery_size_estimates(root, cte_rel); + else + Assert(false); + + /* + * Compute locus and pathkeys per consumer rel so that + * distribution key Vars reference each consumer's own + * relid. Sharing a single producer-based locus caused + * the planner to treat different CTE references as + * co-located, skipping necessary Redistribute Motions + * (e.g. TPC-DS Q75 self-join on a subset of GROUP BY + * keys). + */ + locus = cdbpathlocus_from_subquery(root, cte_rel, best_path); + pathkeys = convert_subquery_pathkeys(root, cte_rel, best_path->pathkeys, + make_tlist_from_pathtarget(best_path->pathtarget)); + + cte_rel->subroot = subroot; + + /* truncate preivous path */ + cte_rel->pathlist = NIL; + cte_path = create_ctescan_path(root, + cte_rel, + NULL /* is_shared */, + locus, + pathkeys, + required_outer); + + /* Correct the hazrads here using best_path_ */ + cte_path->barrierHazard = best_path->barrierHazard; + cte_path->motionHazard = best_path->motionHazard; + + /* Generate appropriate path */ + add_path(cte_rel, cte_path, root); + + /* + * For shared scan, we must gather parallel to write tuples in producer. + * We also do that in partial_pathlist for possible parallel. + */ + if (rel->consider_parallel) + { + cte_path = create_ctescan_path(root, + rel, + NULL /* is_shared */, + locus, + pathkeys, + required_outer); + + cte_path->barrierHazard = best_path->barrierHazard; + cte_path->motionHazard = best_path->motionHazard; + add_partial_path(rel, cte_path); + } + } + return; + } + } } rel->subroot = subroot; @@ -3159,21 +3572,78 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) Path *subpath = (Path *) lfirst(lc); List *pathkeys; CdbPathLocus locus; + Path *cte_path = NULL; locus = cdbpathlocus_from_subquery(root, rel, subpath); /* Convert subquery pathkeys to outer representation */ pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys, make_tlist_from_pathtarget(subpath->pathtarget)); + + cte_path = create_ctescan_path(root, + rel, + is_shared ? NULL : subpath, + locus, + pathkeys, + required_outer); + if (is_shared) + { + /* if shared, there could be only one path of sub_final_rel. */ + cte_path->barrierHazard = subpath->barrierHazard; + cte_path->motionHazard = subpath->motionHazard; + } + + /* Generate appropriate path */ + add_path(rel, cte_path, root); + } + + /* Also add partial paths for cte, for possibile paralle join and etc. */ + if (sub_final_rel->partial_pathlist != NIL) + { + List *pathkeys; + CdbPathLocus locus; + Path * subpath; /* Generate appropriate path */ - add_path(rel, create_ctescan_path(root, + if (!is_shared) + { + subpath = (Path*) linitial(sub_final_rel->partial_pathlist); + locus = cdbpathlocus_from_subquery(root, rel, subpath); + + /* Convert subquery pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + add_partial_path(rel, create_ctescan_path(root, + rel, + subpath, + locus, + pathkeys, + required_outer)); + } + else if (rel->consider_parallel) + { + /* + * For shared scan, we must gather parallel to write tuples in producer. + * We also do that in partial_pathlist for possible parallel. + */ + Assert(sub_final_rel->cheapest_total_path); + subpath = sub_final_rel->cheapest_total_path; + locus = cdbpathlocus_from_subquery(root, rel, subpath); + /* Convert subquery pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + + Path *cte_path = create_ctescan_path(root, rel, - is_shared ? NULL : subpath, + NULL /* is_shared */, locus, pathkeys, - required_outer), - root); + required_outer); + + cte_path->barrierHazard = subpath->barrierHazard; + cte_path->motionHazard = subpath->motionHazard; + add_partial_path(rel, cte_path); + } } } @@ -4004,6 +4474,153 @@ push_down_restrict(PlannerInfo *root, RelOptInfo *rel, return subquery; } +static void +set_subquery_window_filter (PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti, Query *subquery) +{ + Node* window_filter = NULL; + WindowFunc* winfunc_candidate = NULL; + int winref = 0; + ListCell *lc = NULL; + TargetEntry *tle = NULL; + int *window_attr_refs; + + if(!subquery->hasWindowFuncs || + rel->baserestrictinfo == NIL) + return; + + int size = list_length(subquery->targetList); + window_attr_refs = (int*) palloc0(size*sizeof(int)); + + foreach(lc, subquery->targetList) + { + tle = (TargetEntry *)lfirst(lc); + if (IsA(tle->expr, WindowFunc)) + { + WindowFunc* winfunc = (WindowFunc*) (tle->expr); + if (winfunc->winfnoid == F_RANK_ || + winfunc->winfnoid == F_DENSE_RANK_) + window_attr_refs[tle->resno - 1] = winfunc->winref; + } + } + + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *clause = (Node *) rinfo->clause; + + if (rinfo->pseudoconstant) + continue; + + if (!IsA(clause, OpExpr)) + continue; + + OpExpr *op = (OpExpr*) clause; + if (op->opno != 420 /* <= */ && + op->opno != 418 /* < */) + continue; + + if (list_length(op->args) != 2) + continue; + + Node * leftop = (Node *) linitial(op->args); + Node * rightop = (Node *) lsecond(op->args); + if (!IsA(leftop, Var) || + !IsA(rightop, Const)) + continue; + + Var *var = (Var*) leftop; + + if (var->varno != rti) + continue; + + if (window_attr_refs[var->varattno - 1] == 0) + continue; + + /* fail if there were already one. */ + if (window_filter != NULL) + { + pfree(window_attr_refs); + return; + } + + /* Now we found a candidate. */ + window_filter = clause; + winref = window_attr_refs[var->varattno - 1]; + TargetEntry *tle = (TargetEntry *) list_nth(subquery->targetList, var->varattno -1); + Assert(IsA(tle->expr, WindowFunc)); + winfunc_candidate = (WindowFunc*)copyObject(tle->expr); + + } + + if (window_filter) + { + root->lower_window_filter = copyObject(window_filter); + /* record window expr, as var will be replaced later. */ + list_nth_replace(((OpExpr *) root->lower_window_filter)->args, 0, winfunc_candidate); + root->lower_window_filter_winref = winref; + } + pfree(window_attr_refs); + + return; +} + +/* + * collect_cte_quals - Collect pushdown-safe quals from CTE references + * + * Returns a list of qual expressions that can be safely pushed down + * into the CTE subquery. + */ +static List * +collect_cte_quals(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti, Query *subquery) +{ + List *quals = NIL; + + pushdown_safety_info safetyInfo; + + /* Nothing to do here if it doesn't have qual at all */ + if (rel->baserestrictinfo == NIL) + return NIL; + + memset(&safetyInfo, 0, sizeof(safetyInfo)); + safetyInfo.unsafeColumns = (bool *) + palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); + + safetyInfo.unsafeLeaky = rte->security_barrier; + + if (subquery_is_pushdown_safe(subquery, subquery, &safetyInfo)) + { + /* OK to consider pushing down individual quals */ + ListCell *l; + + foreach(l, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *qual= (Node *) rinfo->clause; + + if (!rinfo->pseudoconstant && + qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo)) + { + // TODO: replace varno to the subquery itself. + // Is it possible that baseresctictinfo has quals with sublink or whole row? + + qual = ReplaceVarnoFromSubquery(qual, rti, 0, rte, + subquery->targetList, + REPLACEVARS_REPORT_ERROR, 0, + &subquery->hasSubLinks); + /* valid quals */ + quals = lappend(quals, qual); + } + } + } + pfree(safetyInfo.unsafeColumns); + quals = list_copy_deep(quals); + + return quals; +} + + /* * subquery_is_pushdown_safe - is a subquery safe for pushing down quals? * @@ -4483,8 +5100,7 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual) subquery->havingQual = make_and_qual(subquery->havingQual, qual); else - subquery->jointree->quals = - make_and_qual(subquery->jointree->quals, qual); + subquery->jointree->quals = make_and_qual(subquery->jointree->quals, qual); /* * We need not change the subquery's hasAggs or hasSubLinks flags, @@ -4528,6 +5144,71 @@ recurse_push_qual(Node *setOp, Query *topquery, * SIMPLIFYING SUBQUERY TARGETLISTS *****************************************************************************/ +static void +remove_cte_unused_subquery_outputs(CtePlanInfo * cteplaninfo) +{ + ListCell *lc; + AttrMap *attrMap; + Query *subquery = cteplaninfo->subquery; + Bitmapset *attrs_used = cteplaninfo->attrs_used; + int new_resno = 1; + + attrMap = make_attrmap(list_length(subquery->targetList)); + + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *texpr = (Node *) tle->expr; + + + if (!bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber, attrs_used)) + { + attrMap->attnums[tle->resno - 1] = 0; // no upper need this + } + else + { + attrMap->attnums[tle->resno - 1] = new_resno; + new_resno++; + //new_tlist = lappend(new_tlist, (TargetEntry *) copyObject(tle)); + continue; + } + + /* + * If it has a sortgroupref number, it's used in some sort/group + * clause so we'd better not remove it. Also, don't remove any + * resjunk columns, since their reason for being has nothing to do + * with anybody reading the subquery's output. (It's likely that + * resjunk columns in a sub-SELECT would always have ressortgroupref + * set, but even if they don't, it seems imprudent to remove them.) + */ + if (tle->ressortgroupref || tle->resjunk) + continue; + + if (subquery->setOperations) + continue; + + if (subquery->distinctClause && !subquery->hasDistinctOn) + continue; + + if (subquery->hasTargetSRFs && + expression_returns_set(texpr)) + continue; + + if (contain_volatile_functions(texpr)) + continue; + + /* + * OK, we don't need it. Replace the expression with a NULL constant. + * Preserve the exposed type of the expression, in case something + * looks at the rowtype of the subquery's result. + */ + tle->expr = (Expr *) makeNullConst(exprType(texpr), + exprTypmod(texpr), + exprCollation(texpr)); + } + cteplaninfo->attr_map = attrMap; +} + /* * remove_unused_subquery_outputs * Remove subquery targetlist items we don't need @@ -5272,3 +5953,5 @@ debug_print_rel(PlannerInfo *root, RelOptInfo *rel) } #endif /* OPTIMIZER_DEBUG */ + + diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index f4a6aad7ed8..2e41a9659a7 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -31,6 +31,7 @@ #include "utils/selfuncs.h" #include "cdb/cdbvars.h" /* cdb GUCs */ +#include "utils/guc.h" /* * Data structure for accumulating info about possible range-query @@ -941,6 +942,8 @@ clause_selectivity_ext(PlannerInfo *root, opclause->inputcollid, jointype, sjinfo); + if (sjinfo->jointype == JOIN_INNER && s1 < 0.00001) + s1 = 1.0 - pow(1.0 - s1, cbdb_inner_join_selectivity_damping_factor); } else { diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index bff31340128..55eeb8f6586 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -2317,7 +2317,9 @@ hash_inner_and_outer(PlannerInfo *root, * extended rows. Also, the resulting path must not be parameterized. */ if (joinrel->consider_parallel && - save_jointype != JOIN_UNIQUE_OUTER && + /* save_jointype != JOIN_UNIQUE_OUTER && */ + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) { @@ -2328,25 +2330,53 @@ hash_inner_and_outer(PlannerInfo *root, cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist); + if (save_jointype == JOIN_UNIQUE_OUTER) + { + cheapest_partial_outer = (Path *) create_unique_path(root, outerrel, + cheapest_partial_outer, extra->sjinfo); + if (!cheapest_partial_outer) + return; + } + /* * Can we use a partial inner plan too, so that we can build a * shared hash table in parallel? We can't handle * JOIN_UNIQUE_INNER because we can't guarantee uniqueness. */ if (innerrel->partial_pathlist != NIL && - save_jointype != JOIN_UNIQUE_INNER && + /* save_jointype != JOIN_UNIQUE_INNER && */ enable_parallel_hash) { cheapest_partial_inner = (Path *) linitial(innerrel->partial_pathlist); - try_partial_hashjoin_path(root, joinrel, - cheapest_partial_outer, - cheapest_partial_inner, - hashclauses, - jointype, - save_jointype, - extra, - true /* parallel_hash */ ); + + if (save_jointype != JOIN_UNIQUE_INNER) + { + try_partial_hashjoin_path(root, joinrel, + cheapest_partial_outer, + cheapest_partial_inner, + hashclauses, + jointype, + save_jointype, + extra, + true /* parallel_hash */ ); + } + else + { + cheapest_partial_inner= (Path *) create_unique_path(root, innerrel, + cheapest_partial_inner, extra->sjinfo); + if (cheapest_partial_inner) + { + try_partial_hashjoin_path(root, joinrel, + cheapest_partial_outer, + cheapest_partial_inner, + hashclauses, + JOIN_INNER, /* convert to inner join. */ + save_jointype, + extra, + true /* parallel_hash */ ); + } + } } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 20c1587b062..8cda1c8c6cd 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4780,7 +4780,8 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, cteplaninfo->shared_plan = prepare_plan_for_sharing(cteroot, subplan); } /* Wrap the common Plan tree in a ShareInputScan node */ - subplan = share_prepared_plan(cteroot, cteplaninfo->shared_plan); + subplan = share_prepared_plan(cteroot, cteplaninfo->shared_plan, rte->ctename); + ((ShareInputScan*) subplan)->cteplaninfo = cteplaninfo; } scan_plan = (Plan *) make_subqueryscan(tlist, @@ -5377,6 +5378,23 @@ create_nestloop_plan(PlannerInfo *root, if (partition_selectors_created) join_plan->join.prefetch_inner = true; + + int outer_sisc_role = contain_ShareInputScan_detail(root, (Node*) outer_plan); + int inner_sisc_role = contain_ShareInputScan_detail(root, (Node*) inner_plan); + + if (join_plan->join.prefetch_inner) + { + /* We have to execute outer first if outer has producer and inner has a consumer. */ + if ((outer_sisc_role & (SISC_PRODUCER)) && + ((inner_sisc_role & SISC_PRODUCER) == 0) && + (inner_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_inner = false; + } + else if (inner_sisc_role & (SISC_PRODUCER)) + { + join_plan->join.prefetch_inner = true; + } + /* * A motion deadlock can also happen when outer and joinqual both contain * motions. It is not easy to check for joinqual here, so we set the @@ -5389,6 +5407,13 @@ create_nestloop_plan(PlannerInfo *root, best_path->outerjoinpath->motionHazard && join_plan->join.joinqual != NIL) join_plan->join.prefetch_joinqual = true; + + int joinqual_sisc_role = contain_ShareInputScan_detail(root, (Node *) join_plan->join.joinqual); + + /* If we have producer in outer plan, we have to execute it first. */ + if ((outer_sisc_role & (SISC_PRODUCER)) && + (joinqual_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_joinqual = false; /* * Similar for non join qual. @@ -5398,6 +5423,12 @@ create_nestloop_plan(PlannerInfo *root, join_plan->join.plan.qual != NIL) join_plan->join.prefetch_qual = true; + int qual_sisc_role = contain_ShareInputScan_detail(root, (Node *) join_plan->join.plan.qual); + + if ((outer_sisc_role & (SISC_PRODUCER)) && + (qual_sisc_role& (SISC_CONSUMER))) + join_plan->join.prefetch_qual = false; + return join_plan; } @@ -5754,6 +5785,22 @@ create_mergejoin_plan(PlannerInfo *root, if (partition_selectors_created) join_plan->join.prefetch_inner = true; + int outer_sisc_role = contain_ShareInputScan_detail(root, (Node*) outer_plan); + int inner_sisc_role = contain_ShareInputScan_detail(root, (Node*) inner_plan); + + if (join_plan->join.prefetch_inner) + { + /* We have to execute outer first if outer has producer and inner has a consumer. */ + if ((outer_sisc_role & (SISC_PRODUCER)) && + ((inner_sisc_role & SISC_PRODUCER) == 0) && + (inner_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_inner = false; + } + else if (inner_sisc_role & (SISC_PRODUCER)) + { + join_plan->join.prefetch_inner = true; + } + /* * A motion deadlock can also happen when outer and joinqual both contain * motions. It is not easy to check for joinqual here, so we set the @@ -5775,6 +5822,13 @@ create_mergejoin_plan(PlannerInfo *root, join_plan->join.joinqual != NIL) join_plan->join.prefetch_joinqual = true; + int joinqual_sisc_role = contain_ShareInputScan_detail(root, (Node*)join_plan->join.joinqual); + + /* If we have producer in outer plan, we have to execute it first. */ + if ((outer_sisc_role & (SISC_PRODUCER)) && + (joinqual_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_joinqual = false; + /* * Similar for non join qual. */ @@ -5783,6 +5837,12 @@ create_mergejoin_plan(PlannerInfo *root, join_plan->join.plan.qual != NIL) join_plan->join.prefetch_qual = true; + int qual_sisc_role = contain_ShareInputScan_detail(root, (Node*) join_plan->join.plan.qual); + + if ((outer_sisc_role & (SISC_PRODUCER)) && + (qual_sisc_role& (SISC_CONSUMER))) + join_plan->join.prefetch_qual = false; + /* Costs of sort and material steps are included in path cost already */ copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -6042,6 +6102,22 @@ create_hashjoin_plan(PlannerInfo *root, if (partition_selectors_created) join_plan->join.prefetch_inner = true; + int outer_sisc_role = contain_ShareInputScan_detail(root, (Node*) outer_plan); + int inner_sisc_role = contain_ShareInputScan_detail(root, (Node*)inner_plan); + + if (join_plan->join.prefetch_inner) + { + /* We have to execute outer first if outer has producer and inner has a consumer. */ + if ((outer_sisc_role & (SISC_PRODUCER)) && + ((inner_sisc_role & SISC_PRODUCER) == 0) && + (inner_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_inner = false; + } + else if (inner_sisc_role & (SISC_PRODUCER)) + { + join_plan->join.prefetch_inner = true; + } + /* * A motion deadlock can also happen when outer and joinqual both contain * motions. It is not easy to check for joinqual here, so we set the @@ -6055,6 +6131,13 @@ create_hashjoin_plan(PlannerInfo *root, join_plan->join.joinqual != NIL) join_plan->join.prefetch_joinqual = true; + int joinqual_sisc_role = contain_ShareInputScan_detail(root, (Node*) join_plan->join.joinqual); + + /* If we have producer in outer plan, we have to execute it first. */ + if ((outer_sisc_role & (SISC_PRODUCER)) && + (joinqual_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_joinqual = false; + /* * Similar for non join qual. */ @@ -6063,6 +6146,12 @@ create_hashjoin_plan(PlannerInfo *root, join_plan->join.plan.qual != NIL) join_plan->join.prefetch_qual = true; + int qual_sisc_role = contain_ShareInputScan_detail(root, (Node*) join_plan->join.plan.qual); + + if ((outer_sisc_role & (SISC_PRODUCER)) && + (qual_sisc_role & (SISC_CONSUMER))) + join_plan->join.prefetch_qual = false; + copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); return join_plan; diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index b8cd960a528..6e7b92b8182 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1947,8 +1947,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, if (!bms_is_subset(relids, qualscope)) { PostponedQual *pq = (PostponedQual *) palloc(sizeof(PostponedQual)); - +#if 0 Assert(root->hasLateralRTEs); /* shouldn't happen otherwise */ +#endif Assert(jointype == JOIN_INNER); /* mustn't postpone past outer join */ pq->qual = clause; pq->relids = relids; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 7129ab3fab3..fb344fa3ba1 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -87,6 +87,8 @@ #include "storage/lmgr.h" #include "utils/guc.h" +#include "cdb/cdbmutate.h" + #ifdef USE_ORCA extern void InitGPOPT(); #endif @@ -733,13 +735,22 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, * * apply_shareinput will fix shared_id, and change the DAG to a tree. */ + int subplan_id = 0; forboth(lp, glob->subplans, lr, glob->subroots) { Plan *subplan = (Plan *) lfirst(lp); PlannerInfo *subroot = (PlannerInfo *) lfirst(lr); + subplan_id++; + + apply_shareinput_dag_to_tree_from_subplan = true; + + /* We must make producer in InitPlan if it was. */ + if (bms_is_member(subplan_id, root->init_plan_ids)) + apply_shareinput_dag_to_tree_from_subplan = false; lfirst(lp) = apply_shareinput_dag_to_tree(subroot, subplan); } + apply_shareinput_dag_to_tree_from_subplan = false; top_plan = apply_shareinput_dag_to_tree(root, top_plan); /* final cleanup of the plan */ @@ -930,6 +941,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->eq_classes = NIL; root->non_eq_clauses = NIL; root->init_plans = NIL; + root->init_plan_ids = NULL; root->list_cteplaninfo = NIL; if (parse->cteList != NIL) @@ -967,6 +979,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->non_recursive_path = NULL; root->partColsUpdated = false; root->is_correlated_subplan = false; + root->lower_window_filter = NULL; + root->lower_window_filter_winref = 0; + root->upper_window_filter = parent_root ? copyObject(parent_root->lower_window_filter) : NULL; + root->upper_window_filter_winref = parent_root ? parent_root->lower_window_filter_winref : 0; /* * Save a copy of the raw parse tree for AQUMV join exact-match. @@ -4596,7 +4612,10 @@ consider_groupingsets_paths(PlannerInfo *root, dNumGroups = clamp_row_est(dNumGroupsTotal / CdbPathLocus_NumSegments(path->locus)); if (path->locus.parallel_workers > 1) + { dNumGroups /= path->locus.parallel_workers; + dNumGroups = clamp_row_est(dNumGroups); + } } else dNumGroups = dNumGroupsTotal; @@ -5021,29 +5040,6 @@ create_one_window_path(PlannerInfo *root, path->pathkeys, &presorted_keys); - /* - * Unless the PARTITION BY in the window happens to match the - * current distribution, we need a motion. Each partition - * needs to be handled in the same segment. - * - * If there is no PARTITION BY, then all rows form a single - * partition, so we need to gather all the tuples to a single - * node. But we'll do that after the Sort, so that the Sort - * is parallelized. - * - * This is the same logic that is used for sorted Aggregates. - */ - - path = cdb_prepare_path_for_sorted_agg(root, - is_sorted, - presorted_keys, - window_rel, - path, - path->pathtarget, - window_pathkeys, - -1.0, - wc->partitionClause, - NIL); if (lnext(activeWindows, l)) { /* @@ -5071,6 +5067,44 @@ create_one_window_path(PlannerInfo *root, window_target = output_target; } + if (cbdb_enable_multi_window_agg && + root->upper_window_filter && + root->upper_window_filter_winref == wc->winref) + path = cdb_create_pre_window_agg_path(root, + is_sorted, + presorted_keys, + window_rel, + path, + path->pathtarget, + window_pathkeys, + window_target, + wflists->windowFuncs[wc->winref], + wc); + + /* + * Unless the PARTITION BY in the window happens to match the + * current distribution, we need a motion. Each partition + * needs to be handled in the same segment. + * + * If there is no PARTITION BY, then all rows form a single + * partition, so we need to gather all the tuples to a single + * node. But we'll do that after the Sort, so that the Sort + * is parallelized. + * + * This is the same logic that is used for sorted Aggregates. + */ + + path = cdb_prepare_path_for_sorted_agg(root, + is_sorted, + presorted_keys, + window_rel, + path, + path->pathtarget, + window_pathkeys, + -1.0, + wc->partitionClause, + NIL); + path = (Path *) create_windowagg_path(root, window_rel, path, window_target, wflists->windowFuncs[wc->winref], @@ -7501,7 +7535,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, /* Now decide what to stick atop it */ if (parse->groupingSets) { - /* do nothing, not support parallel now */ + /* do nothing, could not support parallel directly */ } else if (parse->hasAggs || parse->groupClause) { @@ -8120,17 +8154,34 @@ create_partial_grouping_paths(PlannerInfo *root, /* Estimate number of partial groups. */ if (cheapest_total_path != NULL) + { dNumPartialGroups = - get_number_of_groups(root, - cheapest_total_path->rows, - gd, - extra->targetList); + get_number_of_groups(root, cheapest_total_path->rows, + gd, extra->targetList); + + /* + * When estimated groups exceed half the input rows, the cardinality + * estimate is likely unreliable (e.g., from default statistics on + * UNION ALL subquery columns). Cap at 10% of input rows to give + * 2-phase aggregation a fair chance in cost comparison. + */ + if (gp_use_streaming_hashagg && + cbdb_2phase_agg_cardinality_cap < 1.0 && + dNumPartialGroups > cheapest_total_path->rows * cbdb_2phase_agg_cardinality_cap) + dNumPartialGroups = clamp_row_est(cheapest_total_path->rows * 0.1); + } if (cheapest_partial_path != NULL) + { dNumPartialPartialGroups = - get_number_of_groups(root, - cheapest_partial_path->rows, - gd, - extra->targetList); + get_number_of_groups(root, cheapest_partial_path->rows, + gd, extra->targetList); + + if (gp_use_streaming_hashagg && + cbdb_2phase_agg_cardinality_cap < 1.0 && + dNumPartialPartialGroups > cheapest_partial_path->rows * cbdb_2phase_agg_cardinality_cap) + dNumPartialPartialGroups = + clamp_row_est(cheapest_partial_path->rows * 0.1); + } if (can_sort && cheapest_total_path != NULL) { @@ -8392,7 +8443,7 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_INITIAL_SERIAL, - false, + gp_use_streaming_hashagg, /* streaming */ parse->groupClause, NIL, agg_partial_costs, @@ -8425,7 +8476,7 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_INITIAL_SERIAL, - false, + gp_use_streaming_hashagg, /* streaming */ parse->groupClause, NIL, agg_partial_costs, @@ -9208,16 +9259,6 @@ create_partial_window_path(PlannerInfo *root, path->pathkeys, &presorted_keys); - path = cdb_prepare_path_for_sorted_agg(root, - is_sorted, - presorted_keys, - window_rel, - path, - path->pathtarget, - window_pathkeys, - -1.0, - wc->partitionClause, - NIL); if (lnext(activeWindows, l)) { ListCell *lc2; @@ -9236,6 +9277,31 @@ create_partial_window_path(PlannerInfo *root, window_target = output_target; } + if (cbdb_enable_multi_window_agg && + root->upper_window_filter && + root->upper_window_filter_winref == wc->winref) + path = cdb_create_pre_window_agg_path(root, + is_sorted, + presorted_keys, + window_rel, + path, + path->pathtarget, + window_pathkeys, + window_target, + wflists->windowFuncs[wc->winref], + wc); + + path = cdb_prepare_path_for_sorted_agg(root, + is_sorted, + presorted_keys, + window_rel, + path, + path->pathtarget, + window_pathkeys, + -1.0, + wc->partitionClause, + NIL); + path = (Path *) create_windowagg_path(root, window_rel, path, window_target, wflists->windowFuncs[wc->winref], diff --git a/src/backend/optimizer/plan/planshare.c b/src/backend/optimizer/plan/planshare.c index f3b716364ca..816cdb23e04 100644 --- a/src/backend/optimizer/plan/planshare.c +++ b/src/backend/optimizer/plan/planshare.c @@ -24,7 +24,7 @@ #include "optimizer/planshare.h" static ShareInputScan * -make_shareinputscan(PlannerInfo *root, Plan *inputplan) +make_shareinputscan(PlannerInfo *root, Plan *inputplan, char *ctename) { ShareInputScan *sisc; Path sipath; @@ -55,6 +55,7 @@ make_shareinputscan(PlannerInfo *root, Plan *inputplan) sisc->scan.plan.locustype = inputplan->locustype; sisc->scan.plan.parallel = 0; /* No parallel ShareInputScan */ + sisc->ctename = ctename; return sisc; } @@ -86,7 +87,7 @@ prepare_plan_for_sharing(PlannerInfo *root, Plan *common) * prepare_plan_for_sharing(). */ Plan * -share_prepared_plan(PlannerInfo *root, Plan *common) +share_prepared_plan(PlannerInfo *root, Plan *common, char* ctename) { - return (Plan *) make_shareinputscan(root, common); + return (Plan *) make_shareinputscan(root, common, ctename); } diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 5ed6f9dacab..d5d9c93a3a6 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -214,6 +214,21 @@ static Plan *cdb_insert_result_node(PlannerInfo *root, static bool cdb_extract_plan_dependencies_walker(Node *node, cdb_extract_plan_dependencies_context *context); +static CtePlanInfo * +get_cte_plan_info(Plan *plan); + +typedef struct CteAttrMapContext +{ + Relids relids; + AttrNumber *newattno; /* The mapping table to remap the varattno */ +} CteAttrMapContext; + +static bool +change_varattnos_of_ShareInputScan_walker(Node *node, const CteAttrMapContext *attrMapCxt); + +static void +change_varattnos_of_ShareInputScan(Node *node, CtePlanInfo *cteplaninfo); + #ifdef USE_ASSERT_CHECKING #include "cdb/cdbplan.h" @@ -1570,6 +1585,49 @@ set_indexonlyscan_references(PlannerInfo *root, return (Plan *) plan; } +CtePlanInfo * +get_cte_plan_info(Plan *plan) +{ + Assert(IsA(plan, ShareInputScan)); + return ((ShareInputScan*) plan)->cteplaninfo; +} + +/* + * Remaps the varattno of a varattno in a Var node using an attribute map. + */ +static bool +change_varattnos_of_ShareInputScan_walker(Node *node, const CteAttrMapContext *attrMapCxt) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == 0 && + var->varattno > 0 && + bms_is_member(var->varno, attrMapCxt->relids)) + { + Assert(attrMapCxt->newattno[var->varattno - 1]); + var->varattno = var->varattnosyn = attrMapCxt->newattno[var->varattno - 1]; + } + return false; + } + return expression_tree_walker(node, change_varattnos_of_ShareInputScan_walker, + (void *) attrMapCxt); +} + +static void +change_varattnos_of_ShareInputScan(Node *node, CtePlanInfo *cteplaninfo) +{ + CteAttrMapContext attrMapCxt; + + attrMapCxt.newattno = cteplaninfo->attr_map->attnums; + attrMapCxt.relids = cteplaninfo->relids; + + (void) change_varattnos_of_ShareInputScan_walker(node, &attrMapCxt); +} + /* * set_subqueryscan_references * Do set_plan_references processing on a SubqueryScan @@ -1584,6 +1642,9 @@ set_subqueryscan_references(PlannerInfo *root, { RelOptInfo *rel; Plan *result; + bool is_producer = false; + CtePlanInfo *cteplaninfo = NULL; + bool omit_subqueryscan = true; /* Need to look up the subquery's RelOptInfo, since we need its subroot */ rel = find_base_rel(root, plan->scan.scanrelid); @@ -1591,7 +1652,22 @@ set_subqueryscan_references(PlannerInfo *root, /* Recursively process the subplan */ plan->subplan = set_plan_references(rel->subroot, plan->subplan); - if (trivial_subqueryscan(plan)) + if (IsA(plan->subplan, ShareInputScan)) + { + if (plan->subplan->lefttree != NULL) + is_producer = true; + + cteplaninfo = get_cte_plan_info(plan->subplan); + + if (cteplaninfo && cteplaninfo->attr_map != NULL) + omit_subqueryscan = false; /* can not omit as we will adjust columns.*/ + } + + /* + * Producer needs to insert Result node, so don't omit here. + * Consumer needs to adjust targetlist too. + */ + if (omit_subqueryscan && trivial_subqueryscan(plan)) { /* * We can omit the SubqueryScan node and just pull up the subplan. @@ -1609,14 +1685,184 @@ set_subqueryscan_references(PlannerInfo *root, */ plan->scan.scanrelid += rtoffset; - //Assert(plan->scan.scanrelid <= list_length(glob->finalrtable) && "Scan node's relid is outside the finalrtable!"); + if (IsA(plan->subplan, ShareInputScan) && + cteplaninfo && + (cteplaninfo->attr_map != NULL)) + { + /* + * Subquery [attno: 5] + * -> ShareInputScan arrno[1, 2, 3, 4, 5] + * ->lefttree attrno [1, 2, 3, 4, 5] + * + * + * Subquery [attno: 1] + * -> ShareInputScan arrno[1] + * -> Result [attno: 5] + * ->lefttree attrno [1, 2, 3, 4, 5] + */ + List *new_tlist = NIL; + ListCell *lc; + foreach(lc, plan->subplan->targetlist) + { + TargetEntry *tle = (TargetEntry*) lfirst(lc); + AttrNumber new_resno = cteplaninfo->attr_map->attnums[tle->resno - 1]; + if (new_resno != 0) + { + // we need this column + TargetEntry *newtle = flatCopyTargetEntry(tle); + newtle->resno = new_resno; + newtle->ressortgroupref = 0 ; + Var *new_var = (Var *)copyObject(tle->expr); + newtle->expr = (Expr *) new_var; + new_tlist = lappend(new_tlist, newtle); + } + } + + if (is_producer) + { + /* insert result node */ + Plan *resultplan; + resultplan = (Plan *) make_result(new_tlist, NULL, plan->subplan->lefttree); + resultplan->flow = plan->subplan->lefttree->flow; + + plan->subplan->lefttree = resultplan; + /* we must update the shared plan for correct tlist used later. */ + root->glob->share.shared_plans[((ShareInputScan*)plan->subplan)->share_id] = resultplan; + } + + plan->subplan->targetlist = copyObject(new_tlist); + foreach(lc, plan->subplan->targetlist) + { + TargetEntry *tle = (TargetEntry*) lfirst(lc); + Var *var = (Var *)tle->expr; + var->varattno = tle->resno; + } + + foreach (lc, plan->scan.plan.targetlist) + { + TargetEntry *tle = (TargetEntry *)lfirst(lc); + Var *var = (Var *)tle->expr; + + /* Don't touch other refs. */ + if (!bms_is_member(var->varno, cteplaninfo->relids)) + continue; + /* + * We must eliminate tlist that are not used by making nulls like UPSTREAM. + * But don't correct varattno here as the var could be inside expression + * recursively, do it in change_varattnos_of_ShareInputScan(). + */ + if (var->varattno == 0) + { + /* whole row, don't change. */ + } + else if (cteplaninfo->attr_map->attnums[var->varattno - 1] == 0) + { + tle->expr = (Expr *)makeNullConst(exprType((Node *)var), + exprTypmod((Node *)var), + exprCollation((Node *)var)); + } + + /* + * resno, attno: (1, 1), (2, 2), (3, 3), (4, 4), (5, 5) + * used attno: 2, 4 + * resno, attno: (1, null), (2, 1), (3, null), (4, 2), (5, null) + * + * SELECT * from gp_toolkit.gp_partitions where schemaname = 'public' + * and tablename = 'partrl' and partitionlevel = 1 order by partitionrank; + */ + } + change_varattnos_of_ShareInputScan((Node *)plan->scan.plan.targetlist, cteplaninfo); + change_varattnos_of_ShareInputScan((Node *)plan->scan.plan.qual, cteplaninfo); + } plan->scan.plan.targetlist = fix_scan_list(root, plan->scan.plan.targetlist, rtoffset, NUM_EXEC_TLIST((Plan *) plan)); plan->scan.plan.qual = fix_scan_list(root, plan->scan.plan.qual, rtoffset, NUM_EXEC_QUAL((Plan *) plan)); + + + if (IsA(plan->subplan, ShareInputScan) && + cteplaninfo && + (cteplaninfo->attr_map != NULL)) + { + /* after fix_scan_list, the vano could be changed to subquery, we need to adjust the columns for explain */ + RangeTblEntry *rte = rt_fetch(plan->scan.scanrelid, root->glob->finalrtable); + + /* + * It's possible that attr_map has more elements than colnames when subquery + * has junk lists which are useless for upper query. + * + * qp_with_clause: + * + * denseregions as + * ( + * select FOO.*,count(distinct language) as "lang_count", + * sum(surfacearea) as "REGION_SURFACE_AREA" + * from( + * select + * sum(population) as "REGION_POP", + * sum(gnp) as "REGION_GNP", + * region + * from + * country + * group by region + * ) FOO,countrylanguage,country + * where + * country.code = countrylanguage.countrycode + * and FOO.region = country.region + * and FOO."REGION_POP" != 0 + * group by + * FOO.region,foo."REGION_POP",foo."REGION_GNP" + * order by sum(surfacearea)/foo."REGION_POP" desc) + * + * the order by clause is not used by upper quqery. + */ + Assert(cteplaninfo->attr_map->maplen >= list_length(rte->eref->colnames)); + + ListCell *lc; + List *new_colnames1 = NIL; + List *new_colnames2 = NIL; + int i = 0; + foreach(lc, rte->eref->colnames) + { + Alias *alias = (Alias*) lfirst(lc); + if (cteplaninfo->attr_map->attnums[i] != 0) + new_colnames1 = lappend(new_colnames1, copyObject(alias)); + else + new_colnames2 = lappend(new_colnames2, copyObject(alias)); + i++; + } + rte->eref->colnames = list_concat(new_colnames1, new_colnames2); + } + + if (cteplaninfo && cteplaninfo->attr_map != NULL) + { + if (is_producer) + { + /* If we are producer, correct the width of Results and ShareInputsScan */ + Assert(IsA(plan->subplan->lefttree, Result)); + Plan *dest = plan->subplan->lefttree; + Plan *src = plan->subplan->lefttree->lefttree; + dest->startup_cost = src->startup_cost; + dest->total_cost = src->total_cost; + dest->plan_rows = src->plan_rows; + dest->parallel_aware = false; + dest->parallel_safe = src->parallel_safe; + /* We have done projecton here, use the width of subquery */ + dest->plan_width = plan->scan.plan.plan_width; + + /* as well as the ShareInputScan node */ + plan->subplan->plan_width = plan->scan.plan.plan_width; + } + else + { + /* correct the width of ShareInputsScan */ + /* as well as the ShareInputScan node */ + plan->subplan->plan_width = plan->scan.plan.plan_width; + } + } result = (Plan *) plan; } diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index c4da4c1b2da..d83456c0618 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -52,6 +52,8 @@ #include "cdb/cdbutil.h" #include "cdb/cdbpath.h" +#include "utils/guc.h" + typedef struct convert_testexpr_context { PlannerInfo *root; @@ -82,7 +84,9 @@ static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, Node *testexpr, List *testexpr_paramids, - bool unknownEqFalse); + bool unknownEqFalse, + bool outer_has_rte_function); +static bool outer_query_has_rte_function(PlannerInfo *root); static List *generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds); static Node *convert_testexpr_mutator(Node *node, @@ -124,6 +128,18 @@ static bool contain_outer_selfref_walker(Node *node, Index *depth); static bool splan_is_initplan(List *plan_params, SubLinkType subLinkType); +typedef struct +{ + plan_tree_base_prefix base; /* Required prefix for + * plan_tree_walker/mutator */ + Bitmapset *seen_subplans; + bool result; + int sisc_role; +} contain_ShareInputScan_walk_context; + +static bool +contain_ShareInputScan_walk(Node *node, contain_ShareInputScan_walk_context *ctx); + /* * Get the datatype/typmod/collation of the first column of the plan's output. * @@ -375,20 +391,10 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, if (Gp_role == GP_ROLE_DISPATCH) { config->is_under_subplan = true; - - /* - * Disable CTE sharing in subplan. - * - * fixup_subplans() copys duplicate subplan (subplan with same - * plan_id), but doesn't copy the subroot. - * If enable cte sharing here, it leads to mismatch of the length - * of subplans and subroots. And apply_shareinput_xslice() cannot - * make it correct when shared scan is in subplan, then an assert - * (or panic) error will happen in init_tuplestore_state(). - * - * See github issue: https://github.com/greenplum-db/gpdb/issues/12701 - */ - config->gp_cte_sharing = false; + config->gp_cte_sharing = config->gp_cte_sharing ? !(subLinkType == ROWCOMPARE_SUBLINK || + subLinkType == ARRAY_SUBLINK || + subLinkType == MULTIEXPR_SUBLINK || + subLinkType == EXISTS_SUBLINK) : config->gp_cte_sharing; } /* * Strictly speaking, the order of rows in a subquery doesn't matter. @@ -432,8 +438,17 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, { Path *cheapest_partial_path; cheapest_partial_path = linitial(final_rel->partial_pathlist); - add_path(final_rel, cheapest_partial_path, root); - set_cheapest(final_rel); + /* + * Do not be parallel if there is only one row of a SeqScan. + * Else, it will allocate many processes which are unnecessary inside + * InitPlan nodes such as case: TPCDS query 114, 158. + */ + if (cheapest_partial_path->pathtype != T_SeqScan || + cheapest_partial_path->rows > 1) + { + add_path(final_rel, cheapest_partial_path, root); + set_cheapest(final_rel); + } } best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); @@ -465,10 +480,22 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, set_allow_append_initplan_for_function_scan(); Assert(get_allow_append_initplan_for_function_scan() == true); + /* if we are a shared scan */ + subroot->is_shared_scan = contain_ShareInputScan(subroot, (Node*) plan); + + /* + * Detect whether the outer query has an RTE_FUNCTION. If so, + * build_subplan will avoid the eager SubPlan conversion because + * the resulting SubPlan would live in the same multi-segment + * slice as the FunctionScan. See outer_query_has_rte_function(). + */ + bool has_rte_function = outer_query_has_rte_function(root); + /* And convert to SubPlan or InitPlan format. */ result = build_subplan(root, plan, subroot, plan_params, subLinkType, subLinkId, - testexpr, NIL, isTopQual); + testexpr, NIL, isTopQual, + has_rte_function); /* * If it's a correlated EXISTS with an unimportant targetlist, we might be @@ -527,7 +554,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, ANY_SUBLINK, 0, newtestexpr, paramIds, - true)); + true, + has_rte_function)); /* Check we got what we expected */ Assert(hashplan->parParam == NIL); Assert(hashplan->useHashTable); @@ -544,6 +572,53 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, return result; } +/* + * Return true if the outer query's range table contains any RTE_FUNCTION. + * + * A FunctionScan corresponding to an RTE_FUNCTION typically executes on + * every segment (especially SETOF / VOLATILE functions). When that is + * the case, any SubPlan that cbdb_eager_subplan would create for a + * SubLink in the same query level ends up embedded in the same + * multi-segment slice as the FunctionScan. The SubPlan's Entry-locus + * Gather Motion then fails at execution time either with + * "unexpected gang size: N" + * or, when a PL-language SRF tries SPI from inside a QE, + * "query plan with multiple segworker groups is not supported". + * + * Two example shapes that both hit this: + * + * (1) SubLink as an argument to a function in FROM: + * SELECT ... FROM t, generate_series(0, (SELECT max(x) FROM y)) g ... + * + * (2) SubLink as a sibling of a FunctionScan in the target list: + * SELECT n - (SELECT count(*) FROM t) + * FROM srf($$...$$) AS n; + * + * In both shapes the surrounding slice is multi-segment, so we fall + * back to keeping the SubLink as an InitPlan: it is executed on the QD + * and its scalar result is dispatched to QEs via execParams. This + * over-approximates -- some RTE_FUNCTIONs would run on the QD only and + * could safely be eager -- but the cost of missing the optimization is + * low, while the executor errors it prevents are hard failures. + */ +static bool +outer_query_has_rte_function(PlannerInfo *root) +{ + ListCell *lc; + + if (root->parse == NULL) + return false; + + foreach(lc, root->parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc); + + if (rte->rtekind == RTE_FUNCTION) + return true; + } + return false; +} + /* * Build a SubPlan node given the raw inputs --- subroutine for make_subplan * @@ -555,13 +630,94 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, Node *testexpr, List *testexpr_paramids, - bool unknownEqFalse) + bool unknownEqFalse, + bool outer_has_rte_function) { Node *result; SubPlan *splan; ListCell *lc; Bitmapset *plan_param_set; + bool eager_subplan = false; + + if (subroot->is_shared_scan) + eager_subplan = true; + else if (plan->locustype == CdbLocusType_Entry && plan->initPlan != NIL) + { + /* + * Don't eager subplan if we are already on QD, + * else a broadcast under SubPlan will cause inactive Motion. + * + * select a from test_index_with_orderby_limit order by a limit ( + * select min(a) from test_index_with_orderby_limit); + * + * Limit + * Locus: Entry + * -> Gather Motion 32:1 (slice1; segments: 32) + * Locus: Entry + * Merge Key: test_index_with_orderby_limit.a + * -> Limit + * Locus: Hashed + * -> Index Only Scan using index_ab on test_index_with_orderby_limit + * Locus: Hashed + * SubPlan 2 + * -> Materialize + * Locus: Replicated + * -> Broadcast Motion 1:32 (slice2) + * Locus: Replicated + * InitPlan 1 (returns $0) (slice3) + * -> Limit + * Locus: Entry + * -> Gather Motion 32:1 (slice4; segments: 32) + * Locus: Entry + * Merge Key: test_index_with_orderby_limit_1.a + * -> Index Only Scan using index_ab on test_index_with_orderby_limit test_index_with_orderby_limit_1 + * Locus: Hashed + * Index Cond: (a IS NOT NULL) + * -> Result + * Locus: Entry + * + * When SubPlan executes before the main plan, its nested InitPlan slice completes + * and attempts to broadcast data to parent operators. + * However, the main plan hasn't yet initialized the SubPlan execution context + * to receive this data, causing the Motion error. + */ + eager_subplan = false; + } + else if (cbdb_eager_subplan && !is_single_simple_query(subroot)) + eager_subplan = true; + + /* + * If the outer query contains any RTE_FUNCTION, its FunctionScan + * node usually runs on every segment and its slice is therefore + * multi-segment. Any SubPlan created for a SubLink at this query + * level is embedded in that multi-segment slice, and an Entry-locus + * Gather Motion inside the SubPlan fails at execution time (see + * outer_query_has_rte_function()). Fall back to InitPlan in that + * case: it is computed once on the QD and its scalar result is + * dispatched to the QEs via execParams. + */ + if (outer_has_rte_function) + eager_subplan = false; + + /* + * Don't use subpan if there is modify operation, citd might be wrong. + * with updated AS (update table_for_initplan set k = 33 where i = 3 returning k) + * select table_for_initplan.*, (select sum(k) from updated) from table_for_initplan; + */ + if (contain_ModifyTable_plan(root, plan)) + eager_subplan = false; + + /* + * InitPlan can't have ShareInputScan, neither producer or consumer in same slice, else it will hang. + * However, we don't know the slice info here, so make it to subplan. + * + * WITH q1(x,y) AS (SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred) + * SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub); + */ + if (contain_ShareInputScan(root, (Node *)plan)) + eager_subplan = true; + /* * Initialize the SubPlan node. Note plan_id, plan_name, and cost fields * are set further down. @@ -632,7 +788,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, splan->is_initplan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK) + else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK && !eager_subplan) { TargetEntry *te = linitial(plan->targetlist); Param *prm; @@ -791,7 +947,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, splan->plan_id = list_length(root->glob->subplans); if (splan->is_initplan) + { root->init_plans = lappend(root->init_plans, splan); + root->init_plan_ids = bms_add_member(root->init_plan_ids, splan->plan_id); + } /* * A parameterless subplan (not initplan) should be prepared to handle @@ -2765,6 +2924,21 @@ finalize_plan(PlannerInfo *root, Plan *plan, } } + if (IsA(plan, ShareInputScan) && plan->lefttree != NULL) + { + foreach(l, plan->lefttree->initPlan) + { + SubPlan *initsubplan = (SubPlan *) lfirst(l); + ListCell *l2; + + foreach(l2, initsubplan->setParam) + { + initSetParam = bms_add_member(initSetParam, lfirst_int(l2)); + } + } + } + + /* Any setParams are validly referenceable in this node and children */ if (initSetParam) valid_params = bms_union(valid_params, initSetParam); @@ -3550,6 +3724,7 @@ SS_make_initplan_from_plan(PlannerInfo *root, node->setParam = list_make1_int(prm->paramid); root->init_plans = lappend(root->init_plans, node); + root->init_plan_ids = bms_add_member(root->init_plan_ids, node->plan_id); /* * The node can't have any inputs (since it's an initplan), so the @@ -3577,3 +3752,163 @@ splan_is_initplan(List *plan_params, SubLinkType subLinkType) return true; return false; } + +bool contain_ShareInputScan(PlannerInfo *root, Node *node) +{ + contain_ShareInputScan_walk_context ctx; + planner_init_plan_tree_base(&ctx.base, root); + ctx.result = false; + ctx.seen_subplans = NULL; + ctx.sisc_role = SISC_NONE; + + (void) contain_ShareInputScan_walk(node, &ctx); + + return ctx.result; +} + +static bool +contain_ShareInputScan_walk(Node *node, contain_ShareInputScan_walk_context *ctx) +{ + PlannerInfo *root = (PlannerInfo *) ctx->base.node; + + if (node == NULL) + return false; + + if (IsA(node, SubPlan)) + { + SubPlan *spexpr = (SubPlan *) node; + int plan_id = spexpr->plan_id; + + if (!bms_is_member(plan_id, ctx->seen_subplans)) + { + ctx->seen_subplans = bms_add_member(ctx->seen_subplans, plan_id); + + if (spexpr->is_initplan) + return false; + + Plan *plan = list_nth(root->glob->subplans, plan_id - 1); + return plan_tree_walker((Node *) plan, contain_ShareInputScan_walk, ctx, true); + } + } + + if (IsA(node, ShareInputScan)) + { + ctx->result = true; + + if (((Plan*) node)->lefttree != NULL) + ctx->sisc_role |= SISC_PRODUCER; + else + ctx->sisc_role |= SISC_CONSUMER; + + return false; + } + + return plan_tree_walker((Node *) node, contain_ShareInputScan_walk, ctx, true); +} + +/* + * Similar to contain_ShareInputScan() + * with details about producer and consumer info. + */ +int contain_ShareInputScan_detail(PlannerInfo *root, Node *node) +{ + contain_ShareInputScan_walk_context ctx; + planner_init_plan_tree_base(&ctx.base, root); + ctx.result = false; + ctx.seen_subplans = NULL; + ctx.sisc_role = SISC_NONE; + + (void) contain_ShareInputScan_walk(node, &ctx); + return ctx.sisc_role; +} + +/* + * Used to judege if a query is simple enough to be an InitPlan. + * If not, convert it to SubPlan for more parallel. + * A simple select, on a simple relation(not CTE or Partitioned) + * No agg or Group By. + * + * For UPDATE/DELETE/INSERT, we return true to make them no changed. + */ +bool +is_single_simple_query(PlannerInfo *root) +{ + Query* parse = root->parse; + + /* Don't touch writable operations. */ + if (parse->commandType != CMD_SELECT) + return true; + + if (parse->hasAggs || + parse->groupClause != NIL || + parse->cteList != NIL || + parse->hasSubLinks || + parse->hasWindowFuncs) + return false; + + if (list_length(parse->jointree->fromlist) != 1) + return false; + + Node *jtnode = (Node *) linitial(parse->jointree->fromlist); + if (!IsA(jtnode, RangeTblRef)) + return false; + + int varno = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = planner_rt_fetch(varno, root); + + /* + * Don't disturb Result or Values. + * select * from listp where a = (select 1); + */ + if (rte->rtekind == RTE_RESULT || + rte->rtekind == RTE_VALUES || + rte->rtekind == RTE_FUNCTION) + return true; + + if (rte->rtekind != RTE_RELATION ) + return false; + + char relkind = get_rel_relkind(rte->relid); + if (relkind != RELKIND_RELATION) + return false; + + /* OK, it's simple enough. */ + return true; +} + +typedef struct ModifyTableFinderContext +{ + plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */ + bool found; +} ModifyTableFinderContext; + +/* + * Walker to find a motion node that matches a particular motionID + */ +static bool +ModifyTableFinderWalker (Plan *node, void *context) +{ + Assert(context); + ModifyTableFinderContext *ctx = (ModifyTableFinderContext *) context; + + if (node == NULL) + return false; + + if (IsA(node, ModifyTable)) + { + ctx->found = true; + return true; /* found our node; no more visit */ + } + + /* Continue walking */ + return plan_tree_walker((Node*)node, ModifyTableFinderWalker, ctx, true); +} + +bool contain_ModifyTable_plan(PlannerInfo *root, Plan* node) +{ + ModifyTableFinderContext ctx; + ctx.base.node = (Node*)root; + ctx.found = false; + ModifyTableFinderWalker(node, &ctx); + return ctx.found; +} diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index eca7ac93a2d..f7c2aca29f3 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -45,6 +45,10 @@ #include "cdb/cdbsubselect.h" #include "optimizer/transform.h" +#include "parser/parse_clause.h" +#include "parser/parse_oper.h" + +#include "utils/guc.h" typedef struct pullup_replace_vars_context { @@ -139,6 +143,10 @@ static void fix_append_rel_relids(List *append_rel_list, int varno, Relids subrelids); static Node *find_jointree_node_for_rel(Node *jtnode, int relid); +static void make_setop_distinct(Query *subquery); + +static void +make_setop_distinct_recurse(Node *setOp, Query *setOpQuery, bool distinct); /* * replace_empty_jointree @@ -744,13 +752,33 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, { OpExpr *opexp = (OpExpr *) node; JoinExpr *j; + Node *rarg; + Node *n_tmp = node; + + + bool sublink_found = false; + while (IsA(n_tmp, OpExpr)) + { + OpExpr *op_tmp = (OpExpr *) n_tmp; + + if (list_length(op_tmp->args) != 2) + break; - if (list_length(opexp->args) == 2) + rarg = list_nth(op_tmp->args, 1); + if (IsA(rarg, SubLink)) + { + sublink_found = true; + break; + } + n_tmp = list_nth(op_tmp->args, 1); + } + + if (sublink_found && list_length(opexp->args) == 2) { /** * Check if second arg is sublink */ - Node *rarg = list_nth(opexp->args, 1); + // Node *rarg = list_nth(opexp->args, 1); if (IsA(rarg, SubLink)) { @@ -936,6 +964,10 @@ pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode, is_simple_union_all(rte->subquery)) return pull_up_simple_union_all(root, jtnode, rte); + if (rte->rtekind == RTE_SUBQUERY && + cbdb_enable_setop_pre_dedup) + make_setop_distinct(rte->subquery); + /* * Or perhaps it's a simple VALUES RTE? * @@ -1107,6 +1139,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->hasRecursion = false; subroot->wt_param_id = -1; subroot->non_recursive_path = NULL; + subroot->init_plan_ids = NULL; /* No CTEs to worry about */ Assert(subquery->cteList == NIL); @@ -3892,3 +3925,89 @@ init_list_cteplaninfo(int numCtes) return list_cteplaninfo; } + +static void +make_setop_distinct(Query *subquery) +{ + SetOperationStmt *topop; + + + /* Let's just make sure it's a valid subselect ... */ + if (!IsA(subquery, Query) || + subquery->commandType != CMD_SELECT) + elog(ERROR, "subquery is bogus"); + + /* Is it a set-operation query at all? */ + topop = castNode(SetOperationStmt, subquery->setOperations); + if (!topop) + return; + + /* Recursively check the tree of set operations */ + make_setop_distinct_recurse((Node *) topop, subquery, !topop->all); +} + +static void +make_setop_distinct_recurse(Node *setOp, Query *setOpQuery, bool distinct) +{ + if (IsA(setOp, RangeTblRef)) + { + if (!distinct) + return; + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *rte = rt_fetch(rtr->rtindex, setOpQuery->rtable); + Query *subquery = rte->subquery; + ListCell *lc; + List *distinct_clause = NIL; + + Assert(subquery != NULL); + /* + * Don't disturb if subquery is already distinct. + * DISTINCT, DISTINCT ON + * GROUP BY(no grouping sets) + */ + if (subquery->hasDistinctOn || + subquery->groupingSets != NIL || + subquery->distinctClause) + return; + + // add distinct on subquery->targetList + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore junk */ + + SortGroupClause *grpcl = makeNode(SortGroupClause); + Oid restype = exprType((Node *) tle->expr); + Oid sortop; + Oid eqop; + bool hashable; + /* determine the eqop and optional sortop */ + get_sort_group_operators(restype, + false, true, false, + &sortop, &eqop, NULL, + &hashable); + grpcl->tleSortGroupRef = assignSortGroupRef(tle, subquery->targetList); + grpcl->eqop = eqop; + grpcl->sortop = sortop; + grpcl->nulls_first = false; /* OK with or without sortop */ + grpcl->hashable = hashable; + distinct_clause = lappend(distinct_clause, grpcl); + } + subquery->distinctClause = distinct_clause; + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + make_setop_distinct_recurse(op->larg, setOpQuery, !op->all); + make_setop_distinct_recurse(op->rarg, setOpQuery, !op->all); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + return; /* keep compiler quiet */ + } +} diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 42c3e4dc046..616c3a1fbe3 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -674,3 +674,557 @@ process_duplicate_ors(List *orlist) else return make_andclause(pull_ands(winners)); } + +static Expr * +convert_or_to_cnf_complete(Expr *expr); + +static Expr * +convert_and_to_cnf_complete(Expr *expr); +static Expr * +distribute_or_over_ands_complete(List *non_ands, List *and_clauses); +static List * +flatten_or_args_complete(List *args); +static List * +flatten_and_args_complete(List *args); +static Expr * +combine_cnf_clauses_complete(List *clauses); +static List * +remove_duplicates_in_list(List *clauses); +static List * +remove_duplicate_and_subsumed_clauses(List *clauses); +static bool +or_clause_subsumes(Expr *or_clause1, Expr *or_clause2); +static Expr * +deduplicate_cnf_result(Expr *expr); + +/* + * CNF Conversion for CTE Predicate Pushdown + * + * MOTIVATION: + * When a CTE is referenced multiple times with different filter predicates, + * we want to push down the combined predicates to reduce materialization. + * For example: + * + * WITH cte AS (SELECT ... FROM large_table) + * SELECT * FROM cte WHERE store_id = 10 + * UNION ALL + * SELECT * FROM cte WHERE store_id = 20 + * + * We collect predicates from all consumers and combine them: + * (store_id = 10) OR (store_id = 20) + * + * For more complex cases with AND predicates: + * WHERE (store_id = 10 AND year = 2001) + * WHERE (store_id = 20 AND year = 2001) + * + * Combined: (store_id = 10 AND year = 2001) OR (store_id = 20 AND year = 2001) + * + * WHY CNF CONVERSION: + * CNF (Conjunctive Normal Form) is required because: + * 1. The planner expects filter predicates in AND-of-ORs form + * 2. CNF enables individual clauses to be pushed down independently + * 3. After CNF conversion, (year = 2001) can be extracted as a separate + * conjunct and pushed down even if other parts cannot be + * + * ALGORITHM: + * We use the distributive law to convert OR-of-ANDs to AND-of-ORs: + * + * (A AND B) OR (A AND C) + * = (A OR A) AND (A OR C) AND (B OR A) AND (B OR C) [distribute] + * = A AND (A OR C) AND (B OR A) AND (B OR C) [simplify A OR A = A] + * = A AND (A OR C) AND (A OR B) AND (B OR C) [reorder] + * + * With subsumption detection, we can further simplify: + * - (A OR C) subsumes any clause containing all its terms plus more + * - So A AND (A OR C) simplifies to A (since A subsumes A OR C? No...) + * + * Actually: In CNF context, (A) subsumes (A OR B) because: + * - If A is true, both A and (A OR B) are true + * - (A) is more restrictive, so (A OR B) is redundant + * + * EXAMPLE WALKTHROUGH: + * Input: (s='s' AND year=2001) OR (s='s' AND year=2002) + * + * Step 1: Identify AND clauses to distribute + * - First AND: (s='s' AND year=2001) + * - Remaining: (s='s' AND year=2002) + * + * Step 2: Distribute first AND over remaining + * - (s='s' OR (s='s' AND year=2002)) → recurse + * - (year=2001 OR (s='s' AND year=2002)) → recurse + * + * Step 3: Recursively convert each: + * - (s='s' OR (s='s' AND year=2002)) + * = (s='s' OR s='s') AND (s='s' OR year=2002) + * = s='s' AND (s='s' OR year=2002) + * + * - (year=2001 OR (s='s' AND year=2002)) + * = (year=2001 OR s='s') AND (year=2001 OR year=2002) + * + * Step 4: Combine with AND: + * = s='s' AND (s='s' OR year=2002) AND (year=2001 OR s='s') AND (year=2001 OR year=2002) + * + * Step 5: Deduplicate and remove subsumed clauses: + * - s='s' subsumes (s='s' OR year=2002) and (year=2001 OR s='s') + * - Final: s='s' AND (year=2001 OR year=2002) + * + * DEDUPLICATION STRATEGY: + * 1. Exact duplicate removal: (A OR B) appears twice → keep one + * 2. Subsumption removal: (A OR B) AND (A OR B OR C) → keep only (A OR B) + * because (A OR B) being true implies (A OR B OR C) is true + * + * COMPLEXITY NOTE: + * CNF conversion can cause exponential blowup in the worst case. + * For n AND-clauses each with m terms: O(m^n) output clauses. + * The deduplication helps mitigate this for practical queries. + */ +Expr * +convert_expr_to_cnf_complete(Expr *expr) +{ + if (expr == NULL) + return NULL; + + /* Base case: non-Boolean expressions */ + if (!is_orclause(expr) && !is_andclause(expr)) + return expr; + + if (is_orclause(expr)) + { + return convert_or_to_cnf_complete(expr); + } + else if (is_andclause(expr)) + { + return convert_and_to_cnf_complete(expr); + } + + return expr; +} + +/* + * convert_or_to_cnf_complete + * Complete OR to CNF conversion with deduplication + */ +static Expr * +convert_or_to_cnf_complete(Expr *expr) +{ + List *or_args = NIL; + ListCell *lc; + + /* Step 1: Recursively convert all arguments */ + foreach (lc, ((BoolExpr *)expr)->args) + { + Expr *arg = convert_expr_to_cnf_complete((Expr *)lfirst(lc)); + or_args = lappend(or_args, arg); + } + + /* Step 2: Flatten nested ORs */ + or_args = flatten_or_args_complete(or_args); + + /* Step 3: Remove duplicate arguments within this OR */ + or_args = remove_duplicates_in_list(or_args); + + /* Step 4: Check for AND clauses that need distribution */ + List *and_clauses = NIL; + List *non_and_clauses = NIL; + bool has_and = false; + + foreach (lc, or_args) + { + Expr *arg = (Expr *)lfirst(lc); + if (is_andclause(arg)) + { + and_clauses = lappend(and_clauses, arg); + has_and = true; + } + else + { + non_and_clauses = lappend(non_and_clauses, arg); + } + } + + /* Step 5: If no AND clauses, return simplified OR */ + if (!has_and) + { + if (list_length(or_args) == 0) + return (Expr *)makeBoolConst(false, false); + else if (list_length(or_args) == 1) + return (Expr *)linitial(or_args); + else + return make_orclause(or_args); + } + + /* Step 6: Apply distributive law */ + Expr *result = distribute_or_over_ands_complete(non_and_clauses, and_clauses); + + /* Step 7: Final deduplication of the resulting CNF */ + return deduplicate_cnf_result(result); +} + +/* + * convert_and_to_cnf_complete + * Complete AND to CNF conversion with deduplication + */ +static Expr * +convert_and_to_cnf_complete(Expr *expr) +{ + List *and_args = NIL; + ListCell *lc; + + /* Step 1: Recursively convert all arguments */ + foreach (lc, ((BoolExpr *)expr)->args) + { + Expr *arg = convert_expr_to_cnf_complete((Expr *)lfirst(lc)); + and_args = lappend(and_args, arg); + } + + /* Step 2: Flatten nested ANDs */ + and_args = flatten_and_args_complete(and_args); + + /* Step 3: Remove duplicates */ + and_args = remove_duplicates_in_list(and_args); + + /* Step 4: Return simplified AND */ + if (list_length(and_args) == 0) + return (Expr *)makeBoolConst(true, false); + else if (list_length(and_args) == 1) + return (Expr *)linitial(and_args); + else + return make_andclause(and_args); +} + +/* + * distribute_or_over_ands_complete + * Enhanced distribution that handles multiple AND clauses properly + */ +static Expr * +distribute_or_over_ands_complete(List *non_ands, List *and_clauses) +{ + /* Use the first AND clause for initial distribution */ + Expr *first_and = (Expr *)linitial(and_clauses); + List *first_and_args = ((BoolExpr *)first_and)->args; + + /* Remove duplicates from first AND arguments */ + first_and_args = remove_duplicates_in_list(first_and_args); + + /* Remaining AND clauses */ + List *remaining_ands = list_delete_first(list_copy(and_clauses)); + + /* Base arguments for distribution: non-ANDs + remaining ANDs */ + List *base_args = list_concat(remove_duplicates_in_list(non_ands), + remaining_ands); + + /* Apply distribution */ + List *distributed_clauses = NIL; + ListCell *lc; + + foreach (lc, first_and_args) + { + Expr *subclause = (Expr *)lfirst(lc); + + /* Create new OR: (base_args OR subclause) */ + List *new_or_args = list_copy(base_args); + new_or_args = lappend(new_or_args, subclause); + + /* Remove duplicates in the new OR arguments */ + new_or_args = remove_duplicates_in_list(new_or_args); + + /* Convert recursively to CNF */ + Expr *new_or = make_orclause(new_or_args); + Expr *cnf_or = convert_expr_to_cnf_complete(new_or); + + distributed_clauses = lappend(distributed_clauses, cnf_or); + } + + /* Combine all distributed clauses */ + return combine_cnf_clauses_complete(distributed_clauses); +} + +/* + * flatten_or_args_complete + * Flatten nested OR clauses with deduplication + */ +static List * +flatten_or_args_complete(List *args) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, args) + { + Expr *arg = (Expr *)lfirst(lc); + + if (is_orclause(arg)) + { + List *sub_args = flatten_or_args_complete(((BoolExpr *)arg)->args); + result = list_concat(result, sub_args); + } + else + { + result = lappend(result, arg); + } + } + + /* Remove duplicates after flattening */ + return remove_duplicates_in_list(result); +} + +/* + * flatten_and_args_complete + * Flatten nested AND clauses with deduplication + */ +static List * +flatten_and_args_complete(List *args) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, args) + { + Expr *arg = (Expr *)lfirst(lc); + + if (is_andclause(arg)) + { + List *sub_args = flatten_and_args_complete(((BoolExpr *)arg)->args); + result = list_concat(result, sub_args); + } + else + { + result = lappend(result, arg); + } + } + + /* Remove duplicates after flattening */ + return remove_duplicates_in_list(result); +} + +/* + * combine_cnf_clauses_complete + * Combine CNF clauses with advanced deduplication + */ +static Expr * +combine_cnf_clauses_complete(List *clauses) +{ + if (list_length(clauses) == 0) + return (Expr *)makeBoolConst(true, false); + + if (list_length(clauses) == 1) + return (Expr *)linitial(clauses); + + /* Extract all subclauses, handling nested ANDs */ + List *all_clauses = NIL; + ListCell *lc; + + foreach (lc, clauses) + { + Expr *clause = (Expr *)lfirst(lc); + + if (is_andclause(clause)) + { + all_clauses = list_concat(all_clauses, + list_copy(((BoolExpr *)clause)->args)); + } + else + { + all_clauses = lappend(all_clauses, clause); + } + } + + /* Remove duplicates and subsumed clauses */ + all_clauses = remove_duplicate_and_subsumed_clauses(all_clauses); + + if (list_length(all_clauses) == 0) + return (Expr *)makeBoolConst(true, false); + else if (list_length(all_clauses) == 1) + return (Expr *)linitial(all_clauses); + else + return make_andclause(all_clauses); +} + +/* + * remove_duplicates_in_list + * Remove duplicate expressions from a list + */ +static List * +remove_duplicates_in_list(List *clauses) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, clauses) + { + Expr *clause = (Expr *)lfirst(lc); + bool found = false; + ListCell *lc2; + + foreach (lc2, result) + { + if (equal(clause, (Expr *)lfirst(lc2))) + { + found = true; + break; + } + } + + if (!found) + result = lappend(result, clause); + } + + return result; +} + +/* + * remove_duplicate_and_subsumed_clauses + * Remove duplicates and logically redundant clauses from a CNF conjunction. + * + * In a CNF conjunction (AND of clauses), a clause is redundant if it is + * logically implied by another clause already in the list. We detect three + * cases of redundancy: + * + * 1. Exact duplicates: (A OR B) AND (A OR B) → keep one + * + * 2. OR-vs-OR subsumption: (A OR B) AND (A OR B OR C) → keep (A OR B) + * A shorter OR-clause with all its terms present in a longer one + * makes the longer one always-true when the shorter one is true. + * + * 3. Literal-vs-OR subsumption: A AND (A OR B) → keep A + * A bare literal makes any OR-clause containing it redundant, + * because if the literal is true, the OR-clause is trivially true. + */ +static List * +remove_duplicate_and_subsumed_clauses(List *clauses) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, clauses) + { + Expr *clause = (Expr *)lfirst(lc); + bool keep = true; + List *to_remove = NIL; + + /* Check against all existing clauses */ + ListCell *lc_exist; + foreach (lc_exist, result) + { + Expr *existing = (Expr *)lfirst(lc_exist); + + /* Exact duplicate */ + if (equal(clause, existing)) + { + keep = false; + break; + } + + /* Check for OR clause subsumption */ + if (is_orclause(clause) && is_orclause(existing)) + { + if (or_clause_subsumes(existing, clause)) + { + /* Existing subsumes current, skip current */ + keep = false; + break; + } + else if (or_clause_subsumes(clause, existing)) + { + /* + * Current subsumes existing, mark for removal. + * Continue checking other clauses since the current + * clause may subsume multiple existing clauses. + */ + to_remove = lappend(to_remove, existing); + } + } + else if (!is_orclause(clause) && is_orclause(existing)) + { + /* A AND (A OR B) could be simplied to A */ + if (list_member(((BoolExpr *)existing)->args, clause)) + { + to_remove = lappend(to_remove, existing); + } + } + else if (is_orclause(clause) && !is_orclause(existing)) + { + if (list_member(((BoolExpr *)clause)->args, existing)) + { + keep = false; + break; + } + } + } + + /* Remove all clauses that current clause subsumes */ + if (to_remove != NIL) + { + ListCell *lc_rm; + foreach (lc_rm, to_remove) + { + result = list_delete_ptr(result, lfirst(lc_rm)); + } + list_free(to_remove); + } + + if (keep) + result = lappend(result, clause); + } + + return result; +} + +/* + * or_clause_subsumes + * Check if or_clause1 subsumes or_clause2 + * (A OR B) subsumes (B OR C OR A) means we can remove (B OR C OR A) + */ +static bool +or_clause_subsumes(Expr *or_clause1, Expr *or_clause2) +{ + if (!is_orclause(or_clause1) || !is_orclause(or_clause2)) + return false; + + List *args1 = ((BoolExpr *)or_clause1)->args; + List *args2 = ((BoolExpr *)or_clause2)->args; + + /* If all elements of clause1 are in clause2, clause1 subsumes clause2 */ + ListCell *lc1; + foreach (lc1, args1) + { + Expr *arg1 = (Expr *)lfirst(lc1); + bool found = false; + ListCell *lc2; + + foreach (lc2, args2) + { + if (equal(arg1, (Expr *)lfirst(lc2))) + { + found = true; + break; + } + } + + if (!found) + return false; + } + + return true; +} + +/* + * deduplicate_cnf_result + * Final deduplication pass for the CNF result + */ +static Expr * +deduplicate_cnf_result(Expr *expr) +{ + if (!is_andclause(expr)) + return expr; + + List *and_args = ((BoolExpr *)expr)->args; + List *unique_clauses = remove_duplicate_and_subsumed_clauses(and_args); + + if (list_length(unique_clauses) == 0) + return (Expr *)makeBoolConst(true, false); + else if (list_length(unique_clauses) == 1) + return (Expr *)linitial(unique_clauses); + else + return make_andclause(unique_clauses); +} diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index ffaaea30b9c..00e0075ed16 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -801,6 +801,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, int firstFlag; GpSetOpType optype = PSETOP_NONE; /* CDB */ + List *partial_pathlist = NIL; + int parallel_workers = 0; + /* * Tell children to fetch all tuples. */ @@ -822,6 +825,33 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, &dRightGroups); rpath = rrel->cheapest_total_path; + if (lrel->consider_parallel && + (lrel->partial_pathlist != NIL) && + (rrel->partial_pathlist != NIL) && + rrel->consider_parallel) + { + ListCell *lc; + + /* TODO: adjust nGroups ? */ + if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups) + { + partial_pathlist = list_make2(linitial(lrel->partial_pathlist), + linitial(rrel->partial_pathlist)); + } + else + { + partial_pathlist = list_make2(linitial(rrel->partial_pathlist), + linitial(lrel->partial_pathlist)); + } + /* Find the highest number of workers requested for any subpath. */ + foreach(lc, partial_pathlist) + { + Path *path = lfirst(lc); + + parallel_workers = Max(parallel_workers, path->parallel_workers); + } + } + /* Undo effects of forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; @@ -855,6 +885,13 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, { optype = choose_setop_type(pathlist,tlist_list); adjust_setop_arguments(root, pathlist, tlist_list, optype); + + if (partial_pathlist != NIL) + { + /* We don't need optype in parallel, create_append_path will handle that. */ + adjust_setop_arguments(root, partial_pathlist, tlist_list, + choose_setop_type(pathlist,tlist_list)); + } } else if ( Gp_role == GP_ROLE_UTILITY || Gp_role == GP_ROLE_EXECUTE ) /* MPP-2928 */ @@ -881,6 +918,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, bms_union(lrel->relids, rrel->relids)); result_rel->reltarget = create_pathtarget(root, tlist); + if (partial_pathlist != NIL) + result_rel->consider_parallel = true; + /* * Append the child results together. */ @@ -955,6 +995,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, result_rel->rows = path->rows; add_path(result_rel, path, root); + + if (partial_pathlist != NIL) + { + /* Same as above, do partial paths. */ + path = (Path *) create_append_path(root, result_rel, NIL, partial_pathlist, + NIL, NULL, parallel_workers, false, -1); + + use_hash = choose_hashed_setop(root, groupList, path, + dNumGroups, dNumOutputRows, + (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"); + + if (groupList && !use_hash) + path = (Path *) create_sort_path(root, + result_rel, + path, + make_pathkeys_for_sortclauses(root, + groupList, + tlist), + -1.0); + + path = (Path *) create_setop_path(root, + result_rel, + path, + cmd, + use_hash ? SETOP_HASHED : SETOP_SORTED, + groupList, + list_length(op->colTypes) + 1, + use_hash ? firstFlag : -1, + dNumGroups, + dNumOutputRows); + + add_partial_path(result_rel, path); + } + return result_rel; } diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 2e7ff349e77..b54f0a5b8fd 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -495,6 +495,11 @@ adjust_appendrel_attrs_mutator(Node *node, PlannerInfo *newsubroot = makeNode(PlannerInfo); PlannerInfo *oldsubroot = planner_subplan_get_root(root, sp); + /* + * TODO: correct ShareInputScan copy fields. + * We use lefttree to find the same cte ref + */ + memcpy(newsubroot, oldsubroot, sizeof(PlannerInfo)); newsubroot->append_rel_list = (List *) copyObject(oldsubroot->append_rel_list); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index a039fdd87d2..68c29b20344 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -849,7 +849,7 @@ max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) List *save_safe_param_ids; if (!subplan->parallel_safe && - max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + max_parallel_hazard_test(PROPARALLEL_SAFE, context)) return true; save_safe_param_ids = context->safe_param_ids; context->safe_param_ids = list_concat_copy(context->safe_param_ids, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 24b795dde17..b56f60c425d 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2344,8 +2344,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, bool add_motion = false; double numsegments; + /* CBDB_PARALLEL: we might try different paths to be unique. */ +#if 0 /* Caller made a mistake if subpath isn't cheapest_total ... */ Assert(subpath == rel->cheapest_total_path); +#endif Assert(subpath->parent == rel); /* ... or if SpecialJoinInfo is the wrong one */ Assert(sjinfo->jointype == JOIN_SEMI); @@ -3405,9 +3408,13 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, required_outer); pathnode->parallel_aware = false; pathnode->parallel_safe = rel->consider_parallel; +#if 0 pathnode->parallel_workers = 0; +#endif pathnode->pathkeys = pathkeys; pathnode->locus = locus; + /* CBDB_PARALLEL: use locus.parallel */ + pathnode->parallel_workers = locus.parallel_workers; pathnode->sameslice_relids = NULL; @@ -4048,11 +4055,14 @@ create_nestloop_path(PlannerInfo *root, if (orig_jointype == JOIN_DEDUP_SEMI || orig_jointype == JOIN_DEDUP_SEMI_REVERSE) { - return (Path *) create_unique_rowid_path(root, + UniquePath *upath = create_unique_rowid_path(root, joinrel, (Path *) pathnode, pathnode->innerjoinpath->parent->relids, rowidexpr_id); + if (upath) + upath->path.total_cost *= cbdb_dedup_semi_damping_factor; + return (Path *) upath; } /* @@ -4268,11 +4278,14 @@ create_mergejoin_path(PlannerInfo *root, if (orig_jointype == JOIN_DEDUP_SEMI || orig_jointype == JOIN_DEDUP_SEMI_REVERSE) { - return (Path *) create_unique_rowid_path(root, + UniquePath* upath = create_unique_rowid_path(root, joinrel, (Path *) pathnode, pathnode->jpath.innerjoinpath->parent->relids, rowidexpr_id); + if (upath) + upath->path.total_cost *= cbdb_dedup_semi_damping_factor; + return (Path *) upath; } /* @@ -4514,11 +4527,14 @@ create_hashjoin_path(PlannerInfo *root, if (orig_jointype == JOIN_DEDUP_SEMI || orig_jointype == JOIN_DEDUP_SEMI_REVERSE) { - return (Path *) create_unique_rowid_path(root, + UniquePath *upath = create_unique_rowid_path(root, joinrel, (Path *) pathnode, pathnode->jpath.innerjoinpath->parent->relids, rowidexpr_id); + if (upath) + upath->path.total_cost *= cbdb_dedup_semi_damping_factor; + return (Path *) upath; } /* @@ -5090,6 +5106,17 @@ create_agg_path(PlannerInfo *root, subpath->startup_cost, subpath->total_cost, subpath->rows, subpath->pathtarget->width); + /* Correct the effect of streaming */ + if (streaming && + pathnode->path.rows >= cbdb_streaming_damping_rows_threshold&& + (subpath->pathtype != T_SeqScan) && + (pathnode->path.rows >= subpath->rows * cbdb_streaming_damping_factor)) + { + pathnode->path.rows *= cbdb_streaming_damping_factor; + pathnode->path.startup_cost *= cbdb_streaming_damping_factor; + pathnode->path.total_cost *= cbdb_streaming_damping_factor; + } + /* add tlist eval cost for each output row */ pathnode->path.startup_cost += target->cost.startup; pathnode->path.total_cost += target->cost.startup + diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index ecd7a3ff069..ad22109043d 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -1148,6 +1148,7 @@ replace_rte_variables(Node *node, int target_varno, int sublevels_up, context.callback_arg = callback_arg; context.target_varno = target_varno; context.sublevels_up = sublevels_up; + context.target_varno_bms = NULL; /* * We try to initialize inserted_sublink to true if there is no need to @@ -1192,7 +1193,8 @@ replace_rte_variables_mutator(Node *node, { Var *var = (Var *) node; - if (var->varno == context->target_varno && + if ((var->varno == context->target_varno || + bms_is_member(var->varno, context->target_varno_bms)) && var->varlevelsup == context->sublevels_up) { /* Found a matching variable, make the substitution */ @@ -1583,3 +1585,138 @@ ReplaceVarsFromTargetList(Node *node, (void *) &context, outer_hasSubLinks); } + +static Node * +replace_rte_variables_1(Node *node, Bitmapset *target_varno_bms, int sublevels_up, + replace_rte_variables_callback callback, + void *callback_arg, + bool *outer_hasSubLinks) +{ + Node *result; + replace_rte_variables_context context; + + context.callback = callback; + context.callback_arg = callback_arg; + context.target_varno = 0; + context.target_varno_bms = target_varno_bms; + context.sublevels_up = sublevels_up; + + /* + * We try to initialize inserted_sublink to true if there is no need to + * detect new sublinks because the query already has some. + */ + if (node && IsA(node, Query)) + context.inserted_sublink = ((Query *) node)->hasSubLinks; + else if (outer_hasSubLinks) + context.inserted_sublink = *outer_hasSubLinks; + else + context.inserted_sublink = false; + + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + result = query_or_expression_tree_mutator(node, + replace_rte_variables_mutator, + (void *) &context, + 0); + + if (context.inserted_sublink) + { + if (result && IsA(result, Query)) + ((Query *) result)->hasSubLinks = true; + else if (outer_hasSubLinks) + *outer_hasSubLinks = true; + else + elog(ERROR, "replace_rte_variables inserted a SubLink, but has noplace to record it"); + } + + return result; +} + + +Node * +ReplaceVarsFromTargetList_CTE(Node *node, + Bitmapset *target_varno_bms, int sublevels_up, + List *targetlist, + ReplaceVarsNoMatchOption nomatch_option, + int nomatch_varno, + bool *outer_hasSubLinks) +{ + ReplaceVarsFromTargetList_context context; + + context.target_rte = NULL; + context.targetlist = targetlist; + context.nomatch_option = nomatch_option; + context.nomatch_varno = nomatch_varno; + + return replace_rte_variables_1(node, target_varno_bms, sublevels_up, + ReplaceVarsFromTargetList_callback, + (void *) &context, + outer_hasSubLinks); +} + +static Node * +ReplaceVarnoFromSubquery_callback(Var *var, + replace_rte_variables_context *context) +{ + ReplaceVarsFromTargetList_context *rcon = (ReplaceVarsFromTargetList_context *) context->callback_arg; + + if (var->varattno == InvalidAttrNumber) + { + /* Must expand whole-tuple reference into RowExpr */ + RowExpr *rowexpr; + List *colnames; + List *fields; + + /* + * If generating an expansion for a var of a named rowtype (ie, this + * is a plain relation RTE), then we must include dummy items for + * dropped columns. If the var is RECORD (ie, this is a JOIN), then + * omit dropped columns. Either way, attach column names to the + * RowExpr for use of ruleutils.c. + */ + expandRTE(rcon->target_rte, + var->varno, var->varlevelsup, var->location, + (var->vartype != RECORDOID), + &colnames, &fields); + /* Adjust the generated per-field Vars... */ + fields = (List *) replace_rte_variables_mutator((Node *) fields, + context); + rowexpr = makeNode(RowExpr); + rowexpr->args = fields; + rowexpr->row_typeid = var->vartype; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + rowexpr->colnames = colnames; + rowexpr->location = var->location; + + return (Node *) rowexpr; + } + + Var *newnode = (Var *)copyObject(var); + /* Make var to the subquery itself. */ + ((Var *) newnode)->varno = 1; + return (Node *) newnode; +} + +Node * +ReplaceVarnoFromSubquery(Node *node, + int target_varno, int sublevels_up, + RangeTblEntry *target_rte, + List *targetlist, + ReplaceVarsNoMatchOption nomatch_option, + int nomatch_varno, + bool *outer_hasSubLinks) +{ + ReplaceVarsFromTargetList_context context; + + context.target_rte = target_rte; + context.targetlist = targetlist; + context.nomatch_option = nomatch_option; + context.nomatch_varno = nomatch_varno; + + return replace_rte_variables(node, target_varno, sublevels_up, + ReplaceVarnoFromSubquery_callback, + (void *)&context, + outer_hasSubLinks); +} \ No newline at end of file diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 354302afd54..b568ce97e05 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10064,6 +10064,10 @@ get_explain_guc_options(int *num, bool verbose, bool settings) explain = true; } + /* Temp fix EXPLAIN */ + if ((conf->flags & GUC_NO_EXPLAIN)) + continue; + if (!explain) continue; diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 7a4433cfa98..38aa6af0433 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -484,6 +484,16 @@ bool gp_allow_date_field_width_5digits = false; /* Avoid do a real REFRESH materialized view if possibile. */ bool gp_enable_refresh_fast_path = true; +double cbdb_streaming_damping_factor; +int cbdb_streaming_damping_rows_threshold; +double cbdb_inner_join_selectivity_damping_factor; +bool cbdb_enable_multi_window_agg = true; +bool cbdb_eager_subplan = true; +double cbdb_dedup_semi_damping_factor; +bool cbdb_enable_setop_pre_dedup; +bool cbdb_enable_dynamic_shared_scan; +double cbdb_2phase_agg_cardinality_cap; + static const struct config_enum_entry gp_log_format_options[] = { {"text", 0}, {"csv", 1}, @@ -1883,8 +1893,7 @@ struct config_bool ConfigureNamesBool_gp[] = { {"gp_eager_two_phase_agg", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Eager two stage agg."), - NULL, - GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + NULL }, &gp_eager_two_phase_agg, false, NULL, NULL @@ -3387,6 +3396,50 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"cbdb_enable_multi_window_agg", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enable multi phase aggregations for window functions."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_enable_multi_window_agg, + true, + NULL, NULL, NULL + }, + + { + {"cbdb_eager_subplan", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Eager SubPlan instead of InitPlan."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_eager_subplan, + true, + NULL, NULL, NULL + }, + + { + {"cbdb_enable_setop_pre_dedup", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enable do pre-deduplicate on subqueries of SetOp when there is no ALL."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_enable_setop_pre_dedup, + true, + NULL, NULL, NULL + }, + + { + {"cbdb_enable_dynamic_shared_scan", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Choose Shared Scan dynamically according to costs even CTE has multiple references."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_enable_dynamic_shared_scan, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL @@ -4720,6 +4773,17 @@ struct config_int ConfigureNamesInt_gp[] = 5, 1, MAX_BACKENDS, check_max_running_tasks, NULL, NULL }, + + { + {"cbdb_streaming_damping_rows_threshold", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Set the threshold of using streaming damping"), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_streaming_damping_rows_threshold, + 1000, 0, INT_MAX, + NULL, NULL, NULL + }, /* End-of-list marker */ { @@ -4859,6 +4923,52 @@ struct config_real ConfigureNamesReal_gp[] = NULL, NULL, NULL }, + { + {"cbdb_streaming_damping_factor", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("streaming hash aggregate costs damping facor."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_streaming_damping_factor, + 0.95, 0.0, 1.0, + NULL, NULL, NULL + }, + + { + {"cbdb_inner_join_selectivity_damping_factor", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Damping of selectivities in inner join clauses."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_inner_join_selectivity_damping_factor, + 1.4, 1.0, DBL_MAX, + NULL, NULL, NULL + }, + + { + {"cbdb_dedup_semi_damping_factor", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Damping of dedup semi and dedup semi reverse join costs."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_dedup_semi_damping_factor, + 1.04, 1.0, DBL_MAX, + NULL, NULL, NULL + }, + + { + {"cbdb_2phase_agg_cardinality_cap", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Cardinality ratio threshold for capping partial group estimates in 2-phase aggregation."), + gettext_noop("When estimated groups exceed this fraction of input rows, " + "cap partial group count at 10% to favor 2-phase aggregation. " + "Set to 1.0 to disable the cap."), + GUC_NOT_IN_SAMPLE | GUC_NO_EXPLAIN + }, + &cbdb_2phase_agg_cardinality_cap, + 0.5, 0.0, 1.0, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL diff --git a/src/include/cdb/cdbgroupingpaths.h b/src/include/cdb/cdbgroupingpaths.h index 7d4067e140d..bf8319be17a 100644 --- a/src/include/cdb/cdbgroupingpaths.h +++ b/src/include/cdb/cdbgroupingpaths.h @@ -59,4 +59,17 @@ extern List *get_common_group_tles(PathTarget *target, extern CdbPathLocus choose_grouping_locus(PlannerInfo *root, Path *path, List *group_tles, bool *need_redistribute_p); + +extern Path * +cdb_create_pre_window_agg_path(PlannerInfo *root, + bool is_sorted, + int presorted_keys, + RelOptInfo *rel, + Path *subpath, + PathTarget *target, + List *group_pathkeys, + PathTarget *window_target, + List *window_functions, + WindowClause *wc); + #endif /* CDBGROUPINGPATHS_H */ diff --git a/src/include/cdb/cdbmutate.h b/src/include/cdb/cdbmutate.h index f7e29e6e027..a54de438d64 100644 --- a/src/include/cdb/cdbmutate.h +++ b/src/include/cdb/cdbmutate.h @@ -57,4 +57,6 @@ extern Plan *cdbpathtoplan_create_sri_plan(RangeTblEntry *rte, PlannerInfo *subr extern bool contains_outer_params(Node *node, void *context); +extern bool apply_shareinput_dag_to_tree_from_subplan; + #endif /* CDBMUTATE_H */ diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 6b8dbbc9916..d1d0305cb81 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -170,6 +170,7 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, ExprState *eqcomp, FmgrInfo *hashfunctions); extern void ResetTupleHashTable(TupleHashTable hashtable); +extern void DestroyTupleHashTable(TupleHashTable hashtable); /* * prototypes from functions in execJunk.c diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index b240d0ae555..bf803054223 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -292,6 +292,10 @@ typedef struct ParallelHashJoinState pg_atomic_uint32 distributor; /* counter for load balancing */ SharedFileSet fileset; /* space for shared temporary files */ + + /* Runtime filter merge support for parallel hash join */ + dsa_pointer rf_merge_buf; /* DSA pointer to shared merge buffer */ + pg_atomic_uint32 rf_merge_count; /* workers that finished merging */ } ParallelHashJoinState; /* The phases for building batches, used by build_barrier. */ diff --git a/src/include/lib/bloomfilter.h b/src/include/lib/bloomfilter.h index cb71e16864f..ee51672756c 100644 --- a/src/include/lib/bloomfilter.h +++ b/src/include/lib/bloomfilter.h @@ -27,5 +27,9 @@ extern double bloom_false_positive_rate(bloom_filter *filter); extern uint64 bloom_total_bits(bloom_filter *filter); extern bloom_filter *bloom_create_aggresive(int64 total_elems, int work_mem, uint64 seed); +extern Size bloom_bitset_bytes(bloom_filter *filter); +extern unsigned char *bloom_get_bitset(bloom_filter *filter); +extern uint64 bloom_get_seed(bloom_filter *filter); +extern int bloom_get_k(bloom_filter *filter); #endif /* BLOOMFILTER_H */ diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 3cd21590ac9..bed6af86f28 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -27,6 +27,7 @@ #include "cdb/cdbpathlocus.h" #include "foreign/foreign.h" +#include "access/attmap.h" /* * Relids @@ -137,6 +138,9 @@ typedef struct ApplyShareInputContext ApplyShareInputContextPerShare *shared_inputs; /* one for each share */ Bitmapset *qdShares; /* share_ids that are referenced from QD slices */ + char **ctenames; + bool *producer_from_subplan; + ShareInputScan **producer_parent_plans; } ApplyShareInputContext; /*---------- @@ -210,6 +214,8 @@ typedef struct PlannerGlobal int numSlices; struct PlanSlice *slices; + bool under_recursive_cte; + } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -507,6 +513,13 @@ struct PlannerInfo bool is_from_orca; /* true if this PlannerInfo was created from Orca*/ Query *aqumv_raw_parse; /* Raw parse tree for AQUMV join exact-match */ + bool is_shared_scan; /* true for shared scan */ + Node *lower_window_filter; /* simple window function on subquery. */ + int lower_window_filter_winref; /* winref of subquery. */ + Node *upper_window_filter; /* simple window function from parent query.*/ + int upper_window_filter_winref; /* winref of subquery. */ + + Bitmapset *init_plan_ids; /* init SubPlans plan_ids for query */ }; /* @@ -526,6 +539,21 @@ typedef struct CtePlanInfo * The subroot corresponding to the subplan. */ PlannerInfo *subroot; + + /* the relations refered to shared cte. */ + List *rels; + + List *list_quals; + + Relids relids; + + bool push_quals_possible; + bool save_columns_possible; + + Bitmapset *attrs_used; + AttrMap *attr_map; + + Query *subquery; } CtePlanInfo; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index dd214cb9996..4e54b3c1fe6 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -24,6 +24,8 @@ #include "nodes/primnodes.h" #include "parsenodes.h" +typedef struct CtePlanInfo CtePlanInfo; + typedef struct DirectDispatchInfo { /* @@ -1248,6 +1250,11 @@ typedef struct ShareInputScan /* Could be set reference? */ bool ref_set; + + /* Underlying cte name. */ + char* ctename; + + CtePlanInfo* cteplaninfo; } ShareInputScan; /* ---------------- diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index f8400206288..2bbeae81471 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -140,6 +140,7 @@ extern void extract_query_dependencies(Node *query, extern Node *negate_clause(Node *node); extern Expr *canonicalize_qual(Expr *qual, bool is_check); +extern Expr *convert_expr_to_cnf_complete(Expr *expr); /* in util/clauses.c: */ diff --git a/src/include/optimizer/planshare.h b/src/include/optimizer/planshare.h index 20ec6df0cc1..872c182808c 100644 --- a/src/include/optimizer/planshare.h +++ b/src/include/optimizer/planshare.h @@ -19,7 +19,7 @@ #include "nodes/plannodes.h" extern Plan *prepare_plan_for_sharing(PlannerInfo *root, Plan *common); -extern Plan *share_prepared_plan(PlannerInfo *root, Plan *common); +extern Plan *share_prepared_plan(PlannerInfo *root, Plan *common, char* ctename); #endif /* _PLANSHARE_H_ */ diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index d8445cac983..43d4b7a23a8 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -57,5 +57,16 @@ extern bool QueryHasDistributedRelation(Query *q, bool recursive); extern bool contain_outer_selfref(Node *node); extern bool testexpr_is_hashable(Node *testexpr, List *param_ids); +extern bool is_single_simple_query(PlannerInfo *root); + +extern bool contain_ShareInputScan(PlannerInfo *root, Node *node); + +#define SISC_NONE (0) +#define SISC_PRODUCER (1<<0) +#define SISC_CONSUMER (1<<1) + +extern int contain_ShareInputScan_detail(PlannerInfo *root, Node *node); + +extern bool contain_ModifyTable_plan(PlannerInfo *root, Plan* node); #endif /* SUBSELECT_H */ diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 9fa4e9b5bde..7b288328d93 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -29,6 +29,7 @@ struct replace_rte_variables_context replace_rte_variables_callback callback; /* callback function */ void *callback_arg; /* context data for callback function */ int target_varno; /* RTE index to search for */ + Bitmapset *target_varno_bms; /* RTE indexes to search for */ int sublevels_up; /* (current) nesting depth */ bool inserted_sublink; /* have we inserted a SubLink? */ }; @@ -86,4 +87,19 @@ extern Node *ReplaceVarsFromTargetList(Node *node, int nomatch_varno, bool *outer_hasSubLinks); +extern Node *ReplaceVarsFromTargetList_CTE(Node *node, + Bitmapset *target_varno_bms, int sublevels_up, + List *targetlist, + ReplaceVarsNoMatchOption nomatch_option, + int nomatch_varno, + bool *outer_hasSubLinks); +extern Node * +ReplaceVarnoFromSubquery(Node *node, + int target_varno, int sublevels_up, + RangeTblEntry *target_rte, + List *targetlist, + ReplaceVarsNoMatchOption nomatch_option, + int nomatch_varno, + bool *outer_hasSubLinks); + #endif /* REWRITEMANIP_H */ diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 652e0b451f3..b928d0739cc 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -247,6 +247,14 @@ typedef enum #define GUC_GPDB_NEED_SYNC 0x00400000 /* guc value is synced between master and primary */ #define GUC_GPDB_NO_SYNC 0x00800000 /* guc value is not synced between master and primary */ +/* + * FIXEM: we have bad logic in get_explain_guc_options + * Even GUCs have no GUC_EXPLAIN flag, explain(verbose) still show them. + * It's a bug. However, there would be much more plan diffs if we fix it now. + * So introduce a temp fix flag to workaround for new added GUCs which are not showed in explain. + */ +#define GUC_NO_EXPLAIN 0x01000000 /* guc value is not synced between master and primary */ + /* GUC lists for gp_guc_list_show(). (List of struct config_generic) */ extern List *gp_guc_list_for_explain; extern List *gp_guc_list_for_no_plan; @@ -869,4 +877,14 @@ extern bool gpvars_check_rg_query_fixed_mem(int *newval, void **extra, GucSource extern int guc_name_compare(const char *namea, const char *nameb); extern void DispatchSyncPGVariable(struct config_generic * gconfig); +extern double cbdb_streaming_damping_factor; +extern int cbdb_streaming_damping_rows_threshold; +extern double cbdb_inner_join_selectivity_damping_factor; +extern bool cbdb_enable_multi_window_agg; +extern bool cbdb_eager_subplan; +extern double cbdb_dedup_semi_damping_factor; +extern bool cbdb_enable_setop_pre_dedup; +extern bool cbdb_enable_dynamic_shared_scan; +extern double cbdb_2phase_agg_cardinality_cap; + #endif /* GUC_H */ diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 85ecb3548e6..7766c1d483f 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -36,6 +36,15 @@ "block_size", "bonjour", "bonjour_name", + "cbdb_2phase_agg_cardinality_cap", + "cbdb_dedup_semi_damping_factor", + "cbdb_eager_subplan", + "cbdb_enable_dynamic_shared_scan", + "cbdb_enable_multi_window_agg", + "cbdb_enable_setop_pre_dedup", + "cbdb_inner_join_selectivity_damping_factor", + "cbdb_streaming_damping_factor", + "cbdb_streaming_damping_rows_threshold", "check_function_bodies", "checkpoint_completion_target", "checkpoint_flush_after", diff --git a/src/test/isolation2/.gitignore b/src/test/isolation2/.gitignore index 3e18c982138..16a0565e226 100644 --- a/src/test/isolation2/.gitignore +++ b/src/test/isolation2/.gitignore @@ -30,6 +30,8 @@ sql/resgroup/resgroup_views.sql /gpdiff.pl /gpstringsubs.pl /data +/sql/ao_upgrade.sql +/expected/ao_upgrade.out # Generated subdirectories /results/ diff --git a/src/test/isolation2/expected/.gitignore b/src/test/isolation2/expected/.gitignore index 610e9b171dd..fb3f8414379 100644 --- a/src/test/isolation2/expected/.gitignore +++ b/src/test/isolation2/expected/.gitignore @@ -7,6 +7,7 @@ /pt_io_in_progress_deadlock.out /distributed_snapshot.out /local_directory_table_mixed.out +/hot_standby/query_conflict.out # ignores including sub-directories autovacuum-analyze.out diff --git a/src/test/isolation2/expected/ao_upgrade.out b/src/test/isolation2/expected/ao_upgrade.out new file mode 100644 index 00000000000..1874ef3b94a --- /dev/null +++ b/src/test/isolation2/expected/ao_upgrade.out @@ -0,0 +1,217 @@ +-- set_ao_formatversion forces an AO[CO] format to a specific version (the last +-- argument is set to true for a column-oriented table, and false otherwise). +CREATE OR REPLACE FUNCTION set_ao_formatversion(aosegrel oid, version smallint, isaocs bool) RETURNS bool AS '/home/gpadmin/work/gp95/src/test/isolation2/isolation2_regress.so', 'setAOFormatVersion' LANGUAGE C RETURNS NULL ON NULL INPUT; +CREATE + +DROP TABLE IF EXISTS ao_upgrade_test; +DROP +DROP TABLE IF EXISTS aocs_upgrade_test; +DROP +DROP TABLE IF EXISTS aocs_rle_upgrade_test; +DROP + +CREATE TABLE ao_upgrade_test (rowid int, n numeric) USING ao_row; +CREATE +CREATE TABLE aocs_upgrade_test (rowid int, n numeric) USING ao_column; +CREATE +CREATE TABLE aocs_rle_upgrade_test (rowid int, n numeric) USING ao_column WITH (compresstype=RLE_TYPE); +CREATE + +-- We want to load GPDB4 numerics into the table; to do that, add a direct cast +-- from bytea to numeric so we can hardcode what the GPDB4 data looked like. +CREATE CAST (bytea AS numeric) WITHOUT FUNCTION; +CREATE + +INSERT INTO ao_upgrade_test VALUES (1, '\x000003000c007a0d'::bytea::numeric), -- 12.345 (2, '\x00000000'::bytea::numeric), -- 0 (3, '\x000003400c007a0d'::bytea::numeric), -- -12.345 (4, '\x010000000100'::bytea::numeric), -- 10000 (5, '\xfeff0500e803'::bytea::numeric), -- 0.00001 (6, '\xfeff0900e803'::bytea::numeric), -- 0.000010000 (7, '\x190000000100'::bytea::numeric), -- 1e100 (8, '\x010000002400400b'::bytea::numeric), -- 9! (362880) (9, '\x000000c0'::bytea::numeric); -- NaN INSERT INTO aocs_upgrade_test VALUES (1, '\x000003000c007a0d'::bytea::numeric), -- 12.345 (2, '\x00000000'::bytea::numeric), -- 0 (3, '\x000003400c007a0d'::bytea::numeric), -- -12.345 (4, '\x010000000100'::bytea::numeric), -- 10000 (5, '\xfeff0500e803'::bytea::numeric), -- 0.00001 (6, '\xfeff0900e803'::bytea::numeric), -- 0.000010000 (7, '\x190000000100'::bytea::numeric), -- 1e100 (8, '\x010000002400400b'::bytea::numeric), -- 9! (362880) (9, '\x000000c0'::bytea::numeric); -- NaN +-- For the RLE test case, insert a bunch of identical numerics so they will be +-- run-length compressed. +INSERT INTO aocs_rle_upgrade_test (SELECT a, '\x010000002400400b'::bytea::numeric FROM generate_series(1, 10) a); +INSERT 10 + +-- Downgrade to GPDB4 (AO version 2). +--start_ignore +*U: SELECT set_ao_formatversion( (SELECT segrelid FROM pg_appendonly WHERE relid = 'ao_upgrade_test'::regclass), 2::smallint, false); + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) +*U: SELECT set_ao_formatversion( (SELECT segrelid FROM pg_appendonly WHERE relid = 'aocs_upgrade_test'::regclass), 2::smallint, true); + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) +*U: SELECT set_ao_formatversion( (SELECT segrelid FROM pg_appendonly WHERE relid = 'aocs_rle_upgrade_test'::regclass), 2::smallint, true); + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) + + set_ao_formatversion +---------------------- + t +(1 row) +--end_ignore + +-- Scan test. The numerics should be fixed again. +SELECT * FROM ao_upgrade_test; + rowid | n +-------+------------------------------------------------------------------------------------------------------- + 2 | 0 + 5 | 0.00001 + 6 | 0.000010000 + 9 | NaN + 1 | 12.345 + 3 | -12.345 + 4 | 10000 + 7 | 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 8 | 362880 +(9 rows) +SELECT * FROM aocs_upgrade_test; + rowid | n +-------+------------------------------------------------------------------------------------------------------- + 2 | 0 + 5 | 0.00001 + 6 | 0.000010000 + 9 | NaN + 4 | 10000 + 7 | 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 8 | 362880 + 1 | 12.345 + 3 | -12.345 +(9 rows) +SELECT * FROM aocs_rle_upgrade_test; + rowid | n +-------+-------- + 1 | 362880 + 2 | 362880 + 3 | 362880 + 4 | 362880 + 5 | 362880 + 6 | 362880 + 7 | 362880 + 8 | 362880 + 9 | 362880 + 10 | 362880 +(10 rows) + +-- Fetch test. To force fetches, we'll add bitmap indexes and disable sequential +-- scan. +CREATE INDEX ao_bitmap_index ON ao_upgrade_test USING bitmap(n); +CREATE +CREATE INDEX aocs_bitmap_index ON aocs_upgrade_test USING bitmap(n); +CREATE +CREATE INDEX aocs_rle_bitmap_index ON aocs_rle_upgrade_test USING bitmap(n); +CREATE + +SET enable_seqscan TO off; +SET + +-- Ensure we're using a bitmap scan for our tests. Upgrade note to developers: +-- the only thing that this test needs to verify is that a fetch-based scan is +-- in use. Other diffs are fine. +EXPLAIN SELECT n FROM ao_upgrade_test WHERE n = factorial(9); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=1000.36..1100.37 rows=1 width=9) + -> Bitmap Heap Scan on ao_upgrade_test (cost=1000.36..1100.37 rows=1 width=9) + Recheck Cond: n = 362880::numeric + -> Bitmap Index Scan on ao_bitmap_index (cost=0.00..1000.36 rows=1 width=0) + Index Cond: n = 362880::numeric + Settings: enable_seqscan=off + Optimizer status: Postgres query optimizer +(7 rows) +EXPLAIN SELECT n FROM aocs_upgrade_test WHERE n = factorial(9); + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=1000.36..1100.37 rows=1 width=9) + -> Bitmap Heap Scan on aocs_upgrade_test (cost=1000.36..1100.37 rows=1 width=9) + Recheck Cond: n = 362880::numeric + -> Bitmap Index Scan on aocs_bitmap_index (cost=0.00..1000.36 rows=1 width=0) + Index Cond: n = 362880::numeric + Settings: enable_seqscan=off + Optimizer status: Postgres query optimizer +(7 rows) +EXPLAIN SELECT n FROM aocs_rle_upgrade_test WHERE n = factorial(9); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=1000.36..1100.37 rows=1 width=9) + -> Bitmap Heap Scan on aocs_rle_upgrade_test (cost=1000.36..1100.37 rows=1 width=9) + Recheck Cond: n = 362880::numeric + -> Bitmap Index Scan on aocs_rle_bitmap_index (cost=0.00..1000.36 rows=1 width=0) + Index Cond: n = 362880::numeric + Settings: enable_seqscan=off + Optimizer status: Postgres query optimizer +(7 rows) + +SELECT n FROM ao_upgrade_test WHERE n = factorial(9); + n +-------- + 362880 +(1 row) +SELECT n FROM aocs_upgrade_test WHERE n = factorial(9); + n +-------- + 362880 +(1 row) +SELECT n FROM aocs_rle_upgrade_test WHERE n = factorial(9); + n +-------- + 362880 + 362880 + 362880 + 362880 + 362880 + 362880 + 362880 + 362880 + 362880 + 362880 +(10 rows) + +RESET enable_seqscan; +RESET + +DROP CAST (bytea AS numeric); +DROP +DROP FUNCTION set_ao_formatversion(oid, smallint, bool); +DROP diff --git a/src/test/isolation2/expected/gpdispatch.out b/src/test/isolation2/expected/gpdispatch.out index 67f569f65f4..8d41f912b26 100644 --- a/src/test/isolation2/expected/gpdispatch.out +++ b/src/test/isolation2/expected/gpdispatch.out @@ -51,6 +51,8 @@ SET SET 1: set max_parallel_workers_per_gather = 0; SET +1: set cbdb_enable_dynamic_shared_scan = off; +SET 1: select gp_inject_fault_infinite('shareinput_writer_notifyready', 'suspend', 2); gp_inject_fault_infinite -------------------------- diff --git a/src/test/isolation2/expected/gpdispatch_1.out b/src/test/isolation2/expected/gpdispatch_1.out index edd307d6d14..393620b39e6 100644 --- a/src/test/isolation2/expected/gpdispatch_1.out +++ b/src/test/isolation2/expected/gpdispatch_1.out @@ -51,6 +51,8 @@ SET SET 1: set max_parallel_workers_per_gather = 0; SET +1: set cbdb_enable_dynamic_shared_scan = off; +SET 1: select gp_inject_fault_infinite('shareinput_writer_notifyready', 'suspend', 2); gp_inject_fault_infinite -------------------------- diff --git a/src/test/isolation2/expected/hot_standby/query_conflict.out b/src/test/isolation2/expected/hot_standby/query_conflict.out new file mode 100644 index 00000000000..e0fba6222f7 --- /dev/null +++ b/src/test/isolation2/expected/hot_standby/query_conflict.out @@ -0,0 +1,470 @@ +-- Tests for query conflict detection and cancellation on the hot standby. + +---------------------------------------------------------------- +-- Various query conflcit cases for hot standy. +-- +-- All cases are written in this pattern: +-- 1. Start a standby transaction that will be conflicted and cancelled; +-- 2. Start a primary transaction that will conflict it; +-- 3. Commit the primary transaction. Since we are using remote_apply, it will +-- wait until the WAL is applied on the standby, which would happen only +-- after the standby query is cancelled; +-- 4. Run something on the standby transaction and see the conflict error, which +-- in some cases it's ERROR, in others it's FATAL. +-- 5. Quit, establish a new connection, and re-run +-- 6. Check the system view gp_stat_database_conflicts to see that the conflict +-- has been recorded. Note that we print the max count among all segments +-- to avoid flakiness. +-- See https://www.postgresql.org/docs/12/hot-standby.html#HOT-STANDBY-CONFLICT for more details. +---------------------------------------------------------------- + +-- We assume we start the test with clean records +-1S: select max(confl_tablespace), max(confl_lock), max(confl_snapshot), max(confl_bufferpin), max(confl_deadlock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + max | max | max | max | max +-----+-----+-----+-----+----- + 0 | 0 | 0 | 0 | 0 +(1 row) + +--------------------------------------------------------------------- +-- Conflict with explicit lock +--------------------------------------------------------------------- +create table hs_qc_lock(a int); +CREATE +insert into hs_qc_lock select * from generate_series(1,5); +INSERT 5 +-1S: begin; +BEGIN +-1S: select * from hs_qc_lock; + a +--- + 2 + 3 + 4 + 1 + 5 +(5 rows) +1: begin; +BEGIN +1: lock table hs_qc_lock in access exclusive mode; +LOCK +1: end; +END +-1S: select * from hs_qc_lock; +FATAL: terminating connection due to conflict with recovery +DETAIL: User was holding a relation lock for too long. +HINT: In a moment you should be able to reconnect to the database and repeat your command. +server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-1Sq: ... +-1S: select * from hs_qc_lock; + a +--- + 1 + 5 + 2 + 3 + 4 +(5 rows) +-1S: select max(confl_lock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + max +----- + 1 +(1 row) + +--------------------------------------------------------------------- +-- Conflict with implicit lock +--------------------------------------------------------------------- +-1S: begin; +BEGIN +-1S: select * from hs_qc_lock; + a +--- + 1 + 5 + 2 + 3 + 4 +(5 rows) +1: alter table hs_qc_lock set access method ao_row; +ALTER +-1S: select * from hs_qc_lock; +FATAL: terminating connection due to conflict with recovery +DETAIL: User was holding a relation lock for too long. +HINT: In a moment you should be able to reconnect to the database and repeat your command. +server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-1Sq: ... +-1S: select * from hs_qc_lock; + a +--- + 1 + 5 + 2 + 3 + 4 +(5 rows) +-1S: select max(confl_lock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + max +----- + 2 +(1 row) + +--------------------------------------------------------------------- +-- Conflict with drop database +--------------------------------------------------------------------- +1: create database hs_qc_dropdb; +CREATE +-1Sq: ... +-1S:@db_name hs_qc_dropdb: select 1; + ?column? +---------- + 1 +(1 row) +1: drop database hs_qc_dropdb; +DROP +-1S: select 1; +FATAL: terminating connection due to conflict with recovery +DETAIL: User was connected to a database that must be dropped. +server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-1Sq: ... +-- Stats aren't counted for database conflicts. See: pgstat_recv_recoveryconflict + +--------------------------------------------------------------------- +-- Conflict with VACUUM (snapshot) +--------------------------------------------------------------------- +1: create table hs_qc_vac1(a int); +CREATE +1: insert into hs_qc_vac1 select * from generate_series(1,10); +INSERT 10 +-1S: begin transaction isolation level repeatable read; +BEGIN +-1S: select count(*) from hs_qc_vac1; + count +------- + 10 +(1 row) +1: delete from hs_qc_vac1; +DELETE 10 +1: vacuum hs_qc_vac1; +VACUUM +-1S: select count(*) from hs_qc_vac1; +DETAIL: User query might have needed to see row versions that must be removed. +ERROR: terminating connection due to conflict with recovery +HINT: In a moment you should be able to reconnect to the database and repeat your command. +-1Sq: ... +-1S: select max(confl_snapshot) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + max +----- + 1 +(1 row) + +--------------------------------------------------------------------- +-- Conflict with VACUUM (buffer pin) +-- VACUUM of page that the standby is still holding buffer pin on, the difference with +-- the previous case is that here the deleted row is already invisible to the standby. +--------------------------------------------------------------------- +1: create table hs_qc_vac2(a int); +CREATE +1: insert into hs_qc_vac2 values(2); +INSERT 1 +1: delete from hs_qc_vac2; +DELETE 1 +-- run select once on the standby, so the next select will fetch data from buffer +-1S: select * from hs_qc_vac2; + a +--- +(0 rows) +-- suspend the standby at where it just unlocks the buffer but still holds the pin +1: select gp_inject_fault('heapgetpage_after_unlock_buffer', 'suspend','','','hs_qc_vac2',1,1,0,dbid) from gp_segment_configuration where content=0 and role='m'; + gp_inject_fault +----------------- + Success: +(1 row) +-- we'll also make sure the startup process has sent out the signal before we let the standby backend release the pin +1: select gp_inject_fault('recovery_conflict_bufferpin_signal_sent', 'skip',dbid) from gp_segment_configuration where content=0 and role='m'; + gp_inject_fault +----------------- + Success: +(1 row) +-1S&: select * from hs_qc_vac2; +1: vacuum hs_qc_vac2; +VACUUM +-- as mentioned before, make sure startup process has sent the signal, and then let the standby proceed +1: select gp_wait_until_triggered_fault('recovery_conflict_bufferpin_signal_sent', 1,dbid) from gp_segment_configuration where content=0 and role='m'; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) +1: select gp_inject_fault('recovery_conflict_bufferpin_signal_sent', 'reset',dbid) from gp_segment_configuration where content=0 and role='m'; + gp_inject_fault +----------------- + Success: +(1 row) +1: select gp_inject_fault('heapgetpage_after_unlock_buffer', 'reset',dbid) from gp_segment_configuration where content=0 and role='m'; + gp_inject_fault +----------------- + Success: +(1 row) +-- should see the conflict +-1S<: <... completed> +ERROR: canceling statement due to conflict with recovery (seg0 slice1 127.0.1.1:7005 pid=17044) +DETAIL: User was holding shared buffer pin for too long. +-1Sq: ... +-- XXX: sometimes it shows the number is 2 instead of 1. It still validates the test but it would be nice to know why. +-1S: select max(confl_bufferpin) > 0 from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + ?column? +---------- + t +(1 row) + +--------------------------------------------------------------------- +-- Conflict with drop (temp) tablespace +-- Note: regular user tablespaces won't cause conflict on the standby since the standby cannot create any objects under them. +--------------------------------------------------------------------- +-- create tablespace +!\retcode mkdir -p /home/gpadmin/work/gp95/src/test/isolation2/testtablespace/hs_tablespace_directory; +(exited with code 0) +create tablespace hs_ts location '/home/gpadmin/work/gp95/src/test/isolation2/testtablespace/hs_tablespace_directory'; +CREATE + +-- some prepartion on the primary +create table hs_ts_foo (i int, j int) distributed by(i); +CREATE +insert into hs_ts_foo select i, i from generate_series(1,800000)i; +INSERT 800000 +analyze hs_ts_foo; +ANALYZE + +-- make sure the standby won't run too fast and delete the temp files +select gp_inject_fault('after_open_temp_file', 'suspend',dbid) from gp_segment_configuration where content=1 and role='m'; + gp_inject_fault +----------------- + Success: +(1 row) + +-- on the standby, run some query that requires workfile, this example is taken +-- from regress/temp_tablespaces test +-1S: set temp_tablespaces = hs_ts; +SET +-1S: set default_tablespace = hs_ts; +SET +-1S: set statement_mem='2MB'; +SET +-1S&: with a1 as (select * from hs_ts_foo), a2 as (select * from hs_ts_foo) select a1.i xx from a1 inner join a2 on a2.i = a1.i union all select count(a1.i) from a1 inner join a2 on a2.i = a1.i order by xx limit 5; + +-- drop tablespace, should see conflict on the hot standby +drop tablespace hs_ts; +DROP +select gp_inject_fault('after_open_temp_file', 'reset',dbid) from gp_segment_configuration where content=1 and role='m'; + gp_inject_fault +----------------- + Success: +(1 row) +-1S<: <... completed> +ERROR: canceling statement due to conflict with recovery (seg1 slice3 127.0.1.1:7006 pid=990) +DETAIL: User was or might have been using tablespace that must be dropped. +-1Sq: ... + +-- conflict has been recorded. The query has multiple slices +-1S: select max(confl_tablespace) >= 1 from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + ?column? +---------- + t +(1 row) + +-- cleanup +!\retcode rm -rf /home/gpadmin/work/gp95/src/test/isolation2/testtablespace/hs_tablespace_directory; +GP_IGNORE:-- start_ignore +GP_IGNORE: +GP_IGNORE:-- end_ignore +(exited with code 0) +-- Do one checkpoint. Otherwise if server restarts w/o doing checkpoint (some subsequent +-- tests might do that), the server would complain it cannot find the directory for hs_ts. +checkpoint; +CHECKPOINT + +---------------------------------------------------------------- +-- Additional case to show that distributed transaction is not taken into +-- account w/o the help of restore-point-based distributed snapshot creation. +---------------------------------------------------------------- + +1: create table hs_qc_ds1(a int); +CREATE +1: insert into hs_qc_ds1 select * from generate_series(1,10); +INSERT 10 +-- standby starts a repeatable read transaction, runs a local query that +-- creates a distributed snapshot w/o creating QE. +-1S: select count(*) from hs_qc_ds1; + count +------- + 10 +(1 row) +-1S: begin transaction isolation level repeatable read; +BEGIN +-1S: select relname from pg_class where relname = 'hs_qc_ds1'; + relname +----------- + hs_qc_ds1 +(1 row) +-- primary runs VACUUM +1: delete from hs_qc_ds1; +DELETE 10 +1: vacuum hs_qc_ds1; +VACUUM +-- The standby query in theory should be cancelled, because it started before +-- the VACUUM. But in reality, it doesn't, and sees 0 rows, because the QE for the +-- SELECT below will create more recent local snapshot that does not conflict with +-- the VACUUM, and sees the result of DELETE+VACUUM. +-- Note: with the help of restore point, we would be able to create local snapshot +-- precisely corresponding to each distributed snapshot, and do conflict detection accordingly. +-1S: select count(*) from hs_qc_ds1; + count +------- + 0 +(1 row) +-1S: end; +END + +---------------------------------------------------------------- +-- Test GUC hot_standby_feedback +---------------------------------------------------------------- +!\retcode gpconfig -c hot_standby_feedback -v on; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + +1: create table hs_qc_guc1(a int); +CREATE +1: insert into hs_qc_guc1 select * from generate_series(1,10); +INSERT 10 + +-1S: begin transaction isolation level repeatable read; +BEGIN +-1S: select * from hs_qc_guc1; + a +---- + 1 + 10 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(10 rows) + +-- VACUUM won't cleanup this table since the standby still sees it +1: delete from hs_qc_guc1; +DELETE 10 +1: vacuum hs_qc_guc1; +VACUUM + +-- hot standby can still see those rows +-1S: select * from hs_qc_guc1; + a +---- + 1 + 10 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(10 rows) + +-- after the conflicting read transaction ends, the next VACUUM will successfully vacuum the table +-1S: end; +END +1: vacuum hs_qc_guc1; +VACUUM +-1S: select * from hs_qc_guc1; + a +--- +(0 rows) +-1Sq: ... + +!\retcode gpconfig -r hot_standby_feedback; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + +---------------------------------------------------------------- +-- Test GUC vacuum_defer_cleanup_age +---------------------------------------------------------------- +-- Use a GUC value that's not 0, so VACUUM does not clean up +-- recent dead rows that the hot standby might be still seeing. +!\retcode gpconfig -c vacuum_defer_cleanup_age -v 1; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + +1: create table hs_qc_guc2(a int); +CREATE +1: insert into hs_qc_guc2 select * from generate_series(1,10); +INSERT 10 + +-1S: begin transaction isolation level repeatable read; +BEGIN +-1S: select count(*) from hs_qc_guc2; + count +------- + 10 +(1 row) + +-- VACUUM won't cleanup this table since the DELETE is still within vacuum_defer_cleanup_age +1: delete from hs_qc_guc2; +DELETE 10 +1: vacuum hs_qc_guc2; +VACUUM + +-- showing all rows are deleted but not vacuumed +1: select count(*) from hs_qc_guc2; + count +------- + 0 +(1 row) +1: set gp_select_invisible to on; +SET +1: select count(*) from hs_qc_guc2; + count +------- + 10 +(1 row) + +-- hot standby can still query the table +-1S: select count(*) from hs_qc_guc2; + count +------- + 10 +(1 row) + +-- only if the age is reached, hot standby will see the same conflict as before +1: create temp table tt1(a int); +CREATE +1: vacuum hs_qc_guc2; +VACUUM +-1S: select count(*) from hs_qc_guc2; +ERROR: terminating connection due to conflict with recovery (seg0 slice1 127.0.1.1:7005 pid=18713) +DETAIL: User query might have needed to see row versions that must be removed. +HINT: In a moment you should be able to reconnect to the database and repeat your command. +-1Sq: ... +-1S: select max(confl_snapshot) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby'; + max +----- + 2 +(1 row) + +!\retcode gpconfig -r vacuum_defer_cleanup_age; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + diff --git a/src/test/isolation2/sql/ao_upgrade.sql b/src/test/isolation2/sql/ao_upgrade.sql new file mode 100644 index 00000000000..6803122f9d8 --- /dev/null +++ b/src/test/isolation2/sql/ao_upgrade.sql @@ -0,0 +1,86 @@ +-- set_ao_formatversion forces an AO[CO] format to a specific version (the last +-- argument is set to true for a column-oriented table, and false otherwise). +CREATE OR REPLACE FUNCTION set_ao_formatversion(aosegrel oid, version smallint, isaocs bool) + RETURNS bool + AS '/home/gpadmin/work/gp95/src/test/isolation2/isolation2_regress.so', 'setAOFormatVersion' + LANGUAGE C + RETURNS NULL ON NULL INPUT; + +DROP TABLE IF EXISTS ao_upgrade_test; +DROP TABLE IF EXISTS aocs_upgrade_test; +DROP TABLE IF EXISTS aocs_rle_upgrade_test; + +CREATE TABLE ao_upgrade_test (rowid int, n numeric) USING ao_row; +CREATE TABLE aocs_upgrade_test (rowid int, n numeric) USING ao_column; +CREATE TABLE aocs_rle_upgrade_test (rowid int, n numeric) USING ao_column WITH (compresstype=RLE_TYPE); + +-- We want to load GPDB4 numerics into the table; to do that, add a direct cast +-- from bytea to numeric so we can hardcode what the GPDB4 data looked like. +CREATE CAST (bytea AS numeric) WITHOUT FUNCTION; + +INSERT INTO ao_upgrade_test VALUES + (1, '\x000003000c007a0d'::bytea::numeric), -- 12.345 + (2, '\x00000000'::bytea::numeric), -- 0 + (3, '\x000003400c007a0d'::bytea::numeric), -- -12.345 + (4, '\x010000000100'::bytea::numeric), -- 10000 + (5, '\xfeff0500e803'::bytea::numeric), -- 0.00001 + (6, '\xfeff0900e803'::bytea::numeric), -- 0.000010000 + (7, '\x190000000100'::bytea::numeric), -- 1e100 + (8, '\x010000002400400b'::bytea::numeric), -- 9! (362880) + (9, '\x000000c0'::bytea::numeric); -- NaN +INSERT INTO aocs_upgrade_test VALUES + (1, '\x000003000c007a0d'::bytea::numeric), -- 12.345 + (2, '\x00000000'::bytea::numeric), -- 0 + (3, '\x000003400c007a0d'::bytea::numeric), -- -12.345 + (4, '\x010000000100'::bytea::numeric), -- 10000 + (5, '\xfeff0500e803'::bytea::numeric), -- 0.00001 + (6, '\xfeff0900e803'::bytea::numeric), -- 0.000010000 + (7, '\x190000000100'::bytea::numeric), -- 1e100 + (8, '\x010000002400400b'::bytea::numeric), -- 9! (362880) + (9, '\x000000c0'::bytea::numeric); -- NaN + +-- For the RLE test case, insert a bunch of identical numerics so they will be +-- run-length compressed. +INSERT INTO aocs_rle_upgrade_test (SELECT a, '\x010000002400400b'::bytea::numeric FROM generate_series(1, 10) a); + +-- Downgrade to GPDB4 (AO version 2). +--start_ignore +*U: SELECT set_ao_formatversion( + (SELECT segrelid FROM pg_appendonly WHERE relid = 'ao_upgrade_test'::regclass), + 2::smallint, false); +*U: SELECT set_ao_formatversion( + (SELECT segrelid FROM pg_appendonly WHERE relid = 'aocs_upgrade_test'::regclass), + 2::smallint, true); +*U: SELECT set_ao_formatversion( + (SELECT segrelid FROM pg_appendonly WHERE relid = 'aocs_rle_upgrade_test'::regclass), + 2::smallint, true); +--end_ignore + +-- Scan test. The numerics should be fixed again. +SELECT * FROM ao_upgrade_test; +SELECT * FROM aocs_upgrade_test; +SELECT * FROM aocs_rle_upgrade_test; + +-- Fetch test. To force fetches, we'll add bitmap indexes and disable sequential +-- scan. +CREATE INDEX ao_bitmap_index ON ao_upgrade_test USING bitmap(n); +CREATE INDEX aocs_bitmap_index ON aocs_upgrade_test USING bitmap(n); +CREATE INDEX aocs_rle_bitmap_index ON aocs_rle_upgrade_test USING bitmap(n); + +SET enable_seqscan TO off; + +-- Ensure we're using a bitmap scan for our tests. Upgrade note to developers: +-- the only thing that this test needs to verify is that a fetch-based scan is +-- in use. Other diffs are fine. +EXPLAIN SELECT n FROM ao_upgrade_test WHERE n = factorial(9); +EXPLAIN SELECT n FROM aocs_upgrade_test WHERE n = factorial(9); +EXPLAIN SELECT n FROM aocs_rle_upgrade_test WHERE n = factorial(9); + +SELECT n FROM ao_upgrade_test WHERE n = factorial(9); +SELECT n FROM aocs_upgrade_test WHERE n = factorial(9); +SELECT n FROM aocs_rle_upgrade_test WHERE n = factorial(9); + +RESET enable_seqscan; + +DROP CAST (bytea AS numeric); +DROP FUNCTION set_ao_formatversion(oid, smallint, bool); diff --git a/src/test/isolation2/sql/gpdispatch.sql b/src/test/isolation2/sql/gpdispatch.sql index 6455ec5eef8..fefeacba3a4 100644 --- a/src/test/isolation2/sql/gpdispatch.sql +++ b/src/test/isolation2/sql/gpdispatch.sql @@ -27,6 +27,7 @@ insert into test_waitevent select generate_series(1,1000); 1: set optimizer = off; 1: set gp_cte_sharing to on; 1: set max_parallel_workers_per_gather = 0; +1: set cbdb_enable_dynamic_shared_scan = off; 1: select gp_inject_fault_infinite('shareinput_writer_notifyready', 'suspend', 2); 1&: WITH a1 as (select * from test_waitevent), a2 as (select * from test_waitevent) SELECT sum(a1.i) FROM a1 INNER JOIN a2 ON a2.i = a1.i UNION ALL SELECT count(a1.i) FROM a1 INNER JOIN a2 ON a2.i = a1.i; -- start_ignore diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile index 60f8d491efb..2e3d650aba9 100644 --- a/src/test/regress/GNUmakefile +++ b/src/test/regress/GNUmakefile @@ -220,7 +220,7 @@ installcheck-parallel: all installcheck-cbdb-parallel: all tablespace-setup twophase_pqexecparams ( \ export PGOPTIONS='-c optimizer=off -c enable_parallel=true -c min_parallel_table_scan_size=0 -c min_parallel_index_scan_size=0 -c force_parallel_mode=1'; \ - $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule --schedule=$(srcdir)/greenplum_schedule --max-connections=5 $(EXTRA_TESTS) --exclude-tests explain \ + $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule --schedule=$(srcdir)/greenplum_schedule --max-connections=5 $(EXTRA_TESTS) --exclude-tests explain --exclude-tests direct_dispatch --exclude-tests bfv_dd --exclude-tests query_finish --exclude-tests qp_with_clause --exclude-tests workfile/sisc_mat_sort --exclude-tests external_table --exclude-tests temp_tablespaces \ ) installcheck-orca-parallel: all tablespace-setup twophase_pqexecparams diff --git a/src/test/regress/excluded_tests.conf b/src/test/regress/excluded_tests.conf index e29197946d4..b37b044d954 100644 --- a/src/test/regress/excluded_tests.conf +++ b/src/test/regress/excluded_tests.conf @@ -12,3 +12,8 @@ bfv_dd bfv_dd_multicolumn planhints rowhints +query_finish +qp_with_clause +workfile/sisc_mat_sort +external_table +temp_tablespaces diff --git a/src/test/regress/expected/agg_pushdown.out b/src/test/regress/expected/agg_pushdown.out index 9ed0c09f782..d65bab0a392 100644 --- a/src/test/regress/expected/agg_pushdown.out +++ b/src/test/regress/expected/agg_pushdown.out @@ -115,73 +115,74 @@ SET enable_seqscan TO off; EXPLAIN (VERBOSE on, COSTS off) SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i GROUP BY p.i; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: p.i, (avg(c1.v)) - -> Finalize HashAggregate + -> Finalize GroupAggregate Output: p.i, avg(c1.v) Group Key: p.i - -> Redistribute Motion 3:3 (slice2; segments: 3) + -> Sort Output: p.i, (PARTIAL avg(c1.v)) - Hash Key: p.i - -> Partial GroupAggregate - Output: p.i, PARTIAL avg(c1.v) - Group Key: p.i - -> Merge Join - Output: p.i, c1.v - Merge Cond: (p.i = c1.parent) - -> Sort - Output: p.i - Sort Key: p.i - -> Broadcast Motion 3:3 (slice3; segments: 3) + Sort Key: p.i + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: p.i, (PARTIAL avg(c1.v)) + Hash Key: p.i + -> Partial GroupAggregate + Output: p.i, PARTIAL avg(c1.v) + Group Key: p.i + -> Merge Join + Output: p.i, c1.v + Merge Cond: (p.i = c1.parent) + -> Sort Output: p.i - -> Seq Scan on public.agg_pushdown_parent p + Sort Key: p.i + -> Broadcast Motion 3:3 (slice3; segments: 3) Output: p.i - -> Sort - Output: c1.v, c1.parent - Sort Key: c1.parent - -> Seq Scan on public.agg_pushdown_child1 c1 + -> Seq Scan on public.agg_pushdown_parent p + Output: p.i + -> Sort Output: c1.v, c1.parent - Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_parallel = 'off', enable_seqscan = 'off', gp_enable_agg_pushdown = 'off', optimizer = 'off' + Sort Key: c1.parent + -> Seq Scan on public.agg_pushdown_child1 c1 + Output: c1.v, c1.parent + Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_seqscan = 'off', gp_enable_agg_pushdown = 'off', optimizer = 'off' Optimizer: Postgres query optimizer -(28 rows) +(31 rows) SET gp_enable_agg_pushdown TO on; EXPLAIN (VERBOSE on, COSTS off) SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i GROUP BY p.i; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: p.i, (avg(c1.v)) - -> Finalize HashAggregate - Output: p.i, avg(c1.v) - Group Key: p.i - -> Redistribute Motion 3:3 (slice2; segments: 3) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: p.i, avg(c1.v) + Group Key: p.i + -> Gather Motion 3:1 (slice1; segments: 3) + Output: p.i, (PARTIAL avg(c1.v)) + Merge Key: p.i + -> Merge Join Output: p.i, (PARTIAL avg(c1.v)) - Hash Key: p.i - -> Merge Join - Output: p.i, (PARTIAL avg(c1.v)) - Merge Cond: (p.i = c1.parent) - -> Sort + Merge Cond: (p.i = c1.parent) + -> Sort + Output: p.i + Sort Key: p.i + -> Broadcast Motion 3:3 (slice2; segments: 3) Output: p.i - Sort Key: p.i - -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on public.agg_pushdown_parent p Output: p.i - -> Seq Scan on public.agg_pushdown_parent p - Output: p.i - -> Sort - Output: c1.parent, (PARTIAL avg(c1.v)) - Sort Key: c1.parent - -> Partial HashAggregate - Output: c1.parent, PARTIAL avg(c1.v) - Group Key: c1.parent - -> Seq Scan on public.agg_pushdown_child1 c1 - Output: c1.j, c1.parent, c1.v - Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_parallel = 'off', enable_seqscan = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' + -> Sort + Output: c1.parent, (PARTIAL avg(c1.v)) + Sort Key: c1.parent + -> Partial HashAggregate + Output: c1.parent, PARTIAL avg(c1.v) + Group Key: c1.parent + -> Seq Scan on public.agg_pushdown_child1 c1 + Output: c1.j, c1.parent, c1.v + Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_seqscan = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' Optimizer: Postgres query optimizer -(28 rows) +(26 rows) SET enable_seqscan TO on; -- Join "c1" to "p.x" column, i.e. one that is not in the GROUP BY clause. The @@ -267,49 +268,54 @@ EXPLAIN (VERBOSE on, COSTS off) SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Finalize GroupAggregate - Output: p.i, avg((c1.v + c2.v)) - Group Key: p.i - -> Gather Motion 3:1 (slice1; segments: 3) - Output: p.i, (PARTIAL avg((c1.v + c2.v))) - Merge Key: p.i - -> Partial GroupAggregate - Output: p.i, PARTIAL avg((c1.v + c2.v)) - Group Key: p.i - -> Sort - Output: p.i, c1.v, c2.v - Sort Key: p.i - -> Nested Loop - Output: p.i, c1.v, c2.v - Join Filter: (c1.parent = p.i) - -> Broadcast Motion 3:3 (slice2; segments: 3) - Output: p.i - -> Seq Scan on public.agg_pushdown_parent p - Output: p.i - -> Materialize - Output: c1.v, c1.parent, c2.v, c2.parent + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: p.i, (avg((c1.v + c2.v))) + -> Finalize GroupAggregate + Output: p.i, avg((c1.v + c2.v)) + Group Key: p.i + -> Sort + Output: p.i, (PARTIAL avg((c1.v + c2.v))) + Sort Key: p.i + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: p.i, (PARTIAL avg((c1.v + c2.v))) + Hash Key: p.i + -> Partial GroupAggregate + Output: p.i, PARTIAL avg((c1.v + c2.v)) + Group Key: p.i + -> Sort + Output: p.i, c1.v, c2.v + Sort Key: p.i -> Nested Loop - Output: c1.v, c1.parent, c2.v, c2.parent - Join Filter: ((c1.parent = c2.parent) AND (c1.j = c2.k)) - -> Seq Scan on public.agg_pushdown_child1 c1 - Output: c1.j, c1.parent, c1.v + Output: p.i, c1.v, c2.v + Join Filter: (c1.parent = p.i) + -> Broadcast Motion 3:3 (slice3; segments: 3) + Output: p.i + -> Seq Scan on public.agg_pushdown_parent p + Output: p.i -> Materialize - Output: c2.v, c2.parent, c2.k - -> Seq Scan on public.agg_pushdown_child2 c2 - Output: c2.v, c2.parent, c2.k - Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', enable_parallel = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'off', optimizer = 'off' + Output: c1.v, c1.parent, c2.v, c2.parent + -> Nested Loop + Output: c1.v, c1.parent, c2.v, c2.parent + Join Filter: ((c1.parent = c2.parent) AND (c1.j = c2.k)) + -> Seq Scan on public.agg_pushdown_child1 c1 + Output: c1.j, c1.parent, c1.v + -> Materialize + Output: c2.v, c2.parent, c2.k + -> Seq Scan on public.agg_pushdown_child2 c2 + Output: c2.v, c2.parent, c2.k + Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', enable_seqscan = 'on', gp_enable_agg_pushdown = 'off', optimizer = 'off' Optimizer: Postgres query optimizer -(32 rows) +(37 rows) SET gp_enable_agg_pushdown TO on; EXPLAIN (VERBOSE on, COSTS off) SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: p.i, (avg((c1.v + c2.v))) -> Finalize GroupAggregate @@ -342,7 +348,7 @@ c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; Output: c2.v, c2.parent, c2.k -> Seq Scan on public.agg_pushdown_child2 c2 Output: c2.v, c2.parent, c2.k - Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', enable_parallel = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' + Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' Optimizer: Postgres query optimizer (34 rows) @@ -353,8 +359,8 @@ EXPLAIN (VERBOSE on, COSTS off) SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: p.i, (avg((c1.v + c2.v))) -> Finalize GroupAggregate @@ -387,7 +393,7 @@ c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; Output: c2.v, c2.parent, c2.k -> Seq Scan on public.agg_pushdown_child2 c2 Output: c2.v, c2.parent, c2.k - Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_parallel = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' + Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' Optimizer: Postgres query optimizer (34 rows) @@ -399,42 +405,48 @@ EXPLAIN (VERBOSE on, COSTS off) SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: p.i, (avg((c1.v + c2.v))) - -> HashAggregate + -> Finalize GroupAggregate Output: p.i, avg((c1.v + c2.v)) Group Key: p.i - -> Redistribute Motion 3:3 (slice2; segments: 3) - Output: p.i, c1.v, c2.v - Hash Key: p.i - -> Merge Join - Output: p.i, c1.v, c2.v - Merge Cond: (c1.parent = p.i) - -> Merge Join - Output: c1.v, c1.parent, c2.v, c2.parent - Merge Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k)) - -> Sort - Output: c1.v, c1.parent, c1.j - Sort Key: c1.parent, c1.j - -> Seq Scan on public.agg_pushdown_child1 c1 - Output: c1.v, c1.parent, c1.j - -> Sort - Output: c2.v, c2.parent, c2.k - Sort Key: c2.parent, c2.k - -> Seq Scan on public.agg_pushdown_child2 c2 - Output: c2.v, c2.parent, c2.k - -> Sort - Output: p.i - Sort Key: p.i - -> Broadcast Motion 3:3 (slice3; segments: 3) - Output: p.i - -> Seq Scan on public.agg_pushdown_parent p + -> Sort + Output: p.i, (PARTIAL avg((c1.v + c2.v))) + Sort Key: p.i + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: p.i, (PARTIAL avg((c1.v + c2.v))) + Hash Key: p.i + -> Partial GroupAggregate + Output: p.i, PARTIAL avg((c1.v + c2.v)) + Group Key: p.i + -> Merge Join + Output: p.i, c1.v, c2.v + Merge Cond: (c1.parent = p.i) + -> Merge Join + Output: c1.v, c1.parent, c2.v, c2.parent + Merge Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k)) + -> Sort + Output: c1.v, c1.parent, c1.j + Sort Key: c1.parent, c1.j + -> Seq Scan on public.agg_pushdown_child1 c1 + Output: c1.v, c1.parent, c1.j + -> Sort + Output: c2.v, c2.parent, c2.k + Sort Key: c2.parent, c2.k + -> Seq Scan on public.agg_pushdown_child2 c2 + Output: c2.v, c2.parent, c2.k + -> Sort Output: p.i - Settings: enable_hashjoin = 'off', enable_mergejoin = 'on', enable_nestloop = 'off', enable_parallel = 'off', enable_seqscan = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' + Sort Key: p.i + -> Broadcast Motion 3:3 (slice3; segments: 3) + Output: p.i + -> Seq Scan on public.agg_pushdown_parent p + Output: p.i + Settings: enable_hashjoin = 'off', enable_mergejoin = 'on', enable_nestloop = 'off', enable_seqscan = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' Optimizer: Postgres query optimizer -(33 rows) +(39 rows) SET enable_seqscan TO on; -- Clear tables @@ -801,6 +813,8 @@ CREATE TABLE vendor_pd (v_id int, v_name VARCHAR(20)) WITH (APPENDONLY=true, ORI NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'v_id' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE customer_pd (c_id int, c_v_id int, c_n_id int, c_type int, c_consumption int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE nation_pd (n_id int, n_name VARCHAR(20), n_type int, n_population int) WITH (APPENDONLY=true, ORIENTATION=column); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'n_id' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 51e113ef8b6..2b98f85a50f 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -3310,23 +3310,25 @@ having sum(tgb1.v3 * tgb2.v3) > 100 and sum(tgb1.v3) > 200; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=145986.91..145999.46 rows=4 width=12) - -> Finalize HashAggregate (cost=145986.91..145999.41 rows=1 width=12) + Gather Motion 3:1 (slice1; segments: 3) (cost=146024.23..146051.79 rows=4 width=12) + -> Finalize GroupAggregate (cost=146024.23..146051.73 rows=1 width=12) Group Key: tgb2.v2 Filter: ((sum((tgb1.v3 * tgb2.v3)) > 100) AND (sum((tgb1.v3 * tgb2.v3)) > 101) AND (sum((tgb1.v3 * tgb2.v3)) < 1020) AND (sum((tgb1.v3 * tgb2.v3)) <> 103) AND (sum((tgb1.v3 * tgb2.v3)) >= 104) AND (avg((tgb1.v3 * tgb2.v3)) <> '11'::numeric) AND (avg((tgb1.v3 * tgb2.v3)) <> '12'::numeric) AND (avg((tgb2.v1 * tgb1.v1)) <> '13'::numeric) AND (sum(tgb1.v3) > 200)) AggRefs(TargetList): [(0, 0)] - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=145944.41..145974.41 rows=1000 width=84) - Hash Key: tgb2.v2 - -> Streaming Partial HashAggregate (cost=145944.41..145954.41 rows=1000 width=84) - Group Key: tgb2.v2 - AggRefs(TargetList): [(0, 0), (1, 1), (2, 2), (3, 3)] - -> Hash Join (cost=618.25..105488.34 rows=2022803 width=20) - Hash Cond: (tgb1.v1 = tgb2.v1) - -> Seq Scan on tgb1 (cost=0.00..293.67 rows=25967 width=8) - -> Hash (cost=293.67..293.67 rows=25967 width=12) - -> Seq Scan on tgb2 (cost=0.00..293.67 rows=25967 width=12) + -> Sort (cost=146024.23..146026.73 rows=1000 width=84) + Sort Key: tgb2.v2 + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=145944.41..145974.41 rows=1000 width=84) + Hash Key: tgb2.v2 + -> Streaming Partial HashAggregate (cost=145944.41..145954.41 rows=1000 width=84) + Group Key: tgb2.v2 + AggRefs(TargetList): [(0, 0), (1, 1), (2, 2), (3, 3)] + -> Hash Join (cost=618.25..105488.34 rows=2022803 width=20) + Hash Cond: (tgb1.v1 = tgb2.v1) + -> Seq Scan on tgb1 (cost=0.00..293.67 rows=25967 width=8) + -> Hash (cost=293.67..293.67 rows=25967 width=12) + -> Seq Scan on tgb2 (cost=0.00..293.67 rows=25967 width=12) Optimizer: Postgres query optimizer -(16 rows) +(18 rows) select sum(tgb1.v3 * tgb2.v3) @@ -3365,23 +3367,25 @@ having sum(tgb1.v3 * tgb2.v3) > 100 and sum(tgb1.v3) > 200; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=145986.91..145999.46 rows=4 width=12) - -> Finalize HashAggregate (cost=145986.91..145999.41 rows=1 width=12) + Gather Motion 3:1 (slice1; segments: 3) (cost=146024.23..146051.79 rows=4 width=12) + -> Finalize GroupAggregate (cost=146024.23..146051.73 rows=1 width=12) Group Key: tgb2.v2 Filter: ((sum((tgb1.v3 * tgb2.v3)) > 100) AND (sum((tgb1.v3 * tgb2.v3)) > 101) AND (sum((tgb1.v3 * tgb2.v3)) < 1020) AND (sum((tgb1.v3 * tgb2.v3)) <> 103) AND (sum((tgb1.v3 * tgb2.v3)) >= 104) AND (avg((tgb1.v3 * tgb2.v3)) <> '11'::numeric) AND (avg((tgb1.v3 * tgb2.v3)) <> '12'::numeric) AND (avg((tgb2.v1 * tgb1.v1)) <> '13'::numeric) AND (sum(tgb1.v3) > 200)) AggRefs(TargetList): [(0, 0)] - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=145944.41..145974.41 rows=1000 width=84) - Hash Key: tgb2.v2 - -> Streaming Partial HashAggregate (cost=145944.41..145954.41 rows=1000 width=84) - Group Key: tgb2.v2 - AggRefs(TargetList): [(0, 0), (1, 1), (2, 2), (3, 3)] - -> Hash Join (cost=618.25..105488.34 rows=2022803 width=20) - Hash Cond: (tgb1.v1 = tgb2.v1) - -> Seq Scan on tgb1 (cost=0.00..293.67 rows=25967 width=8) - -> Hash (cost=293.67..293.67 rows=25967 width=12) - -> Seq Scan on tgb2 (cost=0.00..293.67 rows=25967 width=12) + -> Sort (cost=146024.23..146026.73 rows=1000 width=84) + Sort Key: tgb2.v2 + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=145944.41..145974.41 rows=1000 width=84) + Hash Key: tgb2.v2 + -> Streaming Partial HashAggregate (cost=145944.41..145954.41 rows=1000 width=84) + Group Key: tgb2.v2 + AggRefs(TargetList): [(0, 0), (1, 1), (2, 2), (3, 3)] + -> Hash Join (cost=618.25..105488.34 rows=2022803 width=20) + Hash Cond: (tgb1.v1 = tgb2.v1) + -> Seq Scan on tgb1 (cost=0.00..293.67 rows=25967 width=8) + -> Hash (cost=293.67..293.67 rows=25967 width=12) + -> Seq Scan on tgb2 (cost=0.00..293.67 rows=25967 width=12) Optimizer: Postgres query optimizer -(16 rows) +(18 rows) select sum(tgb1.v3 * tgb2.v3) diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index 608b4db85a4..33ea9af51b1 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -12,10 +12,12 @@ set enable_answer_query_using_materialized_views = on; -- drop views if there is no data populated begin; create incremental materialized view aqumv_mvt1_need_refresh as select * from aqumv_t1 where c1 = 2 with no data; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. set local enable_answer_query_using_materialized_views = on; explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) Output: c1, c2, c3 -> Seq Scan on aqumv.aqumv_t1 @@ -28,8 +30,8 @@ explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; refresh materialized view aqumv_mvt1_need_refresh; analyze aqumv_mvt1_need_refresh; explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c1, c2, c3 -> Seq Scan on aqumv.aqumv_mvt1_need_refresh @@ -41,6 +43,8 @@ explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; abort; begin; create incremental materialized view aqumv_mvt1_0 as select * from aqumv_t1 where c1 = 2; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_0; set local enable_answer_query_using_materialized_views = off; select * from aqumv_t1 where c1 = 2; @@ -102,8 +106,8 @@ select c3, c2 from aqumv_t1 where c1 = 2; -- tlist matched. explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c1, c2, c3 -> Seq Scan on aqumv.aqumv_mvt1_0 @@ -113,8 +117,8 @@ explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; (6 rows) explain(verbose, costs off) select c1, c2, c3 from aqumv_t1 where c1 = 2; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c1, c2, c3 -> Seq Scan on aqumv.aqumv_mvt1_0 @@ -125,8 +129,8 @@ explain(verbose, costs off) select c1, c2, c3 from aqumv_t1 where c1 = 2; -- tlist partially matched. explain(verbose, costs off) select c2 from aqumv_t1 where c1 = 2; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c2 -> Seq Scan on aqumv.aqumv_mvt1_0 @@ -137,8 +141,8 @@ explain(verbose, costs off) select c2 from aqumv_t1 where c1 = 2; -- tlist disorder. explain(verbose, costs off) select c3, c2 from aqumv_t1 where c1 = 2; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c3, c2 -> Seq Scan on aqumv.aqumv_mvt1_0 @@ -150,6 +154,8 @@ explain(verbose, costs off) select c3, c2 from aqumv_t1 where c1 = 2; abort; begin; create incremental materialized view aqumv_mvt1_1 as select c2 as mc2, c3 as mc3, c1 as mc1, c2 as mc2_1 from aqumv_t1 where c1 = 3; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_1; set local enable_answer_query_using_materialized_views = off; select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; @@ -183,8 +189,8 @@ select c1, c1 from aqumv_t1 where c1 = 3; -- tlist alias. explain(verbose, costs off) select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: mc1, mc2 -> Seq Scan on aqumv.aqumv_mvt1_1 @@ -195,8 +201,8 @@ explain(verbose, costs off) select c1 as col1, c2 as col2 from aqumv_t1 where c1 -- duplicated projection. explain(verbose, costs off) select c1, c1 from aqumv_t1 where c1 = 3; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: mc1, mc1 -> Seq Scan on aqumv.aqumv_mvt1_1 @@ -210,6 +216,8 @@ begin; create incremental materialized view aqumv_mvt1_nonvar_expr as select c2, 1 as mc_const_1, sqrt(100) as mc_sqrt_100 from aqumv_t1 where c1 = 4; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c2' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_nonvar_expr; set local enable_answer_query_using_materialized_views = off; select c2, 200 from aqumv_t1 where c1 = 4; @@ -243,8 +251,8 @@ select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; -- Const are copied to output. explain(verbose, costs off) select c2, 200 from aqumv_t1 where c1 = 4; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c2, 200 -> Seq Scan on aqumv.aqumv_mvt1_nonvar_expr @@ -254,8 +262,8 @@ explain(verbose, costs off) select c2, 200 from aqumv_t1 where c1 = 4; (6 rows) explain(verbose, costs off) select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c2, 1, '10'::double precision -> Seq Scan on aqumv.aqumv_mvt1_nonvar_expr @@ -274,6 +282,8 @@ $$ LANGUAGE plpgsql IMMUTABLE; create incremental materialized view aqumv_mvt1_func_has_var as select c2, aqumv_func(c1, c3) as mc_func_res from aqumv_t1 where c1 = 5; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c2' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_func_has_var; set local enable_answer_query_using_materialized_views = off; select c2, aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; @@ -293,8 +303,8 @@ select c2, aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; -- Functions has Vars are replaced. explain(verbose, costs off) select c2, aqumv_func(c1, c3), aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c2, mc_func_res, mc_func_res -> Seq Scan on aqumv.aqumv_mvt1_func_has_var @@ -306,12 +316,14 @@ explain(verbose, costs off) select c2, aqumv_func(c1, c3), aqumv_func(c1, c3) fr abort; begin; create incremental materialized view aqumv_mvt1_2 as select c2 as mc2, c1 as mc1 from aqumv_t1 where c1 > 1 and c1 < 5; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_2; set local enable_answer_query_using_materialized_views = on; -- shoud be unable to use mv, projection doesn't exit in mv's tlist explain(verbose, costs off) select c3 from aqumv_t1 where c1 < 5 and c1 > 1; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c3 -> Seq Scan on aqumv.aqumv_t1 @@ -323,8 +335,8 @@ explain(verbose, costs off) select c3 from aqumv_t1 where c1 < 5 and c1 > 1; -- no post quals. explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: mc1, mc2 -> Seq Scan on aqumv.aqumv_mvt1_2 @@ -351,8 +363,8 @@ select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; (2 rows) explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: mc1, mc2 -> Seq Scan on aqumv.aqumv_mvt1_2 @@ -364,8 +376,8 @@ explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 -- should be unable to use mv, post quals has column doesn't exit in mv's tlist. explain(verbose, costs off) select * from aqumv_t1 where c1 < 5 and c1 > 1 and c3 > 1; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c1, c2, c3 -> Seq Scan on aqumv.aqumv_t1 @@ -378,12 +390,14 @@ explain(verbose, costs off) select * from aqumv_t1 where c1 < 5 and c1 > 1 and c abort; begin; create incremental materialized view aqumv_mvt1_3 as select c2 as mc2, c1 as mc1, c3+1 as mc3 from aqumv_t1 where c1 > 5 and c1 < 10; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_3; set local enable_answer_query_using_materialized_views = on; -- should be unable to use mv, column c3 doesn't exist in mv's tlist. explain(verbose, costs off) select * from aqumv_t1 where c1 > 5 and c1 < 10; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c1, c2, c3 -> Seq Scan on aqumv.aqumv_t1 @@ -395,8 +409,8 @@ explain(verbose, costs off) select * from aqumv_t1 where c1 > 5 and c1 < 10; -- expr c3+1 is in mv's tlist explain(verbose, costs off) select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: mc1, mc3 -> Seq Scan on aqumv.aqumv_mvt1_3 @@ -407,8 +421,8 @@ explain(verbose, costs off) select c1 as col1, c3+1 as col2 from aqumv_t1 where -- expr c1+1 could be derived from mv's tlist explain(verbose, costs off) select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: ((mc1 + 1)), mc2, mc3 -> Seq Scan on aqumv.aqumv_mvt1_3 @@ -420,27 +434,27 @@ explain(verbose, costs off) select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; col1 | col2 ------+------ - 6 | 9 - 9 | 12 - 6 | 9 - 9 | 12 7 | 10 8 | 11 7 | 10 8 | 11 + 6 | 9 + 9 | 12 + 6 | 9 + 9 | 12 (8 rows) select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; col1 | c2 | col2 ------+----+------ - 7 | 7 | 9 - 10 | 10 | 12 - 7 | 7 | 9 - 10 | 10 | 12 8 | 8 | 10 9 | 9 | 11 8 | 8 | 10 9 | 9 | 11 + 7 | 7 | 9 + 10 | 10 | 12 + 7 | 7 | 9 + 10 | 10 | 12 (8 rows) set local enable_answer_query_using_materialized_views = off; @@ -475,6 +489,8 @@ begin; create incremental materialized view aqumv_mvt1_4 as select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 from aqumv_t1 where c1 > 10 and c1 < 15; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_4; set local enable_answer_query_using_materialized_views = on; -- complex exprs @@ -506,14 +522,14 @@ set local enable_answer_query_using_materialized_views = off; select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; c1 | ?column? | ?column? ----+-------------------+---------- + 12 | 4.60555127546399 | 14 + 12 | 4.60555127546399 | 14 11 | 4.464101615137754 | 13 13 | 4.741657386773941 | 15 14 | 4.872983346207417 | 16 11 | 4.464101615137754 | 13 13 | 4.741657386773941 | 15 14 | 4.872983346207417 | 16 - 12 | 4.60555127546399 | 14 - 12 | 4.60555127546399 | 14 (8 rows) abort; @@ -522,6 +538,8 @@ begin; create incremental materialized view aqumv_mvt1_post_quals as select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 from aqumv_t1 where c1 > 20 and c1 < 30; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_post_quals; set local enable_answer_query_using_materialized_views = on; explain(verbose, costs off) @@ -570,6 +588,12 @@ select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; c1 | ?column? | ?column? ----+--------------------+---------- + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 22 | 5.795831523312719 | 24 24 | 6 | 26 27 | 6.291502622129181 | 29 @@ -582,12 +606,6 @@ select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 26 | 6.196152422706632 | 28 23 | 5.898979485566356 | 25 26 | 6.196152422706632 | 28 - 21 | 5.69041575982343 | 23 - 25 | 6.0990195135927845 | 27 - 28 | 6.385164807134504 | 30 - 21 | 5.69041575982343 | 23 - 25 | 6.0990195135927845 | 27 - 28 | 6.385164807134504 | 30 (18 rows) select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 @@ -608,16 +626,16 @@ select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 24 | 6 | 26 27 | 6.291502622129181 | 29 29 | 6.477225575051661 | 31 - 23 | 5.898979485566356 | 25 - 26 | 6.196152422706632 | 28 - 23 | 5.898979485566356 | 25 - 26 | 6.196152422706632 | 28 21 | 5.69041575982343 | 23 25 | 6.0990195135927845 | 27 28 | 6.385164807134504 | 30 21 | 5.69041575982343 | 23 25 | 6.0990195135927845 | 27 28 | 6.385164807134504 | 30 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 (18 rows) set local enable_answer_query_using_materialized_views = off; @@ -633,16 +651,16 @@ select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 24 | 6 | 26 27 | 6.291502622129181 | 29 29 | 6.477225575051661 | 31 - 23 | 5.898979485566356 | 25 - 26 | 6.196152422706632 | 28 - 23 | 5.898979485566356 | 25 - 26 | 6.196152422706632 | 28 21 | 5.69041575982343 | 23 25 | 6.0990195135927845 | 27 28 | 6.385164807134504 | 30 21 | 5.69041575982343 | 23 25 | 6.0990195135927845 | 27 28 | 6.385164807134504 | 30 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 (18 rows) select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 @@ -655,12 +673,6 @@ select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; c1 | ?column? | ?column? ----+--------------------+---------- - 21 | 5.69041575982343 | 23 - 25 | 6.0990195135927845 | 27 - 28 | 6.385164807134504 | 30 - 21 | 5.69041575982343 | 23 - 25 | 6.0990195135927845 | 27 - 28 | 6.385164807134504 | 30 22 | 5.795831523312719 | 24 24 | 6 | 26 27 | 6.291502622129181 | 29 @@ -669,6 +681,12 @@ select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 24 | 6 | 26 27 | 6.291502622129181 | 29 29 | 6.477225575051661 | 31 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 23 | 5.898979485566356 | 25 26 | 6.196152422706632 | 28 23 | 5.898979485566356 | 25 @@ -682,6 +700,8 @@ set local enable_answer_query_using_materialized_views = on; create incremental materialized view aqumv_mvt1_candidate_0 as select c1 as mc1, c2 as mc2, abs(c2) as mc3 from aqumv_t1 where c1 > 30; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_candidate_0; -- choose aqumv_mvt1_candidate_0 explain(verbose, costs off) @@ -700,6 +720,8 @@ select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 create incremental materialized view aqumv_mvt1_candidate_1 as select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 from aqumv_t1 where c1 > 30 and c1 < 40; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_candidate_1; -- choose aqumv_mvt1_candidate_1 as it has lower cost(less rows). explain(verbose, costs off) @@ -718,6 +740,8 @@ select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 create incremental materialized view aqumv_mvt1_candidate_2 as select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 from aqumv_t1 where c1 > 30 and c1 < 500; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt1_candidate_2; -- still choose aqumv_mvt1_candidate_1 as it has lowest cost(less rows). explain(verbose, costs off) @@ -746,6 +770,8 @@ analyze aqumv_t2; create incremental materialized view aqumv_mvt2_0 as select c1 as mc1, c2 as mc2, c3 as mc3 from aqumv_t2 where c1 > 90; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt2_0; -- test aggregation functions supported in IVM. set local enable_answer_query_using_materialized_views = off; @@ -1109,6 +1135,8 @@ analyze aqumv_t2; create incremental materialized view aqumv_mvt2_1 as select c3 as mc3, c1 as mc1 from aqumv_t2 where c1 > 90; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt2_1; set local enable_answer_query_using_materialized_views = off; explain(costs off, verbose) @@ -1172,6 +1200,8 @@ analyze aqumv_t3; create incremental materialized view aqumv_mvt3_0 as select c1 as mc1, c2 as mc2, c3 as mc3 from aqumv_t3 where c1 > 90; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt3_0; -- Group By set local enable_answer_query_using_materialized_views = off; @@ -1199,12 +1229,12 @@ select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; 99 | 101 | 1 97 | 99 | 1 92 | 94 | 1 + 100 | 102 | 1 + 96 | 98 | 1 98 | 100 | 1 95 | 97 | 1 91 | 93 | 1 91 | 95 | 0 - 100 | 102 | 1 - 96 | 98 | 1 (11 rows) set local enable_answer_query_using_materialized_views = on; @@ -1226,6 +1256,8 @@ select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; c1 | c3 | count -----+-----+------- + 100 | 102 | 1 + 96 | 98 | 1 94 | 96 | 1 93 | 95 | 1 99 | 101 | 1 @@ -1235,8 +1267,6 @@ select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; 95 | 97 | 1 91 | 93 | 1 91 | 95 | 0 - 100 | 102 | 1 - 96 | 98 | 1 (11 rows) -- GROUPING SETS @@ -1271,14 +1301,14 @@ select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1) 98 | | 1 95 | | 1 91 | | 1 - | 101 | 1 | 93 | 1 + | 101 | 1 | 99 | 1 | 94 | 1 | 100 | 1 | 102 | 1 - | 96 | 1 | 97 | 1 + | 96 | 1 | 98 | 1 | 95 | 1 (20 rows) @@ -1308,8 +1338,8 @@ select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1) 99 | | 1 94 | | 1 100 | | 1 - 96 | | 1 97 | | 1 + 96 | | 1 98 | | 1 95 | | 1 91 | | 1 @@ -1559,10 +1589,14 @@ analyze aqumv_t4; create incremental materialized view aqumv_mvt4_0 as select c1 as mc1, c2 as mc2, c3 as mc3 from aqumv_t4 where c1 > 90; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt4_0; create incremental materialized view aqumv_mvt4_1 as select c2 as mc2, c1 as mc1 from aqumv_t4 where c1 > 95; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt4_1; -- HAVING clause pushed down to where quals. set local enable_answer_query_using_materialized_views = off; @@ -1588,10 +1622,10 @@ select c1, c3 from aqumv_t4 where c1 > 90 group by (c1, c3) having c3 > 97 ; select c1, c3 from aqumv_t4 where c1 > 90 group by (c1, c3) having c3 > 97 ; c1 | c3 -----+----- - 96 | 98 - 100 | 102 97 | 99 99 | 101 + 96 | 98 + 100 | 102 98 | 100 (5 rows) @@ -1615,11 +1649,11 @@ select c1, c3 from aqumv_t4 where c1 > 90 group by (c1, c3) having c3 > 97 ; select c1, c3 from aqumv_t4 where c1 > 90 group by (c1, c3) having c3 > 97 ; c1 | c3 -----+----- + 99 | 101 + 97 | 99 100 | 102 96 | 98 98 | 100 - 99 | 101 - 97 | 99 (5 rows) -- quals kept in HAVING clause. @@ -1646,10 +1680,10 @@ select c1, c3, avg(c2) from aqumv_t4 where c1 > 90 group by (c1, c3) having avg( -----+-----+---------------------- 99 | 101 | 100.0000000000000000 97 | 99 | 98.0000000000000000 - 98 | 100 | 99.0000000000000000 - 95 | 97 | 96.0000000000000000 100 | 102 | 101.0000000000000000 96 | 98 | 97.0000000000000000 + 98 | 100 | 99.0000000000000000 + 95 | 97 | 96.0000000000000000 (6 rows) set local enable_answer_query_using_materialized_views = on; @@ -1672,10 +1706,10 @@ select c1, c3, avg(c2) from aqumv_t4 where c1 > 90 group by (c1, c3) having avg( select c1, c3, avg(c2) from aqumv_t4 where c1 > 90 group by (c1, c3) having avg(c2) > 95; c1 | c3 | avg -----+-----+---------------------- - 98 | 100 | 99.0000000000000000 - 95 | 97 | 96.0000000000000000 99 | 101 | 100.0000000000000000 97 | 99 | 98.0000000000000000 + 98 | 100 | 99.0000000000000000 + 95 | 97 | 96.0000000000000000 100 | 102 | 101.0000000000000000 96 | 98 | 97.0000000000000000 (6 rows) @@ -1799,9 +1833,9 @@ select c1, avg(c2) from aqumv_t4 where c1 > 95 group by c1 having avg(c3) > 96; -----+---------------------- 99 | 100.0000000000000000 97 | 98.0000000000000000 - 98 | 99.0000000000000000 100 | 101.0000000000000000 96 | 97.0000000000000000 + 98 | 99.0000000000000000 (5 rows) -- Can use AQUMV. @@ -1856,10 +1890,10 @@ select c1, avg(c2) from aqumv_t4 where c1 > 90 group by c1 having avg(c3) > 96; -----+---------------------- 99 | 100.0000000000000000 97 | 98.0000000000000000 - 98 | 99.0000000000000000 - 95 | 96.0000000000000000 100 | 101.0000000000000000 96 | 97.0000000000000000 + 98 | 99.0000000000000000 + 95 | 96.0000000000000000 (6 rows) abort; @@ -2162,6 +2196,8 @@ analyze aqumv_t6; create incremental materialized view aqumv_mvt6_0 as select c1 as mc1, c2 as mc2 from aqumv_t6 where c1 > 90; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt6_0; -- DISTINCT \pset null NULL @@ -2282,6 +2318,8 @@ sum(c2) as sum_c2, sum(distinct c2) as sum_distinct_c2 from aqumv_t6 where c1 > create incremental materialized view aqumv_mvt6_1 as select c3 as mc3, c4 as mc4, c1 as mc1, c2 as mc2 from aqumv_t6 where c1 > 97; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt6_1; set local enable_answer_query_using_materialized_views = off; explain(costs off, verbose) @@ -2457,6 +2495,8 @@ analyze aqumv_t7; create incremental materialized view aqumv_mvt7_0 as select c3 as cm3, c1 as mc1, c2 as mc2 from aqumv_t7 where c1 > 90; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mc1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aqumv_mvt7_0; -- LIMIT set local enable_answer_query_using_materialized_views = off; @@ -2745,11 +2785,11 @@ select * from aqumv_ext_r; 4 7 8 + 1 5 6 9 10 - 1 (10 rows) set local enable_answer_query_using_materialized_views = on; @@ -2810,12 +2850,12 @@ select * from aqumv_ext_mv; 6 9 10 + 1 2 3 4 7 8 - 1 (10 rows) INSERT INTO aqumv_ext_w SELECT * FROM generate_series(10, 15); @@ -2830,11 +2870,6 @@ refresh materialized view aqumv_ext_mv; select * from aqumv_ext_mv; id ---- - 2 - 3 - 4 - 7 - 8 1 12 15 @@ -2846,6 +2881,11 @@ select * from aqumv_ext_mv; 11 13 14 + 2 + 3 + 4 + 7 + 8 (16 rows) abort; @@ -3173,16 +3213,32 @@ abort; create table par(a int, b int, c int) partition by range(b) subpartition by range(c) subpartition template (start (1) end (3) every (1)) (start(1) end(3) every(1)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); create materialized view mv_par as select count(*) from par; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv_par1 as select count(*) from par_1_prt_1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv_par1_1 as select count(*) from par_1_prt_1_2_prt_1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv_par1_2 as select count(*) from par_1_prt_1_2_prt_2; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv_par2 as select count(*) from par_1_prt_2; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv_par2_1 as select count(*) from par_1_prt_2_2_prt_1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create materialized view mv_par_prune as select count(*) from par where b = 1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. set enable_answer_query_using_materialized_views = on; explain(costs off, verbose) select count(*) from par; @@ -3480,10 +3536,10 @@ select b, sum(a), c, count(b) from t0 group by c, b; select b, sum(a), c, count(b) from t0 group by c, b; b | sum | c | count ---+------+---+------- + 3 | 1024 | 4 | 512 6 | 2560 | 7 | 512 4 | 1536 | 5 | 512 5 | 2048 | 6 | 512 - 3 | 1024 | 4 | 512 2 | 512 | 3 | 512 (5 rows) @@ -3559,11 +3615,11 @@ select b + c + 1, sum(a) + count(b) from t0 group by c, b; select b + c + 1, sum(a) + count(b) from t0 group by c, b; ?column? | ?column? ----------+---------- - 8 | 1536 - 14 | 3072 12 | 2560 6 | 1024 10 | 2048 + 8 | 1536 + 14 | 3072 (5 rows) -- no qual, should not match @@ -3574,26 +3630,29 @@ select c, count(b) from t0 group by c ; ----------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c, (count(b)) - -> Finalize HashAggregate + -> Finalize GroupAggregate Output: c, count(b) Group Key: t0.c - -> Redistribute Motion 3:3 (slice2; segments: 3) + -> Sort Output: c, (PARTIAL count(b)) - Hash Key: c - -> Streaming Partial HashAggregate - Output: c, PARTIAL count(b) - Group Key: t0.c - -> Seq Scan on aqumv.t0 - Output: a, b, c, d + Sort Key: t0.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: c, (PARTIAL count(b)) + Hash Key: c + -> Streaming Partial HashAggregate + Output: c, PARTIAL count(b) + Group Key: t0.c + -> Seq Scan on aqumv.t0 + Output: a, b, c, d Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' Optimizer: Postgres query optimizer -(15 rows) +(18 rows) select c, count(b) from t0 group by c ; c | count ---+------- - 4 | 512 3 | 512 + 4 | 512 7 | 512 5 | 512 6 | 512 @@ -3606,26 +3665,29 @@ select c, count(b) from t0 group by c ; ---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: c, (count(b)) - -> Finalize HashAggregate + -> Finalize GroupAggregate Output: c, count(b) Group Key: t0.c - -> Redistribute Motion 3:3 (slice2; segments: 3) + -> Sort Output: c, (PARTIAL count(b)) - Hash Key: c - -> Streaming Partial HashAggregate - Output: c, PARTIAL count(b) - Group Key: t0.c - -> Seq Scan on aqumv.t0 - Output: a, b, c, d + Sort Key: t0.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: c, (PARTIAL count(b)) + Hash Key: c + -> Streaming Partial HashAggregate + Output: c, PARTIAL count(b) + Group Key: t0.c + -> Seq Scan on aqumv.t0 + Output: a, b, c, d Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' Optimizer: Postgres query optimizer -(15 rows) +(18 rows) select c, count(b) from t0 group by c ; c | count ---+------- - 4 | 512 3 | 512 + 4 | 512 7 | 512 5 | 512 6 | 512 @@ -3709,8 +3771,8 @@ select count(b), b, c from t0 where a > 3 group by b, c; select count(b), b, c from t0 where a > 3 group by b, c; count | b | c -------+---+--- - 512 | 5 | 6 512 | 6 | 7 + 512 | 5 | 6 (2 rows) set local enable_answer_query_using_materialized_views = on; @@ -3850,5 +3912,17 @@ reset optimizer; reset enable_answer_query_using_materialized_views; -- start_ignore drop schema aqumv cascade; +NOTICE: drop cascades to 11 other objects +DETAIL: drop cascades to table aqumv_t1 +drop cascades to table t1 +drop cascades to materialized view normal_mv_t1 +drop cascades to table par +drop cascades to materialized view mv_par +drop cascades to materialized view mv_par1 +drop cascades to materialized view mv_par1_1 +drop cascades to materialized view mv_par1_2 +drop cascades to materialized view mv_par2 +drop cascades to materialized view mv_par2_1 +drop cascades to materialized view mv_par_prune -- end_ignore reset search_path; diff --git a/src/test/regress/expected/bfv_aggregate.out b/src/test/regress/expected/bfv_aggregate.out index 472e2ca98b3..0ece7443127 100644 --- a/src/test/regress/expected/bfv_aggregate.out +++ b/src/test/regress/expected/bfv_aggregate.out @@ -102,7 +102,7 @@ select 1, to_char(col1, 'YYYY'), median(col2) from d group by 1, 2; -- -- SETUP create table toy(id,val) as select i,i from generate_series(1,5) i; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create aggregate mysum1(int4) (sfunc = int4_sum, combinefunc=int8pl, stype=bigint); create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint); @@ -1590,14 +1590,14 @@ create temp table mpp14125 as select repeat('a', a) a, a % 10 b from generate_se NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. explain select string_agg(a, '') from mpp14125 group by b; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=6.50..6.83 rows=10 width=36) - -> HashAggregate (cost=6.50..6.62 rows=4 width=36) + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=612.67..630.17 rows=1000 width=36) + -> HashAggregate (cost=612.67..616.83 rows=333 width=36) Group Key: b - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..6.00 rows=34 width=55) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..530.00 rows=16533 width=36) Hash Key: b - -> Seq Scan on mpp14125 (cost=0.00..4.00 rows=34 width=55) + -> Seq Scan on mpp14125 (cost=0.00..199.33 rows=16533 width=36) Optimizer: Postgres query optimizer (7 rows) @@ -1625,29 +1625,29 @@ insert into int2vectortab values select distinct t from int2vectortab group by distkey, t; t ------- - 1 - 1 2 - 1 2 3 22 + 1 2 3 + 1 2 + 1 (4 rows) select t from int2vectortab union select t from int2vectortab; t ------- + 22 1 - 1 2 1 2 3 - 22 + 1 2 (4 rows) select count(*) over (partition by t) from int2vectortab; count ------- - 1 1 1 2 2 + 1 (5 rows) select count(distinct t) from int2vectortab; @@ -1696,7 +1696,7 @@ SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y G Hash Key: b.y -> Seq Scan on pagg_tab2 b Optimizer: Postgres query optimizer -(16 rows) +(18 rows) SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; x | sum @@ -1741,26 +1741,26 @@ SELECT a.x, b.y, count(*) FROM pagg_tab1 a FULL JOIN pagg_tab2 b ON a.x = b.y GR SELECT a.x, b.y, count(*) FROM pagg_tab1 a FULL JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, b.y; x | y | count ----+----+------- - 26 | | 10 - 28 | | 10 4 | | 10 + 8 | | 10 | 15 | 10 + 28 | | 10 + 26 | | 10 6 | 6 | 100 - 8 | | 10 - 2 | | 10 - 22 | | 10 - 10 | | 10 - | 9 | 10 - | 21 | 10 - 0 | 0 | 100 - 14 | | 10 + 16 | | 10 12 | 12 | 100 + 24 | 24 | 100 + 20 | | 10 | 3 | 10 - 16 | | 10 - | 27 | 10 18 | 18 | 100 - 20 | | 10 - 24 | 24 | 100 + | 27 | 10 + 14 | | 10 + | 9 | 10 + 0 | 0 | 100 + | 21 | 10 + 10 | | 10 + 22 | | 10 + 2 | | 10 (20 rows) -- @@ -1813,7 +1813,7 @@ select 1, median(col1) from group_by_const group by 1; -- Test GROUP BY with a RelabelType create table tx (c1 text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into tx values('hello'); EXPLAIN (COSTS OFF, VERBOSE ON) @@ -1827,8 +1827,8 @@ SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; Group Key: (tx.c1)::character varying -> Seq Scan on bfv_aggregate.tx Output: (c1)::character varying, c1 - Optimizer: Postgres query optimizer Settings: optimizer = 'off' + Optimizer: Postgres query optimizer (9 rows) SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; @@ -1951,40 +1951,43 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur insert into ex1 select i,i,i from generate_series(1, 10) i; insert into ex2 select i,i,i from generate_series(1, 10) i; explain (verbose on, costs off) select ex2.b/2, sum(ex1.a) from ex1, (select a, coalesce(b, 1) b from ex2) ex2 where ex1.a = ex2.a group by ex2.b/2; - QUERY PLAN ----------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: ((COALESCE(ex2.b, 1) / 2)), (sum(ex1.a)) - -> Finalize HashAggregate + -> Finalize GroupAggregate Output: ((COALESCE(ex2.b, 1) / 2)), sum(ex1.a) Group Key: ((COALESCE(ex2.b, 1) / 2)) - -> Redistribute Motion 3:3 (slice2; segments: 3) + -> Sort Output: ((COALESCE(ex2.b, 1) / 2)), (PARTIAL sum(ex1.a)) - Hash Key: ((COALESCE(ex2.b, 1) / 2)) - -> Streaming Partial HashAggregate - Output: ((COALESCE(ex2.b, 1) / 2)), PARTIAL sum(ex1.a) - Group Key: (COALESCE(ex2.b, 1) / 2) - -> Hash Join - Output: (COALESCE(ex2.b, 1) / 2), ex1.a - Hash Cond: (ex1.a = ex2.a) - -> Seq Scan on bfv_aggregate.ex1 - Output: ex1.a, ex1.b, ex1.c - -> Hash - Output: ex2.b, ex2.a - -> Seq Scan on bfv_aggregate.ex2 + Sort Key: ((COALESCE(ex2.b, 1) / 2)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: ((COALESCE(ex2.b, 1) / 2)), (PARTIAL sum(ex1.a)) + Hash Key: ((COALESCE(ex2.b, 1) / 2)) + -> Streaming Partial HashAggregate + Output: ((COALESCE(ex2.b, 1) / 2)), PARTIAL sum(ex1.a) + Group Key: (COALESCE(ex2.b, 1) / 2) + -> Hash Join + Output: (COALESCE(ex2.b, 1) / 2), ex1.a + Hash Cond: (ex1.a = ex2.a) + -> Seq Scan on bfv_aggregate.ex1 + Output: ex1.a, ex1.b, ex1.c + -> Hash Output: ex2.b, ex2.a + -> Seq Scan on bfv_aggregate.ex2 + Output: ex2.b, ex2.a Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(22 rows) +(25 rows) select ex2.b/2, sum(ex1.a) from ex1, (select a, coalesce(b, 1) b from ex2) ex2 where ex1.a = ex2.a group by ex2.b/2; ?column? | sum ----------+----- - 4 | 17 2 | 9 3 | 13 - 1 | 5 + 4 | 17 0 | 1 + 1 | 5 5 | 10 (6 rows) @@ -2011,12 +2014,12 @@ explain (verbose on, costs off) SELECT b/2, sum(b) * (b/2) FROM ex1 GROUP BY b/ SELECT b/2, sum(b) * (b/2) FROM ex1 GROUP BY b/2; ?column? | ?column? ----------+---------- - 5 | 50 4 | 68 2 | 18 3 | 39 1 | 5 0 | 0 + 5 | 50 (6 rows) -- Test if Motion is placed between the "group by clauses" @@ -2172,7 +2175,7 @@ select * from t3 group by a, b, c; (3 rows) commit; -ERROR: duplicate key value violates unique constraint "t3_pkey" (seg1 127.0.0.1:7003 pid=86457) +ERROR: duplicate key value violates unique constraint "t3_pkey" (seg1 127.0.1.1:9203 pid=1582955) DETAIL: Key (a, b)=(1, 1) already exists. drop table t1, t2, t3, t4, t5, t6; -- CLEANUP diff --git a/src/test/regress/expected/cbdb_parallel.out b/src/test/regress/expected/cbdb_parallel.out index af975de50f4..c0c58ff8234 100644 --- a/src/test/regress/expected/cbdb_parallel.out +++ b/src/test/regress/expected/cbdb_parallel.out @@ -344,13 +344,29 @@ abort; begin; set local enable_parallel = on; create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%'; relname | reloptions ----------------+---------------------- @@ -458,8 +474,14 @@ explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 wh abort; create table ao1(x int, y int) with(appendonly=true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table ao2(x int, y int) with(appendonly=true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table aocs1(x int, y int) with(appendonly=true, orientation=column); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. begin; -- encourage use of parallel plans set local min_parallel_table_scan_size = 0; @@ -688,7 +710,11 @@ select count(c2) from t1; abort; -- test segfilecount according to data volume. create table ao_segfilecount(x int, y int) with(appendonly=true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table aocs_segfilecount(x int, y int) with(appendonly=true, orientation=column); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. begin; set local gp_appendonly_insert_files = 5; set local gp_appendonly_insert_files_tuples_range = 10; @@ -770,10 +796,14 @@ drop table aocs_segfilecount; -- test gp_appendonly_insert_files doesn't take effect begin; create table t (x int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t select i from generate_series(1, 1000) i; set local gp_appendonly_insert_files=4; set local gp_appendonly_insert_files_tuples_range = 10; create table ao1 using ao_row as select * from t; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze ao1; select segfilecount from pg_appendonly where relid='ao1'::regclass; segfilecount @@ -782,6 +812,8 @@ select segfilecount from pg_appendonly where relid='ao1'::regclass; (1 row) create table ao2 with(appendonly=true) as select * from t; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze ao2; select segfilecount from pg_appendonly where relid='ao2'::regclass; segfilecount @@ -790,6 +822,8 @@ select segfilecount from pg_appendonly where relid='ao2'::regclass; (1 row) create table aocs1 using ao_column as select * from t; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aocs1; select segfilecount from pg_appendonly where relid='aocs1'::regclass; segfilecount @@ -798,6 +832,8 @@ select segfilecount from pg_appendonly where relid='aocs1'::regclass; (1 row) create table aocs2 with(appendonly=true, orientation=column) as select * from t; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aocs2; select segfilecount from pg_appendonly where relid='aocs2'::regclass; segfilecount @@ -810,6 +846,8 @@ abort; begin; set local max_parallel_workers_per_gather = 2; create table t1(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt1(a int, b int) with(parallel_workers=2) distributed replicated; create table rt2(a int, b int) distributed replicated; create table rt3(a int, b int) distributed replicated; @@ -1032,8 +1070,6 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt2 on select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- - 1 | 2 | 1 | 1 | 1 | 2 - 2 | 3 | 1 | 2 | 2 | 3 5 | 6 | 5 | 5 | 5 | 6 6 | 7 | 6 | 6 | 6 | 7 9 | 10 | 9 | 9 | 9 | 10 @@ -1041,6 +1077,8 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 6 | 7 | 5 | 6 | 6 | 7 7 | 8 | 6 | 7 | 7 | 8 10 | 11 | 9 | 10 | 10 | 11 + 1 | 2 | 1 | 1 | 1 | 2 + 2 | 3 | 1 | 2 | 2 | 3 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 3 | 3 | 3 | 4 4 | 5 | 4 | 4 | 4 | 5 @@ -1093,6 +1131,13 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt2 on select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- + 5 | 6 | 5 | 5 | 5 | 6 + 6 | 7 | 5 | 6 | 6 | 7 + 6 | 7 | 6 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 9 | 10 | 9 | 9 | 9 | 10 + 10 | 11 | 9 | 10 | 10 | 11 + 10 | 11 | 10 | 10 | 10 | 11 1 | 2 | 1 | 1 | 1 | 2 2 | 3 | 1 | 2 | 2 | 3 2 | 3 | 2 | 2 | 2 | 3 @@ -1105,13 +1150,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 8 | 9 | 7 | 8 | 8 | 9 8 | 9 | 8 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 5 | 6 | 5 | 5 | 5 | 6 - 6 | 7 | 5 | 6 | 6 | 7 - 6 | 7 | 6 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 9 | 10 | 9 | 9 | 9 | 10 - 10 | 11 | 9 | 10 | 10 | 11 - 10 | 11 | 10 | 10 | 10 | 11 (19 rows) -- @@ -1147,6 +1185,13 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; ----+----+----+----+----+---- 1 | 2 | 1 | 1 | 1 | 2 2 | 3 | 1 | 2 | 2 | 3 + 5 | 6 | 5 | 5 | 5 | 6 + 6 | 7 | 6 | 6 | 6 | 7 + 9 | 10 | 9 | 9 | 9 | 10 + 10 | 11 | 10 | 10 | 10 | 11 + 6 | 7 | 5 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 10 | 11 | 9 | 10 | 10 | 11 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 3 | 3 | 3 | 4 4 | 5 | 4 | 4 | 4 | 5 @@ -1157,13 +1202,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; 5 | 6 | 4 | 5 | 5 | 6 8 | 9 | 7 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 5 | 6 | 5 | 5 | 5 | 6 - 6 | 7 | 6 | 6 | 6 | 7 - 9 | 10 | 9 | 9 | 9 | 10 - 10 | 11 | 10 | 10 | 10 | 11 - 6 | 7 | 5 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 10 | 11 | 9 | 10 | 10 | 11 (19 rows) -- parallel join without parallel hash @@ -1200,12 +1238,14 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt3 on select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- - 1 | 2 | 1 | 1 | 1 | 2 5 | 6 | 5 | 5 | 5 | 6 6 | 7 | 6 | 6 | 6 | 7 9 | 10 | 9 | 9 | 9 | 10 10 | 11 | 10 | 10 | 10 | 11 - 2 | 3 | 1 | 2 | 2 | 3 + 6 | 7 | 5 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 10 | 11 | 9 | 10 | 10 | 11 + 1 | 2 | 1 | 1 | 1 | 2 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 3 | 3 | 3 | 4 4 | 5 | 4 | 4 | 4 | 5 @@ -1216,12 +1256,12 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; 5 | 6 | 4 | 5 | 5 | 6 8 | 9 | 7 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 6 | 7 | 5 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 10 | 11 | 9 | 10 | 10 | 11 + 2 | 3 | 1 | 2 | 2 | 3 (19 rows) create table t2(a int, b int) with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt4(a int, b int) with(parallel_workers=2) distributed replicated; insert into t2 select i, i+1 from generate_series(1, 10) i; insert into rt4 select i, i+1 from generate_series(1, 10000) i; @@ -1231,7 +1271,6 @@ set local enable_parallel = off; select * from rt4 join t2 using(b); b | a | a ----+----+---- - 2 | 1 | 1 6 | 5 | 5 7 | 6 | 6 10 | 9 | 9 @@ -1241,6 +1280,7 @@ select * from rt4 join t2 using(b); 5 | 4 | 4 8 | 7 | 7 9 | 8 | 8 + 2 | 1 | 1 (10 rows) set local enable_parallel = on; @@ -1276,14 +1316,16 @@ select * from rt4 join t2 using(b); 5 | 4 | 4 8 | 7 | 7 9 | 8 | 8 - 2 | 1 | 1 6 | 5 | 5 7 | 6 | 6 10 | 9 | 9 11 | 10 | 10 + 2 | 1 | 1 (10 rows) create table t3(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t3 select i, i+1 from generate_series(1, 9000) i; analyze t3; set local enable_parallel = off; @@ -1362,8 +1404,8 @@ explain(locus, costs off) select * from t_replica_workers_2 join t_random_worker select * from t_replica_workers_2 join t_random_workers_0 using(a); a | b | b ---+---+--- - 1 | 2 | 2 3 | 4 | 4 + 1 | 2 | 2 2 | 3 | 3 4 | 5 | 5 5 | 6 | 6 @@ -1374,11 +1416,11 @@ set local enable_parallel=false; select * from t_replica_workers_2 join t_random_workers_0 using(a); a | b | b ---+---+--- - 1 | 2 | 2 - 3 | 4 | 4 - 2 | 3 | 3 4 | 5 | 5 5 | 6 | 6 + 3 | 4 | 4 + 1 | 2 | 2 + 2 | 3 | 3 (5 rows) abort; @@ -1419,11 +1461,11 @@ explain(locus, costs off) select * from t_replica_workers_2 right join t_random_ select * from t_replica_workers_2 right join t_random_workers_2 using(a); a | b | b ---+---+--- - 2 | 3 | 3 - 5 | 6 | 6 - 1 | 2 | 2 3 | 4 | 4 + 1 | 2 | 2 4 | 5 | 5 + 2 | 3 | 3 + 5 | 6 | 6 (5 rows) -- non parallel results @@ -1431,11 +1473,11 @@ set local enable_parallel=false; select * from t_replica_workers_2 right join t_random_workers_2 using(a); a | b | b ---+---+--- - 5 | 6 | 6 1 | 2 | 2 - 3 | 4 | 4 4 | 5 | 5 2 | 3 | 3 + 5 | 6 | 6 + 3 | 4 | 4 (5 rows) abort; @@ -1477,9 +1519,9 @@ select * from t_replica_workers_2 join t_random_workers_2 using(a); a | b | b ---+---+--- 1 | 2 | 2 - 2 | 3 | 3 3 | 4 | 4 4 | 5 | 5 + 2 | 3 | 3 5 | 6 | 6 (5 rows) @@ -1488,11 +1530,11 @@ set local enable_parallel=false; select * from t_replica_workers_2 join t_random_workers_2 using(a); a | b | b ---+---+--- + 1 | 2 | 2 3 | 4 | 4 4 | 5 | 5 - 5 | 6 | 6 - 1 | 2 | 2 2 | 3 | 3 + 5 | 6 | 6 (5 rows) abort; @@ -1503,6 +1545,8 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt1(a int, b int) with(parallel_workers=2) distributed replicated; insert into t1 select i, i from generate_series(1, 100000) i; insert into rt1 select i, i+1 from generate_series(1, 10000) i; @@ -1510,28 +1554,28 @@ analyze t1; analyze rt1; set local enable_parallel = on; explain(locus, costs off) select * from (select count(*) as a from t1) t1 left join rt1 on rt1.a = t1.a; - QUERY PLAN ------------------------------------------------------------- - Parallel Hash Right Join + QUERY PLAN +------------------------------------------------------ + Parallel Hash Left Join Locus: Entry - Hash Cond: (rt1.a = (count(*))) - -> Gather Motion 2:1 (slice1; segments: 2) - Locus: Entry - -> Parallel Seq Scan on rt1 - Locus: SegmentGeneralWorkers - Parallel Workers: 2 - -> Parallel Hash + Hash Cond: ((count(*)) = rt1.a) + -> Finalize Aggregate Locus: Entry - -> Finalize Aggregate + -> Gather Motion 6:1 (slice1; segments: 6) Locus: Entry - -> Gather Motion 6:1 (slice2; segments: 6) - Locus: Entry - -> Partial Aggregate + -> Partial Aggregate + Locus: HashedWorkers + Parallel Workers: 2 + -> Parallel Seq Scan on t1 Locus: HashedWorkers Parallel Workers: 2 - -> Parallel Seq Scan on t1 - Locus: HashedWorkers - Parallel Workers: 2 + -> Parallel Hash + Locus: Entry + -> Gather Motion 2:1 (slice2; segments: 2) + Locus: Entry + -> Parallel Seq Scan on rt1 + Locus: SegmentGeneralWorkers + Parallel Workers: 2 Optimizer: Postgres query optimizer (21 rows) @@ -1558,7 +1602,11 @@ begin; set local enable_parallel = on; set local max_parallel_workers_per_gather = 4; create table t1(a int, b int) with(parallel_workers=4); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with(parallel_workers=4); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt1(a int, b int) with(parallel_workers=4) distributed replicated; insert into t1 select i, i from generate_series(1, 10000000) i; insert into t2 select i, i from generate_series(1, 10000000) i; @@ -1649,7 +1697,11 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=3); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(b int, a int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10) i; insert into t2 select i, i+1 from generate_series(1, 5) i; analyze t1; @@ -1661,17 +1713,17 @@ explain(costs off) select * from t1 right join t2 on t1.b = t2.a; QUERY PLAN ------------------------------------------------------------------ Gather Motion 9:1 (slice1; segments: 9) - -> Parallel Hash Right Join - Hash Cond: (t1.b = t2.a) - -> Redistribute Motion 9:9 (slice2; segments: 9) - Hash Key: t1.b + -> Parallel Hash Left Join + Hash Cond: (t2.a = t1.b) + -> Redistribute Motion 6:9 (slice2; segments: 6) + Hash Key: t2.a Hash Module: 3 - -> Parallel Seq Scan on t1 + -> Parallel Seq Scan on t2 -> Parallel Hash - -> Redistribute Motion 6:9 (slice3; segments: 6) - Hash Key: t2.a + -> Redistribute Motion 9:9 (slice3; segments: 9) + Hash Key: t1.b Hash Module: 3 - -> Parallel Seq Scan on t2 + -> Parallel Seq Scan on t1 Optimizer: Postgres query optimizer (13 rows) @@ -1681,7 +1733,11 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i%10, i from generate_series(1, 5) i; insert into t1 values (100000); insert into t2 select i%10, i from generate_series(1, 100000) i; @@ -1690,34 +1746,34 @@ analyze t2; set local enable_parallel = on; -- parallel hash join with shared table, SinglQE as outer partial path. explain(locus, costs off) select * from (select count(*) as a from t2) t2 left join t1 on t1.a = t2.a; - QUERY PLAN ------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------ Gather Motion 6:1 (slice1; segments: 6) Locus: Entry - -> Parallel Hash Right Join - Locus: HashedWorkers + -> Parallel Hash Left Join + Locus: Hashed Parallel Workers: 2 - Hash Cond: (t1.a = (count(*))) - -> Parallel Seq Scan on t1 - Locus: HashedWorkers - Parallel Workers: 2 - -> Parallel Hash + Hash Cond: ((count(*)) = t1.a) + -> Redistribute Motion 1:6 (slice2; segments: 1) Locus: Hashed - -> Redistribute Motion 1:6 (slice2; segments: 1) - Locus: Hashed - Parallel Workers: 2 - Hash Key: (count(*)) - Hash Module: 3 - -> Finalize Aggregate + Parallel Workers: 2 + Hash Key: (count(*)) + Hash Module: 3 + -> Finalize Aggregate + Locus: SingleQE + -> Gather Motion 6:1 (slice3; segments: 6) Locus: SingleQE - -> Gather Motion 6:1 (slice3; segments: 6) - Locus: SingleQE - -> Partial Aggregate + -> Partial Aggregate + Locus: HashedWorkers + Parallel Workers: 2 + -> Parallel Seq Scan on t2 Locus: HashedWorkers Parallel Workers: 2 - -> Parallel Seq Scan on t2 - Locus: HashedWorkers - Parallel Workers: 2 + -> Parallel Hash + Locus: Hashed + -> Parallel Seq Scan on t1 + Locus: HashedWorkers + Parallel Workers: 2 Optimizer: Postgres query optimizer (27 rows) @@ -1913,12 +1969,18 @@ begin; create table rt1(a int, b int) distributed replicated; create table rt2(a int, b int) with (parallel_workers = 0) distributed replicated; create table t1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with (parallel_workers = 0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10000) i; insert into t2 select i, i+1 from generate_series(1, 10000) i; insert into rt1 select i, i+1 from generate_series(1, 10000) i; insert into rt2 select i, i+1 from generate_series(1, 10000) i; CREATE TABLE sq1 AS SELECT a, b FROM t1 WHERE gp_segment_id = 0; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. set local optimizer=off; set local enable_parallel=on; set local min_parallel_table_scan_size to 0; @@ -1975,7 +2037,7 @@ explain (locus, costs off) select * from rt1 union all select * from t1; -> Result Locus: Strewn Parallel Workers: 3 - One-Time Filter: (gp_execution_segment() = 1) + One-Time Filter: (gp_execution_segment() = 2) -> Parallel Seq Scan on rt1 Locus: SegmentGeneralWorkers Parallel Workers: 3 @@ -1999,7 +2061,7 @@ explain (locus, costs off) select * from rt1 union all select * from t2; -> Result Locus: Strewn Parallel Workers: 3 - One-Time Filter: (gp_execution_segment() = 1) + One-Time Filter: (gp_execution_segment() = 2) -> Parallel Seq Scan on rt1 Locus: SegmentGeneralWorkers Parallel Workers: 3 @@ -2076,6 +2138,8 @@ abort; -- begin; create table t1(c1 int, c2 int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 100000) i; analyze t1; set local optimizer = off; @@ -2143,6 +2207,8 @@ abort; -- begin; create table t1(c1 int, c2 int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 100000) i; analyze t1; set local optimizer = off; @@ -2362,6 +2428,8 @@ set local optimizer = off; set local enable_parallel = on; -- ao table create table ao (a INT, b INT) using ao_row; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into ao select i as a, i as b from generate_series(1, 100) AS i; alter table ao set (parallel_workers = 2); explain(costs off) select count(*) from ao; @@ -2383,6 +2451,8 @@ select count(*) from ao; alter table ao reset (parallel_workers); -- aocs table create table aocs (a INT, b INT) using ao_column; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into aocs select i as a, i as b from generate_series(1, 100) AS i; alter table aocs set (parallel_workers = 2); explain(costs off) select count(*) from aocs; @@ -2456,6 +2526,8 @@ select * from abort; begin; create table pagg_tab (a int, b int, c text, d int) partition by list(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table pagg_tab_p1 partition of pagg_tab for values in ('0000', '0001', '0002', '0003', '0004'); NOTICE: table has parent, setting distribution columns to match parent table create table pagg_tab_p2 partition of pagg_tab for values in ('0005', '0006', '0007', '0008'); @@ -2472,49 +2544,62 @@ set local enable_hashagg to false; set local enable_parallel = off; explain (costs off, locus) select c, sum(a), avg(b), count(*) from pagg_tab group by 1 having avg(d) < 15 order by 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------ Sort Locus: Entry Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) - -> Append + -> Merge Append Locus: Entry - -> Finalize GroupAggregate - Locus: Entry - Group Key: pagg_tab.c - Filter: (avg(pagg_tab.d) < '15'::numeric) - -> Gather Motion 3:1 (slice1; segments: 3) - Locus: Entry - Merge Key: pagg_tab.c - -> Partial GroupAggregate + Sort Key: pagg_tab.c + -> Gather Motion 3:1 (slice1; segments: 3) + Locus: SingleQE + Merge Key: pagg_tab.c + -> Finalize GroupAggregate + Locus: Hashed + Group Key: pagg_tab.c + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Sort Locus: Hashed - Group Key: pagg_tab.c - -> Sort + Sort Key: pagg_tab.c + -> Redistribute Motion 3:3 (slice2; segments: 3) Locus: Hashed - Sort Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab + Hash Key: pagg_tab.c + -> Partial GroupAggregate Locus: Hashed - -> Finalize GroupAggregate - Locus: Entry - Group Key: pagg_tab_1.c - Filter: (avg(pagg_tab_1.d) < '15'::numeric) - -> Gather Motion 3:1 (slice2; segments: 3) - Locus: Entry - Merge Key: pagg_tab_1.c - -> Partial GroupAggregate + Group Key: pagg_tab.c + -> Sort + Locus: Hashed + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + Locus: Hashed + -> Gather Motion 3:1 (slice3; segments: 3) + Locus: SingleQE + Merge Key: pagg_tab_1.c + -> Finalize GroupAggregate + Locus: Hashed + Group Key: pagg_tab_1.c + Filter: (avg(pagg_tab_1.d) < '15'::numeric) + -> Sort Locus: Hashed - Group Key: pagg_tab_1.c - -> Sort + Sort Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice4; segments: 3) Locus: Hashed - Sort Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 + Hash Key: pagg_tab_1.c + -> Partial GroupAggregate Locus: Hashed + Group Key: pagg_tab_1.c + -> Sort + Locus: Hashed + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 + Locus: Hashed -> Finalize GroupAggregate - Locus: Entry + Locus: SingleQE Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) - -> Gather Motion 3:1 (slice3; segments: 3) - Locus: Entry + -> Gather Motion 3:1 (slice5; segments: 3) + Locus: SingleQE Merge Key: pagg_tab_2.c -> Partial GroupAggregate Locus: Hashed @@ -2525,7 +2610,7 @@ select c, sum(a), avg(b), count(*) from pagg_tab group by 1 having avg(d) < 15 o -> Seq Scan on pagg_tab_p3 pagg_tab_2 Locus: Hashed Optimizer: Postgres query optimizer -(51 rows) +(64 rows) abort; -- @@ -2536,7 +2621,11 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=3); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(b int, a int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10) i; insert into t2 select i, i+1 from generate_series(1, 5) i; analyze t1; @@ -2927,6 +3016,8 @@ abort; -- prepare, execute locus is null begin; create table t1(c1 int, c2 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze t1; prepare t1_count(integer) as select count(*) from t1; explain(locus, costs off) execute t1_count(1); @@ -2975,7 +3066,7 @@ create table t2_anti(a int, b int) with(parallel_workers=2) distributed by (b); insert into t2_anti values(generate_series(5, 10)); explain(costs off, verbose) select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_anti.a where t2_anti.a is null; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) Output: t1_anti.a, t1_anti.b @@ -3032,10 +3123,10 @@ select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_ant select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_anti.a where t2_anti.a is null; a | b ---+--- + 1 | 2 | 3 | 4 | - 1 | (4 rows) set local enable_parallel_hash = off; @@ -3071,8 +3162,8 @@ select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_ant ---+--- 3 | 4 | - 2 | 1 | + 2 | (4 rows) abort; @@ -3101,18 +3192,21 @@ insert into t_distinct_0 select * from t_distinct_0; analyze t_distinct_0; explain(costs off) select distinct a from t_distinct_0; - QUERY PLAN ------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> HashAggregate + Merge Key: a + -> GroupAggregate Group Key: a - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: a - -> Streaming HashAggregate - Group Key: a - -> Seq Scan on t_distinct_0 + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Streaming HashAggregate + Group Key: a + -> Seq Scan on t_distinct_0 Optimizer: Postgres query optimizer -(9 rows) +(12 rows) set enable_parallel = on; -- first stage HashAgg, second stage GroupAgg @@ -3135,7 +3229,7 @@ select distinct a from t_distinct_0; Optimizer: Postgres query optimizer (13 rows) -set parallel_query_use_streaming_hashagg = off; +set gp_use_streaming_hashagg = off; explain(costs off) select distinct a from t_distinct_0; QUERY PLAN @@ -3197,7 +3291,7 @@ select distinct a from t_distinct_0; Optimizer: Postgres query optimizer (10 rows) -set parallel_query_use_streaming_hashagg = on; +set gp_use_streaming_hashagg = on; explain(costs off) select distinct a from t_distinct_0; QUERY PLAN @@ -3235,6 +3329,8 @@ select distinct a, b from t_distinct_0; drop table if exists t_distinct_1; NOTICE: table "t_distinct_1" does not exist, skipping create table t_distinct_1(a int, b int) using ao_column; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t_distinct_1 select * from t_distinct_0; analyze t_distinct_1; set enable_parallel = off; @@ -3486,24 +3582,26 @@ WHERE e.salary > ( SELECT AVG(salary) FROM employees WHERE department_id = e.department_id); - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------- - Gather Motion 6:1 (slice1; segments: 6) (cost=116.58..230.86 rows=3767 width=218) - -> Parallel Hash Join (cost=116.58..186.92 rows=628 width=218) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Gather Motion 6:1 (slice1; segments: 6) (cost=118.92..233.20 rows=3767 width=218) + -> Parallel Hash Join (cost=118.92..189.25 rows=628 width=218) Hash Cond: (e.department_id = "Expr_SUBQUERY".csq_c0) Join Filter: (e.salary > "Expr_SUBQUERY".csq_c1) -> Parallel Seq Scan on employees e (cost=0.00..52.83 rows=1883 width=254) - -> Parallel Hash (cost=110.33..110.33 rows=500 width=36) - -> Broadcast Workers Motion 6:6 (slice2; segments: 6) (cost=99.92..110.33 rows=500 width=36) - -> Subquery Scan on "Expr_SUBQUERY" (cost=99.92..103.67 rows=167 width=36) - -> HashAggregate (cost=99.92..102.00 rows=167 width=36) + -> Parallel Hash (cost=112.67..112.67 rows=500 width=36) + -> Broadcast Workers Motion 6:6 (slice2; segments: 6) (cost=102.25..112.67 rows=500 width=36) + -> Subquery Scan on "Expr_SUBQUERY" (cost=102.25..106.00 rows=167 width=36) + -> Finalize HashAggregate (cost=102.25..104.33 rows=167 width=36) Group Key: employees.department_id - -> Redistribute Motion 6:6 (slice3; segments: 6) (cost=0.00..90.50 rows=1883 width=36) + -> Redistribute Motion 6:6 (slice3; segments: 6) (cost=62.25..94.75 rows=1000 width=36) Hash Key: employees.department_id Hash Module: 3 - -> Parallel Seq Scan on employees (cost=0.00..52.83 rows=1883 width=36) + -> Streaming Partial HashAggregate (cost=62.25..74.75 rows=1000 width=36) + Group Key: employees.department_id + -> Parallel Seq Scan on employees (cost=0.00..52.83 rows=1883 width=36) Optimizer: Postgres query optimizer -(15 rows) +(17 rows) SELECT e.name FROM employees e @@ -3521,14 +3619,17 @@ WHERE e.salary > ( -- Test https://github.com/apache/cloudberry/issues/1376 -- create table t1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2 (like t1); +NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table set gp_cte_sharing = on; explain(locus, costs off) with x as (select a, count(*) as b from t1 group by a union all select a, count(*) as b from t2 group by a) select count(*) from x a join x b on a.a = b.b; - QUERY PLAN ------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------ Finalize Aggregate Locus: Entry -> Gather Motion 3:1 (slice1; segments: 3) @@ -3551,20 +3652,24 @@ explain(locus, costs off) with x as Locus: Hashed -> Shared Scan (share slice:id 1:0) Locus: Hashed - -> Append + -> Redistribute Motion 6:3 (slice3; segments: 6) Locus: Hashed - -> HashAggregate - Locus: Hashed - Group Key: t1.a - -> Seq Scan on t1 + Hash Key: t2.a + -> Parallel Append + Locus: HashedWorkers + Parallel Workers: 2 + -> HashAggregate Locus: Hashed - -> HashAggregate - Locus: Hashed - Group Key: t2.a - -> Seq Scan on t2 + Group Key: t2.a + -> Seq Scan on t2 + Locus: Hashed + -> HashAggregate Locus: Hashed + Group Key: t1.a + -> Seq Scan on t1 + Locus: Hashed Optimizer: Postgres query optimizer -(35 rows) +(39 rows) reset gp_cte_sharing; reset enable_parallel; @@ -3589,29 +3694,25 @@ set local min_parallel_table_scan_size = 0; -- 12_P_12_10: Parallel Hash Full Join: HashedWorkers FULL JOIN HashedWorkers -> HashedOJ(parallel) explain(costs off, locus) select count(*) from pj_t1 full join pj_t2 using (id); - QUERY PLAN ----------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Finalize Aggregate Locus: Entry - -> Gather Motion 6:1 (slice1; segments: 6) + -> Gather Motion 3:1 (slice1; segments: 3) Locus: Entry -> Partial Aggregate Locus: HashedOJ - Parallel Workers: 2 - -> Parallel Hash Full Join + -> Hash Full Join Locus: HashedOJ - Parallel Workers: 2 Hash Cond: (pj_t1.id = pj_t2.id) - -> Parallel Seq Scan on pj_t1 - Locus: HashedWorkers - Parallel Workers: 2 - -> Parallel Hash + -> Seq Scan on pj_t1 Locus: Hashed - -> Parallel Seq Scan on pj_t2 - Locus: HashedWorkers - Parallel Workers: 2 + -> Hash + Locus: Hashed + -> Seq Scan on pj_t2 + Locus: Hashed Optimizer: Postgres query optimizer -(20 rows) +(16 rows) -- correctness: parallel result matches non-parallel set local enable_parallel = off; @@ -3641,16 +3742,16 @@ select count(*) from pj_t1 right join pj_t2 using (id); -> Partial Aggregate Locus: HashedWorkers Parallel Workers: 2 - -> Parallel Hash Right Join + -> Parallel Hash Left Join Locus: HashedWorkers Parallel Workers: 2 - Hash Cond: (pj_t1.id = pj_t2.id) - -> Parallel Seq Scan on pj_t1 + Hash Cond: (pj_t2.id = pj_t1.id) + -> Parallel Seq Scan on pj_t2 Locus: HashedWorkers Parallel Workers: 2 -> Parallel Hash Locus: Hashed - -> Parallel Seq Scan on pj_t2 + -> Parallel Seq Scan on pj_t1 Locus: HashedWorkers Parallel Workers: 2 Optimizer: Postgres query optimizer @@ -3739,14 +3840,7 @@ select count(*) from (pj_t1 full join pj_t2 using (id)) fj full join pj_t3 using abort; -- start_ignore -drop schema test_parallel cascade; -NOTICE: drop cascades to 6 other objects -DETAIL: drop cascades to table t_distinct_0 -drop cascades to table t_distinct_1 -drop cascades to table departments -drop cascades to table employees -drop cascades to table t1 -drop cascades to table t2 +--drop schema test_parallel cascade; -- end_ignore reset gp_appendonly_insert_files; reset force_parallel_mode; diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out index d702146b243..38352efe2e2 100644 --- a/src/test/regress/expected/create_view.out +++ b/src/test/regress/expected/create_view.out @@ -1934,8 +1934,8 @@ select pg_get_viewdef('tt25v', true); -- also check cases seen only in EXPLAIN explain (verbose, costs off) select * from tt24v; - QUERY PLAN --------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------ Hash Join Output: (share0_ref1.r).column2, ((ROW("*VALUES*".column1, "*VALUES*".column2))).column2 Hash Cond: (((ROW("*VALUES*".column1, "*VALUES*".column2))).column1 = (share0_ref1.r).column1) @@ -1949,8 +1949,9 @@ select * from tt24v; Output: share0_ref1.r -> Values Scan on "*VALUES*_1" Output: ROW("*VALUES*_1".column1, "*VALUES*_1".column2) + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(14 rows) +(15 rows) explain (verbose, costs off) select (r).column2 from (select r from (values(1,2),(3,4)) r limit 1) ss; diff --git a/src/test/regress/expected/cte_prune.out b/src/test/regress/expected/cte_prune.out index 84f6f0b7d10..6913304321d 100644 --- a/src/test/regress/expected/cte_prune.out +++ b/src/test/regress/expected/cte_prune.out @@ -4,30 +4,34 @@ set search_path = cte_prune; SET optimizer_trace_fallback = on; -- end_ignore create table t1(v1 int, v2 int, v3 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'v1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 values(generate_series(1, 10), generate_series(11, 20), generate_series(21, 30)); analyze t1; create table t2(v1 int, v2 int, v3 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'v1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t2 values(generate_series(0, 100), generate_series(100, 200), generate_series(200, 300)); analyze t2; -- should pruned both seq scan and shared scan -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.16 rows=3 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v1 - -> Hash Right Join (cost=1.06..2.11 rows=1 width=4) + -> Hash Right Join Output: c11.v1 Hash Cond: (t1.v1 = c11.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.04..1.04 rows=1 width=4) + -> Hash Output: c11.v1 - -> Subquery Scan on c11 (cost=0.00..1.04 rows=1 width=4) + -> Subquery Scan on c11 Output: c11.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.04 rows=1 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 Filter: (t1_1.v1 < 5) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (16 rows) @@ -40,85 +44,85 @@ with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 4 (4 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.16 rows=3 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v2 - -> Hash Right Join (cost=1.06..2.11 rows=1 width=4) + -> Hash Right Join Output: c11.v2 Hash Cond: (t1.v1 = c11.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.04..1.04 rows=1 width=8) + -> Hash Output: c11.v2, c11.v1 - -> Subquery Scan on c11 (cost=0.00..1.04 rows=1 width=8) + -> Subquery Scan on c11 Output: c11.v2, c11.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.04 rows=1 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 Filter: (t1_1.v1 < 5) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (16 rows) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; v2 ---- + 11 12 13 14 - 11 (4 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.16 rows=3 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v3 - -> Hash Right Join (cost=1.06..2.11 rows=1 width=4) + -> Hash Right Join Output: c11.v3 Hash Cond: (t1.v1 = c11.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.04..1.04 rows=1 width=8) + -> Hash Output: c11.v3, c11.v1 - -> Subquery Scan on c11 (cost=0.00..1.04 rows=1 width=8) + -> Subquery Scan on c11 Output: c11.v3, c11.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.04 rows=1 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 Filter: (t1_1.v1 < 5) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (16 rows) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; v3 ---- + 21 22 23 24 - 21 (4 rows) -- * also should be pruned -explain verbose with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.16 rows=3 width=4) +explain(costs off, verbose) with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v1 - -> Hash Right Join (cost=1.06..2.11 rows=1 width=4) + -> Hash Right Join Output: c11.v1 Hash Cond: (t1.v1 = c11.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.04..1.04 rows=1 width=4) + -> Hash Output: c11.v1 - -> Subquery Scan on c11 (cost=0.00..1.04 rows=1 width=4) + -> Subquery Scan on c11 Output: c11.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.04 rows=1 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 Filter: (t1_1.v1 < 5) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (16 rows) @@ -132,39 +136,39 @@ with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 o (4 rows) -- no push filter -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; - QUERY PLAN ----------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.08..2.35 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v3 - -> Hash Right Join (cost=1.08..2.22 rows=3 width=4) + -> Hash Right Join Output: c11.v3 Hash Cond: (c22.v2 = c11.v1) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: c22.v2 Hash Key: c22.v2 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=8) + -> Hash Output: c11.v3, c11.v1 - -> Subquery Scan on c11 (cost=0.00..1.03 rows=3 width=8) + -> Subquery Scan on c11 Output: c11.v3, c11.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (20 rows) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; v3 ---- + 21 26 25 30 29 - 21 27 23 28 @@ -172,28 +176,28 @@ with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 22 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; - QUERY PLAN ----------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.08..2.35 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v2 - -> Hash Right Join (cost=1.08..2.22 rows=3 width=4) + -> Hash Right Join Output: c11.v2 Hash Cond: (c22.v2 = c11.v1) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: c22.v2 Hash Key: c22.v2 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=8) + -> Hash Output: c11.v2, c11.v1 - -> Subquery Scan on c11 (cost=0.00..1.03 rows=3 width=8) + -> Subquery Scan on c11 Output: c11.v2, c11.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (20 rows) @@ -205,39 +209,39 @@ with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 18 14 12 + 11 16 15 20 19 - 11 (10 rows) -- distribution col can be pruned which is better than do redistribute in CTE consumer -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.14..2.42 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; + QUERY PLAN +------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v2 - -> Hash Left Join (cost=1.14..2.29 rows=3 width=4) + -> Hash Left Join Output: c11.v2 Hash Cond: (c11.v2 = c22.v2) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: c11.v2 Hash Key: c11.v2 - -> Subquery Scan on c11 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c11 Output: c11.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.10..1.10 rows=3 width=4) + -> Hash Output: c22.v2 - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: c22.v2 Hash Key: c22.v2 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v2 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (23 rows) @@ -247,40 +251,40 @@ with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 18 16 19 + 12 + 15 + 20 13 14 17 11 - 12 - 15 - 20 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.14..2.42 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; + QUERY PLAN +------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v3 - -> Hash Left Join (cost=1.14..2.29 rows=3 width=4) + -> Hash Left Join Output: c11.v3 Hash Cond: (c11.v3 = c22.v3) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: c11.v3 Hash Key: c11.v3 - -> Subquery Scan on c11 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c11 Output: c11.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.10..1.10 rows=3 width=4) + -> Hash Output: c22.v3 - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: c22.v3 Hash Key: c22.v3 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v3 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (23 rows) @@ -301,26 +305,26 @@ with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 -- groupby/order by/window function/grouping set should be contains in CTE output -- group by -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.17..2.34 rows=10 width=12) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; + QUERY PLAN +------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(t1.v1)), t1.v1 - -> HashAggregate (cost=2.17..2.20 rows=3 width=12) + -> HashAggregate Output: sum(t1.v1), t1.v1 Group Key: t1.v1 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=4) + -> Hash Left Join Output: t1.v1 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (18 rows) @@ -332,45 +336,42 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left jo 8 7 3 + 1 10 9 6 5 - 1 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.24..2.40 rows=10 width=12) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(t1.v1)), t1.v2 - -> HashAggregate (cost=2.24..2.27 rows=3 width=12) + -> HashAggregate Output: sum(t1.v1), t1.v2 Group Key: t1.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1.08..2.22 rows=3 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v2, t1.v1 Hash Key: t1.v2 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v2, t1.v1 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (21 rows) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; sum ----- - 6 - 9 - 8 10 2 5 @@ -378,31 +379,34 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left jo 3 4 7 + 6 + 9 + 8 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.24..2.40 rows=10 width=12) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(t1.v3)), t1.v2 - -> HashAggregate (cost=2.24..2.27 rows=3 width=12) + -> HashAggregate Output: sum(t1.v3), t1.v2 Group Key: t1.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1.08..2.22 rows=3 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v2, t1.v3 Hash Key: t1.v2 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v2, t1.v3 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (21 rows) @@ -422,27 +426,27 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left jo (10 rows) -- order by -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.18..2.32 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; + QUERY PLAN +------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, c22.v1 Merge Key: c22.v1 - -> Sort (cost=2.18..2.19 rows=3 width=8) + -> Sort Output: t1.v1, c22.v1 Sort Key: c22.v1 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v1, c22.v1 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (19 rows) @@ -461,27 +465,27 @@ with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 10 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.18..2.32 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; + QUERY PLAN +------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, c22.v3 Merge Key: c22.v3 - -> Sort (cost=2.18..2.19 rows=3 width=8) + -> Sort Output: t1.v1, c22.v3 Sort Key: c22.v3 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v1, c22.v3 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=8) + -> Hash Output: c22.v3, c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=8) + -> Subquery Scan on c22 Output: c22.v3, c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (19 rows) @@ -501,30 +505,30 @@ with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 (10 rows) -- window function -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - WindowAgg (cost=2.18..2.47 rows=10 width=12) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; + QUERY PLAN +------------------------------------------------------------------------- + WindowAgg Output: sum(t1.v1) OVER (?), t1.v2 Order By: t1.v2 - -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.18..2.32 rows=10 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) Output: t1.v2, t1.v1 Merge Key: t1.v2 - -> Sort (cost=2.18..2.19 rows=3 width=8) + -> Sort Output: t1.v2, t1.v1 Sort Key: t1.v2 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v2, t1.v1 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (22 rows) @@ -543,30 +547,30 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) 55 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - WindowAgg (cost=2.18..2.47 rows=10 width=12) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; + QUERY PLAN +------------------------------------------------------------------------- + WindowAgg Output: sum(t1.v2) OVER (?), t1.v3 Order By: t1.v3 - -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.18..2.32 rows=10 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) Output: t1.v3, t1.v2 Merge Key: t1.v3 - -> Sort (cost=2.18..2.19 rows=3 width=8) + -> Sort Output: t1.v3, t1.v2 Sort Key: t1.v3 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v3, t1.v2 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (22 rows) @@ -586,32 +590,32 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) (10 rows) -- grouping set -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); - QUERY PLAN ---------------------------------------------------------------------------------------------------- - GroupAggregate (cost=2.18..2.61 rows=21 width=16) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); + QUERY PLAN +------------------------------------------------------------------------- + GroupAggregate Output: sum(t1.v2), t1.v1, t1.v2 Group Key: t1.v1, t1.v2 Group Key: t1.v1 Group Key: () - -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.18..2.32 rows=10 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, t1.v2 Merge Key: t1.v1, t1.v2 - -> Sort (cost=2.18..2.19 rows=3 width=8) + -> Sort Output: t1.v1, t1.v2 Sort Key: t1.v1, t1.v2 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v1, t1.v2 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (24 rows) @@ -641,32 +645,32 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left jo 155 (21 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v2,c11.v3); - QUERY PLAN ---------------------------------------------------------------------------------------------------- - GroupAggregate (cost=2.18..2.61 rows=21 width=16) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v2,c11.v3); + QUERY PLAN +------------------------------------------------------------------------- + GroupAggregate Output: sum(t1.v2), t1.v2, t1.v3 Group Key: t1.v2, t1.v3 Group Key: t1.v2 Group Key: () - -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.18..2.32 rows=10 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) Output: t1.v2, t1.v3 Merge Key: t1.v2, t1.v3 - -> Sort (cost=2.18..2.19 rows=3 width=8) + -> Sort Output: t1.v2, t1.v3 Sort Key: t1.v2, t1.v3 - -> Hash Left Join (cost=1.08..2.15 rows=3 width=8) + -> Hash Left Join Output: t1.v2, t1.v3 Hash Cond: (t1.v1 = c22.v1) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=0.00..1.03 rows=3 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1, t1_1.v2, t1_1.v3 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (24 rows) @@ -686,38 +690,38 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) (10 rows) -- CTE producer should have right output -explain verbose with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) +explain(costs off, verbose) with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ---------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=366.90..742.37 rows=779 width=4) + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1 - -> Hash Left Join (cost=366.90..731.98 rows=260 width=4) + -> Hash Left Join Output: t1.v1 Hash Cond: (t1.v1 = c22.v1) - -> Hash Join (cost=1.08..362.58 rows=260 width=16) + -> Hash Join Output: t1.v1, t2.v1, t2.v2, t2.v3 Hash Cond: (t2.v1 = t1.v1) - -> Seq Scan on cte_prune.t2 (cost=0.00..293.67 rows=25967 width=12) + -> Seq Scan on cte_prune.t2 Output: t2.v1, t2.v2, t2.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: t1.v1 - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 - -> Hash (cost=362.58..362.58 rows=260 width=4) + -> Hash Output: c22.v1 - -> Subquery Scan on c22 (cost=1.08..362.58 rows=260 width=4) + -> Subquery Scan on c22 Output: c22.v1 - -> Hash Join (cost=1.08..362.58 rows=260 width=16) + -> Hash Join Output: t1_1.v1, t2_1.v1, t2_1.v2, t2_1.v3 Hash Cond: (t2_1.v1 = t1_1.v1) - -> Seq Scan on cte_prune.t2 t2_1 (cost=0.00..293.67 rows=25967 width=12) + -> Seq Scan on cte_prune.t2 t2_1 Output: t2_1.v1, t2_1.v2, t2_1.v3 - -> Hash (cost=1.03..1.03 rows=3 width=4) + -> Hash Output: t1_1.v1 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=4) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v1 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (29 rows) @@ -737,44 +741,44 @@ select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; 1 (10 rows) -explain verbose with c1 as (select sum(v1) as v1, sum(v2) as v2, v3 from t1 group by v3) +explain(costs off, verbose) with c1 as (select sum(v1) as v1, sum(v2) as v2, v3 from t1 group by v3) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=2.39..2.67 rows=10 width=8) + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: c11.v1 - -> Hash Left Join (cost=2.39..2.54 rows=3 width=8) + -> Hash Left Join Output: c11.v1 Hash Cond: (c11.v1 = c22.v1) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1.12..1.23 rows=3 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: c11.v1 Hash Key: c11.v1 - -> Subquery Scan on c11 (cost=1.12..1.16 rows=3 width=8) + -> Subquery Scan on c11 Output: c11.v1 - -> HashAggregate (cost=1.12..1.16 rows=3 width=20) + -> HashAggregate Output: sum(t1.v1), sum(t1.v2), t1.v3 Group Key: t1.v3 - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.10 rows=3 width=12) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: t1.v3, t1.v1, t1.v2 Hash Key: t1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v3, t1.v1, t1.v2 - -> Hash (cost=1.23..1.23 rows=3 width=8) + -> Hash Output: c22.v1 - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=1.12..1.23 rows=3 width=8) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: c22.v1 Hash Key: c22.v1 - -> Subquery Scan on c22 (cost=1.12..1.16 rows=3 width=8) + -> Subquery Scan on c22 Output: c22.v1 - -> HashAggregate (cost=1.12..1.16 rows=3 width=20) + -> HashAggregate Output: sum(t1_1.v1), sum(t1_1.v2), t1_1.v3 Group Key: t1_1.v3 - -> Redistribute Motion 3:3 (slice5; segments: 3) (cost=0.00..1.10 rows=3 width=12) + -> Redistribute Motion 3:3 (slice5; segments: 3) Output: t1_1.v3, t1_1.v1, t1_1.v2 Hash Key: t1_1.v3 - -> Seq Scan on cte_prune.t1 t1_1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 t1_1 Output: t1_1.v3, t1_1.v1, t1_1.v2 - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (35 rows) @@ -909,7 +913,7 @@ create table tpcds_web_sales primary key (ws_item_sk, ws_order_number) ); -- sql 23 -explain verbose with frequent_ss_items as +explain (costs off, verbose) with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from tpcds_store_sales ,tpcds_date_dim @@ -923,258 +927,258 @@ select t1.v1 from t1 where t1.v1 in (select item_sk from frequent_ss_items where and t1.v1 in (select item_sk from frequent_ss_items where item_sk > 0); QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=452.38..452.44 rows=3 width=4) + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1 - -> HashAggregate (cost=452.38..452.39 rows=1 width=4) + -> HashAggregate Output: t1.v1 Group Key: (RowIdExpr) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=451.80..452.38 rows=1 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v1, (RowIdExpr) Hash Key: (RowIdExpr) - -> Hash Join (cost=451.80..452.36 rows=1 width=4) + -> Hash Join Output: t1.v1, (RowIdExpr) Hash Cond: (frequent_ss_items.item_sk = t1.v1) - -> Subquery Scan on frequent_ss_items (cost=223.67..224.14 rows=6 width=4) + -> Subquery Scan on frequent_ss_items Output: frequent_ss_items.item_sk - -> GroupAggregate (cost=223.67..224.14 rows=6 width=48) + -> GroupAggregate Output: (substr((tpcds_item.i_item_desc)::text, 1, 30)), tpcds_item.i_item_sk, tpcds_date_dim.d_date, count(*) Group Key: (substr((tpcds_item.i_item_desc)::text, 1, 30)), tpcds_item.i_item_sk, tpcds_date_dim.d_date Filter: (count(*) > 4) - -> Sort (cost=223.67..223.72 rows=18 width=40) + -> Sort Output: (substr((tpcds_item.i_item_desc)::text, 1, 30)), tpcds_item.i_item_sk, tpcds_date_dim.d_date Sort Key: (substr((tpcds_item.i_item_desc)::text, 1, 30)), tpcds_item.i_item_sk, tpcds_date_dim.d_date - -> Hash Join (cost=181.20..223.30 rows=18 width=40) + -> Hash Join Output: substr((tpcds_item.i_item_desc)::text, 1, 30), tpcds_item.i_item_sk, tpcds_date_dim.d_date Hash Cond: (tpcds_item.i_item_sk = tpcds_store_sales.ss_item_sk) - -> Seq Scan on cte_prune.tpcds_item (cost=0.00..39.33 rows=533 width=422) + -> Seq Scan on cte_prune.tpcds_item Output: tpcds_item.i_item_sk, tpcds_item.i_item_id, tpcds_item.i_rec_start_date, tpcds_item.i_rec_end_date, tpcds_item.i_item_desc, tpcds_item.i_current_price, tpcds_item.i_wholesale_cost, tpcds_item.i_brand_id, tpcds_item.i_brand, tpcds_item.i_class_id, tpcds_item.i_class, tpcds_item.i_category_id, tpcds_item.i_category, tpcds_item.i_manufact_id, tpcds_item.i_manufact, tpcds_item.i_size, tpcds_item.i_formulation, tpcds_item.i_color, tpcds_item.i_units, tpcds_item.i_container, tpcds_item.i_manager_id, tpcds_item.i_product_name - -> Hash (cost=180.98..180.98 rows=18 width=8) + -> Hash Output: tpcds_store_sales.ss_item_sk, tpcds_date_dim.d_date - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=89.63..180.98 rows=18 width=8) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: tpcds_store_sales.ss_item_sk, tpcds_date_dim.d_date Hash Key: tpcds_store_sales.ss_item_sk - -> Hash Join (cost=89.63..180.62 rows=18 width=8) + -> Hash Join Output: tpcds_store_sales.ss_item_sk, tpcds_date_dim.d_date Inner Unique: true Hash Cond: (tpcds_store_sales.ss_sold_date_sk = tpcds_date_dim.d_date_sk) - -> Seq Scan on cte_prune.tpcds_store_sales (cost=0.00..79.00 rows=4500 width=8) + -> Seq Scan on cte_prune.tpcds_store_sales Output: tpcds_store_sales.ss_sold_date_sk, tpcds_store_sales.ss_sold_time_sk, tpcds_store_sales.ss_item_sk, tpcds_store_sales.ss_customer_sk, tpcds_store_sales.ss_cdemo_sk, tpcds_store_sales.ss_hdemo_sk, tpcds_store_sales.ss_addr_sk, tpcds_store_sales.ss_store_sk, tpcds_store_sales.ss_promo_sk, tpcds_store_sales.ss_ticket_number, tpcds_store_sales.ss_quantity, tpcds_store_sales.ss_wholesale_cost, tpcds_store_sales.ss_list_price, tpcds_store_sales.ss_sales_price, tpcds_store_sales.ss_ext_discount_amt, tpcds_store_sales.ss_ext_sales_price, tpcds_store_sales.ss_ext_wholesale_cost, tpcds_store_sales.ss_ext_list_price, tpcds_store_sales.ss_ext_tax, tpcds_store_sales.ss_coupon_amt, tpcds_store_sales.ss_net_paid, tpcds_store_sales.ss_net_paid_inc_tax, tpcds_store_sales.ss_net_profit - -> Hash (cost=89.08..89.08 rows=44 width=8) + -> Hash Output: tpcds_date_dim.d_date, tpcds_date_dim.d_date_sk - -> Broadcast Motion 3:3 (slice4; segments: 3) (cost=0.00..89.08 rows=44 width=8) + -> Broadcast Motion 3:3 (slice4; segments: 3) Output: tpcds_date_dim.d_date, tpcds_date_dim.d_date_sk - -> Seq Scan on cte_prune.tpcds_date_dim (cost=0.00..88.50 rows=15 width=8) + -> Seq Scan on cte_prune.tpcds_date_dim Output: tpcds_date_dim.d_date, tpcds_date_dim.d_date_sk Filter: (tpcds_date_dim.d_year = ANY ('{1999,2000,2001,2002}'::integer[])) - -> Hash (cost=228.11..228.11 rows=1 width=8) + -> Hash Output: t1.v1, frequent_ss_items_1.item_sk, (RowIdExpr) - -> Hash Semi Join (cost=227.06..228.11 rows=1 width=8) + -> Hash Semi Join Output: t1.v1, frequent_ss_items_1.item_sk, RowIdExpr Hash Cond: (t1.v1 = frequent_ss_items_1.item_sk) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=227.05..227.05 rows=1 width=4) + -> Hash Output: frequent_ss_items_1.item_sk - -> Subquery Scan on frequent_ss_items_1 (cost=226.96..227.04 rows=1 width=4) + -> Subquery Scan on frequent_ss_items_1 Output: frequent_ss_items_1.item_sk - -> GroupAggregate (cost=226.96..227.04 rows=1 width=48) + -> GroupAggregate Output: (substr((tpcds_item_1.i_item_desc)::text, 1, 30)), tpcds_item_1.i_item_sk, tpcds_date_dim_1.d_date, count(*) Group Key: (substr((tpcds_item_1.i_item_desc)::text, 1, 30)), tpcds_item_1.i_item_sk, tpcds_date_dim_1.d_date Filter: (count(*) > 4) - -> Sort (cost=226.96..226.97 rows=3 width=40) + -> Sort Output: (substr((tpcds_item_1.i_item_desc)::text, 1, 30)), tpcds_item_1.i_item_sk, tpcds_date_dim_1.d_date Sort Key: (substr((tpcds_item_1.i_item_desc)::text, 1, 30)), tpcds_item_1.i_item_sk, tpcds_date_dim_1.d_date - -> Hash Join (cost=132.52..226.94 rows=3 width=40) + -> Hash Join Output: substr((tpcds_item_1.i_item_desc)::text, 1, 30), tpcds_item_1.i_item_sk, tpcds_date_dim_1.d_date Inner Unique: true Hash Cond: (tpcds_store_sales_1.ss_item_sk = tpcds_item_1.i_item_sk) - -> Redistribute Motion 3:3 (slice5; segments: 3) (cost=89.63..183.99 rows=6 width=8) + -> Redistribute Motion 3:3 (slice5; segments: 3) Output: tpcds_store_sales_1.ss_item_sk, tpcds_date_dim_1.d_date Hash Key: tpcds_store_sales_1.ss_item_sk - -> Hash Join (cost=89.63..183.87 rows=6 width=8) + -> Hash Join Output: tpcds_store_sales_1.ss_item_sk, tpcds_date_dim_1.d_date Inner Unique: true Hash Cond: (tpcds_store_sales_1.ss_sold_date_sk = tpcds_date_dim_1.d_date_sk) - -> Seq Scan on cte_prune.tpcds_store_sales tpcds_store_sales_1 (cost=0.00..90.25 rows=1500 width=8) + -> Seq Scan on cte_prune.tpcds_store_sales tpcds_store_sales_1 Output: tpcds_store_sales_1.ss_sold_date_sk, tpcds_store_sales_1.ss_sold_time_sk, tpcds_store_sales_1.ss_item_sk, tpcds_store_sales_1.ss_customer_sk, tpcds_store_sales_1.ss_cdemo_sk, tpcds_store_sales_1.ss_hdemo_sk, tpcds_store_sales_1.ss_addr_sk, tpcds_store_sales_1.ss_store_sk, tpcds_store_sales_1.ss_promo_sk, tpcds_store_sales_1.ss_ticket_number, tpcds_store_sales_1.ss_quantity, tpcds_store_sales_1.ss_wholesale_cost, tpcds_store_sales_1.ss_list_price, tpcds_store_sales_1.ss_sales_price, tpcds_store_sales_1.ss_ext_discount_amt, tpcds_store_sales_1.ss_ext_sales_price, tpcds_store_sales_1.ss_ext_wholesale_cost, tpcds_store_sales_1.ss_ext_list_price, tpcds_store_sales_1.ss_ext_tax, tpcds_store_sales_1.ss_coupon_amt, tpcds_store_sales_1.ss_net_paid, tpcds_store_sales_1.ss_net_paid_inc_tax, tpcds_store_sales_1.ss_net_profit Filter: (tpcds_store_sales_1.ss_item_sk > 0) - -> Hash (cost=89.08..89.08 rows=44 width=8) + -> Hash Output: tpcds_date_dim_1.d_date, tpcds_date_dim_1.d_date_sk - -> Broadcast Motion 3:3 (slice6; segments: 3) (cost=0.00..89.08 rows=44 width=8) + -> Broadcast Motion 3:3 (slice6; segments: 3) Output: tpcds_date_dim_1.d_date, tpcds_date_dim_1.d_date_sk - -> Seq Scan on cte_prune.tpcds_date_dim tpcds_date_dim_1 (cost=0.00..88.50 rows=15 width=8) + -> Seq Scan on cte_prune.tpcds_date_dim tpcds_date_dim_1 Output: tpcds_date_dim_1.d_date, tpcds_date_dim_1.d_date_sk Filter: (tpcds_date_dim_1.d_year = ANY ('{1999,2000,2001,2002}'::integer[])) - -> Hash (cost=40.67..40.67 rows=178 width=422) + -> Hash Output: tpcds_item_1.i_item_desc, tpcds_item_1.i_item_sk - -> Seq Scan on cte_prune.tpcds_item tpcds_item_1 (cost=0.00..40.67 rows=178 width=422) + -> Seq Scan on cte_prune.tpcds_item tpcds_item_1 Output: tpcds_item_1.i_item_desc, tpcds_item_1.i_item_sk Filter: (tpcds_item_1.i_item_sk > 0) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (89 rows) -- sql 95 -explain verbose with ws_wh as +explain (costs off, verbose) with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from tpcds_web_sales ws1,tpcds_web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) select * from t1 where t1.v1 in (select ws_order_number from ws_wh where true) and t1.v1 in (select ws_order_number from ws_wh where ws_order_number > 0); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=2952.11..2952.33 rows=14 width=12) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, t1.v2, t1.v3 - -> HashAggregate (cost=2952.11..2952.15 rows=5 width=12) + -> HashAggregate Output: t1.v1, t1.v2, t1.v3 Group Key: (RowIdExpr) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=508.11..2952.10 rows=5 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) Hash Key: (RowIdExpr) - -> Hash Join (cost=508.11..2952.01 rows=5 width=12) + -> Hash Join Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) Hash Cond: (ws_wh.ws_order_number = t1.v1) - -> Subquery Scan on ws_wh (cost=179.92..2181.72 rows=35328 width=4) + -> Subquery Scan on ws_wh Output: ws_wh.ws_order_number - -> Hash Join (cost=179.92..2181.72 rows=35328 width=12) + -> Hash Join Output: ws1.ws_order_number, ws1.ws_warehouse_sk, ws2.ws_warehouse_sk Hash Cond: (ws1.ws_order_number = ws2.ws_order_number) Join Filter: (ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..137.00 rows=3433 width=8) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: ws1.ws_order_number, ws1.ws_warehouse_sk Hash Key: ws1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws1 (cost=0.00..68.33 rows=3433 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws1 Output: ws1.ws_order_number, ws1.ws_warehouse_sk - -> Hash (cost=137.00..137.00 rows=3433 width=8) + -> Hash Output: ws2.ws_warehouse_sk, ws2.ws_order_number - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..137.00 rows=3433 width=8) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: ws2.ws_warehouse_sk, ws2.ws_order_number Hash Key: ws2.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws2 (cost=0.00..68.33 rows=3433 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws2 Output: ws2.ws_warehouse_sk, ws2.ws_order_number - -> Hash (cost=328.14..328.14 rows=5 width=16) + -> Hash Output: t1.v1, t1.v2, t1.v3, ws_wh_1.ws_order_number, (RowIdExpr) - -> Redistribute Motion 3:3 (slice5; segments: 3) (cost=327.96..328.14 rows=5 width=16) + -> Redistribute Motion 3:3 (slice5; segments: 3) Output: t1.v1, t1.v2, t1.v3, ws_wh_1.ws_order_number, (RowIdExpr) Hash Key: t1.v1 - -> Result (cost=327.96..328.05 rows=5 width=16) + -> Result Output: t1.v1, t1.v2, t1.v3, ws_wh_1.ws_order_number, RowIdExpr - -> HashAggregate (cost=327.96..328.00 rows=5 width=16) + -> HashAggregate Output: t1.v1, t1.v2, t1.v3, ws_wh_1.ws_order_number Group Key: (RowIdExpr) - -> Redistribute Motion 3:3 (slice6; segments: 3) (cost=115.19..327.95 rows=5 width=16) + -> Redistribute Motion 3:3 (slice6; segments: 3) Output: t1.v1, t1.v2, t1.v3, ws_wh_1.ws_order_number, (RowIdExpr) Hash Key: (RowIdExpr) - -> Hash Join (cost=115.19..327.86 rows=5 width=16) + -> Hash Join Output: t1.v1, t1.v2, t1.v3, ws_wh_1.ws_order_number, (RowIdExpr) Hash Cond: (ws_wh_1.ws_order_number = t1.v1) - -> Subquery Scan on ws_wh_1 (cost=114.11..277.62 rows=3925 width=4) + -> Subquery Scan on ws_wh_1 Output: ws_wh_1.ws_order_number - -> Hash Join (cost=114.11..277.62 rows=3925 width=12) + -> Hash Join Output: ws1_1.ws_order_number, ws1_1.ws_warehouse_sk, ws2_1.ws_warehouse_sk Hash Cond: (ws1_1.ws_order_number = ws2_1.ws_order_number) Join Filter: (ws1_1.ws_warehouse_sk <> ws2_1.ws_warehouse_sk) - -> Redistribute Motion 3:3 (slice7; segments: 3) (cost=0.00..99.81 rows=1144 width=8) + -> Redistribute Motion 3:3 (slice7; segments: 3) Output: ws1_1.ws_order_number, ws1_1.ws_warehouse_sk Hash Key: ws1_1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws1_1 (cost=0.00..76.92 rows=1144 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws1_1 Output: ws1_1.ws_order_number, ws1_1.ws_warehouse_sk Filter: (ws1_1.ws_order_number > 0) - -> Hash (cost=99.81..99.81 rows=1144 width=8) + -> Hash Output: ws2_1.ws_warehouse_sk, ws2_1.ws_order_number - -> Redistribute Motion 3:3 (slice8; segments: 3) (cost=0.00..99.81 rows=1144 width=8) + -> Redistribute Motion 3:3 (slice8; segments: 3) Output: ws2_1.ws_warehouse_sk, ws2_1.ws_order_number Hash Key: ws2_1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws2_1 (cost=0.00..76.92 rows=1144 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws2_1 Output: ws2_1.ws_warehouse_sk, ws2_1.ws_order_number Filter: (ws2_1.ws_order_number > 0) - -> Hash (cost=1.03..1.03 rows=3 width=12) + -> Hash Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3, RowIdExpr - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (71 rows) -explain verbose with ws_wh as +explain (costs off, verbose) with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from tpcds_web_sales ws1,tpcds_web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) select * from t1 where t1.v1 in (select wh1 from ws_wh where true) and t1.v1 in (select wh1 from ws_wh where ws_order_number > 0); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=2952.68..2952.91 rows=14 width=12) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, t1.v2, t1.v3 - -> HashAggregate (cost=2952.68..2952.73 rows=5 width=12) + -> HashAggregate Output: t1.v1, t1.v2, t1.v3 Group Key: (RowIdExpr) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=508.53..2952.67 rows=5 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) Hash Key: (RowIdExpr) - -> Hash Join (cost=508.53..2952.58 rows=5 width=12) + -> Hash Join Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) Hash Cond: (ws_wh.wh1 = t1.v1) - -> Subquery Scan on ws_wh (cost=179.92..2181.72 rows=35328 width=4) + -> Subquery Scan on ws_wh Output: ws_wh.wh1 - -> Hash Join (cost=179.92..2181.72 rows=35328 width=12) + -> Hash Join Output: ws1.ws_order_number, ws1.ws_warehouse_sk, ws2.ws_warehouse_sk Hash Cond: (ws1.ws_order_number = ws2.ws_order_number) Join Filter: (ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..137.00 rows=3433 width=8) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: ws1.ws_order_number, ws1.ws_warehouse_sk Hash Key: ws1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws1 (cost=0.00..68.33 rows=3433 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws1 Output: ws1.ws_order_number, ws1.ws_warehouse_sk - -> Hash (cost=137.00..137.00 rows=3433 width=8) + -> Hash Output: ws2.ws_warehouse_sk, ws2.ws_order_number - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..137.00 rows=3433 width=8) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: ws2.ws_warehouse_sk, ws2.ws_order_number Hash Key: ws2.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws2 (cost=0.00..68.33 rows=3433 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws2 Output: ws2.ws_warehouse_sk, ws2.ws_order_number - -> Hash (cost=328.44..328.44 rows=14 width=16) + -> Hash Output: t1.v1, t1.v2, t1.v3, ws_wh_1.wh1, (RowIdExpr) - -> Broadcast Motion 3:3 (slice5; segments: 3) (cost=328.17..328.44 rows=14 width=16) + -> Broadcast Motion 3:3 (slice5; segments: 3) Output: t1.v1, t1.v2, t1.v3, ws_wh_1.wh1, (RowIdExpr) - -> Result (cost=328.17..328.26 rows=5 width=16) + -> Result Output: t1.v1, t1.v2, t1.v3, ws_wh_1.wh1, RowIdExpr - -> HashAggregate (cost=328.17..328.22 rows=5 width=16) + -> HashAggregate Output: t1.v1, t1.v2, t1.v3, ws_wh_1.wh1 Group Key: (RowIdExpr) - -> Redistribute Motion 3:3 (slice6; segments: 3) (cost=115.40..328.16 rows=5 width=16) + -> Redistribute Motion 3:3 (slice6; segments: 3) Output: t1.v1, t1.v2, t1.v3, ws_wh_1.wh1, (RowIdExpr) Hash Key: (RowIdExpr) - -> Hash Join (cost=115.40..328.07 rows=5 width=16) + -> Hash Join Output: t1.v1, t1.v2, t1.v3, ws_wh_1.wh1, (RowIdExpr) Hash Cond: (ws_wh_1.wh1 = t1.v1) - -> Subquery Scan on ws_wh_1 (cost=114.11..277.62 rows=3925 width=4) + -> Subquery Scan on ws_wh_1 Output: ws_wh_1.wh1 - -> Hash Join (cost=114.11..277.62 rows=3925 width=12) + -> Hash Join Output: ws1_1.ws_order_number, ws1_1.ws_warehouse_sk, ws2_1.ws_warehouse_sk Hash Cond: (ws1_1.ws_order_number = ws2_1.ws_order_number) Join Filter: (ws1_1.ws_warehouse_sk <> ws2_1.ws_warehouse_sk) - -> Redistribute Motion 3:3 (slice7; segments: 3) (cost=0.00..99.81 rows=1144 width=8) + -> Redistribute Motion 3:3 (slice7; segments: 3) Output: ws1_1.ws_order_number, ws1_1.ws_warehouse_sk Hash Key: ws1_1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws1_1 (cost=0.00..76.92 rows=1144 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws1_1 Output: ws1_1.ws_order_number, ws1_1.ws_warehouse_sk Filter: (ws1_1.ws_order_number > 0) - -> Hash (cost=99.81..99.81 rows=1144 width=8) + -> Hash Output: ws2_1.ws_warehouse_sk, ws2_1.ws_order_number - -> Redistribute Motion 3:3 (slice8; segments: 3) (cost=0.00..99.81 rows=1144 width=8) + -> Redistribute Motion 3:3 (slice8; segments: 3) Output: ws2_1.ws_warehouse_sk, ws2_1.ws_order_number Hash Key: ws2_1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws2_1 (cost=0.00..76.92 rows=1144 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws2_1 Output: ws2_1.ws_warehouse_sk, ws2_1.ws_order_number Filter: (ws2_1.ws_order_number > 0) - -> Hash (cost=1.17..1.17 rows=10 width=12) + -> Hash Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) - -> Broadcast Motion 3:3 (slice9; segments: 3) (cost=0.00..1.17 rows=10 width=12) + -> Broadcast Motion 3:3 (slice9; segments: 3) Output: t1.v1, t1.v2, t1.v3, (RowIdExpr) - -> Seq Scan on cte_prune.t1 (cost=0.00..1.03 rows=3 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3, RowIdExpr - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (72 rows) @@ -1197,7 +1201,7 @@ analyze t3; analyze t4; -- Additional filtering conditions are added to the consumer. -- This is caused by `PexprInferPredicates` in the ORCA preprocessor. -explain verbose WITH t(a,b,d) AS +explain(costs off, verbose) WITH t(a,b,d) AS ( SELECT t3.a,t3.b,t4.d FROM t3,t4 WHERE t3.a = t4.d ) @@ -1209,86 +1213,87 @@ t WHERE cup.e < 10 GROUP BY cup.c,cup.d, cup.e ,t.d, t.b ORDER BY 1,2,3,4 LIMIT 10; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit (cost=20000000031.26..20000000031.40 rows=10 width=56) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------ + Limit Output: cup.c, cup.d, cup.e, (sum(t.d) OVER (?)), t.d, t.b - -> Gather Motion 3:1 (slice1; segments: 3) (cost=20000000031.26..20000000031.68 rows=30 width=56) + -> Gather Motion 3:1 (slice1; segments: 3) Output: cup.c, cup.d, cup.e, (sum(t.d) OVER (?)), t.d, t.b Merge Key: cup.c, cup.d, cup.e, (sum(t.d) OVER (?)) - -> Limit (cost=20000000031.26..20000000031.28 rows=10 width=56) + -> Limit Output: cup.c, cup.d, cup.e, (sum(t.d) OVER (?)), t.d, t.b - -> Sort (cost=20000000031.26..20000000031.54 rows=111 width=56) + -> Sort Output: cup.c, cup.d, cup.e, (sum(t.d) OVER (?)), t.d, t.b Sort Key: cup.c, cup.d, cup.e, (sum(t.d) OVER (?)) - -> WindowAgg (cost=20000000026.91..20000000028.86 rows=111 width=56) + -> WindowAgg Output: cup.c, cup.d, cup.e, sum(t.d) OVER (?), t.d, t.b Partition By: t.b - -> Sort (cost=20000000026.91..20000000027.19 rows=111 width=48) + -> Sort Output: cup.c, cup.d, cup.e, t.d, t.b Sort Key: t.b - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20000000018.14..20000000023.14 rows=111 width=48) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: cup.c, cup.d, cup.e, t.d, t.b Hash Key: t.b - -> GroupAggregate (cost=20000000018.14..20000000020.92 rows=111 width=48) + -> GroupAggregate Output: cup.c, cup.d, cup.e, t.d, t.b Group Key: cup.c, cup.d, cup.e, t.d, t.b - -> Sort (cost=20000000018.14..20000000018.42 rows=111 width=48) + -> Sort Output: cup.c, cup.d, cup.e, t.d, t.b Sort Key: cup.c, cup.d, cup.e, t.d, t.b - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=20000000008.36..20000000014.36 rows=111 width=48) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: cup.c, cup.d, cup.e, t.d, t.b Hash Key: cup.c, cup.d, cup.e, t.d, t.b - -> Nested Loop (cost=20000000008.36..20000000012.14 rows=111 width=48) + -> Nested Loop Output: cup.c, cup.d, cup.e, t.d, t.b - -> Broadcast Motion 3:3 (slice4; segments: 3) (cost=1.08..2.35 rows=10 width=8) + -> Broadcast Motion 3:3 (slice4; segments: 3) Output: t.d, t.b - -> Subquery Scan on t (cost=1.08..2.22 rows=3 width=8) + -> Subquery Scan on t Output: t.d, t.b - -> Hash Join (cost=1.08..2.22 rows=3 width=12) + -> Hash Join Output: t3.a, t3.b, t4.d Hash Cond: (t4.d = t3.a) - -> Redistribute Motion 3:3 (slice5; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice5; segments: 3) Output: t4.d Hash Key: t4.d - -> Seq Scan on cte_prune.t4 (cost=0.00..1.03 rows=3 width=4) + -> Seq Scan on cte_prune.t4 Output: t4.d - -> Hash (cost=1.03..1.03 rows=3 width=8) + -> Hash Output: t3.a, t3.b - -> Seq Scan on cte_prune.t3 (cost=0.00..1.03 rows=3 width=8) + -> Seq Scan on cte_prune.t3 Output: t3.a, t3.b - -> Materialize (cost=10000000007.29..10000000008.43 rows=11 width=40) + -> Materialize Output: cup.c, cup.d, cup.e - -> Subquery Scan on cup (cost=10000000007.29..10000000008.37 rows=11 width=40) + -> Subquery Scan on cup Output: cup.c, cup.d, cup.e Filter: (cup.e < '10'::numeric) - -> WindowAgg (cost=10000000007.29..10000000007.95 rows=33 width=48) + -> WindowAgg Output: t4_1.c, t4_1.d, avg(t3_1.b) OVER (?), t3_1.b, t3_1.a Partition By: t3_1.a Order By: t3_1.b - -> Sort (cost=10000000007.29..10000000007.37 rows=33 width=16) + -> Sort Output: t3_1.b, t3_1.a, t4_1.c, t4_1.d Sort Key: t3_1.a, t3_1.b DESC - -> Nested Loop (cost=10000000001.08..10000000006.44 rows=33 width=16) + -> Nested Loop Output: t3_1.b, t3_1.a, t4_1.c, t4_1.d - -> Hash Join (cost=1.08..2.22 rows=3 width=12) + -> Hash Join Output: t3_1.a, t3_1.b, t4_2.d Hash Cond: (t4_2.d = t3_1.a) - -> Redistribute Motion 3:3 (slice6; segments: 3) (cost=0.00..1.10 rows=3 width=4) + -> Redistribute Motion 3:3 (slice6; segments: 3) Output: t4_2.d Hash Key: t4_2.d - -> Seq Scan on cte_prune.t4 t4_2 (cost=0.00..1.03 rows=3 width=4) + -> Seq Scan on cte_prune.t4 t4_2 Output: t4_2.d - -> Hash (cost=1.03..1.03 rows=3 width=8) + -> Hash Output: t3_1.a, t3_1.b - -> Seq Scan on cte_prune.t3 t3_1 (cost=0.00..1.03 rows=3 width=8) + -> Seq Scan on cte_prune.t3 t3_1 Output: t3_1.a, t3_1.b - -> Materialize (cost=0.00..1.22 rows=10 width=8) + -> Materialize Output: t4_1.c, t4_1.d - -> Broadcast Motion 3:3 (slice7; segments: 3) (cost=0.00..1.17 rows=10 width=8) + -> Broadcast Motion 3:3 (slice7; segments: 3) Output: t4_1.c, t4_1.d - -> Seq Scan on cte_prune.t4 t4_1 (cost=0.00..1.03 rows=3 width=8) + -> Seq Scan on cte_prune.t4 t4_1 Output: t4_1.c, t4_1.d + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (80 rows) @@ -1346,28 +1351,28 @@ SELECT ELSE 'DDDD'::name END) AS string4 FROM generate_series(0, 99) AS i; -explain verbose select four, x +explain(costs off, verbose) select four, x from (select four, ten, 'foo'::text as x from cte_prune_tenk1) as t group by grouping sets (four, x) having x = 'foo'; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=130.74..134.95 rows=3 width=36) + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: cte_prune_tenk1.four, ('foo'::text) - -> HashAggregate (cost=130.74..134.91 rows=1 width=36) + -> HashAggregate Output: cte_prune_tenk1.four, ('foo'::text) Group Key: cte_prune_tenk1.four, ('foo'::text), (GROUPINGSET_ID()) Filter: (('foo'::text) = 'foo'::text) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=83.58..123.29 rows=993 width=36) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: cte_prune_tenk1.four, ('foo'::text), (GROUPINGSET_ID()) Hash Key: cte_prune_tenk1.four, ('foo'::text), (GROUPINGSET_ID()) - -> Partial HashAggregate (cost=83.58..103.43 rows=993 width=36) + -> Partial HashAggregate Output: cte_prune_tenk1.four, ('foo'::text), GROUPINGSET_ID() Hash Key: cte_prune_tenk1.four Hash Key: 'foo'::text - -> Seq Scan on cte_prune.cte_prune_tenk1 (cost=0.00..73.67 rows=3967 width=36) + -> Seq Scan on cte_prune.cte_prune_tenk1 Output: cte_prune_tenk1.four, 'foo'::text - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (17 rows) @@ -1426,7 +1431,7 @@ ALTER TABLE ONLY country ALTER TABLE ONLY countrylanguage ADD CONSTRAINT countrylanguage_pkey PRIMARY KEY (countrycode, "language"); -- CTE1(inlined) in CTE2(no-inlined) case -explain verbose with country as +explain(costs off, verbose) with country as (select country.code,country.name COUNTRY, city.name CAPITAL, language, isofficial, percentage FROM country,city,countrylanguage WHERE country.code = countrylanguage.countrycode @@ -1443,86 +1448,86 @@ select * from where country.percentage = countrylanguage.percentage order by countrylanguage.COUNTRY,country.language LIMIT 40; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit (cost=1051.23..1051.62 rows=28 width=202) + Limit Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage - -> Gather Motion 3:1 (slice1; segments: 3) (cost=1051.23..1051.62 rows=28 width=202) + -> Gather Motion 3:1 (slice1; segments: 3) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage Merge Key: country.country, countrylanguage_2.language - -> Limit (cost=1051.23..1051.25 rows=9 width=202) + -> Limit Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage - -> Sort (cost=1051.23..1051.25 rows=9 width=202) + -> Sort Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage Sort Key: country.country, countrylanguage_2.language - -> Hash Join (cost=846.99..1051.08 rows=9 width=202) + -> Hash Join Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage, country.code, country.country, country.language, country.isofficial, country.percentage Hash Cond: (country.percentage = countrylanguage_2.percentage) - -> Hash Join (cost=436.98..637.90 rows=245 width=85) + -> Hash Join Output: country.code, country.country, country.language, country.isofficial, country.percentage Hash Cond: (countrylanguage.countrycode = country.code) - -> Seq Scan on cte_prune.countrylanguage (cost=0.00..165.33 rows=13133 width=16) + -> Seq Scan on cte_prune.countrylanguage Output: countrylanguage.countrycode, countrylanguage.language, countrylanguage.isofficial, countrylanguage.percentage - -> Hash (cost=436.74..436.74 rows=19 width=85) + -> Hash Output: country.code, country.country, country.language, country.isofficial, country.percentage - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=227.29..436.74 rows=19 width=85) + -> Broadcast Motion 3:3 (slice2; segments: 3) Output: country.code, country.country, country.language, country.isofficial, country.percentage - -> Subquery Scan on country (cost=227.29..436.49 rows=6 width=85) + -> Subquery Scan on country Output: country.code, country.country, country.language, country.isofficial, country.percentage - -> Hash Join (cost=227.29..436.49 rows=6 width=117) + -> Hash Join Output: country_1.code, country_1.name, city.name, countrylanguage_1.language, countrylanguage_1.isofficial, countrylanguage_1.percentage Hash Cond: (countrylanguage_1.countrycode = country_1.code) - -> Seq Scan on cte_prune.countrylanguage countrylanguage_1 (cost=0.00..198.17 rows=4378 width=53) + -> Seq Scan on cte_prune.countrylanguage countrylanguage_1 Output: countrylanguage_1.countrycode, countrylanguage_1.language, countrylanguage_1.isofficial, countrylanguage_1.percentage Filter: (countrylanguage_1.percentage > '50'::double precision) - -> Hash (cost=227.11..227.11 rows=15 width=80) + -> Hash Output: country_1.code, country_1.name, city.name - -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=77.45..227.11 rows=15 width=80) + -> Broadcast Motion 3:3 (slice3; segments: 3) Output: country_1.code, country_1.name, city.name - -> Hash Join (cost=77.45..226.91 rows=5 width=80) + -> Hash Join Output: country_1.code, country_1.name, city.name Hash Cond: (city.id = country_1.capital) - -> Seq Scan on cte_prune.city (cost=0.00..126.33 rows=9233 width=36) + -> Seq Scan on cte_prune.city Output: city.id, city.name, city.countrycode, city.district, city.population - -> Hash (cost=77.40..77.40 rows=3 width=52) + -> Hash Output: country_1.code, country_1.name, country_1.capital - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..77.40 rows=3 width=52) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: country_1.code, country_1.name, country_1.capital Hash Key: country_1.capital - -> Seq Scan on cte_prune.country country_1 (cost=0.00..77.33 rows=3 width=52) + -> Seq Scan on cte_prune.country country_1 Output: country_1.code, country_1.name, country_1.capital Filter: (country_1.continent = 'Europe'::text) - -> Hash (cost=409.66..409.66 rows=28 width=117) + -> Hash Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage - -> Broadcast Motion 3:3 (slice5; segments: 3) (cost=227.29..409.66 rows=28 width=117) + -> Broadcast Motion 3:3 (slice5; segments: 3) Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage - -> Hash Join (cost=227.29..409.20 rows=9 width=117) + -> Hash Join Output: country_2.code, country_2.name, city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage Hash Cond: (countrylanguage_2.countrycode = country_2.code) - -> Seq Scan on cte_prune.countrylanguage countrylanguage_2 (cost=0.00..165.33 rows=6567 width=53) + -> Seq Scan on cte_prune.countrylanguage countrylanguage_2 Output: countrylanguage_2.countrycode, countrylanguage_2.language, countrylanguage_2.isofficial, countrylanguage_2.percentage Filter: countrylanguage_2.isofficial - -> Hash (cost=227.11..227.11 rows=15 width=80) + -> Hash Output: country_2.code, country_2.name, city_1.name - -> Broadcast Motion 3:3 (slice6; segments: 3) (cost=77.45..227.11 rows=15 width=80) + -> Broadcast Motion 3:3 (slice6; segments: 3) Output: country_2.code, country_2.name, city_1.name - -> Hash Join (cost=77.45..226.91 rows=5 width=80) + -> Hash Join Output: country_2.code, country_2.name, city_1.name Hash Cond: (city_1.id = country_2.capital) - -> Seq Scan on cte_prune.city city_1 (cost=0.00..126.33 rows=9233 width=36) + -> Seq Scan on cte_prune.city city_1 Output: city_1.id, city_1.name, city_1.countrycode, city_1.district, city_1.population - -> Hash (cost=77.40..77.40 rows=3 width=52) + -> Hash Output: country_2.code, country_2.name, country_2.capital - -> Redistribute Motion 3:3 (slice7; segments: 3) (cost=0.00..77.40 rows=3 width=52) + -> Redistribute Motion 3:3 (slice7; segments: 3) Output: country_2.code, country_2.name, country_2.capital Hash Key: country_2.capital - -> Seq Scan on cte_prune.country country_2 (cost=0.00..77.33 rows=3 width=52) + -> Seq Scan on cte_prune.country country_2 Output: country_2.code, country_2.name, country_2.capital Filter: (country_2.continent = 'Europe'::text) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (76 rows) -- CTE in the main query and subqueries within the main query -explain verbose with bad_headofstates as +explain(costs off, verbose) with bad_headofstates as ( select country.code,country.name,country.headofstate,countrylanguage.language from @@ -1541,112 +1546,112 @@ where FOO.code = bad_headofstates.code and FOO.capital = city.id) OUTER_FOO group by region ) OUTERMOST_FOO,bad_headofstates,country where country.code = bad_headofstates.code and country.region = OUTERMOST_FOO.region order by OUTERMOST_FOO.region,bad_headofstates.headofstate LIMIT 40; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit (cost=3473.37..3473.94 rows=40 width=96) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit Output: (avg(country_2.population)), country_2.region, country_1.headofstate - -> Gather Motion 3:1 (slice1; segments: 3) (cost=3473.37..3475.07 rows=120 width=96) + -> Gather Motion 3:1 (slice1; segments: 3) Output: (avg(country_2.population)), country_2.region, country_1.headofstate Merge Key: country_2.region, country_1.headofstate - -> Limit (cost=3473.37..3473.47 rows=40 width=96) + -> Limit Output: (avg(country_2.population)), country_2.region, country_1.headofstate - -> Sort (cost=3473.37..3482.49 rows=3648 width=96) + -> Sort Output: (avg(country_2.population)), country_2.region, country_1.headofstate Sort Key: country_2.region, country_1.headofstate - -> Hash Join (cost=2915.55..3358.06 rows=3648 width=96) + -> Hash Join Output: (avg(country_2.population)), country_2.region, country_1.headofstate Inner Unique: true Hash Cond: (country.region = country_2.region) - -> Hash Join (cost=222.07..618.52 rows=3648 width=64) + -> Hash Join Output: country_1.headofstate, country.region Inner Unique: true Hash Cond: (country_1.code = country.code) - -> Hash Join (cost=110.07..460.46 rows=3648 width=112) + -> Hash Join Output: country_1.code, country_1.name, country_1.headofstate, countrylanguage.language Inner Unique: true Hash Cond: (countrylanguage.countrycode = country_1.code) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..296.67 rows=6567 width=48) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: countrylanguage.language, countrylanguage.countrycode Hash Key: countrylanguage.countrycode - -> Seq Scan on cte_prune.countrylanguage (cost=0.00..165.33 rows=6567 width=48) + -> Seq Scan on cte_prune.countrylanguage Output: countrylanguage.language, countrylanguage.countrycode Filter: countrylanguage.isofficial - -> Hash (cost=86.00..86.00 rows=1926 width=80) + -> Hash Output: country_1.code, country_1.name, country_1.headofstate - -> Seq Scan on cte_prune.country country_1 (cost=0.00..86.00 rows=1926 width=80) + -> Seq Scan on cte_prune.country country_1 Output: country_1.code, country_1.name, country_1.headofstate Filter: ((country_1.gnp < country_1.gnpold) OR (country_1.gnp < '3000'::numeric)) - -> Hash (cost=68.67..68.67 rows=3467 width=48) + -> Hash Output: country.code, country.region - -> Seq Scan on cte_prune.country (cost=0.00..68.67 rows=3467 width=48) + -> Seq Scan on cte_prune.country Output: country.code, country.region - -> Hash (cost=2680.97..2680.97 rows=1000 width=64) + -> Hash Output: (avg(country_2.population)), country_2.region - -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=2660.14..2680.97 rows=1000 width=64) + -> Broadcast Motion 3:3 (slice3; segments: 3) Output: (avg(country_2.population)), country_2.region - -> Finalize HashAggregate (cost=2660.14..2664.31 rows=333 width=64) + -> Finalize HashAggregate Output: avg(country_2.population), country_2.region Group Key: country_2.region - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=2625.14..2655.14 rows=1000 width=64) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: country_2.region, (PARTIAL avg(country_2.population)) Hash Key: country_2.region - -> Streaming Partial HashAggregate (cost=2625.14..2635.14 rows=1000 width=64) + -> Streaming Partial HashAggregate Output: country_2.region, PARTIAL avg(country_2.population) Group Key: country_2.region - -> Hash Join (cost=1662.17..2425.51 rows=39927 width=36) + -> Hash Join Output: country_2.region, country_2.population Hash Cond: (country_3.code = country_2.code) - -> Hash Join (cost=110.07..460.46 rows=3648 width=112) + -> Hash Join Output: country_3.code, country_3.name, country_3.headofstate, countrylanguage_1.language Inner Unique: true Hash Cond: (countrylanguage_1.countrycode = country_3.code) - -> Redistribute Motion 3:3 (slice5; segments: 3) (cost=0.00..296.67 rows=6567 width=48) + -> Redistribute Motion 3:3 (slice5; segments: 3) Output: countrylanguage_1.language, countrylanguage_1.countrycode Hash Key: countrylanguage_1.countrycode - -> Seq Scan on cte_prune.countrylanguage countrylanguage_1 (cost=0.00..165.33 rows=6567 width=48) + -> Seq Scan on cte_prune.countrylanguage countrylanguage_1 Output: countrylanguage_1.language, countrylanguage_1.countrycode Filter: countrylanguage_1.isofficial - -> Hash (cost=86.00..86.00 rows=1926 width=80) + -> Hash Output: country_3.code, country_3.name, country_3.headofstate - -> Seq Scan on cte_prune.country country_3 (cost=0.00..86.00 rows=1926 width=80) + -> Seq Scan on cte_prune.country country_3 Output: country_3.code, country_3.name, country_3.headofstate Filter: ((country_3.gnp < country_3.gnpold) OR (country_3.gnp < '3000'::numeric)) - -> Hash (cost=1506.49..1506.49 rows=3648 width=68) + -> Hash Output: country_2.population, country_2.region, country_2.code, country_4.code - -> Hash Join (cost=1063.99..1506.49 rows=3648 width=68) + -> Hash Join Output: country_2.population, country_2.region, country_2.code, country_4.code Inner Unique: true Hash Cond: (country_2.capital = city.id) - -> Hash Join (cost=222.07..618.52 rows=3648 width=72) + -> Hash Join Output: country_2.population, country_2.region, country_2.code, country_2.capital, country_4.code Inner Unique: true Hash Cond: (country_4.code = country_2.code) - -> Hash Join (cost=110.07..460.46 rows=3648 width=112) + -> Hash Join Output: country_4.code, country_4.name, country_4.headofstate, countrylanguage_2.language Inner Unique: true Hash Cond: (countrylanguage_2.countrycode = country_4.code) - -> Redistribute Motion 3:3 (slice6; segments: 3) (cost=0.00..296.67 rows=6567 width=48) + -> Redistribute Motion 3:3 (slice6; segments: 3) Output: countrylanguage_2.language, countrylanguage_2.countrycode Hash Key: countrylanguage_2.countrycode - -> Seq Scan on cte_prune.countrylanguage countrylanguage_2 (cost=0.00..165.33 rows=6567 width=48) + -> Seq Scan on cte_prune.countrylanguage countrylanguage_2 Output: countrylanguage_2.language, countrylanguage_2.countrycode Filter: countrylanguage_2.isofficial - -> Hash (cost=86.00..86.00 rows=1926 width=80) + -> Hash Output: country_4.code, country_4.name, country_4.headofstate - -> Seq Scan on cte_prune.country country_4 (cost=0.00..86.00 rows=1926 width=80) + -> Seq Scan on cte_prune.country country_4 Output: country_4.code, country_4.name, country_4.headofstate Filter: ((country_4.gnp < country_4.gnpold) OR (country_4.gnp < '3000'::numeric)) - -> Hash (cost=68.67..68.67 rows=3467 width=56) + -> Hash Output: country_2.population, country_2.region, country_2.code, country_2.capital - -> Seq Scan on cte_prune.country country_2 (cost=0.00..68.67 rows=3467 width=56) + -> Seq Scan on cte_prune.country country_2 Output: country_2.population, country_2.region, country_2.code, country_2.capital - -> Hash (cost=495.67..495.67 rows=27700 width=4) + -> Hash Output: city.id - -> Broadcast Motion 3:3 (slice7; segments: 3) (cost=0.00..495.67 rows=27700 width=4) + -> Broadcast Motion 3:3 (slice7; segments: 3) Output: city.id - -> Seq Scan on cte_prune.city (cost=0.00..126.33 rows=9233 width=4) + -> Seq Scan on cte_prune.city Output: city.id - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (105 rows) @@ -1663,58 +1668,58 @@ CREATE TABLE t6 AS SELECT i as a, i+1 as b from generate_series(1,10)i; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- inlined CTEs should have not unused columns(ex. t5.*, t6.* in output) -explain verbose WITH w AS (SELECT a, b from t6 where b < 5) +explain(costs off, verbose) WITH w AS (SELECT a, b from t6 where b < 5) SELECT * FROM t6, (WITH v AS (SELECT c, d FROM t5, w WHERE c = w.a AND c < 2) SELECT v1.c, v1.d FROM v v1, v v2 WHERE v1.c = v2.c AND v1.d > 1 ) x WHERE t6.a = x.c ORDER BY 1; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=824331921.87..983595523.10 rows=11242136557 width=16) + QUERY PLAN +------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t6.a, t6.b, t5_1.c, t5_1.d Merge Key: t6.a - -> Sort (cost=824331921.87..833700369.00 rows=3747378852 width=16) + -> Sort Output: t6.a, t6.b, t5_1.c, t5_1.d Sort Key: t6.a - -> Hash Join (cost=5001.56..196417010.27 rows=3747378852 width=16) + -> Hash Join Output: t6.a, t6.b, t5_1.c, t5_1.d Hash Cond: (t5_1.c = t6.a) - -> Hash Join (cost=4321.81..2257711.94 rows=43523564 width=8) + -> Hash Join Output: t5_1.c, t5_1.d Hash Cond: (t5.c = t5_1.c) - -> Hash Join (cost=512.33..4788.69 rows=158519 width=8) + -> Hash Join Output: t5.c, t5.d Hash Cond: (t6_1.a = t5.c) - -> Seq Scan on cte_prune.t6 t6_1 (cost=0.00..464.50 rows=5523 width=8) + -> Seq Scan on cte_prune.t6 t6_1 Output: t6_1.a, t6_1.b Filter: ((t6_1.b < 5) AND (t6_1.a < 2)) - -> Hash (cost=392.75..392.75 rows=9567 width=8) + -> Hash Output: t5.c, t5.d - -> Seq Scan on cte_prune.t5 (cost=0.00..392.75 rows=9567 width=8) + -> Seq Scan on cte_prune.t5 Output: t5.c, t5.d Filter: (t5.c < 2) - -> Hash (cost=2665.47..2665.47 rows=91521 width=8) + -> Hash Output: t5_1.c, t5_1.d - -> Hash Join (cost=533.54..2665.47 rows=91521 width=8) + -> Hash Join Output: t5_1.c, t5_1.d Hash Cond: (t5_1.c = w.a) - -> Seq Scan on cte_prune.t5 t5_1 (cost=0.00..464.50 rows=5523 width=8) + -> Seq Scan on cte_prune.t5 t5_1 Output: t5_1.c, t5_1.d Filter: ((t5_1.c < 2) AND (t5_1.d > 1)) - -> Hash (cost=464.50..464.50 rows=5523 width=4) + -> Hash Output: w.a - -> Subquery Scan on w (cost=0.00..464.50 rows=5523 width=4) + -> Subquery Scan on w Output: w.a - -> Seq Scan on cte_prune.t6 t6_2 (cost=0.00..464.50 rows=5523 width=8) + -> Seq Scan on cte_prune.t6 t6_2 Output: t6_2.a, t6_2.b Filter: ((t6_2.b < 5) AND (t6_2.a < 2)) - -> Hash (cost=321.00..321.00 rows=28700 width=8) + -> Hash Output: t6.a, t6.b - -> Seq Scan on cte_prune.t6 (cost=0.00..321.00 rows=28700 width=8) + -> Seq Scan on cte_prune.t6 Output: t6.a, t6.b - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (44 rows) @@ -1745,14 +1750,14 @@ INSERT INTO t7 VALUES (8, 9, NULL); explain (verbose, costs off) with x as (select * from (select f1 from t7) ss) select * from x where f1 = 1; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) Output: t7.f1 -> Seq Scan on cte_prune.t7 Output: t7.f1 Filter: (t7.f1 = 1) - Settings: enable_parallel = 'off', optimizer = 'off' + Settings: optimizer = 'off' Optimizer: Postgres query optimizer (7 rows) @@ -1769,7 +1774,8 @@ drop table t5; drop table t6; drop table t7; drop schema cte_prune cascade; -NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table t3 drop cascades to table t4 +drop cascades to table cte_prune_tenk1 -- end_ignore diff --git a/src/test/regress/expected/cte_prune_optimizer.out b/src/test/regress/expected/cte_prune_optimizer.out index 1b0833673e8..9f58fd8b8f8 100644 --- a/src/test/regress/expected/cte_prune_optimizer.out +++ b/src/test/regress/expected/cte_prune_optimizer.out @@ -1,6 +1,6 @@ -- start_ignore -drop table if exists t1; -drop table if exists t2; +create schema cte_prune; +set search_path = cte_prune; SET optimizer_trace_fallback = on; -- end_ignore create table t1(v1 int, v2 int, v3 int); @@ -14,34 +14,34 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur insert into t2 values(generate_series(0, 100), generate_series(100, 200), generate_series(200, 300)); analyze t2; -- should pruned both seq scan and shared scan -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=5 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v1 - -> Sequence (cost=0.00..1293.00 rows=2 width=4) + -> Sequence Output: share0_ref3.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=2 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 Filter: (t1.v1 < 5) - -> Hash Left Join (cost=0.00..862.00 rows=2 width=4) + -> Hash Left Join Output: share0_ref3.v1 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Result (cost=0.00..431.00 rows=2 width=4) + -> Result Output: share0_ref3.v1 Filter: (share0_ref3.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1 - -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Hash Output: share0_ref2.v1 - -> Result (cost=0.00..431.00 rows=2 width=4) + -> Result Output: share0_ref2.v1 Filter: (share0_ref2.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (26 rows) @@ -54,115 +54,115 @@ with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 4 (4 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=5 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v2 - -> Sequence (cost=0.00..1293.00 rows=2 width=4) + -> Sequence Output: share0_ref3.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=2 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2 Filter: (t1.v1 < 5) - -> Hash Left Join (cost=0.00..862.00 rows=2 width=4) + -> Hash Left Join Output: share0_ref3.v2 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Result (cost=0.00..431.00 rows=2 width=8) + -> Result Output: share0_ref3.v1, share0_ref3.v2 Filter: (share0_ref3.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v2 - -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Hash Output: share0_ref2.v1 - -> Result (cost=0.00..431.00 rows=2 width=4) + -> Result Output: share0_ref2.v1 Filter: (share0_ref2.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1, share0_ref2.v2 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (26 rows) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; v2 ---- + 11 12 13 14 - 11 (4 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=5 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v3 - -> Sequence (cost=0.00..1293.00 rows=2 width=4) + -> Sequence Output: share0_ref3.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=2 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v3 Filter: (t1.v1 < 5) - -> Hash Left Join (cost=0.00..862.00 rows=2 width=4) + -> Hash Left Join Output: share0_ref3.v3 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Result (cost=0.00..431.00 rows=2 width=8) + -> Result Output: share0_ref3.v1, share0_ref3.v3 Filter: (share0_ref3.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Hash Output: share0_ref2.v1 - -> Result (cost=0.00..431.00 rows=2 width=4) + -> Result Output: share0_ref2.v1 Filter: (share0_ref2.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1, share0_ref2.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (26 rows) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; v3 ---- + 21 22 23 24 - 21 (4 rows) -- * also should be pruned -explain verbose with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=5 width=4) +explain(costs off, verbose) with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v1 - -> Sequence (cost=0.00..1293.00 rows=2 width=4) + -> Sequence Output: share0_ref3.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=2 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 Filter: (t1.v1 < 5) - -> Hash Left Join (cost=0.00..862.00 rows=2 width=4) + -> Hash Left Join Output: share0_ref3.v1 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Result (cost=0.00..431.00 rows=2 width=4) + -> Result Output: share0_ref3.v1 Filter: (share0_ref3.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1 - -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Hash Output: share0_ref2.v1 - -> Result (cost=0.00..431.00 rows=2 width=4) + -> Result Output: share0_ref2.v1 Filter: (share0_ref2.v1 < 5) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=2 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (26 rows) @@ -176,161 +176,161 @@ with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 o (4 rows) -- no push filter -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=11 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v3 - -> Sequence (cost=0.00..1293.00 rows=4 width=4) + -> Sequence Output: share0_ref3.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2, share0_ref1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=4) + -> Hash Left Join Output: share0_ref3.v3 Hash Cond: (share0_ref3.v1 = share0_ref2.v2) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v2, share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: share0_ref2.v2 Hash Key: share0_ref2.v2 - -> Result (cost=0.00..431.00 rows=4 width=4) + -> Result Output: share0_ref2.v2 - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 2:0) Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (24 rows) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; v3 ---- - 26 - 25 - 30 - 29 - 21 - 27 + 22 23 - 28 24 - 22 + 27 + 28 + 21 + 25 + 26 + 29 + 30 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=11 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v2 - -> Sequence (cost=0.00..1293.00 rows=4 width=4) + -> Sequence Output: share0_ref3.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=4) + -> Hash Left Join Output: share0_ref3.v2 Hash Cond: (share0_ref3.v1 = share0_ref2.v2) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v2 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: share0_ref2.v2 Hash Key: share0_ref2.v2 - -> Result (cost=0.00..431.00 rows=4 width=4) + -> Result Output: share0_ref2.v2 - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 2:0) Output: share0_ref2.v1, share0_ref2.v2 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (24 rows) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; v2 ---- - 17 - 13 - 18 - 14 - 12 - 16 15 - 20 + 16 19 + 20 11 + 12 + 13 + 14 + 17 + 18 (10 rows) -- distribution col can be pruned which is better than do redistribute in CTE consumer -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v2 - -> Sequence (cost=0.00..1293.00 rows=4 width=4) + -> Sequence Output: share0_ref3.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v2 Hash Key: t1.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v2 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=4) + -> Hash Left Join Output: share0_ref3.v2 Hash Cond: (share0_ref3.v2 = share0_ref2.v2) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v2 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v2 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (22 rows) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; v2 ---- - 18 - 16 - 19 + 11 13 14 17 - 11 - 12 + 16 + 19 + 18 15 20 + 12 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v3 - -> Sequence (cost=0.00..1293.00 rows=4 width=4) + -> Sequence Output: share0_ref3.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v3 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t1.v3 Hash Key: t1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v3 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=4) + -> Hash Left Join Output: share0_ref3.v3 Hash Cond: (share0_ref3.v3 = share0_ref2.v3) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (22 rows) @@ -341,8 +341,8 @@ with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 24 27 29 - 28 21 + 28 25 23 26 @@ -351,135 +351,135 @@ with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 -- groupby/order by/window function/grouping set should be contains in CTE output -- group by -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share0_ref3.v1)) - -> Result (cost=0.00..1293.00 rows=4 width=8) + -> Result Output: (sum(share0_ref3.v1)) - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: (sum(share0_ref3.v1)), share0_ref3.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 - -> GroupAggregate (cost=0.00..862.00 rows=4 width=8) + -> GroupAggregate Output: sum(share0_ref3.v1), share0_ref3.v1 Group Key: share0_ref3.v1 - -> Sort (cost=0.00..862.00 rows=4 width=4) + -> Sort Output: share0_ref3.v1 Sort Key: share0_ref3.v1 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=4) + -> Hash Left Join Output: share0_ref3.v1 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (27 rows) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; sum ----- - 4 + 5 + 6 + 9 + 10 2 - 8 - 7 3 - 10 - 9 - 6 - 5 + 4 + 7 + 8 1 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share0_ref3.v1)) - -> Result (cost=0.00..1293.00 rows=4 width=8) + -> Result Output: (sum(share0_ref3.v1)) - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: (sum(share0_ref3.v1)), share0_ref3.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2 - -> GroupAggregate (cost=0.00..862.00 rows=4 width=8) + -> GroupAggregate Output: sum(share0_ref3.v1), share0_ref3.v2 Group Key: share0_ref3.v2 - -> Sort (cost=0.00..862.00 rows=4 width=8) + -> Sort Output: share0_ref3.v1, share0_ref3.v2 Sort Key: share0_ref3.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..862.00 rows=4 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: share0_ref3.v1, share0_ref3.v2 Hash Key: share0_ref3.v2 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v1, share0_ref3.v2 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 2:0) Output: share0_ref3.v1, share0_ref3.v2 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1, share0_ref2.v2 - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 2:0) Output: share0_ref2.v1, share0_ref2.v2 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (30 rows) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; sum ----- - 6 - 9 - 8 - 10 2 5 + 10 + 6 + 8 + 9 1 3 4 7 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share0_ref3.v3)) - -> Result (cost=0.00..1293.00 rows=4 width=8) + -> Result Output: (sum(share0_ref3.v3)) - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: (sum(share0_ref3.v3)), share0_ref3.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2, share0_ref1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> GroupAggregate (cost=0.00..862.00 rows=4 width=8) + -> GroupAggregate Output: sum(share0_ref3.v3), share0_ref3.v2 Group Key: share0_ref3.v2 - -> Sort (cost=0.00..862.00 rows=4 width=8) + -> Sort Output: share0_ref3.v2, share0_ref3.v3 Sort Key: share0_ref3.v2 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..862.00 rows=4 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: share0_ref3.v2, share0_ref3.v3 Hash Key: share0_ref3.v2 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v2, share0_ref3.v3 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=12) + -> Shared Scan (share slice:id 2:0) Output: share0_ref3.v1, share0_ref3.v2, share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 2:0) Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (30 rows) @@ -487,11 +487,11 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left jo sum ----- 26 - 29 28 - 30 + 29 22 25 + 30 21 23 24 @@ -499,33 +499,33 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left jo (10 rows) -- order by -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; - QUERY PLAN ----------------------------------------------------------------------------------------------------------- - Result (cost=0.00..1293.00 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; + QUERY PLAN +------------------------------------------------------------------------ + Result Output: share0_ref3.v1 - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=4) + -> Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v1, share0_ref2.v1 Merge Key: share0_ref2.v1 - -> Sort (cost=0.00..1293.00 rows=4 width=8) + -> Sort Output: share0_ref3.v1, share0_ref2.v1 Sort Key: share0_ref2.v1 - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: share0_ref3.v1, share0_ref2.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v1, share0_ref2.v1 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (25 rows) @@ -544,33 +544,33 @@ with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 10 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; - QUERY PLAN ----------------------------------------------------------------------------------------------------------- - Result (cost=0.00..1293.00 rows=10 width=4) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; + QUERY PLAN +------------------------------------------------------------------------------ + Result Output: share0_ref3.v1 - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=4) + -> Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v1, share0_ref2.v3 Merge Key: share0_ref2.v3 - -> Sort (cost=0.00..1293.00 rows=4 width=8) + -> Sort Output: share0_ref3.v1, share0_ref2.v3 Sort Key: share0_ref2.v3 - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: share0_ref3.v1, share0_ref2.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v3 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v1, share0_ref2.v3 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=4 width=8) + -> Hash Output: share0_ref2.v1, share0_ref2.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1, share0_ref2.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (25 rows) @@ -590,38 +590,38 @@ with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 (10 rows) -- window function -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share0_ref3.v1) OVER (?)) - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: (sum(share0_ref3.v1) OVER (?)) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2 - -> Redistribute Motion 1:3 (slice2) (cost=0.00..862.00 rows=4 width=8) + -> Redistribute Motion 1:3 (slice2) Output: (sum(share0_ref3.v1) OVER (?)) - -> WindowAgg (cost=0.00..862.00 rows=10 width=8) + -> WindowAgg Output: sum(share0_ref3.v1) OVER (?) Order By: share0_ref3.v2 - -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..862.00 rows=10 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) Output: share0_ref3.v1, share0_ref3.v2 Merge Key: share0_ref3.v2 - -> Sort (cost=0.00..862.00 rows=4 width=8) + -> Sort Output: share0_ref3.v1, share0_ref3.v2 Sort Key: share0_ref3.v2 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v1, share0_ref3.v2 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 3:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 3:0) Output: share0_ref3.v1, share0_ref3.v2 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1, share0_ref2.v2 - -> Shared Scan (share slice:id 3:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 3:0) Output: share0_ref2.v1, share0_ref2.v2 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (30 rows) @@ -630,9 +630,9 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) ----- 1 3 - 6 10 15 + 6 21 28 36 @@ -640,202 +640,202 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) 55 (10 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=10 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share0_ref3.v2) OVER (?)) - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: (sum(share0_ref3.v2) OVER (?)) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2, share0_ref1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Redistribute Motion 1:3 (slice2) (cost=0.00..862.00 rows=4 width=8) + -> Redistribute Motion 1:3 (slice2) Output: (sum(share0_ref3.v2) OVER (?)) - -> WindowAgg (cost=0.00..862.00 rows=10 width=8) + -> WindowAgg Output: sum(share0_ref3.v2) OVER (?) Order By: share0_ref3.v3 - -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..862.00 rows=10 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) Output: share0_ref3.v2, share0_ref3.v3 Merge Key: share0_ref3.v3 - -> Sort (cost=0.00..862.00 rows=4 width=8) + -> Sort Output: share0_ref3.v2, share0_ref3.v3 Sort Key: share0_ref3.v3 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v2, share0_ref3.v3 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 3:0) (cost=0.00..431.00 rows=4 width=12) + -> Shared Scan (share slice:id 3:0) Output: share0_ref3.v1, share0_ref3.v2, share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - -> Shared Scan (share slice:id 3:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 3:0) Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (30 rows) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; sum ----- - 11 23 36 50 65 - 81 - 98 116 + 81 135 + 11 + 98 155 (10 rows) -- grouping set -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2586.00 rows=21 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share1_ref2.v2)) - -> Sequence (cost=0.00..2586.00 rows=7 width=8) + -> Sequence Output: (sum(share1_ref2.v2)) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2 - -> Sequence (cost=0.00..2155.00 rows=7 width=8) + -> Sequence Output: (sum(share1_ref2.v2)) - -> Shared Scan (share slice:id 1:1) (cost=0.00..862.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:1) Output: share1_ref1.v1, share1_ref1.v2 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v1, share0_ref3.v2 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v2 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1, share0_ref2.v2 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1, share0_ref2.v2 - -> Append (cost=0.00..1293.00 rows=7 width=8) - -> GroupAggregate (cost=0.00..431.00 rows=4 width=8) + -> Append + -> GroupAggregate Output: sum(share1_ref2.v2) Group Key: share1_ref2.v1, share1_ref2.v2 - -> Sort (cost=0.00..431.00 rows=4 width=8) + -> Sort Output: share1_ref2.v1, share1_ref2.v2 Sort Key: share1_ref2.v1, share1_ref2.v2 - -> Shared Scan (share slice:id 1:1) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:1) Output: share1_ref2.v1, share1_ref2.v2 - -> GroupAggregate (cost=0.00..431.00 rows=4 width=8) + -> GroupAggregate Output: sum(share1_ref3.v2) Group Key: share1_ref3.v1 - -> Sort (cost=0.00..431.00 rows=4 width=8) + -> Sort Output: share1_ref3.v1, share1_ref3.v2 Sort Key: share1_ref3.v1 - -> Shared Scan (share slice:id 1:1) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:1) Output: share1_ref3.v1, share1_ref3.v2 - -> Result (cost=0.00..431.00 rows=1 width=8) + -> Result Output: (sum(share1_ref4.v2)) - -> Redistribute Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 1:3 (slice2) Output: (sum(share1_ref4.v2)) - -> Finalize Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Finalize Aggregate Output: sum(share1_ref4.v2) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) Output: (PARTIAL sum(share1_ref4.v2)) - -> Partial Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Partial Aggregate Output: PARTIAL sum(share1_ref4.v2) - -> Shared Scan (share slice:id 3:1) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 3:1) Output: share1_ref4.v1, share1_ref4.v2 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (52 rows) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); sum ----- - 11 - 11 - 12 12 13 - 13 - 14 14 - 15 - 15 - 16 - 16 - 17 17 18 + 12 + 13 + 14 + 17 18 - 19 + 155 + 11 + 11 + 15 + 16 19 20 + 15 + 16 + 19 20 - 155 (21 rows) -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v2,c11.v3); - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2586.00 rows=21 width=8) +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v2,c11.v3); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: (sum(share1_ref2.v2)) - -> Sequence (cost=0.00..2586.00 rows=7 width=8) + -> Sequence Output: (sum(share1_ref2.v2)) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1, share0_ref1.v2, share0_ref1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Sequence (cost=0.00..2155.00 rows=7 width=8) + -> Sequence Output: (sum(share1_ref2.v2)) - -> Shared Scan (share slice:id 1:1) (cost=0.00..862.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:1) Output: share1_ref1.v2, share1_ref1.v3 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v2, share0_ref3.v3 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=12) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1, share0_ref3.v2, share0_ref3.v3 - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1, share0_ref2.v2, share0_ref2.v3 - -> Append (cost=0.00..1293.00 rows=7 width=8) - -> GroupAggregate (cost=0.00..431.00 rows=4 width=8) + -> Append + -> GroupAggregate Output: sum(share1_ref2.v2) Group Key: share1_ref2.v2, share1_ref2.v3 - -> Sort (cost=0.00..431.00 rows=4 width=8) + -> Sort Output: share1_ref2.v2, share1_ref2.v3 Sort Key: share1_ref2.v2, share1_ref2.v3 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: share1_ref2.v2, share1_ref2.v3 Hash Key: share1_ref2.v2, share1_ref2.v3 - -> Shared Scan (share slice:id 2:1) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 2:1) Output: share1_ref2.v2, share1_ref2.v3 - -> GroupAggregate (cost=0.00..431.00 rows=4 width=8) + -> GroupAggregate Output: sum(share1_ref3.v2) Group Key: share1_ref3.v2 - -> Sort (cost=0.00..431.00 rows=4 width=4) + -> Sort Output: share1_ref3.v2 Sort Key: share1_ref3.v2 - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: share1_ref3.v2 Hash Key: share1_ref3.v2 - -> Result (cost=0.00..431.00 rows=4 width=4) + -> Result Output: share1_ref3.v2 - -> Shared Scan (share slice:id 3:1) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 3:1) Output: share1_ref3.v2, share1_ref3.v3 - -> Result (cost=0.00..431.00 rows=1 width=8) + -> Result Output: (sum(share1_ref4.v2)) - -> Redistribute Motion 1:3 (slice4) (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 1:3 (slice4) Output: (sum(share1_ref4.v2)) - -> Finalize Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Finalize Aggregate Output: sum(share1_ref4.v2) - -> Gather Motion 3:1 (slice5; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice5; segments: 3) Output: (PARTIAL sum(share1_ref4.v2)) - -> Partial Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Partial Aggregate Output: PARTIAL sum(share1_ref4.v2) - -> Shared Scan (share slice:id 5:1) (cost=0.00..431.00 rows=4 width=4) + -> Shared Scan (share slice:id 5:1) Output: share1_ref4.v2, share1_ref4.v3 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (60 rows) @@ -843,47 +843,47 @@ with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) sum ----- 11 - 23 36 + 81 + 116 + 23 50 65 - 81 98 - 116 135 155 (10 rows) -- CTE producer should have right output -explain verbose with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) +explain(costs off, verbose) with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN ----------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1724.00 rows=2 width=4) + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v1 - -> Sequence (cost=0.00..1724.00 rows=1 width=4) + -> Sequence Output: share0_ref3.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..862.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1 - -> Hash Join (cost=0.00..862.00 rows=1 width=4) + -> Hash Join Output: t1.v1 - Hash Cond: (t1.v1 = t2.v1) - -> Seq Scan on cte_prune.t2 (cost=0.00..431.00 rows=4 width=4) + Hash Cond: (t2.v1 = t1.v1) + -> Seq Scan on cte_prune.t2 Output: t2.v1 - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: t1.v1 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 - -> Hash Left Join (cost=0.00..862.00 rows=1 width=4) + -> Hash Left Join Output: share0_ref3.v1 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1 - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref2.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (26 rows) @@ -895,45 +895,45 @@ select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; 6 9 10 + 1 2 3 4 7 8 - 1 (10 rows) -explain verbose with c1 as (select sum(v1) as v1, sum(v2) as v2, v3 from t1 group by v3) +explain(costs off, verbose) with c1 as (select sum(v1) as v1, sum(v2) as v2, v3 from t1 group by v3) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=11 width=8) + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.v1 - -> Sequence (cost=0.00..1293.00 rows=4 width=8) + -> Sequence Output: share0_ref3.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.v1 - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: (sum(t1.v1)) Hash Key: (sum(t1.v1)) - -> HashAggregate (cost=0.00..431.00 rows=4 width=8) + -> HashAggregate Output: sum(t1.v1) Group Key: t1.v3 - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=4 width=12) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: t1.v1, t1.v2, t1.v3 Hash Key: t1.v3 - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash Left Join (cost=0.00..862.00 rows=4 width=8) + -> Hash Left Join Output: share0_ref3.v1 Hash Cond: (share0_ref3.v1 = share0_ref2.v1) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.v1 - -> Hash (cost=431.00..431.00 rows=4 width=8) + -> Hash Output: share0_ref2.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=4 width=8) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.v1 - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (28 rows) @@ -1068,7 +1068,7 @@ create table tpcds_web_sales primary key (ws_item_sk, ws_order_number) ); -- sql 23 -explain verbose with frequent_ss_items as +explain (costs off, verbose) with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from tpcds_store_sales ,tpcds_date_dim @@ -1080,173 +1080,173 @@ explain verbose with frequent_ss_items as having count(*) >4) select t1.v1 from t1 where t1.v1 in (select item_sk from frequent_ss_items where true) and t1.v1 in (select item_sk from frequent_ss_items where item_sk > 0); - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2161.00 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1 - -> Sequence (cost=0.00..2161.00 rows=1 width=4) + -> Sequence Output: t1.v1 - -> Shared Scan (share slice:id 1:0) (cost=0.00..868.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.i_item_sk - -> Result (cost=0.00..868.00 rows=1 width=4) + -> Result Output: tpcds_item.i_item_sk Filter: ((count(*)) > 4) - -> HashAggregate (cost=0.00..868.00 rows=1 width=12) + -> HashAggregate Output: count(*), count(*), tpcds_item.i_item_sk, (substr((tpcds_item.i_item_desc)::text, 1, 30)), tpcds_date_dim.d_date Group Key: substr((tpcds_item.i_item_desc)::text, 1, 30), tpcds_item.i_item_sk, tpcds_date_dim.d_date - -> Hash Join (cost=0.00..868.00 rows=1 width=16) + -> Hash Join Output: substr((tpcds_item.i_item_desc)::text, 1, 30), tpcds_date_dim.d_date, tpcds_item.i_item_sk Hash Cond: (tpcds_item.i_item_sk = tpcds_store_sales.ss_item_sk) - -> Seq Scan on cte_prune.tpcds_item (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on cte_prune.tpcds_item Output: tpcds_item.i_item_sk, tpcds_item.i_item_desc - -> Hash (cost=437.00..437.00 rows=1 width=8) + -> Hash Output: tpcds_store_sales.ss_item_sk, tpcds_date_dim.d_date - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..437.00 rows=1 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: tpcds_store_sales.ss_item_sk, tpcds_date_dim.d_date Hash Key: tpcds_store_sales.ss_item_sk - -> Nested Loop (cost=0.00..437.00 rows=1 width=8) + -> Nested Loop Output: tpcds_store_sales.ss_item_sk, tpcds_date_dim.d_date Join Filter: true - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: tpcds_store_sales.ss_sold_date_sk, tpcds_store_sales.ss_item_sk Hash Key: tpcds_store_sales.ss_sold_date_sk - -> Seq Scan on cte_prune.tpcds_store_sales (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on cte_prune.tpcds_store_sales Output: tpcds_store_sales.ss_sold_date_sk, tpcds_store_sales.ss_item_sk - -> Index Scan using tpcds_date_dim_pkey on cte_prune.tpcds_date_dim (cost=0.00..6.00 rows=1 width=4) + -> Index Scan using tpcds_date_dim_pkey on cte_prune.tpcds_date_dim Output: tpcds_date_dim.d_date, tpcds_date_dim.d_year Index Cond: (tpcds_date_dim.d_date_sk = tpcds_store_sales.ss_sold_date_sk) Filter: (tpcds_date_dim.d_year = ANY ('{1999,2000,2001,2002}'::integer[])) - -> Hash Semi Join (cost=0.00..1293.00 rows=1 width=4) + -> Hash Semi Join Output: t1.v1 Hash Cond: (t1.v1 = share0_ref2.i_item_sk) - -> Hash Semi Join (cost=0.00..862.00 rows=1 width=4) + -> Hash Semi Join Output: t1.v1 Hash Cond: (t1.v1 = share0_ref3.i_item_sk) - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=4) + -> Seq Scan on cte_prune.t1 Output: t1.v1 - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref3.i_item_sk - -> Result (cost=0.00..431.00 rows=1 width=4) + -> Result Output: share0_ref3.i_item_sk Filter: (share0_ref3.i_item_sk > 0) - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref3.i_item_sk - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref2.i_item_sk - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 1:0) Output: share0_ref2.i_item_sk - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (55 rows) -- sql 95 -explain verbose with ws_wh as +explain (costs off, verbose) with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from tpcds_web_sales ws1,tpcds_web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) select * from t1 where t1.v1 in (select ws_order_number from ws_wh where true) and t1.v1 in (select ws_order_number from ws_wh where ws_order_number > 0); - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1730.00 rows=1 width=12) + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, t1.v2, t1.v3 - -> Sequence (cost=0.00..1730.00 rows=1 width=12) + -> Sequence Output: t1.v1, t1.v2, t1.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..437.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.ws_order_number - -> Nested Loop (cost=0.00..437.00 rows=1 width=4) + -> Nested Loop Output: ws1.ws_order_number Join Filter: true - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) Output: ws1.ws_warehouse_sk, ws1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws1 (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws1 Output: ws1.ws_warehouse_sk, ws1.ws_order_number - -> Index Scan using tpcds_web_sales_pkey on cte_prune.tpcds_web_sales ws2 (cost=0.00..6.00 rows=1 width=1) + -> Index Scan using tpcds_web_sales_pkey on cte_prune.tpcds_web_sales ws2 Output: ws2.ws_warehouse_sk Index Cond: (ws2.ws_order_number = ws1.ws_order_number) Filter: (ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - -> Hash Semi Join (cost=0.00..1293.00 rows=1 width=12) + -> Hash Semi Join Output: t1.v1, t1.v2, t1.v3 Hash Cond: (t1.v1 = share0_ref2.ws_order_number) - -> Hash Semi Join (cost=0.00..862.00 rows=1 width=12) + -> Hash Semi Join Output: t1.v1, t1.v2, t1.v3 Hash Cond: (t1.v1 = share0_ref3.ws_order_number) - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref3.ws_order_number - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: share0_ref3.ws_order_number Hash Key: share0_ref3.ws_order_number - -> Result (cost=0.00..431.00 rows=1 width=4) + -> Result Output: share0_ref3.ws_order_number Filter: (share0_ref3.ws_order_number > 0) - -> Shared Scan (share slice:id 3:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 3:0) Output: share0_ref3.ws_order_number - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref2.ws_order_number - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: share0_ref2.ws_order_number Hash Key: share0_ref2.ws_order_number - -> Shared Scan (share slice:id 4:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 4:0) Output: share0_ref2.ws_order_number - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (44 rows) -explain verbose with ws_wh as +explain (costs off, verbose) with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from tpcds_web_sales ws1,tpcds_web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) select * from t1 where t1.v1 in (select wh1 from ws_wh where true) and t1.v1 in (select wh1 from ws_wh where ws_order_number > 0); - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1730.00 rows=1 width=12) + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t1.v1, t1.v2, t1.v3 - -> Sequence (cost=0.00..1730.00 rows=1 width=12) + -> Sequence Output: t1.v1, t1.v2, t1.v3 - -> Shared Scan (share slice:id 1:0) (cost=0.00..437.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.ws_order_number, share0_ref1.ws_warehouse_sk - -> Nested Loop (cost=0.00..437.00 rows=1 width=8) + -> Nested Loop Output: ws1.ws_order_number, ws1.ws_warehouse_sk Join Filter: true - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) Output: ws1.ws_warehouse_sk, ws1.ws_order_number - -> Seq Scan on cte_prune.tpcds_web_sales ws1 (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on cte_prune.tpcds_web_sales ws1 Output: ws1.ws_warehouse_sk, ws1.ws_order_number - -> Index Scan using tpcds_web_sales_pkey on cte_prune.tpcds_web_sales ws2 (cost=0.00..6.00 rows=1 width=1) + -> Index Scan using tpcds_web_sales_pkey on cte_prune.tpcds_web_sales ws2 Output: ws2.ws_warehouse_sk Index Cond: (ws2.ws_order_number = ws1.ws_order_number) Filter: (ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - -> Hash Semi Join (cost=0.00..1293.00 rows=1 width=12) + -> Hash Semi Join Output: t1.v1, t1.v2, t1.v3 Hash Cond: (t1.v1 = share0_ref2.ws_warehouse_sk) - -> Hash Semi Join (cost=0.00..862.00 rows=1 width=12) + -> Hash Semi Join Output: t1.v1, t1.v2, t1.v3 Hash Cond: (t1.v1 = share0_ref3.ws_warehouse_sk) - -> Seq Scan on cte_prune.t1 (cost=0.00..431.00 rows=4 width=12) + -> Seq Scan on cte_prune.t1 Output: t1.v1, t1.v2, t1.v3 - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref3.ws_warehouse_sk - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: share0_ref3.ws_warehouse_sk Hash Key: share0_ref3.ws_warehouse_sk - -> Result (cost=0.00..431.00 rows=1 width=4) + -> Result Output: share0_ref3.ws_warehouse_sk Filter: (share0_ref3.ws_order_number > 0) - -> Shared Scan (share slice:id 3:0) (cost=0.00..431.00 rows=1 width=8) + -> Shared Scan (share slice:id 3:0) Output: share0_ref3.ws_order_number, share0_ref3.ws_warehouse_sk - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share0_ref2.ws_warehouse_sk - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: share0_ref2.ws_warehouse_sk Hash Key: share0_ref2.ws_warehouse_sk - -> Result (cost=0.00..431.00 rows=1 width=4) + -> Result Output: share0_ref2.ws_warehouse_sk - -> Shared Scan (share slice:id 4:0) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 4:0) Output: share0_ref2.ws_order_number, share0_ref2.ws_warehouse_sk - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (46 rows) @@ -1260,12 +1260,14 @@ drop table t2; -- end_ignore -- comm cases CREATE TABLE t3 AS SELECT i as a, i+1 as b from generate_series(1,10)i; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. CREATE TABLE t4 AS SELECT i as c, i+1 as d from generate_series(1,10)i; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. analyze t3; analyze t4; -- Additional filtering conditions are added to the consumer. -- This is caused by `PexprInferPredicates` in the ORCA preprocessor. -explain verbose WITH t(a,b,d) AS +explain(costs off, verbose) WITH t(a,b,d) AS ( SELECT t3.a,t3.b,t4.d FROM t3,t4 WHERE t3.a = t4.d ) @@ -1277,98 +1279,98 @@ t WHERE cup.e < 10 GROUP BY cup.c,cup.d, cup.e ,t.d, t.b ORDER BY 1,2,3,4 LIMIT 10; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1356697001.21 rows=10 width=24) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) Merge Key: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Sort (cost=0.00..1356697001.21 rows=4 width=24) + -> Sort Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) Sort Key: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Sequence (cost=0.00..1356697001.21 rows=4 width=24) + -> Sequence Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Shared Scan (share slice:id 1:0) (cost=0.00..862.00 rows=4 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.a, share0_ref1.b, share0_ref1.d - -> Hash Join (cost=0.00..862.00 rows=4 width=12) + -> Hash Join Output: t3.a, t3.b, t4.d Hash Cond: (t3.a = t4.d) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: t3.a, t3.b Hash Key: t3.a - -> Seq Scan on cte_prune.t3 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t3 Output: t3.a, t3.b - -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Hash Output: t4.d - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: t4.d Hash Key: t4.d - -> Seq Scan on cte_prune.t4 (cost=0.00..431.00 rows=4 width=4) + -> Seq Scan on cte_prune.t4 Output: t4.d - -> Redistribute Motion 1:3 (slice4) (cost=0.00..1356696139.21 rows=4 width=24) + -> Redistribute Motion 1:3 (slice4) Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Limit (cost=0.00..1356696139.21 rows=10 width=24) + -> Limit Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Gather Motion 3:1 (slice5; segments: 3) (cost=0.00..1356696139.21 rows=10 width=24) + -> Gather Motion 3:1 (slice5; segments: 3) Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) Merge Key: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Limit (cost=0.00..1356696139.21 rows=4 width=24) + -> Limit Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Result (cost=0.00..1356696139.21 rows=134 width=24) + -> Result Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> Sort (cost=0.00..1356696139.21 rows=134 width=24) + -> Sort Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)), share0_ref2.b Sort Key: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), (sum(share0_ref2.d) OVER (?)) - -> WindowAgg (cost=0.00..1356696139.08 rows=134 width=24) + -> WindowAgg Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), sum(share0_ref2.d) OVER (?), share0_ref2.b Partition By: share0_ref2.b - -> Sort (cost=0.00..1356696139.08 rows=134 width=24) + -> Sort Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.b, share0_ref2.d Sort Key: share0_ref2.b - -> Redistribute Motion 3:3 (slice6; segments: 3) (cost=0.00..1356696138.95 rows=134 width=24) + -> Redistribute Motion 3:3 (slice6; segments: 3) Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.b, share0_ref2.d Hash Key: share0_ref2.b - -> HashAggregate (cost=0.00..1356696138.94 rows=134 width=24) + -> HashAggregate Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.b, share0_ref2.d Group Key: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.d, share0_ref2.b - -> Redistribute Motion 3:3 (slice7; segments: 3) (cost=0.00..1356696138.85 rows=134 width=24) + -> Redistribute Motion 3:3 (slice7; segments: 3) Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.b, share0_ref2.d Hash Key: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.d, share0_ref2.b - -> Nested Loop (cost=0.00..1356696138.84 rows=134 width=24) + -> Nested Loop Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)), share0_ref2.b, share0_ref2.d Join Filter: true - -> Result (cost=0.00..1324036.56 rows=14 width=16) + -> Result Output: t4_1.c, t4_1.d, (avg(share0_ref3.b) OVER (?)) Filter: ((avg(share0_ref3.b) OVER (?)) < '10'::numeric) - -> WindowAgg (cost=0.00..1324036.56 rows=34 width=16) + -> WindowAgg Output: avg(share0_ref3.b) OVER (?), share0_ref3.a, share0_ref3.b, t4_1.c, t4_1.d Partition By: share0_ref3.a Order By: share0_ref3.b - -> Sort (cost=0.00..1324036.56 rows=34 width=16) + -> Sort Output: share0_ref3.a, share0_ref3.b, t4_1.c, t4_1.d Sort Key: share0_ref3.a, share0_ref3.b DESC - -> Redistribute Motion 3:3 (slice9; segments: 3) (cost=0.00..1324036.54 rows=34 width=16) + -> Redistribute Motion 3:3 (slice9; segments: 3) Output: share0_ref3.a, share0_ref3.b, t4_1.c, t4_1.d Hash Key: share0_ref3.a - -> Nested Loop (cost=0.00..1324036.54 rows=34 width=16) + -> Nested Loop Output: share0_ref3.a, share0_ref3.b, t4_1.c, t4_1.d Join Filter: true - -> Broadcast Motion 3:3 (slice10; segments: 3) (cost=0.00..431.00 rows=10 width=8) + -> Broadcast Motion 3:3 (slice10; segments: 3) Output: share0_ref3.a, share0_ref3.b - -> Result (cost=0.00..431.00 rows=4 width=8) + -> Result Output: share0_ref3.a, share0_ref3.b Filter: (share0_ref3.a = share0_ref3.d) - -> Shared Scan (share slice:id 10:0) (cost=0.00..431.00 rows=4 width=12) + -> Shared Scan (share slice:id 10:0) Output: share0_ref3.a, share0_ref3.b, share0_ref3.d - -> Seq Scan on cte_prune.t4 t4_1 (cost=0.00..431.00 rows=4 width=8) + -> Seq Scan on cte_prune.t4 t4_1 Output: t4_1.c, t4_1.d - -> Materialize (cost=0.00..431.00 rows=10 width=8) + -> Materialize Output: share0_ref2.b, share0_ref2.d - -> Broadcast Motion 3:3 (slice8; segments: 3) (cost=0.00..431.00 rows=10 width=8) + -> Broadcast Motion 3:3 (slice8; segments: 3) Output: share0_ref2.b, share0_ref2.d - -> Result (cost=0.00..431.00 rows=4 width=8) + -> Result Output: share0_ref2.b, share0_ref2.d Filter: (share0_ref2.a = share0_ref2.d) - -> Shared Scan (share slice:id 8:0) (cost=0.00..431.00 rows=4 width=12) + -> Shared Scan (share slice:id 8:0) Output: share0_ref2.a, share0_ref2.b, share0_ref2.d Settings: optimizer = 'on' Optimizer: GPORCA @@ -1430,42 +1432,42 @@ SELECT FROM generate_series(0, 99) AS i; INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation -explain verbose select four, x +explain(costs off, verbose) select four, x from (select four, ten, 'foo'::text as x from cte_prune_tenk1) as t group by grouping sets (four, x) having x = 'foo'; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=12) + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: (NULL::integer), share0_ref2.x - -> Sequence (cost=0.00..862.00 rows=1 width=12) + -> Sequence Output: (NULL::integer), share0_ref2.x - -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.four, share0_ref1.x - -> Seq Scan on cte_prune.cte_prune_tenk1 (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on cte_prune.cte_prune_tenk1 Output: cte_prune_tenk1.four, 'foo'::text - -> Append (cost=0.00..431.00 rows=1 width=12) - -> GroupAggregate (cost=0.00..431.00 rows=1 width=8) + -> Append + -> GroupAggregate Output: NULL::integer, share0_ref2.x Group Key: share0_ref2.x - -> Sort (cost=0.00..431.00 rows=1 width=8) + -> Sort Output: share0_ref2.x Sort Key: share0_ref2.x - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: share0_ref2.x Hash Key: share0_ref2.x - -> Result (cost=0.00..431.00 rows=1 width=8) + -> Result Output: share0_ref2.x Filter: (share0_ref2.x = 'foo'::text) - -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=1 width=8) + -> Shared Scan (share slice:id 2:0) Output: share0_ref2.four, share0_ref2.x - -> Result (cost=0.00..0.00 rows=0 width=12) + -> Result Output: (NULL::integer), (NULL::text) One-Time Filter: (gp_execution_segment() = 0) - -> Result (cost=0.00..0.00 rows=0 width=12) + -> Result Output: NULL::integer, NULL::text One-Time Filter: false - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (31 rows) @@ -1524,7 +1526,7 @@ ALTER TABLE ONLY country ALTER TABLE ONLY countrylanguage ADD CONSTRAINT countrylanguage_pkey PRIMARY KEY (countrycode, "language"); -- CTE1(inlined) in CTE2(no-inlined) case -explain verbose with country as +explain(costs off, verbose) with country as (select country.code,country.name COUNTRY, city.name CAPITAL, language, isofficial, percentage FROM country,city,countrylanguage WHERE country.code = countrylanguage.countrycode @@ -1541,84 +1543,84 @@ select * from where country.percentage = countrylanguage.percentage order by countrylanguage.COUNTRY,country.language LIMIT 40; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1736.01 rows=1 width=66) + Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Merge Key: share0_ref2.name, share0_ref3.language - -> Sort (cost=0.00..1736.01 rows=1 width=66) + -> Sort Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Sort Key: share0_ref2.name, share0_ref3.language - -> Sequence (cost=0.00..1736.00 rows=1 width=66) + -> Sequence Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Shared Scan (share slice:id 1:0) (cost=0.00..868.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:0) Output: share0_ref1.code, share0_ref1.name, share0_ref1.name_1, share0_ref1.language, share0_ref1.isofficial, share0_ref1.percentage - -> Hash Join (cost=0.00..868.00 rows=1 width=37) + -> Hash Join Output: country.code, country.name, city.name, countrylanguage.language, countrylanguage.isofficial, countrylanguage.percentage Hash Cond: (country.capital = city.id) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..437.00 rows=1 width=33) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: country.code, country.name, country.capital, countrylanguage.language, countrylanguage.isofficial, countrylanguage.percentage Hash Key: country.capital - -> Nested Loop (cost=0.00..437.00 rows=1 width=33) + -> Nested Loop Output: country.code, country.name, country.capital, countrylanguage.language, countrylanguage.isofficial, countrylanguage.percentage Join Filter: true - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=21) + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: countrylanguage.countrycode, countrylanguage.language, countrylanguage.isofficial, countrylanguage.percentage Hash Key: countrylanguage.countrycode - -> Seq Scan on cte_prune.countrylanguage (cost=0.00..431.00 rows=1 width=21) + -> Seq Scan on cte_prune.countrylanguage Output: countrylanguage.countrycode, countrylanguage.language, countrylanguage.isofficial, countrylanguage.percentage - -> Index Scan using country_pkey on cte_prune.country (cost=0.00..6.00 rows=1 width=20) + -> Index Scan using country_pkey on cte_prune.country Output: country.code, country.name, country.continent, country.capital Index Cond: (country.code = countrylanguage.countrycode) Filter: (country.continent = 'Europe'::text) - -> Hash (cost=431.00..431.00 rows=1 width=12) + -> Hash Output: city.id, city.name - -> Seq Scan on cte_prune.city (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on cte_prune.city Output: city.id, city.name - -> Redistribute Motion 1:3 (slice4) (cost=0.00..868.00 rows=1 width=66) + -> Redistribute Motion 1:3 (slice4) Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Limit (cost=0.00..868.00 rows=1 width=66) + -> Limit Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Gather Motion 3:1 (slice5; segments: 3) (cost=0.00..868.00 rows=1 width=66) + -> Gather Motion 3:1 (slice5; segments: 3) Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Merge Key: share0_ref2.name, share0_ref3.language - -> Sort (cost=0.00..868.00 rows=1 width=66) + -> Sort Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Sort Key: share0_ref2.name, share0_ref3.language - -> Hash Join (cost=0.00..868.00 rows=1 width=66) + -> Hash Join Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage, share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Hash Cond: (share0_ref3.percentage = share0_ref2.percentage) - -> Redistribute Motion 3:3 (slice6; segments: 3) (cost=0.00..431.00 rows=1 width=37) + -> Redistribute Motion 3:3 (slice6; segments: 3) Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage Hash Key: share0_ref3.percentage - -> Result (cost=0.00..431.00 rows=1 width=37) + -> Result Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage Filter: (share0_ref3.isofficial AND (share0_ref3.percentage > '50'::double precision)) - -> Shared Scan (share slice:id 6:0) (cost=0.00..431.00 rows=1 width=37) + -> Shared Scan (share slice:id 6:0) Output: share0_ref3.code, share0_ref3.name, share0_ref3.name_1, share0_ref3.language, share0_ref3.isofficial, share0_ref3.percentage - -> Hash (cost=437.00..437.00 rows=1 width=29) + -> Hash Output: share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Redistribute Motion 3:3 (slice7; segments: 3) (cost=0.00..437.00 rows=1 width=29) + -> Redistribute Motion 3:3 (slice7; segments: 3) Output: share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Hash Key: share0_ref2.percentage - -> Result (cost=0.00..437.00 rows=1 width=29) + -> Result Output: share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Filter: (share0_ref2.percentage > '50'::double precision) - -> Nested Loop (cost=0.00..437.00 rows=1 width=29) + -> Nested Loop Output: share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage Join Filter: true - -> Broadcast Motion 3:3 (slice8; segments: 3) (cost=0.00..431.00 rows=1 width=29) + -> Broadcast Motion 3:3 (slice8; segments: 3) Output: share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Result (cost=0.00..431.00 rows=1 width=29) + -> Result Output: share0_ref2.code, share0_ref2.name, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Shared Scan (share slice:id 8:0) (cost=0.00..431.00 rows=1 width=29) + -> Shared Scan (share slice:id 8:0) Output: share0_ref2.code, share0_ref2.name, share0_ref2.name_1, share0_ref2.language, share0_ref2.isofficial, share0_ref2.percentage - -> Index Scan using countrylanguage_pkey on cte_prune.countrylanguage countrylanguage_1 (cost=0.00..6.00 rows=1 width=1) + -> Index Scan using countrylanguage_pkey on cte_prune.countrylanguage countrylanguage_1 Index Cond: (countrylanguage_1.countrycode = share0_ref2.code) - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (74 rows) -- CTE in the main query and subqueries within the main query -explain verbose with bad_headofstates as +explain(costs off, verbose) with bad_headofstates as ( select country.code,country.name,country.headofstate,countrylanguage.language from @@ -1637,84 +1639,84 @@ where FOO.code = bad_headofstates.code and FOO.capital = city.id) OUTER_FOO group by region ) OUTERMOST_FOO,bad_headofstates,country where country.code = bad_headofstates.code and country.region = OUTERMOST_FOO.region order by OUTERMOST_FOO.region,bad_headofstates.headofstate LIMIT 40; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------- - Sequence (cost=0.00..1748.00 rows=1 width=24) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------- + Sequence Output: (avg(country_1.population)), country_1.region, share0_ref2.headofstate - -> Shared Scan (share slice:id 0:0) (cost=0.00..437.00 rows=1 width=1) + -> Shared Scan (share slice:id 0:0) Output: share0_ref1.code, share0_ref1.headofstate - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.00 rows=1 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) Output: country.code, country.headofstate - -> Nested Loop (cost=0.00..437.00 rows=1 width=16) + -> Nested Loop Output: country.code, country.headofstate Join Filter: true - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: countrylanguage.countrycode Hash Key: countrylanguage.countrycode - -> Seq Scan on cte_prune.countrylanguage (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on cte_prune.countrylanguage Output: countrylanguage.countrycode Filter: countrylanguage.isofficial - -> Index Scan using country_pkey on cte_prune.country (cost=0.00..6.00 rows=1 width=16) + -> Index Scan using country_pkey on cte_prune.country Output: country.code, country.gnp, country.gnpold, country.headofstate Index Cond: (country.code = countrylanguage.countrycode) Filter: ((country.gnp < country.gnpold) OR (country.gnp < '3000'::numeric)) - -> Limit (cost=0.00..1311.00 rows=1 width=24) + -> Limit Output: (avg(country_1.population)), country_1.region, share0_ref2.headofstate - -> Sort (cost=0.00..1311.00 rows=1 width=24) + -> Sort Output: (avg(country_1.population)), country_1.region, share0_ref2.headofstate Sort Key: country_1.region, share0_ref2.headofstate - -> Hash Join (cost=0.00..1311.00 rows=1 width=24) + -> Hash Join Output: (avg(country_1.population)), country_1.region, share0_ref2.headofstate Hash Cond: (country_1.region = country_2.region) - -> HashAggregate (cost=0.00..874.00 rows=1 width=16) + -> HashAggregate Output: avg(country_1.population), country_1.region Group Key: country_1.region - -> Hash Join (cost=0.00..874.00 rows=1 width=12) + -> Hash Join Output: country_1.region, country_1.population Hash Cond: (share0_ref4.code = share0_ref3.code) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..443.00 rows=1 width=20) + -> Gather Motion 3:1 (slice3; segments: 3) Output: share0_ref4.code, country_1.region, country_1.population - -> Nested Loop (cost=0.00..443.00 rows=1 width=20) + -> Nested Loop Output: share0_ref4.code, country_1.region, country_1.population Join Filter: true - -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..437.00 rows=1 width=24) + -> Redistribute Motion 3:3 (slice4; segments: 3) Output: share0_ref4.code, country_1.region, country_1.population, country_1.capital Hash Key: country_1.capital - -> Nested Loop (cost=0.00..437.00 rows=1 width=24) + -> Nested Loop Output: share0_ref4.code, country_1.region, country_1.population, country_1.capital Join Filter: true - -> Redistribute Motion 1:3 (slice5) (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 1:3 (slice5) Output: share0_ref4.code Hash Key: share0_ref4.code - -> Result (cost=0.00..431.00 rows=1 width=8) + -> Result Output: share0_ref4.code - -> Shared Scan (share slice:id 5:0) (cost=0.00..431.00 rows=1 width=8) + -> Shared Scan (share slice:id 5:0) Output: share0_ref4.code, share0_ref4.headofstate - -> Index Scan using country_pkey on cte_prune.country country_1 (cost=0.00..6.00 rows=1 width=16) + -> Index Scan using country_pkey on cte_prune.country country_1 Output: country_1.region, country_1.population, country_1.capital Index Cond: (country_1.code = share0_ref4.code) - -> Index Scan using city_pkey on cte_prune.city (cost=0.00..6.00 rows=1 width=1) + -> Index Scan using city_pkey on cte_prune.city Index Cond: (city.id = country_1.capital) - -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Hash Output: share0_ref3.code, share0_ref3.headofstate - -> Shared Scan (share slice:id 0:0) (cost=0.00..431.00 rows=1 width=8) + -> Shared Scan (share slice:id 0:0) Output: share0_ref3.code, share0_ref3.headofstate - -> Hash (cost=437.00..437.00 rows=1 width=16) + -> Hash Output: share0_ref2.headofstate, country_2.region - -> Gather Motion 3:1 (slice6; segments: 3) (cost=0.00..437.00 rows=1 width=16) + -> Gather Motion 3:1 (slice6; segments: 3) Output: share0_ref2.headofstate, country_2.region - -> Nested Loop (cost=0.00..437.00 rows=1 width=16) + -> Nested Loop Output: share0_ref2.headofstate, country_2.region Join Filter: true - -> Redistribute Motion 1:3 (slice7) (cost=0.00..431.00 rows=1 width=16) + -> Redistribute Motion 1:3 (slice7) Output: share0_ref2.code, share0_ref2.headofstate Hash Key: share0_ref2.code - -> Shared Scan (share slice:id 7:0) (cost=0.00..431.00 rows=1 width=16) + -> Shared Scan (share slice:id 7:0) Output: share0_ref2.code, share0_ref2.headofstate - -> Index Scan using country_pkey on cte_prune.country country_2 (cost=0.00..6.00 rows=1 width=8) + -> Index Scan using country_pkey on cte_prune.country country_2 Output: country_2.region Index Cond: (country_2.code = share0_ref2.code) - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (77 rows) @@ -1725,75 +1727,73 @@ drop table countrylanguage; -- end_ignore -- inlined CTEs CREATE TABLE t5 AS SELECT i as c, i+1 as d from generate_series(1,10)i; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. CREATE TABLE t6 AS SELECT i as a, i+1 as b from generate_series(1,10)i; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. -- inlined CTEs should have not unused columns(ex. t5.*, t6.* in output) -explain verbose WITH w AS (SELECT a, b from t6 where b < 5) +explain(costs off, verbose) WITH w AS (SELECT a, b from t6 where b < 5) SELECT * FROM t6, (WITH v AS (SELECT c, d FROM t5, w WHERE c = w.a AND c < 2) SELECT v1.c, v1.d FROM v v1, v v2 WHERE v1.c = v2.c AND v1.d > 1 ) x WHERE t6.a = x.c ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2155.00 rows=1 width=16) + QUERY PLAN +---------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) Output: t6_1.a, t6_1.b, share1_ref3.c, share1_ref3.d Merge Key: t6_1.a - -> Sort (cost=0.00..2155.00 rows=1 width=16) + -> Sort Output: t6_1.a, t6_1.b, share1_ref3.c, share1_ref3.d Sort Key: t6_1.a - -> Hash Join (cost=0.00..2155.00 rows=1 width=16) + -> Hash Join Output: t6_1.a, t6_1.b, share1_ref3.c, share1_ref3.d Hash Cond: (share1_ref3.c = t6_1.a) - -> Sequence (cost=0.00..1724.00 rows=1 width=8) + -> Sequence Output: share1_ref3.c, share1_ref3.d - -> Shared Scan (share slice:id 1:1) (cost=0.00..862.00 rows=1 width=1) + -> Shared Scan (share slice:id 1:1) Output: share1_ref1.c, share1_ref1.d - -> Hash Join (cost=0.00..862.00 rows=1 width=8) + -> Hash Join Output: t5.c, t5.d Hash Cond: (t5.c = t6.a) - -> Seq Scan on cte_prune.t5 (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on cte_prune.t5 Output: t5.c, t5.d Filter: (t5.c < 2) - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: t6.a - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) Output: t6.a - -> Result (cost=0.00..431.00 rows=1 width=4) + -> Result Output: t6.a Filter: (t6.a < 2) - -> Seq Scan on cte_prune.t6 (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on cte_prune.t6 Output: t6.a Filter: (t6.b < 5) - -> Hash Join (cost=0.00..862.00 rows=1 width=8) + -> Hash Join Output: share1_ref3.c, share1_ref3.d Hash Cond: (share1_ref3.c = share1_ref2.c) - -> Result (cost=0.00..431.00 rows=1 width=8) + -> Result Output: share1_ref3.c, share1_ref3.d Filter: ((share1_ref3.d > 1) AND (share1_ref3.c < 2)) - -> Shared Scan (share slice:id 1:1) (cost=0.00..431.00 rows=1 width=8) + -> Shared Scan (share slice:id 1:1) Output: share1_ref3.c, share1_ref3.d - -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Hash Output: share1_ref2.c - -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice3; segments: 3) Output: share1_ref2.c - -> Result (cost=0.00..431.00 rows=1 width=4) + -> Result Output: share1_ref2.c Filter: (share1_ref2.c < 2) - -> Shared Scan (share slice:id 3:1) (cost=0.00..431.00 rows=1 width=4) + -> Shared Scan (share slice:id 3:1) Output: share1_ref2.c, share1_ref2.d - -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Hash Output: t6_1.a, t6_1.b - -> Broadcast Motion 3:3 (slice4; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice4; segments: 3) Output: t6_1.a, t6_1.b - -> Seq Scan on cte_prune.t6 t6_1 (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on cte_prune.t6 t6_1 Output: t6_1.a, t6_1.b Filter: (t6_1.a < 2) - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (55 rows) @@ -1824,14 +1824,14 @@ INSERT INTO t7 VALUES (8, 9, NULL); explain (verbose, costs off) with x as (select * from (select f1 from t7) ss) select * from x where f1 = 1; - QUERY PLAN ------------------------------------------------------ + QUERY PLAN +------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) Output: f1 -> Seq Scan on cte_prune.t7 Output: f1 Filter: (t7.f1 = 1) - Settings: enable_parallel = 'off', optimizer = 'on' + Settings: optimizer = 'on' Optimizer: GPORCA (7 rows) @@ -1848,7 +1848,8 @@ drop table t5; drop table t6; drop table t7; drop schema cte_prune cascade; -NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table t3 drop cascades to table t4 +drop cascades to table cte_prune_tenk1 -- end_ignore diff --git a/src/test/regress/expected/dboptions.out b/src/test/regress/expected/dboptions.out index b035fc62661..f8b036b2672 100644 --- a/src/test/regress/expected/dboptions.out +++ b/src/test/regress/expected/dboptions.out @@ -1,6 +1,10 @@ -- -- Test create/alter database options -- +-- start_matchsubs +-- m/\"\/tmp\/.s.PGSQL.\d+\"/ +-- s/\"\/tmp\/.s.PGSQL.\d+\"/\"\/tmp\/.s.PGSQL.xxx\"/ +-- end_matchsubs -- Test CONNECTION LIMIT -- create a regular user as superusers are exempt from limits create user connlimit_test_user; @@ -54,7 +58,7 @@ order by gp_segment_id; alter database limitdb with connection limit 0; -- should fail, because the connection limit is 0 \! psql -h /tmp limitdb -Xc "select 'connected'" -U connlimit_test_user -psql: error: connection to server on socket "/tmp/.s.PGSQL.7000" failed: FATAL: too many connections for database "limitdb" +psql: error: connection to server on socket "/tmp/.s.PGSQL.xxx" failed: FATAL: too many connections for database "limitdb" -- Test ALLOW_CONNECTIONS create database limitdb2 allow_connections = true; select -1 as gp_segment_id, datconnlimit, datallowconn from pg_database where datname='limitdb2' @@ -84,7 +88,7 @@ order by gp_segment_id; -- should fail, as we have disallowed connections \! psql -h /tmp limitdb2 -Xc "select 'connected'" -U connlimit_test_user -psql: error: connection to server on socket "/tmp/.s.PGSQL.7000" failed: FATAL: database "limitdb2" is not currently accepting connections +psql: error: connection to server on socket "/tmp/.s.PGSQL.xxx" failed: FATAL: database "limitdb2" is not currently accepting connections -- Test IS_TEMPLATE create database templatedb is_template=true; select -1 as gp_segment_id, datistemplate from pg_database where datname = 'templatedb' diff --git a/src/test/regress/expected/direct_dispatch.out b/src/test/regress/expected/direct_dispatch.out index 488260e790a..1e8abdcf9b3 100644 --- a/src/test/regress/expected/direct_dispatch.out +++ b/src/test/regress/expected/direct_dispatch.out @@ -265,8 +265,8 @@ INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 key | value -----+------- | cow - 1 | 100 2 | 100 + 1 | 100 (3 rows) prepare test_update (int) as update direct_test set value = 'boo' where key = $1; @@ -278,9 +278,9 @@ select * from direct_test; INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 key | value -----+------- - 1 | 100 - 2 | boo | cow + 2 | boo + 1 | 100 (3 rows) ------------------------ @@ -381,20 +381,20 @@ INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL cont EXPLAIN SELECT a.* FROM MPP_22019_a a INNER JOIN MPP_22019_b b ON a.i = b.i WHERE a.j NOT IN (SELECT j FROM MPP_22019_a a2 where a2.j = b.j) and a.i = 1; QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=10000000000.00..10000002519.51 rows=3707 width=8) - -> Nested Loop (cost=10000000000.00..10000002445.38 rows=1236 width=8) + Gather Motion 1:1 (slice1; segments: 1) (cost=10000000000.00..10000000845.29 rows=3707 width=8) + -> Nested Loop (cost=10000000000.00..10000000795.87 rows=1236 width=8) Join Filter: (SubPlan 1) - -> Seq Scan on mpp_22019_a a (cost=0.00..1176.25 rows=29 width=8) + -> Seq Scan on mpp_22019_a a (cost=0.00..392.75 rows=29 width=8) Filter: (i = 1) - -> Materialize (cost=0.00..1176.68 rows=29 width=8) - -> Seq Scan on mpp_22019_b b (cost=0.00..1176.25 rows=29 width=8) + -> Materialize (cost=0.00..392.89 rows=29 width=8) + -> Seq Scan on mpp_22019_b b (cost=0.00..392.75 rows=29 width=8) Filter: (i = 1) SubPlan 1 - -> Result (cost=0.00..3974.50 rows=86100 width=4) + -> Result (cost=0.00..2760.50 rows=86100 width=4) Filter: (a2.j = b.j) - -> Materialize (cost=0.00..3113.50 rows=86100 width=4) - -> Broadcast Motion 3:1 (slice2; segments: 3) (cost=0.00..2683.00 rows=28700 width=4) - -> Seq Scan on mpp_22019_a a2 (cost=0.00..961.00 rows=28700 width=4) + -> Materialize (cost=0.00..1899.50 rows=86100 width=4) + -> Broadcast Motion 3:1 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=4) + -> Seq Scan on mpp_22019_a a2 (cost=0.00..321.00 rows=28700 width=4) Optimizer: Postgres query optimizer (15 rows) @@ -467,7 +467,7 @@ INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL cont -- One partition is randomly distributed, while others are distributed by key. alter table ddtesttab_1_prt_2 set distributed randomly; ERROR: can't set the distribution policy of "ddtesttab_1_prt_2" -HINT: Distribution policy can be set for an entire partitioned table, not for one of its leaf parts or an interior branch. +HINT: Distribution policy of a partition can only be the same as its parent's. insert into ddtesttab values (1, 1, 5); INFO: (slice 0) Dispatch command to SINGLE content INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content @@ -652,18 +652,20 @@ explain (costs off) select gp_segment_id, * from bar_randDistr where gp_segment_ (4 rows) explain (costs off) select gp_segment_id, count(*) from bar_randDistr group by gp_segment_id; - QUERY PLAN ------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: gp_segment_id - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: gp_segment_id - -> Streaming Partial HashAggregate - Group Key: gp_segment_id - -> Seq Scan on bar_randdistr + -> Sort + Sort Key: gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: gp_segment_id + -> Streaming Partial HashAggregate + Group Key: gp_segment_id + -> Seq Scan on bar_randdistr Optimizer: Postgres query optimizer -(9 rows) +(11 rows) -- Case2: Conjunction scenario with filter condition on gp_segment_id and column explain (costs off) select gp_segment_id, * from bar_randDistr where gp_segment_id=0 and col1 between 1 and 10; @@ -895,8 +897,8 @@ INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 gp_segment_id | gp_segment_id | id | id ---------------+---------------+----+---- 1 | 1 | 1 | 1 - 2 | 0 | 5 | 2 2 | 0 | 6 | 2 + 2 | 0 | 5 | 2 (3 rows) -- flaky tests @@ -1044,8 +1046,8 @@ select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_se INFO: (slice 1) Dispatch command to PARTIAL contents: 1 0 gp_segment_id | a | b ---------------+---+--- - 0 | 3 | 3 1 | 1 | 1 + 0 | 3 | 3 (2 rows) --test direct dispatch if distribution column is of varchar type @@ -1104,11 +1106,11 @@ select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 gp_segment_id | col1_varchar | col2_int ---------------+--------------+---------- + 2 | a | 1 + 2 | 97 | 6 1 | b | 2 1 | e | 5 0 | d | 4 - 2 | a | 1 - 2 | 97 | 6 (5 rows) --test direct dispatch if distribution column is of varchar type and disjunction scenario @@ -1474,8 +1476,8 @@ begin; -- orca does not handle direct dispatch for DELETE or UPDATE now -- also orca does not handle DELETE/UPDATE for partitioned tables now. explain (costs off) delete from t_hash_partition where r_regionkey=1; - QUERY PLAN --------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Delete on t_hash_partition Delete on t_hash_partition_1_prt_region1 t_hash_partition_1 -> Seq Scan on t_hash_partition_1_prt_region1 t_hash_partition_1 @@ -1498,8 +1500,8 @@ begin; -- orca does not handle direct dispatch for DELETE or UPDATE now -- also orca does not handle DELETE/UPDATE for partitioned tables now. explain (costs off) update t_hash_partition set r_name = 'CHINA' where r_regionkey=1; - QUERY PLAN --------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Update on t_hash_partition Update on t_hash_partition_1_prt_region1 t_hash_partition_1 -> Seq Scan on t_hash_partition_1_prt_region1 t_hash_partition_1 diff --git a/src/test/regress/expected/gp_distinct_plans.out b/src/test/regress/expected/gp_distinct_plans.out index 15f5f039bc4..86ffe426815 100644 --- a/src/test/regress/expected/gp_distinct_plans.out +++ b/src/test/regress/expected/gp_distinct_plans.out @@ -23,43 +23,46 @@ explain select distinct a, b from distinct_test; select distinct a, b from distinct_test; a | b ----+--- - 1 | 0 0 | 0 - 6 | 3 - 9 | 4 + 1 | 0 10 | 5 5 | 2 - 3 | 1 + 6 | 3 + 9 | 4 + 7 | 3 4 | 2 + 3 | 1 2 | 1 - 7 | 3 8 | 4 (11 rows) -- Otherwise, redistribution is needed explain select distinct b from distinct_test; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=46.86..46.96 rows=6 width=4) - -> HashAggregate (cost=46.86..46.88 rows=2 width=4) + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=46.92..47.05 rows=6 width=4) + Merge Key: b + -> GroupAggregate (cost=46.92..46.97 rows=2 width=4) Group Key: b - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=46.67..46.85 rows=6 width=4) - Hash Key: b - -> Streaming HashAggregate (cost=46.67..46.73 rows=6 width=4) - Group Key: b - -> Seq Scan on distinct_test (cost=0.00..38.33 rows=3333 width=4) + -> Sort (cost=46.92..46.94 rows=6 width=4) + Sort Key: b + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=46.67..46.85 rows=6 width=4) + Hash Key: b + -> Streaming HashAggregate (cost=46.67..46.73 rows=6 width=4) + Group Key: b + -> Seq Scan on distinct_test (cost=0.00..38.33 rows=3333 width=4) Optimizer: Postgres query optimizer -(9 rows) +(12 rows) select distinct b from distinct_test; b --- - 5 + 0 + 1 2 - 4 3 - 1 - 0 + 4 + 5 (6 rows) -- The two-stage aggregation can be disabled with GUC @@ -137,19 +140,23 @@ select distinct a, b from distinct_test; -- Otherwise, redistribution is needed explain select distinct b from distinct_test; - QUERY PLAN ------------------------------------------------------------------------------------------ - GroupAggregate (cost=233.38..250.45 rows=6 width=4) - Group Key: b - -> Gather Motion 3:1 (slice1; segments: 3) (cost=233.38..250.35 rows=18 width=4) - Merge Key: b - -> GroupAggregate (cost=233.38..250.11 rows=6 width=4) - Group Key: b - -> Sort (cost=233.38..241.71 rows=3333 width=4) - Sort Key: b - -> Seq Scan on distinct_test (cost=0.00..38.33 rows=3333 width=4) + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=250.30..250.43 rows=6 width=4) + Merge Key: b + -> GroupAggregate (cost=250.30..250.35 rows=2 width=4) + Group Key: b + -> Sort (cost=250.30..250.32 rows=6 width=4) + Sort Key: b + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=233.38..250.23 rows=6 width=4) + Hash Key: b + -> GroupAggregate (cost=233.38..250.11 rows=6 width=4) + Group Key: b + -> Sort (cost=233.38..241.71 rows=3333 width=4) + Sort Key: b + -> Seq Scan on distinct_test (cost=0.00..38.33 rows=3333 width=4) Optimizer: Postgres query optimizer -(10 rows) +(14 rows) select distinct b from distinct_test; b @@ -225,17 +232,21 @@ select distinct a, b from distinct_test; -- Otherwise, redistribution is needed explain select distinct b from distinct_test; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------- - GroupAggregate (cost=0.16..197.23 rows=6 width=4) - Group Key: b - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.16..197.13 rows=18 width=4) - Merge Key: b - -> GroupAggregate (cost=0.16..196.89 rows=6 width=4) - Group Key: b - -> Index Only Scan using distinct_test_b_idx on distinct_test (cost=0.16..188.49 rows=3333 width=4) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=197.08..197.21 rows=6 width=4) + Merge Key: b + -> GroupAggregate (cost=197.08..197.13 rows=2 width=4) + Group Key: b + -> Sort (cost=197.08..197.10 rows=6 width=4) + Sort Key: b + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.16..197.01 rows=6 width=4) + Hash Key: b + -> GroupAggregate (cost=0.16..196.89 rows=6 width=4) + Group Key: b + -> Index Only Scan using distinct_test_b_idx on distinct_test (cost=0.16..188.49 rows=3333 width=4) Optimizer: Postgres query optimizer -(8 rows) +(12 rows) select distinct b from distinct_test; b diff --git a/src/test/regress/expected/gp_dqa.out b/src/test/regress/expected/gp_dqa.out index 0b8222c9a25..ca27b296831 100644 --- a/src/test/regress/expected/gp_dqa.out +++ b/src/test/regress/expected/gp_dqa.out @@ -39,7 +39,7 @@ select count(distinct d) from dqa_t1; (1 row) explain (costs off) select count(distinct d) from dqa_t1; - QUERY PLAN + QUERY PLAN ------------------------------------------------ Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) @@ -51,22 +51,22 @@ explain (costs off) select count(distinct d) from dqa_t1; select count(distinct d) from dqa_t1 group by i; count ------- - 9 8 8 - 9 8 8 - 9 8 9 8 8 8 + 9 + 9 + 9 (12 rows) explain (costs off) select count(distinct d) from dqa_t1 group by i; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -82,22 +82,22 @@ explain (costs off) select count(distinct d) from dqa_t1 group by i; select count(distinct d), sum(distinct d) from dqa_t1 group by i; count | sum -------+----- - 9 | 73 + 9 | 82 + 8 | 92 + 8 | 101 + 8 | 116 + 9 | 91 9 | 64 + 9 | 73 8 | 100 - 8 | 71 8 | 56 + 8 | 71 8 | 108 8 | 86 - 9 | 82 - 8 | 116 - 8 | 92 - 8 | 101 - 9 | 91 (12 rows) explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1 group by i; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -193,29 +193,29 @@ explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 gro select count(distinct d), count(distinct dt) from dqa_t1 group by d; count | count -------+------- - 1 | 5 - 1 | 4 1 | 4 1 | 4 1 | 4 1 | 5 + 1 | 5 1 | 4 1 | 4 - 1 | 4 - 1 | 5 1 | 5 1 | 4 - 1 | 5 + 1 | 4 1 | 4 1 | 4 1 | 4 1 | 5 1 | 4 1 | 5 + 1 | 5 1 | 4 + 1 | 5 1 | 4 1 | 4 1 | 5 + 1 | 4 (23 rows) explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by d; @@ -266,24 +266,23 @@ select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dqa_t1.d = dqa_t2.d gr 2 1 1 - 1 + 2 + 2 1 1 2 1 - 2 - 2 + 1 1 2 1 1 + 2 1 1 2 2 1 - 2 - 1 1 1 1 @@ -298,19 +297,20 @@ select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dqa_t1.d = dqa_t2.d gr 1 1 1 + 2 1 + 2 1 + 2 1 1 2 1 - 2 - 2 1 1 1 1 - 2 + 1 1 1 1 @@ -348,7 +348,7 @@ select count(distinct c) from dqa_t1; (1 row) explain (costs off) select count(distinct c) from dqa_t1; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------ Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) @@ -372,9 +372,11 @@ select count(distinct c) from dqa_t1 group by dt; 3 3 3 + 2 3 3 3 + 2 3 3 3 @@ -382,7 +384,6 @@ select count(distinct c) from dqa_t1 group by dt; 3 3 3 - 2 3 3 3 @@ -399,11 +400,10 @@ select count(distinct c) from dqa_t1 group by dt; 3 3 3 - 2 (34 rows) explain (costs off) select count(distinct c) from dqa_t1 group by dt; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -423,31 +423,31 @@ select count(distinct c) from dqa_t1 group by d; ------- 5 4 - 4 - 4 - 5 5 4 4 4 - 5 4 4 - 5 + 4 4 4 5 + 5 4 4 4 5 + 5 4 + 5 4 5 + 4 (23 rows) explain (costs off) select count(distinct c) from dqa_t1 group by d; - QUERY PLAN + QUERY PLAN ------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -461,20 +461,20 @@ explain (costs off) select count(distinct c) from dqa_t1 group by d; select count(distinct i), sum(distinct i) from dqa_t1 group by c; count | sum -------+----- - 6 | 30 6 | 36 6 | 30 6 | 36 + 6 | 30 + 6 | 30 6 | 36 6 | 30 6 | 30 6 | 36 6 | 36 - 6 | 30 (10 rows) explain (costs off) select count(distinct i), sum(distinct i) from dqa_t1 group by c; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -515,17 +515,17 @@ select count(distinct c), count(distinct dt), i from dqa_t1 group by i; count | count | i -------+-------+---- 5 | 9 | 3 + 5 | 8 | 8 5 | 8 | 11 - 5 | 8 | 5 5 | 8 | 9 - 5 | 9 | 1 + 5 | 8 | 10 5 | 8 | 7 + 5 | 9 | 1 + 5 | 8 | 5 + 5 | 8 | 4 5 | 9 | 2 - 5 | 8 | 6 5 | 9 | 0 - 5 | 8 | 8 - 5 | 8 | 10 - 5 | 8 | 4 + 5 | 8 | 6 (12 rows) explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1 group by i; @@ -552,29 +552,29 @@ explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1 select count(distinct i), count(distinct c), d from dqa_t1 group by d; count | count | d -------+-------+---- - 5 | 5 | 3 - 4 | 4 | 21 - 4 | 4 | 19 4 | 4 | 11 - 4 | 4 | 13 - 5 | 5 | 5 - 4 | 4 | 15 - 4 | 4 | 9 + 4 | 4 | 8 + 4 | 4 | 19 + 5 | 5 | 4 + 5 | 5 | 0 + 4 | 4 | 21 + 4 | 4 | 14 + 5 | 5 | 3 4 | 4 | 17 - 5 | 5 | 1 - 5 | 5 | 7 + 4 | 4 | 22 4 | 4 | 20 - 5 | 5 | 2 - 4 | 4 | 14 - 4 | 4 | 12 + 4 | 4 | 9 + 4 | 4 | 10 + 5 | 5 | 7 + 4 | 4 | 13 + 5 | 5 | 1 + 5 | 5 | 5 4 | 4 | 18 - 5 | 5 | 6 - 4 | 4 | 22 - 5 | 5 | 0 + 5 | 5 | 2 4 | 4 | 16 - 4 | 4 | 8 - 4 | 4 | 10 - 5 | 5 | 4 + 4 | 4 | 15 + 5 | 5 | 6 + 4 | 4 | 12 (23 rows) explain (costs off) select count(distinct i), count(distinct c), d from dqa_t1 group by d; @@ -605,7 +605,7 @@ select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c; (1 row) explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------------------------- Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) @@ -628,66 +628,66 @@ explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where d select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c group by dqa_t2.dt; count ------- - 10 + 14 13 13 14 + 13 + 13 14 13 14 + 14 + 14 + 14 + 14 + 14 + 14 10 10 13 - 13 - 10 14 13 - 13 - 14 + 10 10 13 - 13 - 13 + 14 + 14 13 13 14 - 14 10 - 14 - 14 - 14 13 14 13 + 10 14 14 13 - 10 - 13 14 13 13 - 10 13 - 14 13 10 + 10 13 - 14 - 14 10 - 14 + 13 13 13 10 10 13 + 13 + 10 14 + 13 14 (56 rows) explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c group by dqa_t2.dt; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -777,8 +777,8 @@ select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 gro (60 rows) explain (costs off) select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c; - QUERY PLAN ------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------ Sort Sort Key: i, c -> Finalize HashAggregate @@ -868,14 +868,14 @@ select to_char(corr(distinct d, i), '9.99999999999999') from dqa_t1 group by c; ------------------- .13670602618479 .13670602618479 - .32622410426034 + .05796784490862 -.11810476840833 - .07008652924496 - .07008652924496 - -.17582636927840 + .32622410426034 -.17582636927840 + .07008652924496 .42037777407962 - .05796784490862 + -.17582636927840 + .07008652924496 (10 rows) explain (costs off) select to_char(corr(distinct d, i), '9.99999999999999') from dqa_t1 group by c; @@ -902,8 +902,8 @@ select count(distinct c), corr(distinct d, i) from dqa_t1; (1 row) explain (costs off) select count(distinct c), corr(distinct d, i) from dqa_t1; - QUERY PLAN ------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------ Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate @@ -924,8 +924,8 @@ select count(distinct d), corr(distinct d, i) from dqa_t1; (1 row) explain (costs off) select count(distinct d), corr(distinct d, i) from dqa_t1; - QUERY PLAN ------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------ Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate @@ -946,8 +946,8 @@ select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; (1 row) explain (costs off) select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; - QUERY PLAN --------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate @@ -968,8 +968,8 @@ select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, (1 row) explain (costs off) select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate @@ -987,29 +987,29 @@ explain (costs off) select count(distinct c), count(distinct d), count(distinct select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; count | corr | d -------+------+---- + 4 | | 11 + 4 | | 8 + 4 | | 19 + 5 | | 4 5 | | 0 - 5 | | 1 - 5 | | 2 + 4 | | 21 + 4 | | 14 5 | | 3 - 5 | | 4 - 5 | | 5 - 5 | | 6 - 5 | | 7 - 4 | | 8 + 4 | | 17 + 4 | | 22 + 4 | | 20 4 | | 9 4 | | 10 - 4 | | 11 - 4 | | 12 + 5 | | 7 4 | | 13 - 4 | | 14 - 4 | | 15 - 4 | | 16 - 4 | | 17 + 5 | | 1 + 5 | | 5 4 | | 18 - 4 | | 19 - 4 | | 20 - 4 | | 21 - 4 | | 22 + 5 | | 2 + 4 | | 16 + 4 | | 15 + 5 | | 6 + 4 | | 12 (23 rows) explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; @@ -1036,106 +1036,106 @@ explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1 select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; count | corr | d | i -------+------+----+---- - 1 | | 0 | 0 - 1 | | 0 | 8 - 1 | | 0 | 9 - 1 | | 0 | 10 - 1 | | 0 | 11 + 1 | | 16 | 2 1 | | 1 | 0 - 1 | | 1 | 1 - 1 | | 1 | 9 - 1 | | 1 | 10 - 1 | | 1 | 11 - 1 | | 2 | 0 - 1 | | 2 | 1 - 1 | | 2 | 2 - 1 | | 2 | 10 - 1 | | 2 | 11 - 1 | | 3 | 0 - 1 | | 3 | 1 - 1 | | 3 | 2 - 1 | | 3 | 3 - 1 | | 3 | 11 1 | | 4 | 0 - 1 | | 4 | 1 - 1 | | 4 | 2 + 1 | | 9 | 9 + 1 | | 6 | 6 + 1 | | 19 | 7 + 1 | | 22 | 8 1 | | 4 | 3 + 1 | | 7 | 3 + 1 | | 16 | 3 + 1 | | 18 | 3 + 1 | | 1 | 10 + 1 | | 13 | 11 + 1 | | 1 | 1 + 1 | | 9 | 7 + 1 | | 18 | 5 1 | | 4 | 4 - 1 | | 5 | 1 - 1 | | 5 | 2 - 1 | | 5 | 3 1 | | 5 | 4 - 1 | | 5 | 5 + 1 | | 0 | 10 + 1 | | 14 | 2 + 1 | | 12 | 0 1 | | 6 | 2 - 1 | | 6 | 3 - 1 | | 6 | 4 - 1 | | 6 | 5 - 1 | | 6 | 6 - 1 | | 7 | 3 - 1 | | 7 | 4 - 1 | | 7 | 5 - 1 | | 7 | 6 - 1 | | 7 | 7 - 1 | | 8 | 5 - 1 | | 8 | 6 - 1 | | 8 | 7 - 1 | | 8 | 8 - 1 | | 9 | 6 - 1 | | 9 | 7 - 1 | | 9 | 8 - 1 | | 9 | 9 - 1 | | 10 | 7 - 1 | | 10 | 8 - 1 | | 10 | 9 + 1 | | 5 | 2 + 1 | | 5 | 1 + 1 | | 22 | 10 1 | | 10 | 10 - 1 | | 11 | 8 - 1 | | 11 | 9 - 1 | | 11 | 10 - 1 | | 11 | 11 - 1 | | 12 | 0 + 1 | | 15 | 0 + 1 | | 18 | 6 + 1 | | 17 | 4 + 1 | | 17 | 5 + 1 | | 3 | 2 + 1 | | 20 | 5 + 1 | | 0 | 9 + 1 | | 1 | 11 + 1 | | 0 | 11 + 1 | | 1 | 9 + 1 | | 3 | 3 + 1 | | 16 | 4 + 1 | | 4 | 1 1 | | 12 | 9 - 1 | | 12 | 10 + 1 | | 8 | 7 1 | | 12 | 11 + 1 | | 7 | 7 + 1 | | 3 | 0 + 1 | | 5 | 3 + 1 | | 10 | 8 1 | | 13 | 0 - 1 | | 13 | 1 - 1 | | 13 | 10 - 1 | | 13 | 11 + 1 | | 6 | 3 + 1 | | 22 | 9 + 1 | | 22 | 7 + 1 | | 4 | 2 + 1 | | 2 | 2 + 1 | | 2 | 10 + 1 | | 3 | 1 + 1 | | 19 | 5 + 1 | | 7 | 5 + 1 | | 7 | 6 + 1 | | 15 | 1 + 1 | | 7 | 4 1 | | 14 | 0 + 1 | | 6 | 5 + 1 | | 21 | 9 + 1 | | 11 | 8 + 1 | | 17 | 3 + 1 | | 21 | 8 + 1 | | 2 | 0 + 1 | | 8 | 8 + 1 | | 11 | 9 + 1 | | 21 | 6 + 1 | | 2 | 11 + 1 | | 20 | 7 1 | | 14 | 1 - 1 | | 14 | 2 - 1 | | 14 | 11 - 1 | | 15 | 0 - 1 | | 15 | 1 + 1 | | 3 | 11 + 1 | | 13 | 10 + 1 | | 18 | 4 1 | | 15 | 2 - 1 | | 15 | 3 + 1 | | 0 | 0 + 1 | | 6 | 4 + 1 | | 20 | 8 + 1 | | 12 | 10 1 | | 16 | 1 - 1 | | 16 | 2 - 1 | | 16 | 3 - 1 | | 16 | 4 + 1 | | 20 | 6 + 1 | | 2 | 1 + 1 | | 5 | 5 1 | | 17 | 2 - 1 | | 17 | 3 - 1 | | 17 | 4 - 1 | | 17 | 5 - 1 | | 18 | 3 - 1 | | 18 | 4 - 1 | | 18 | 5 - 1 | | 18 | 6 - 1 | | 19 | 4 - 1 | | 19 | 5 + 1 | | 11 | 10 + 1 | | 9 | 6 + 1 | | 10 | 9 + 1 | | 13 | 1 + 1 | | 0 | 8 + 1 | | 11 | 11 1 | | 19 | 6 - 1 | | 19 | 7 - 1 | | 20 | 5 - 1 | | 20 | 6 - 1 | | 20 | 7 - 1 | | 20 | 8 - 1 | | 21 | 6 + 1 | | 8 | 6 + 1 | | 9 | 8 + 1 | | 15 | 3 + 1 | | 19 | 4 + 1 | | 14 | 11 + 1 | | 10 | 7 1 | | 21 | 7 - 1 | | 21 | 8 - 1 | | 21 | 9 - 1 | | 22 | 7 - 1 | | 22 | 8 - 1 | | 22 | 9 - 1 | | 22 | 10 + 1 | | 8 | 5 (100 rows) explain (costs off) select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; @@ -1169,8 +1169,8 @@ select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; 3 | 0.075093926148264 | 06-17-2009 3 | 0.075093926148264 | 07-08-2009 3 | 0.075093926148264 | 06-16-2009 - 2 | -1 | 07-13-2009 3 | 0.075093926148264 | 06-29-2009 + 2 | -1 | 07-13-2009 3 | 0.59603956067927 | 07-04-2009 3 | -0.70957090557056 | 06-23-2009 2 | -1 | 07-12-2009 @@ -1194,8 +1194,8 @@ select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; 3 | 0.075093926148264 | 07-01-2009 3 | -1 | 07-03-2009 3 | 0.075093926148264 | 06-15-2009 - 3 | 0.59603956067927 | 06-25-2009 3 | 0.075093926148264 | 06-30-2009 + 3 | 0.59603956067927 | 06-25-2009 (34 rows) explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; @@ -1222,18 +1222,18 @@ explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; count | corr | i -------+------+---- - 9 | | 0 - 9 | | 1 - 9 | | 2 9 | | 3 - 8 | | 4 - 8 | | 5 - 8 | | 6 - 8 | | 7 8 | | 8 + 8 | | 11 8 | | 9 8 | | 10 - 8 | | 11 + 8 | | 7 + 9 | | 1 + 8 | | 5 + 8 | | 4 + 9 | | 2 + 9 | | 0 + 8 | | 6 (12 rows) explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; @@ -1260,34 +1260,34 @@ explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1 select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; count | corr | d -------+------+---- + 1 | | 11 + 1 | | 8 + 1 | | 19 + 1 | | 4 1 | | 0 - 1 | | 1 - 1 | | 2 + 1 | | 21 + 1 | | 14 1 | | 3 - 1 | | 4 - 1 | | 5 - 1 | | 6 - 1 | | 7 - 1 | | 8 + 1 | | 17 + 1 | | 20 + 1 | | 22 1 | | 9 1 | | 10 - 1 | | 11 - 1 | | 12 + 1 | | 7 1 | | 13 - 1 | | 14 + 1 | | 1 + 1 | | 5 + 1 | | 18 + 1 | | 2 1 | | 15 1 | | 16 - 1 | | 17 - 1 | | 18 - 1 | | 19 - 1 | | 20 - 1 | | 21 - 1 | | 22 + 1 | | 6 + 1 | | 12 (23 rows) explain (costs off) select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; - QUERY PLAN ------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------ Finalize HashAggregate Group Key: d -> Gather Motion 3:1 (slice1; segments: 3) @@ -1311,14 +1311,14 @@ select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c f -------+-------------------+--- 10 | .13670602618479 | 0 10 | .13670602618479 | 1 - 10 | .32622410426034 | 2 - 10 | -.11810476840833 | 3 10 | .07008652924496 | 4 - 10 | .07008652924496 | 5 + 10 | .05796784490862 | 9 + 10 | .42037777407962 | 8 10 | -.17582636927840 | 6 + 10 | -.11810476840833 | 3 + 10 | .32622410426034 | 2 10 | -.17582636927840 | 7 - 10 | .42037777407962 | 8 - 10 | .05796784490862 | 9 + 10 | .07008652924496 | 5 (10 rows) explain (costs off) select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c; @@ -1913,8 +1913,6 @@ set enable_groupagg=on; select count(distinct d) from dqa_t1 group by i; count ------- - 9 - 9 9 9 8 @@ -1925,22 +1923,27 @@ select count(distinct d) from dqa_t1 group by i; 8 8 8 + 9 + 9 (12 rows) explain (costs off) select count(distinct d) from dqa_t1 group by i; - QUERY PLAN ------------------------------------------------- - Finalize GroupAggregate - Group Key: i - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: i - -> Partial GroupAggregate - Group Key: i - -> Sort - Sort Key: i - -> Seq Scan on dqa_t1 + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize GroupAggregate + Group Key: i + -> Sort + Sort Key: i + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: i + -> Partial GroupAggregate + Group Key: i + -> Sort + Sort Key: i + -> Seq Scan on dqa_t1 Optimizer: Postgres query optimizer -(10 rows) +(13 rows) select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; count | count | count @@ -1951,18 +1954,18 @@ select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; select count(distinct c), count(distinct dt), i from dqa_t1 group by i; count | count | i -------+-------+---- - 5 | 9 | 3 - 5 | 8 | 11 5 | 8 | 5 - 5 | 8 | 9 - 5 | 9 | 1 - 5 | 8 | 7 - 5 | 9 | 2 5 | 8 | 6 - 5 | 9 | 0 - 5 | 8 | 8 + 5 | 8 | 9 5 | 8 | 10 + 5 | 8 | 11 + 5 | 9 | 2 + 5 | 9 | 3 5 | 8 | 4 + 5 | 8 | 7 + 5 | 8 | 8 + 5 | 9 | 0 + 5 | 9 | 1 (12 rows) -- cleanup @@ -2016,11 +2019,11 @@ select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0 select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b; sum | sum -----+----- - 136 | 0 - 136 | 1 136 | 2 136 | 3 136 | 4 + 136 | 0 + 136 | 1 (5 rows) select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; @@ -2059,125 +2062,125 @@ select count(distinct a) filter (where a > 3), count(distinct b) from dqa_f1; (1 row) explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=20.66..20.67 rows=1 width=16) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=20.62..20.65 rows=1 width=16) - -> Partial Aggregate (cost=20.62..20.63 rows=1 width=16) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.59 rows=1 width=8) + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=5051.77..5051.78 rows=1 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=5047.75..5051.76 rows=3 width=16) + -> Partial Aggregate (cost=5047.75..5047.76 rows=1 width=16) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1007.75..5027.75 rows=2000 width=8) Hash Key: a, b, (AggExprId) - -> Streaming HashAggregate (cost=20.50..20.53 rows=1 width=8) + -> Streaming HashAggregate (cost=1007.75..1027.75 rows=2000 width=8) Group Key: AggExprId, a, b - -> TupleSplit (cost=13.00..15.50 rows=667 width=8) + -> TupleSplit (cost=293.67..358.58 rows=51933 width=8) Split by Col: (a) FILTER (WHERE (a > 0)), (b) FILTER (WHERE (a > 0)) - -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) + -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=8) Optimizer: Postgres query optimizer (11 rows) explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Finalize HashAggregate (cost=21.62..21.67 rows=5 width=20) + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------- + Finalize HashAggregate (cost=22914.32..22924.32 rows=1000 width=20) Group Key: b - -> Gather Motion 3:1 (slice1; segments: 3) (cost=21.30..21.56 rows=9 width=20) - -> Partial HashAggregate (cost=21.30..21.39 rows=3 width=20) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=18881.92..22891.83 rows=3000 width=20) + -> Partial HashAggregate (cost=18881.92..18891.92 rows=1000 width=20) Group Key: b - -> HashAggregate (cost=21.06..21.17 rows=4 width=8) + -> HashAggregate (cost=18815.99..18845.29 rows=2930 width=8) Group Key: (AggExprId), a, b - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.95 rows=5 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1007.75..18662.28 rows=8783 width=8) Hash Key: b, a, (AggExprId) - -> Streaming HashAggregate (cost=20.50..20.65 rows=5 width=8) + -> Streaming HashAggregate (cost=1007.75..1095.58 rows=8783 width=8) Group Key: AggExprId, a, b - -> TupleSplit (cost=13.00..15.50 rows=667 width=8) + -> TupleSplit (cost=293.67..358.58 rows=51933 width=8) Split by Col: (a) FILTER (WHERE (a > 0)), (b) FILTER (WHERE (a > 0)) Group Key: b - -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) + -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=8) Optimizer: Postgres query optimizer (16 rows) explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; - QUERY PLAN -------------------------------------------------------------------------------------------------------------- - Finalize HashAggregate (cost=21.20..21.23 rows=3 width=20) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Finalize HashAggregate (cost=36236.03..36246.03 rows=1000 width=20) Group Key: c - -> Gather Motion 3:1 (slice1; segments: 3) (cost=21.00..21.16 rows=6 width=20) - -> Partial HashAggregate (cost=21.00..21.05 rows=2 width=20) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=32203.53..36213.53 rows=3000 width=20) + -> Partial HashAggregate (cost=32203.53..32213.53 rows=1000 width=20) Group Key: c - -> HashAggregate (cost=20.86..20.92 rows=3 width=12) + -> HashAggregate (cost=32086.68..32138.62 rows=5193 width=12) Group Key: (AggExprId), a, b, c - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.77 rows=3 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1007.75..31780.49 rows=15310 width=12) Hash Key: c, a, b, (AggExprId) - -> Streaming HashAggregate (cost=20.50..20.59 rows=3 width=12) + -> Streaming HashAggregate (cost=1007.75..1160.85 rows=15310 width=12) Group Key: AggExprId, a, b, c - -> TupleSplit (cost=13.00..15.50 rows=667 width=12) + -> TupleSplit (cost=293.67..358.58 rows=51933 width=12) Split by Col: (a) FILTER (WHERE (a > 0)), (b) FILTER (WHERE (a > 0)) Group Key: c - -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=12) + -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=12) Optimizer: Postgres query optimizer (16 rows) explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=96.41..96.42 rows=1 width=16) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=96.37..96.40 rows=1 width=16) - -> Partial Aggregate (cost=96.37..96.38 rows=1 width=16) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.59 rows=1 width=8) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------ + Finalize Aggregate (cost=2845624349.00..2845624349.01 rows=1 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=2845624344.97..2845624348.98 rows=3 width=16) + -> Partial Aggregate (cost=2845624344.97..2845624344.98 rows=1 width=16) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=2740096721.64..2740100741.64 rows=2000 width=8) Hash Key: dqa_f1.a, dqa_f1.b, (AggExprId) - -> Streaming HashAggregate (cost=20.50..20.53 rows=1 width=8) + -> Streaming HashAggregate (cost=2740096721.64..2740096741.64 rows=2000 width=8) Group Key: AggExprId, dqa_f1.a, dqa_f1.b - -> TupleSplit (cost=13.00..15.50 rows=667 width=8) + -> TupleSplit (cost=293.67..358.58 rows=51933 width=8) Split by Col: (dqa_f1.a) FILTER (WHERE (SubPlan 1)), (dqa_f1.b) FILTER (WHERE (dqa_f1.a > 0)) - -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) + -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=8) SubPlan 1 - -> Result (cost=0.00..48.00 rows=334 width=4) + -> Result (cost=0.00..105328.83 rows=77900 width=4) Filter: (dqa_f2.x = dqa_f1.a) - -> Materialize (cost=0.00..38.00 rows=334 width=4) - -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..33.00 rows=334 width=4) - -> Seq Scan on dqa_f2 (cost=0.00..13.00 rows=334 width=4) + -> Materialize (cost=0.00..104549.83 rows=77900 width=4) + -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..104160.33 rows=77900 width=4) + -> Seq Scan on dqa_f2 (cost=0.00..293.67 rows=25967 width=4) Optimizer: Postgres query optimizer (17 rows) explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------- - Finalize HashAggregate (cost=181.11..181.14 rows=3 width=20) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + Finalize HashAggregate (cost=3821914999.94..3821915009.94 rows=1000 width=20) Group Key: dqa_f1.c - -> Gather Motion 3:1 (slice1; segments: 3) (cost=180.92..181.07 rows=6 width=20) - -> Partial HashAggregate (cost=180.92..180.97 rows=2 width=20) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=3821910967.44..3821914977.44 rows=3000 width=20) + -> Partial HashAggregate (cost=3821910967.44..3821910977.44 rows=1000 width=20) Group Key: dqa_f1.c - -> HashAggregate (cost=20.86..20.92 rows=3 width=12) + -> HashAggregate (cost=3547901279.20..3547901331.13 rows=5193 width=12) Group Key: (AggExprId), dqa_f1.a, dqa_f1.b, dqa_f1.c - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.77 rows=3 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=2740096721.64..2740127494.38 rows=15310 width=12) Hash Key: dqa_f1.c, dqa_f1.a, dqa_f1.b, (AggExprId) - -> Streaming HashAggregate (cost=20.50..20.59 rows=3 width=12) + -> Streaming HashAggregate (cost=2740096721.64..2740096874.74 rows=15310 width=12) Group Key: AggExprId, dqa_f1.a, dqa_f1.b, dqa_f1.c - -> TupleSplit (cost=13.00..15.50 rows=667 width=12) + -> TupleSplit (cost=293.67..358.58 rows=51933 width=12) Split by Col: (dqa_f1.a) FILTER (WHERE (SubPlan 1)), (dqa_f1.b) FILTER (WHERE (dqa_f1.a > 0)) Group Key: dqa_f1.c - -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=12) + -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=12) SubPlan 1 - -> Result (cost=0.00..48.00 rows=334 width=4) + -> Result (cost=0.00..105328.83 rows=77900 width=4) Filter: (dqa_f2.x = dqa_f1.a) - -> Materialize (cost=0.00..38.00 rows=334 width=4) - -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..33.00 rows=334 width=4) - -> Seq Scan on dqa_f2 (cost=0.00..13.00 rows=334 width=4) + -> Materialize (cost=0.00..104549.83 rows=77900 width=4) + -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..104160.33 rows=77900 width=4) + -> Seq Scan on dqa_f2 (cost=0.00..293.67 rows=25967 width=4) Optimizer: Postgres query optimizer (22 rows) explain select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=20.67..20.68 rows=1 width=24) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=20.64..20.67 rows=1 width=24) - -> Partial Aggregate (cost=20.64..20.65 rows=1 width=24) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.59 rows=1 width=8) + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=5321.45..5321.46 rows=1 width=24) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=5317.42..5321.43 rows=3 width=24) + -> Partial Aggregate (cost=5317.42..5317.43 rows=1 width=24) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1267.42..5287.42 rows=2000 width=8) Hash Key: a, b, (AggExprId) - -> Streaming HashAggregate (cost=20.50..20.53 rows=1 width=8) + -> Streaming HashAggregate (cost=1267.42..1287.42 rows=2000 width=8) Group Key: AggExprId, a, b - -> TupleSplit (cost=13.00..15.50 rows=667 width=8) + -> TupleSplit (cost=293.67..358.58 rows=51933 width=8) Split by Col: (a) FILTER (WHERE (a > 3)), (b) FILTER (WHERE (a > 4)) - -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) + -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=8) Optimizer: Postgres query optimizer (11 rows) @@ -2186,8 +2189,8 @@ explain select count(distinct a) filter (where a > 3),count( distinct b) filter explain select count(distinct a), sum(b), sum(c) from dqa_f1; QUERY PLAN --------------------------------------------------------------------------------------- - Finalize Aggregate (cost=490.45..490.46 rows=1 width=24) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=488.42..490.43 rows=3 width=24) + Finalize Aggregate (cost=492.45..492.46 rows=1 width=24) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=488.42..492.43 rows=3 width=24) -> Partial Aggregate (cost=488.42..488.43 rows=1 width=24) -> Seq Scan on dqa_f1 (cost=0.00..293.67 rows=25967 width=12) Optimizer: Postgres query optimizer @@ -2203,8 +2206,8 @@ select count(distinct a), sum(b), sum(c) from dqa_f1; create table dqa_unique(a int, b int, c int, d int, primary key(a, b)); insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i; explain(verbose on, costs off) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; - QUERY PLAN ------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize HashAggregate Output: count(a), count(d), c, a, b Group Key: dqa_unique.a, dqa_unique.b @@ -2228,8 +2231,8 @@ explain(verbose on, costs off) select count(distinct a), count(distinct d), c fr Group Key: dqa_unique.a, dqa_unique.b -> Seq Scan on public.dqa_unique Output: a, b, c, d + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' (25 rows) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; @@ -2268,8 +2271,8 @@ select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3; - QUERY PLAN ----------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT (a)::text) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2287,8 +2290,8 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin Split by Col: (((dqa_f3.b)::text)), (dqa_f3.a) -> Seq Scan on public.dqa_f3 Output: b, a, (b)::text + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' (19 rows) -- Case 2: Same as the above one, but convert the type of column 'a' to 'varchar' via binary-compatible types. @@ -2299,7 +2302,7 @@ select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a f (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT a) @@ -2318,8 +2321,8 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin Split by Col: (((dqa_f3.b)::text)), (dqa_f3.a) -> Seq Scan on public.dqa_f3 Output: b, a, (b)::text + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' (19 rows) -- Case 3: When converting the type of column 'a' from 'varchar' to 'int' in DQA expression, TupleSplit should generate an additional @@ -2340,8 +2343,8 @@ select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3 (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3; - QUERY PLAN ---------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT ((a)::integer)) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2359,8 +2362,8 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin Split by Col: (((dqa_f3.b)::text)), (((dqa_f3.a)::integer)) -> Seq Scan on public.dqa_f3 Output: b, a, (b)::text, (a)::integer + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' (19 rows) -- Case 4: When converting the type of column 'a' from 'varchar' to 'int' to 'varchar', TupleSplit should generate an additional @@ -2372,7 +2375,7 @@ select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a fr (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------------------------------ Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT (((a)::integer)::character varying)) @@ -2391,8 +2394,8 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin Split by Col: (((dqa_f3.b)::text)), ((((dqa_f3.a)::integer)::character varying)) -> Seq Scan on public.dqa_f3 Output: b, a, (b)::text, ((a)::integer)::character varying + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' (19 rows) drop table dqa_f3; @@ -2423,7 +2426,7 @@ select count(distinct a) from t_issue_659; -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate -> Seq Scan on t_issue_659 - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: Postgres query optimizer (5 rows) select count(distinct a) from t_issue_659; @@ -2486,8 +2489,8 @@ explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; Hash Key: b -> Seq Scan on public.multiagg1 Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' (13 rows) select count(distinct b), sum(c) from multiagg1; @@ -2497,8 +2500,8 @@ select count(distinct b), sum(c) from multiagg1; (1 row) explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; - QUERY PLAN --------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT b), sum(c) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2510,8 +2513,8 @@ explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; Hash Key: b -> Seq Scan on public.multiagg2 Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' (13 rows) select count(distinct b), sum(c) from multiagg2; @@ -2543,10 +2546,10 @@ insert into num_table values(1,1,1,1),(2,2,2,2),(3,3,3,3); explain select count(distinct a), sum(b) from num_table; QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=14212.19..14212.20 rows=1 width=16) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=14210.17..14212.18 rows=3 width=16) - -> Partial Aggregate (cost=14210.17..14210.18 rows=1 width=16) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..14140.33 rows=13967 width=12) + Finalize Aggregate (cost=28180.86..28180.87 rows=1 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=28176.83..28180.84 rows=3 width=16) + -> Partial Aggregate (cost=28176.83..28176.84 rows=1 width=16) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..28107.00 rows=13967 width=12) Hash Key: a -> Seq Scan on num_table (cost=0.00..173.67 rows=13967 width=12) Optimizer: Postgres query optimizer @@ -2561,7 +2564,7 @@ select count(distinct a), sum(b) from num_table; explain select count(distinct a), sum(b) from num_table group by id; QUERY PLAN ---------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000001135.25..10000001944.92 rows=1000 width=20) + Gather Motion 3:1 (slice1; segments: 3) (cost=10000001135.25..10000002611.58 rows=1000 width=20) -> GroupAggregate (cost=10000001135.25..10000001278.25 rows=333 width=20) Group Key: id -> Sort (cost=1135.25..1170.17 rows=13967 width=16) @@ -2573,9 +2576,9 @@ explain select count(distinct a), sum(b) from num_table group by id; select count(distinct a), sum(b) from num_table group by id; count | sum -------+----- - 1 | 1 1 | 2 1 | 3 + 1 | 1 (3 rows) -- count(distinct a) is a simple aggregation @@ -2589,10 +2592,10 @@ select count(distinct a), sum(b) from num_table group by id; explain select count(distinct a), sum(c) from num_table; QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=14212.20..14212.21 rows=1 width=40) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=14210.17..14212.18 rows=3 width=40) - -> Partial Aggregate (cost=14210.17..14210.18 rows=1 width=40) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..14140.33 rows=13967 width=40) + Finalize Aggregate (cost=28180.87..28180.88 rows=1 width=40) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=28176.84..28180.85 rows=3 width=40) + -> Partial Aggregate (cost=28176.84..28176.85 rows=1 width=40) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..28107.00 rows=13967 width=40) Hash Key: a -> Seq Scan on num_table (cost=0.00..173.67 rows=13967 width=40) Optimizer: Postgres query optimizer @@ -2607,7 +2610,7 @@ select count(distinct a), sum(c) from num_table; explain select id, count(distinct a), avg(b), sum(c) from num_table group by grouping sets ((id,c)); QUERY PLAN ----------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000001135.25..10000004159.03 rows=4190 width=108) + Gather Motion 3:1 (slice1; segments: 3) (cost=10000001135.25..10000006952.37 rows=4190 width=108) -> GroupAggregate (cost=10000001135.25..10000001365.70 rows=1397 width=108) Group Key: id, c -> Sort (cost=1135.25..1170.17 rows=13967 width=48) @@ -2619,14 +2622,14 @@ explain select id, count(distinct a), avg(b), sum(c) from num_table group by gro select id, count(distinct a), avg(b), sum(c) from num_table group by grouping sets ((id,c)); id | count | avg | sum ----+-------+------------------------+----- - 1 | 1 | 1.00000000000000000000 | 1 2 | 1 | 2.0000000000000000 | 2 3 | 1 | 3.0000000000000000 | 3 + 1 | 1 | 1.00000000000000000000 | 1 (3 rows) explain (verbose on, costs off) select count(distinct b) from num_table group by c; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (count(b)), c -> HashAggregate @@ -2643,8 +2646,8 @@ explain (verbose on, costs off) select count(distinct b) from num_table group by Group Key: num_table.c, num_table.b -> Seq Scan on public.num_table Output: id, a, b, c - Optimizer: Postgres-based planner - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer = 'off', optimizer_force_multistage_agg = 'on' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' + Optimizer: Postgres query optimizer (18 rows) select count(distinct b) from num_table group by c; @@ -2682,8 +2685,8 @@ analyze dqa_f3; * */ explain (verbose on, costs off)select sum(Distinct a), count(b), sum(c) from dqa_f3 group by e; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (sum(DISTINCT a)), (count(b)), (sum(c)), e -> GroupAggregate @@ -2697,25 +2700,25 @@ explain (verbose on, costs off)select sum(Distinct a), count(b), sum(c) from dqa Hash Key: e -> Seq Scan on public.dqa_f3 Output: e, a, b, c - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (15 rows) select sum(Distinct a), count(b), sum(c) from dqa_f3 group by e; sum | count | sum -----+-------+----- - 136 | 143 | 142 136 | 143 | 143 136 | 143 | 142 - 136 | 143 | 143 136 | 143 | 144 136 | 142 | 142 136 | 143 | 144 + 136 | 143 | 143 + 136 | 143 | 142 (7 rows) explain (verbose on, costs off) select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (sum(DISTINCT e)), (count(b)), (sum(c)), a -> GroupAggregate @@ -2726,7 +2729,7 @@ explain (verbose on, costs off) select sum(Distinct e), count(b), sum(c) from dq Sort Key: dqa_f3.a -> Seq Scan on public.dqa_f3 Output: a, e, b, c - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (12 rows) @@ -2735,20 +2738,20 @@ select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; -----+-------+----- 21 | 58 | 59 21 | 59 | 58 + 21 | 59 | 59 21 | 58 | 57 + 21 | 59 | 60 21 | 59 | 59 21 | 59 | 58 - 21 | 59 | 59 21 | 59 | 58 21 | 59 | 60 21 | 58 | 58 21 | 59 | 60 - 21 | 59 | 60 - 21 | 59 | 58 - 21 | 59 | 58 21 | 59 | 59 - 21 | 59 | 60 21 | 59 | 59 + 21 | 59 | 58 + 21 | 59 | 60 + 21 | 59 | 58 21 | 59 | 60 (17 rows) @@ -2763,8 +2766,8 @@ select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; * */ explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (sum(DISTINCT c)), (count(a)), (sum(d)), b -> GroupAggregate @@ -2778,23 +2781,23 @@ explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dq Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a, d - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (15 rows) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; sum | count | sum -----+-------+------ - 3 | 200 | 1100 - 3 | 200 | 1300 - 3 | 200 | 900 3 | 200 | 500 3 | 200 | 700 + 3 | 200 | 900 + 3 | 200 | 1100 + 3 | 200 | 1300 (5 rows) explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b order by b; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (sum(DISTINCT c)), (count(a)), (sum(d)), b Merge Key: b @@ -2809,7 +2812,7 @@ explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dq Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a, d - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (16 rows) @@ -2824,8 +2827,8 @@ select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b order by b; (5 rows) explain (verbose on, costs off) select distinct sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- HashAggregate Output: (sum(DISTINCT c)), (count(a)), (sum(d)), b Group Key: (sum(DISTINCT dqa_f3.c)), (count(dqa_f3.a)), (sum(dqa_f3.d)) @@ -2845,23 +2848,23 @@ explain (verbose on, costs off) select distinct sum(Distinct c), count(a), sum(d Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a, d - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (21 rows) select distinct sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; sum | count | sum -----+-------+------ - 3 | 200 | 500 3 | 200 | 900 + 3 | 200 | 500 3 | 200 | 700 3 | 200 | 1100 3 | 200 | 1300 (5 rows) explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > 3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (sum(DISTINCT c)), (count(a)), (sum(d)), b -> GroupAggregate @@ -2876,16 +2879,16 @@ explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dq Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a, d, e - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (16 rows) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > 3; sum | count | sum -----+-------+------ - 3 | 200 | 1100 3 | 200 | 500 3 | 200 | 700 + 3 | 200 | 1100 (3 rows) -- flaky tests @@ -2899,8 +2902,8 @@ select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > -- Test gp_enable_agg_distinct_pruning is off on this branch set gp_enable_agg_distinct_pruning = off; explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: (sum(DISTINCT c)), (count(a)), (sum(d)), b -> GroupAggregate @@ -2914,7 +2917,7 @@ explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dq Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a, d - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_enable_agg_distinct_pruning = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_enable_agg_distinct_pruning = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (15 rows) @@ -2923,9 +2926,9 @@ select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; -----+-------+------ 3 | 200 | 500 3 | 200 | 700 + 3 | 200 | 900 3 | 200 | 1100 3 | 200 | 1300 - 3 | 200 | 900 (5 rows) reset gp_enable_agg_distinct_pruning; @@ -2941,8 +2944,8 @@ reset gp_enable_agg_distinct_pruning; * -> input */ explain (verbose on, costs off) select sum(Distinct b), count(c), sum(a) from dqa_f3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: sum(DISTINCT b), count(c), sum(a) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2954,7 +2957,7 @@ explain (verbose on, costs off) select sum(Distinct b), count(c), sum(a) from dq Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (13 rows) @@ -2965,8 +2968,8 @@ select sum(Distinct b), count(c), sum(a) from dqa_f3; (1 row) explain (verbose on, costs off) select distinct sum(Distinct b), count(c), sum(a) from dqa_f3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: sum(DISTINCT b), count(c), sum(a) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2978,7 +2981,7 @@ explain (verbose on, costs off) select distinct sum(Distinct b), count(c), sum(a Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (13 rows) @@ -2989,8 +2992,8 @@ select distinct sum(Distinct b), count(c), sum(a) from dqa_f3; (1 row) explain (verbose on, costs off) select sum(Distinct b), count(c) filter(where c > 1), sum(a) from dqa_f3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: sum(DISTINCT b), count(c) FILTER (WHERE (c > 1)), sum(a) -> Gather Motion 3:1 (slice1; segments: 3) @@ -3002,7 +3005,7 @@ explain (verbose on, costs off) select sum(Distinct b), count(c) filter(where c Hash Key: b -> Seq Scan on public.dqa_f3 Output: b, c, a - Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'off' + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer (13 rows) @@ -3030,8 +3033,8 @@ select count(distinct a), count(distinct b) from dqa_f4 group by c; set optimizer_enable_multiple_distinct_aggs=on; explain (verbose on, costs off) select count(distinct a), count(distinct b) from dqa_f4 group by c; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize HashAggregate Output: count(a), count(b), c Group Key: dqa_f4.c @@ -3055,8 +3058,8 @@ explain (verbose on, costs off) select count(distinct a), count(distinct b) from Group Key: dqa_f4.c -> Seq Scan on public.dqa_f4 Output: a, b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer = 'off', optimizer_enable_multiple_distinct_aggs = 'on' (25 rows) select count(distinct a), count(distinct b) from dqa_f4 group by c; diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index 50da048fb61..00e6badd791 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -22,6 +22,10 @@ set optimizer_segments = 3; set optimizer_enable_master_only_queries = on; -- master only tables create schema orca; +-- start_ignore +GRANT ALL ON SCHEMA orca TO PUBLIC; +SET search_path to orca, public; +-- end_ignore create table orca.r(); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. set allow_system_table_mods=true; @@ -718,26 +722,26 @@ select count(*) from orca.r; select a, b from orca.r, orca.s group by a,b; a | b ----+--- - 13 | 4 - 9 | 3 + 1 | 0 + 2 | 0 + 3 | 1 + 4 | 1 5 | 1 - 16 | 5 + 6 | 2 + 7 | 2 + 8 | 2 + 9 | 3 + 10 | 3 + 11 | 3 12 | 4 + 13 | 4 + 14 | 4 + 15 | 5 + 16 | 5 17 | 5 - 4 | 1 18 | 6 - 10 | 3 - 8 | 2 - 6 | 2 - 15 | 5 19 | 6 - 14 | 4 - 3 | 1 - 2 | 0 - 11 | 3 - 7 | 2 20 | 6 - 1 | 0 (20 rows) select r.a+1 from orca.r; @@ -1907,23 +1911,23 @@ select pow(r.b,r.a) from orca.r; select b from orca.r group by b having count(*) > 2; b --- - 6 - 1 - 4 2 3 5 + 4 + 6 + 1 (6 rows) select b from orca.r group by b having count(*) <= avg(a) + (select count(*) from orca.s where s.c = r.b); b --- - 6 - 1 - 4 2 3 5 + 4 + 6 + 1 (6 rows) select sum(a) from orca.r group by b having count(*) > 2 order by b+1; @@ -2165,15 +2169,15 @@ analyze orca.bar2; select x2 from orca.foo where x1 in (select x2 from orca.bar1); x2 ---- - 4 - 5 6 7 - 8 - 3 - 9 10 11 + 3 + 4 + 5 + 8 + 9 (9 rows) select 1; @@ -2239,29 +2243,29 @@ select distinct 1, sum(x1) from orca.foo; select distinct x1, rank() over(order by x1) from (select x1 from orca.foo order by x1) x; --order none x1 | rank ----+------ - 1 | 1 + 10 | 10 + 8 | 8 2 | 2 - 3 | 3 - 4 | 4 5 | 5 - 6 | 6 + 1 | 1 + 4 | 4 7 | 7 - 8 | 8 + 3 | 3 9 | 9 - 10 | 10 + 6 | 6 (10 rows) select distinct x1, sum(x3) from orca.foo group by x1,x2; x1 | sum ----+----- - 1 | 3 2 | 4 + 7 | 9 + 8 | 10 3 | 5 4 | 6 - 5 | 7 + 1 | 3 6 | 8 - 7 | 9 - 8 | 10 + 5 | 7 9 | 11 10 | 12 (10 rows) @@ -2269,16 +2273,16 @@ select distinct x1, sum(x3) from orca.foo group by x1,x2; select distinct s from (select sum(x2) s from orca.foo group by x1) x; s ---- - 9 - 8 - 11 - 10 + 4 2 + 8 7 - 6 - 4 3 5 + 11 + 9 + 6 + 10 (10 rows) select * from orca.foo a where a.x1 = (select distinct sum(b.x1)+avg(b.x1) sa from orca.bar1 b group by b.x3 order by sa limit 1); @@ -2297,16 +2301,16 @@ select distinct a.x1 from orca.foo a where a.x1 <= (select distinct sum(b.x1)+av select * from orca.foo a where a.x1 = (select distinct b.x1 from orca.bar1 b where b.x1=a.x1 limit 1); x1 | x2 | x3 ----+----+---- - 1 | 2 | 3 2 | 3 | 4 - 8 | 9 | 10 - 9 | 10 | 11 - 10 | 11 | 12 3 | 4 | 5 4 | 5 | 6 + 7 | 8 | 9 + 8 | 9 | 10 + 1 | 2 | 3 5 | 6 | 7 6 | 7 | 8 - 7 | 8 | 9 + 9 | 10 | 11 + 10 | 11 | 12 (10 rows) -- with clause @@ -2314,15 +2318,15 @@ with cte1 as (select * from orca.foo) select a.x1+1 from (select * from cte1) a ?column? ---------- 2 - 3 - 8 + 11 + 10 7 - 5 - 4 6 - 10 + 5 + 3 9 - 11 + 8 + 4 (10 rows) select count(*)+1 from orca.bar1 b where b.x1 < any (with cte1 as (select * from orca.foo) select a.x1+1 from (select * from cte1) a group by a.x1); @@ -2395,28 +2399,28 @@ select count(*) from orca.foo x where x.x1 > (select count(*)+1 from orca.bar1 y -- result node with one time filter and filter explain select case when bar1.x2 = bar2.x2 then coalesce((select 1 from orca.foo where bar1.x2 = bar2.x2 and bar1.x2 = random() and foo.x2 = bar2.x2),0) else 1 end as col1, bar1.x1 from orca.bar1 inner join orca.bar2 on (bar1.x2 = bar2.x2) order by bar1.x1; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=75.54..75.59 rows=20 width=12) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=11.57..11.86 rows=20 width=8) Merge Key: bar1.x1 - -> Sort (cost=75.54..75.59 rows=7 width=12) + -> Sort (cost=11.57..11.59 rows=7 width=8) Sort Key: bar1.x1 - -> Hash Join (cost=3.85..75.11 rows=7 width=12) + -> Hash Join (cost=1.28..11.48 rows=7 width=8) Hash Cond: (bar2.x2 = bar1.x2) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..3.90 rows=10 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.30 rows=10 width=4) Hash Key: bar2.x2 - -> Seq Scan on bar2 (cost=0.00..3.30 rows=10 width=4) - -> Hash (cost=3.60..3.60 rows=7 width=8) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..3.60 rows=7 width=8) + -> Seq Scan on bar2 (cost=0.00..1.10 rows=10 width=4) + -> Hash (cost=1.20..1.20 rows=7 width=8) + -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..1.20 rows=7 width=8) Hash Key: bar1.x2 - -> Seq Scan on bar1 (cost=0.00..3.20 rows=7 width=8) + -> Seq Scan on bar1 (cost=0.00..1.07 rows=7 width=8) SubPlan 1 - -> Result (cost=0.00..3.35 rows=10 width=4) + -> Result (cost=0.00..1.32 rows=10 width=4) One-Time Filter: (bar1.x2 = bar2.x2) Filter: ((foo.x2 = bar2.x2) AND ((bar1.x2)::double precision = random())) - -> Materialize (cost=0.00..3.35 rows=10 width=4) - -> Broadcast Motion 3:3 (slice4; segments: 3) (cost=0.00..3.30 rows=4 width=4) - -> Seq Scan on foo (cost=0.00..3.10 rows=4 width=4) + -> Materialize (cost=0.00..1.22 rows=10 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.17 rows=10 width=4) + -> Seq Scan on foo (cost=0.00..1.03 rows=3 width=4) Optimizer: Postgres query optimizer (21 rows) @@ -2459,744 +2463,744 @@ analyze orca.s; select * from orca.r, orca.s where r.a=s.c; a | b | c | d ---+---+---+--- + 5 | 2 | 5 | 1 + 6 | 0 | 6 | 0 + 5 | 2 | 5 | 0 + 6 | 0 | 6 | 1 + 5 | 2 | 5 | 1 + 6 | 0 | 6 | 0 + 5 | 2 | 5 | 0 + 6 | 0 | 6 | 1 1 | 1 | 1 | 1 - 2 | 2 | 2 | 0 1 | 1 | 1 | 0 - 2 | 2 | 2 | 1 1 | 1 | 1 | 1 - 2 | 2 | 2 | 0 1 | 1 | 1 | 0 - 2 | 2 | 2 | 1 1 | 1 | 1 | 1 2 | 2 | 2 | 0 3 | 0 | 3 | 1 4 | 1 | 4 | 0 - 5 | 2 | 5 | 1 - 6 | 0 | 6 | 0 + 2 | 2 | 2 | 1 3 | 0 | 3 | 0 4 | 1 | 4 | 1 - 5 | 2 | 5 | 0 - 6 | 0 | 6 | 1 + 2 | 2 | 2 | 0 3 | 0 | 3 | 1 4 | 1 | 4 | 0 - 5 | 2 | 5 | 1 - 6 | 0 | 6 | 0 + 2 | 2 | 2 | 1 3 | 0 | 3 | 0 4 | 1 | 4 | 1 - 5 | 2 | 5 | 0 - 6 | 0 | 6 | 1 + 2 | 2 | 2 | 0 (26 rows) -- Materialize node select * from orca.r, orca.s where r.as.c; a | b | c | d ----+---+---+--- - 1 | 1 | 3 | 1 - 1 | 1 | 4 | 0 - 1 | 1 | 5 | 1 - 1 | 1 | 6 | 0 - 1 | 1 | 3 | 0 - 1 | 1 | 4 | 1 - 1 | 1 | 5 | 0 - 1 | 1 | 6 | 1 - 1 | 1 | 3 | 1 - 1 | 1 | 4 | 0 - 1 | 1 | 5 | 1 - 1 | 1 | 6 | 0 - 1 | 1 | 3 | 0 - 1 | 1 | 4 | 1 - 1 | 1 | 5 | 0 - 1 | 1 | 6 | 1 - 2 | 2 | 3 | 1 - 2 | 2 | 4 | 0 - 2 | 2 | 5 | 1 - 2 | 2 | 6 | 0 - 2 | 2 | 3 | 0 - 2 | 2 | 4 | 1 - 2 | 2 | 5 | 0 - 2 | 2 | 6 | 1 - 2 | 2 | 3 | 1 - 2 | 2 | 4 | 0 - 2 | 2 | 5 | 1 - 2 | 2 | 6 | 0 - 2 | 2 | 3 | 0 - 2 | 2 | 4 | 1 - 2 | 2 | 5 | 0 - 2 | 2 | 6 | 1 - 13 | 1 | 3 | 1 - 13 | 1 | 4 | 0 - 13 | 1 | 5 | 1 - 13 | 1 | 6 | 0 - 13 | 1 | 3 | 0 - 13 | 1 | 4 | 1 - 13 | 1 | 5 | 0 - 13 | 1 | 6 | 1 - 13 | 1 | 3 | 1 - 13 | 1 | 4 | 0 - 13 | 1 | 5 | 1 - 13 | 1 | 6 | 0 - 13 | 1 | 3 | 0 - 13 | 1 | 4 | 1 - 13 | 1 | 5 | 0 - 13 | 1 | 6 | 1 - 14 | 2 | 3 | 1 - 14 | 2 | 4 | 0 - 14 | 2 | 5 | 1 - 14 | 2 | 6 | 0 - 14 | 2 | 3 | 0 - 14 | 2 | 4 | 1 - 14 | 2 | 5 | 0 - 14 | 2 | 6 | 1 - 14 | 2 | 3 | 1 - 14 | 2 | 4 | 0 - 14 | 2 | 5 | 1 - 14 | 2 | 6 | 0 - 14 | 2 | 3 | 0 - 14 | 2 | 4 | 1 - 14 | 2 | 5 | 0 - 14 | 2 | 6 | 1 - 15 | 0 | 3 | 1 - 15 | 0 | 4 | 0 - 15 | 0 | 5 | 1 - 15 | 0 | 6 | 0 - 15 | 0 | 3 | 0 - 15 | 0 | 4 | 1 - 15 | 0 | 5 | 0 - 15 | 0 | 6 | 1 - 15 | 0 | 3 | 1 - 15 | 0 | 4 | 0 - 15 | 0 | 5 | 1 - 15 | 0 | 6 | 0 - 15 | 0 | 3 | 0 - 15 | 0 | 4 | 1 - 15 | 0 | 5 | 0 - 15 | 0 | 6 | 1 - 16 | 1 | 3 | 1 - 16 | 1 | 4 | 0 - 16 | 1 | 5 | 1 - 16 | 1 | 6 | 0 - 16 | 1 | 3 | 0 - 16 | 1 | 4 | 1 - 16 | 1 | 5 | 0 - 16 | 1 | 6 | 1 - 16 | 1 | 3 | 1 - 16 | 1 | 4 | 0 - 16 | 1 | 5 | 1 - 16 | 1 | 6 | 0 - 16 | 1 | 3 | 0 - 16 | 1 | 4 | 1 - 16 | 1 | 5 | 0 - 16 | 1 | 6 | 1 - 17 | 2 | 3 | 1 - 17 | 2 | 4 | 0 - 17 | 2 | 5 | 1 - 17 | 2 | 6 | 0 - 17 | 2 | 3 | 0 - 17 | 2 | 4 | 1 - 17 | 2 | 5 | 0 - 17 | 2 | 6 | 1 - 17 | 2 | 3 | 1 - 17 | 2 | 4 | 0 - 17 | 2 | 5 | 1 - 17 | 2 | 6 | 0 - 17 | 2 | 3 | 0 - 17 | 2 | 4 | 1 - 17 | 2 | 5 | 0 - 17 | 2 | 6 | 1 - 3 | 0 | 3 | 1 - 3 | 0 | 4 | 0 - 3 | 0 | 5 | 1 - 3 | 0 | 6 | 0 - 3 | 0 | 3 | 0 - 3 | 0 | 4 | 1 - 3 | 0 | 5 | 0 - 3 | 0 | 6 | 1 - 3 | 0 | 3 | 1 - 3 | 0 | 4 | 0 - 3 | 0 | 5 | 1 - 3 | 0 | 6 | 0 - 3 | 0 | 3 | 0 - 3 | 0 | 4 | 1 - 3 | 0 | 5 | 0 - 3 | 0 | 6 | 1 - 4 | 1 | 3 | 1 - 4 | 1 | 4 | 0 - 4 | 1 | 5 | 1 - 4 | 1 | 6 | 0 - 4 | 1 | 3 | 0 - 4 | 1 | 4 | 1 - 4 | 1 | 5 | 0 - 4 | 1 | 6 | 1 - 4 | 1 | 3 | 1 - 4 | 1 | 4 | 0 - 4 | 1 | 5 | 1 - 4 | 1 | 6 | 0 - 4 | 1 | 3 | 0 - 4 | 1 | 4 | 1 - 4 | 1 | 5 | 0 - 4 | 1 | 6 | 1 - 5 | 2 | 3 | 1 - 5 | 2 | 4 | 0 + 1 | 1 | 1 | 1 + 1 | 1 | 0 | 1 + 1 | 1 | 1 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 1 | 1 + 1 | 1 | 0 | 1 + 1 | 1 | 1 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 1 | 1 + 12 | 0 | 1 | 1 + 12 | 0 | 0 | 1 + 12 | 0 | 1 | 0 + 12 | 0 | 0 | 0 + 12 | 0 | 1 | 1 + 12 | 0 | 0 | 1 + 12 | 0 | 1 | 0 + 12 | 0 | 0 | 0 + 12 | 0 | 1 | 1 + 15 | 0 | 1 | 1 + 15 | 0 | 0 | 1 + 15 | 0 | 1 | 0 + 15 | 0 | 0 | 0 + 15 | 0 | 1 | 1 + 15 | 0 | 0 | 1 + 15 | 0 | 1 | 0 + 15 | 0 | 0 | 0 + 15 | 0 | 1 | 1 + 20 | 2 | 1 | 1 + 20 | 2 | 0 | 1 + 20 | 2 | 1 | 0 + 20 | 2 | 0 | 0 + 20 | 2 | 1 | 1 + 20 | 2 | 0 | 1 + 20 | 2 | 1 | 0 + 20 | 2 | 0 | 0 + 20 | 2 | 1 | 1 + 5 | 2 | 1 | 1 + 5 | 2 | 0 | 1 + 5 | 2 | 1 | 0 + 5 | 2 | 0 | 0 + 5 | 2 | 1 | 1 + 5 | 2 | 0 | 1 + 5 | 2 | 1 | 0 + 5 | 2 | 0 | 0 + 5 | 2 | 1 | 1 + 6 | 0 | 1 | 1 + 6 | 0 | 0 | 1 + 6 | 0 | 1 | 0 + 6 | 0 | 0 | 0 + 6 | 0 | 1 | 1 + 6 | 0 | 0 | 1 + 6 | 0 | 1 | 0 + 6 | 0 | 0 | 0 + 6 | 0 | 1 | 1 + 9 | 0 | 1 | 1 + 9 | 0 | 0 | 1 + 9 | 0 | 1 | 0 + 9 | 0 | 0 | 0 + 9 | 0 | 1 | 1 + 9 | 0 | 0 | 1 + 9 | 0 | 1 | 0 + 9 | 0 | 0 | 0 + 9 | 0 | 1 | 1 + 10 | 1 | 1 | 1 + 10 | 1 | 0 | 1 + 10 | 1 | 1 | 0 + 10 | 1 | 0 | 0 + 10 | 1 | 1 | 1 + 10 | 1 | 0 | 1 + 10 | 1 | 1 | 0 + 10 | 1 | 0 | 0 + 10 | 1 | 1 | 1 + 11 | 2 | 1 | 1 + 11 | 2 | 0 | 1 + 11 | 2 | 1 | 0 + 11 | 2 | 0 | 0 + 11 | 2 | 1 | 1 + 11 | 2 | 0 | 1 + 11 | 2 | 1 | 0 + 11 | 2 | 0 | 0 + 11 | 2 | 1 | 1 + 13 | 1 | 1 | 1 + 13 | 1 | 0 | 1 + 13 | 1 | 1 | 0 + 13 | 1 | 0 | 0 + 13 | 1 | 1 | 1 + 13 | 1 | 0 | 1 + 13 | 1 | 1 | 0 + 13 | 1 | 0 | 0 + 13 | 1 | 1 | 1 + 14 | 2 | 1 | 1 + 14 | 2 | 0 | 1 + 14 | 2 | 1 | 0 + 14 | 2 | 0 | 0 + 14 | 2 | 1 | 1 + 14 | 2 | 0 | 1 + 14 | 2 | 1 | 0 + 14 | 2 | 0 | 0 + 14 | 2 | 1 | 1 + 17 | 2 | 1 | 1 + 17 | 2 | 0 | 1 + 17 | 2 | 1 | 0 + 17 | 2 | 0 | 0 + 17 | 2 | 1 | 1 + 17 | 2 | 0 | 1 + 17 | 2 | 1 | 0 + 17 | 2 | 0 | 0 + 17 | 2 | 1 | 1 + 2 | 2 | 1 | 1 + 2 | 2 | 0 | 1 + 2 | 2 | 1 | 0 + 2 | 2 | 0 | 0 + 2 | 2 | 1 | 1 + 2 | 2 | 0 | 1 + 2 | 2 | 1 | 0 + 2 | 2 | 0 | 0 + 2 | 2 | 1 | 1 + 3 | 0 | 1 | 1 + 3 | 0 | 0 | 1 + 3 | 0 | 1 | 0 + 3 | 0 | 0 | 0 + 3 | 0 | 1 | 1 + 3 | 0 | 0 | 1 + 3 | 0 | 1 | 0 + 3 | 0 | 0 | 0 + 3 | 0 | 1 | 1 + 4 | 1 | 1 | 1 + 4 | 1 | 0 | 1 + 4 | 1 | 1 | 0 + 4 | 1 | 0 | 0 + 4 | 1 | 1 | 1 + 4 | 1 | 0 | 1 + 4 | 1 | 1 | 0 + 4 | 1 | 0 | 0 + 4 | 1 | 1 | 1 + 7 | 1 | 1 | 1 + 7 | 1 | 0 | 1 + 7 | 1 | 1 | 0 + 7 | 1 | 0 | 0 + 7 | 1 | 1 | 1 + 7 | 1 | 0 | 1 + 7 | 1 | 1 | 0 + 7 | 1 | 0 | 0 + 7 | 1 | 1 | 1 + 8 | 2 | 1 | 1 + 8 | 2 | 0 | 1 + 8 | 2 | 1 | 0 + 8 | 2 | 0 | 0 + 8 | 2 | 1 | 1 + 8 | 2 | 0 | 1 + 8 | 2 | 1 | 0 + 8 | 2 | 0 | 0 + 8 | 2 | 1 | 1 + 16 | 1 | 1 | 1 + 16 | 1 | 0 | 1 + 16 | 1 | 1 | 0 + 16 | 1 | 0 | 0 + 16 | 1 | 1 | 1 + 16 | 1 | 0 | 1 + 16 | 1 | 1 | 0 + 16 | 1 | 0 | 0 + 16 | 1 | 1 | 1 + 18 | 0 | 1 | 1 + 18 | 0 | 0 | 1 + 18 | 0 | 1 | 0 + 18 | 0 | 0 | 0 + 18 | 0 | 1 | 1 + 18 | 0 | 0 | 1 + 18 | 0 | 1 | 0 + 18 | 0 | 0 | 0 + 18 | 0 | 1 | 1 + 19 | 1 | 1 | 1 + 19 | 1 | 0 | 1 + 19 | 1 | 1 | 0 + 19 | 1 | 0 | 0 + 19 | 1 | 1 | 1 + 19 | 1 | 0 | 1 + 19 | 1 | 1 | 0 + 19 | 1 | 0 | 0 + 19 | 1 | 1 | 1 + 1 | 1 | 5 | 1 + 1 | 1 | 6 | 0 + 1 | 1 | 5 | 0 + 1 | 1 | 6 | 1 + 1 | 1 | 5 | 1 + 1 | 1 | 6 | 0 + 1 | 1 | 5 | 0 + 1 | 1 | 6 | 1 + 12 | 0 | 5 | 1 + 12 | 0 | 6 | 0 + 12 | 0 | 5 | 0 + 12 | 0 | 6 | 1 + 12 | 0 | 5 | 1 + 12 | 0 | 6 | 0 + 12 | 0 | 5 | 0 + 12 | 0 | 6 | 1 + 15 | 0 | 5 | 1 + 15 | 0 | 6 | 0 + 15 | 0 | 5 | 0 + 15 | 0 | 6 | 1 + 15 | 0 | 5 | 1 + 15 | 0 | 6 | 0 + 15 | 0 | 5 | 0 + 15 | 0 | 6 | 1 + 20 | 2 | 5 | 1 + 20 | 2 | 6 | 0 + 20 | 2 | 5 | 0 + 20 | 2 | 6 | 1 + 20 | 2 | 5 | 1 + 20 | 2 | 6 | 0 + 20 | 2 | 5 | 0 + 20 | 2 | 6 | 1 5 | 2 | 5 | 1 5 | 2 | 6 | 0 - 5 | 2 | 3 | 0 - 5 | 2 | 4 | 1 5 | 2 | 5 | 0 5 | 2 | 6 | 1 - 5 | 2 | 3 | 1 - 5 | 2 | 4 | 0 5 | 2 | 5 | 1 5 | 2 | 6 | 0 - 5 | 2 | 3 | 0 - 5 | 2 | 4 | 1 5 | 2 | 5 | 0 5 | 2 | 6 | 1 - 6 | 0 | 3 | 1 - 6 | 0 | 4 | 0 6 | 0 | 5 | 1 6 | 0 | 6 | 0 - 6 | 0 | 3 | 0 - 6 | 0 | 4 | 1 6 | 0 | 5 | 0 6 | 0 | 6 | 1 - 6 | 0 | 3 | 1 - 6 | 0 | 4 | 0 6 | 0 | 5 | 1 6 | 0 | 6 | 0 - 6 | 0 | 3 | 0 - 6 | 0 | 4 | 1 6 | 0 | 5 | 0 6 | 0 | 6 | 1 - 7 | 1 | 3 | 1 - 7 | 1 | 4 | 0 + 9 | 0 | 5 | 1 + 9 | 0 | 6 | 0 + 9 | 0 | 5 | 0 + 9 | 0 | 6 | 1 + 9 | 0 | 5 | 1 + 9 | 0 | 6 | 0 + 9 | 0 | 5 | 0 + 9 | 0 | 6 | 1 + 10 | 1 | 5 | 1 + 10 | 1 | 6 | 0 + 10 | 1 | 5 | 0 + 10 | 1 | 6 | 1 + 10 | 1 | 5 | 1 + 10 | 1 | 6 | 0 + 10 | 1 | 5 | 0 + 10 | 1 | 6 | 1 + 11 | 2 | 5 | 1 + 11 | 2 | 6 | 0 + 11 | 2 | 5 | 0 + 11 | 2 | 6 | 1 + 11 | 2 | 5 | 1 + 11 | 2 | 6 | 0 + 11 | 2 | 5 | 0 + 11 | 2 | 6 | 1 + 13 | 1 | 5 | 1 + 13 | 1 | 6 | 0 + 13 | 1 | 5 | 0 + 13 | 1 | 6 | 1 + 13 | 1 | 5 | 1 + 13 | 1 | 6 | 0 + 13 | 1 | 5 | 0 + 13 | 1 | 6 | 1 + 14 | 2 | 5 | 1 + 14 | 2 | 6 | 0 + 14 | 2 | 5 | 0 + 14 | 2 | 6 | 1 + 14 | 2 | 5 | 1 + 14 | 2 | 6 | 0 + 14 | 2 | 5 | 0 + 14 | 2 | 6 | 1 + 17 | 2 | 5 | 1 + 17 | 2 | 6 | 0 + 17 | 2 | 5 | 0 + 17 | 2 | 6 | 1 + 17 | 2 | 5 | 1 + 17 | 2 | 6 | 0 + 17 | 2 | 5 | 0 + 17 | 2 | 6 | 1 + 2 | 2 | 5 | 1 + 2 | 2 | 6 | 0 + 2 | 2 | 5 | 0 + 2 | 2 | 6 | 1 + 2 | 2 | 5 | 1 + 2 | 2 | 6 | 0 + 2 | 2 | 5 | 0 + 2 | 2 | 6 | 1 + 3 | 0 | 5 | 1 + 3 | 0 | 6 | 0 + 3 | 0 | 5 | 0 + 3 | 0 | 6 | 1 + 3 | 0 | 5 | 1 + 3 | 0 | 6 | 0 + 3 | 0 | 5 | 0 + 3 | 0 | 6 | 1 + 4 | 1 | 5 | 1 + 4 | 1 | 6 | 0 + 4 | 1 | 5 | 0 + 4 | 1 | 6 | 1 + 4 | 1 | 5 | 1 + 4 | 1 | 6 | 0 + 4 | 1 | 5 | 0 + 4 | 1 | 6 | 1 7 | 1 | 5 | 1 7 | 1 | 6 | 0 - 7 | 1 | 3 | 0 - 7 | 1 | 4 | 1 7 | 1 | 5 | 0 7 | 1 | 6 | 1 - 7 | 1 | 3 | 1 - 7 | 1 | 4 | 0 7 | 1 | 5 | 1 7 | 1 | 6 | 0 - 7 | 1 | 3 | 0 - 7 | 1 | 4 | 1 7 | 1 | 5 | 0 7 | 1 | 6 | 1 - 18 | 0 | 3 | 1 - 18 | 0 | 4 | 0 + 8 | 2 | 5 | 1 + 8 | 2 | 6 | 0 + 8 | 2 | 5 | 0 + 8 | 2 | 6 | 1 + 8 | 2 | 5 | 1 + 8 | 2 | 6 | 0 + 8 | 2 | 5 | 0 + 8 | 2 | 6 | 1 + 16 | 1 | 5 | 1 + 16 | 1 | 6 | 0 + 16 | 1 | 5 | 0 + 16 | 1 | 6 | 1 + 16 | 1 | 5 | 1 + 16 | 1 | 6 | 0 + 16 | 1 | 5 | 0 + 16 | 1 | 6 | 1 18 | 0 | 5 | 1 18 | 0 | 6 | 0 - 18 | 0 | 3 | 0 - 18 | 0 | 4 | 1 18 | 0 | 5 | 0 18 | 0 | 6 | 1 - 18 | 0 | 3 | 1 - 18 | 0 | 4 | 0 18 | 0 | 5 | 1 18 | 0 | 6 | 0 - 18 | 0 | 3 | 0 - 18 | 0 | 4 | 1 18 | 0 | 5 | 0 18 | 0 | 6 | 1 - 19 | 1 | 3 | 1 - 19 | 1 | 4 | 0 19 | 1 | 5 | 1 19 | 1 | 6 | 0 - 19 | 1 | 3 | 0 - 19 | 1 | 4 | 1 19 | 1 | 5 | 0 19 | 1 | 6 | 1 - 19 | 1 | 3 | 1 - 19 | 1 | 4 | 0 19 | 1 | 5 | 1 - 19 | 1 | 6 | 0 - 19 | 1 | 3 | 0 - 19 | 1 | 4 | 1 + 19 | 1 | 6 | 0 19 | 1 | 5 | 0 19 | 1 | 6 | 1 + 1 | 1 | 2 | 0 + 1 | 1 | 3 | 1 + 1 | 1 | 4 | 0 + 1 | 1 | 2 | 1 + 1 | 1 | 3 | 0 + 1 | 1 | 4 | 1 + 1 | 1 | 2 | 0 + 1 | 1 | 3 | 1 + 1 | 1 | 4 | 0 + 1 | 1 | 2 | 1 + 1 | 1 | 3 | 0 + 1 | 1 | 4 | 1 + 1 | 1 | 2 | 0 + 12 | 0 | 2 | 0 + 12 | 0 | 3 | 1 + 12 | 0 | 4 | 0 + 12 | 0 | 2 | 1 + 12 | 0 | 3 | 0 + 12 | 0 | 4 | 1 + 12 | 0 | 2 | 0 + 12 | 0 | 3 | 1 + 12 | 0 | 4 | 0 + 12 | 0 | 2 | 1 + 12 | 0 | 3 | 0 + 12 | 0 | 4 | 1 + 12 | 0 | 2 | 0 + 15 | 0 | 2 | 0 + 15 | 0 | 3 | 1 + 15 | 0 | 4 | 0 + 15 | 0 | 2 | 1 + 15 | 0 | 3 | 0 + 15 | 0 | 4 | 1 + 15 | 0 | 2 | 0 + 15 | 0 | 3 | 1 + 15 | 0 | 4 | 0 + 15 | 0 | 2 | 1 + 15 | 0 | 3 | 0 + 15 | 0 | 4 | 1 + 15 | 0 | 2 | 0 + 20 | 2 | 2 | 0 20 | 2 | 3 | 1 20 | 2 | 4 | 0 - 20 | 2 | 5 | 1 - 20 | 2 | 6 | 0 + 20 | 2 | 2 | 1 20 | 2 | 3 | 0 20 | 2 | 4 | 1 - 20 | 2 | 5 | 0 - 20 | 2 | 6 | 1 + 20 | 2 | 2 | 0 20 | 2 | 3 | 1 20 | 2 | 4 | 0 - 20 | 2 | 5 | 1 - 20 | 2 | 6 | 0 + 20 | 2 | 2 | 1 20 | 2 | 3 | 0 20 | 2 | 4 | 1 - 20 | 2 | 5 | 0 - 20 | 2 | 6 | 1 - 8 | 2 | 3 | 1 - 8 | 2 | 4 | 0 - 8 | 2 | 5 | 1 - 8 | 2 | 6 | 0 - 8 | 2 | 3 | 0 - 8 | 2 | 4 | 1 - 8 | 2 | 5 | 0 - 8 | 2 | 6 | 1 - 8 | 2 | 3 | 1 - 8 | 2 | 4 | 0 - 8 | 2 | 5 | 1 - 8 | 2 | 6 | 0 - 8 | 2 | 3 | 0 - 8 | 2 | 4 | 1 - 8 | 2 | 5 | 0 - 8 | 2 | 6 | 1 + 20 | 2 | 2 | 0 + 5 | 2 | 2 | 0 + 5 | 2 | 3 | 1 + 5 | 2 | 4 | 0 + 5 | 2 | 2 | 1 + 5 | 2 | 3 | 0 + 5 | 2 | 4 | 1 + 5 | 2 | 2 | 0 + 5 | 2 | 3 | 1 + 5 | 2 | 4 | 0 + 5 | 2 | 2 | 1 + 5 | 2 | 3 | 0 + 5 | 2 | 4 | 1 + 5 | 2 | 2 | 0 + 6 | 0 | 2 | 0 + 6 | 0 | 3 | 1 + 6 | 0 | 4 | 0 + 6 | 0 | 2 | 1 + 6 | 0 | 3 | 0 + 6 | 0 | 4 | 1 + 6 | 0 | 2 | 0 + 6 | 0 | 3 | 1 + 6 | 0 | 4 | 0 + 6 | 0 | 2 | 1 + 6 | 0 | 3 | 0 + 6 | 0 | 4 | 1 + 6 | 0 | 2 | 0 + 9 | 0 | 2 | 0 9 | 0 | 3 | 1 9 | 0 | 4 | 0 - 9 | 0 | 5 | 1 - 9 | 0 | 6 | 0 + 9 | 0 | 2 | 1 9 | 0 | 3 | 0 9 | 0 | 4 | 1 - 9 | 0 | 5 | 0 - 9 | 0 | 6 | 1 + 9 | 0 | 2 | 0 9 | 0 | 3 | 1 9 | 0 | 4 | 0 - 9 | 0 | 5 | 1 - 9 | 0 | 6 | 0 + 9 | 0 | 2 | 1 9 | 0 | 3 | 0 9 | 0 | 4 | 1 - 9 | 0 | 5 | 0 - 9 | 0 | 6 | 1 + 9 | 0 | 2 | 0 + 10 | 1 | 2 | 0 10 | 1 | 3 | 1 10 | 1 | 4 | 0 - 10 | 1 | 5 | 1 - 10 | 1 | 6 | 0 + 10 | 1 | 2 | 1 10 | 1 | 3 | 0 10 | 1 | 4 | 1 - 10 | 1 | 5 | 0 - 10 | 1 | 6 | 1 + 10 | 1 | 2 | 0 10 | 1 | 3 | 1 10 | 1 | 4 | 0 - 10 | 1 | 5 | 1 - 10 | 1 | 6 | 0 + 10 | 1 | 2 | 1 10 | 1 | 3 | 0 10 | 1 | 4 | 1 - 10 | 1 | 5 | 0 - 10 | 1 | 6 | 1 + 10 | 1 | 2 | 0 + 11 | 2 | 2 | 0 11 | 2 | 3 | 1 11 | 2 | 4 | 0 - 1 | 1 | 1 | 1 - 1 | 1 | 2 | 0 - 1 | 1 | 0 | 1 - 1 | 1 | 1 | 0 - 1 | 1 | 2 | 1 - 1 | 1 | 0 | 0 - 1 | 1 | 1 | 1 - 1 | 1 | 2 | 0 - 1 | 1 | 0 | 1 - 1 | 1 | 1 | 0 - 1 | 1 | 2 | 1 - 1 | 1 | 0 | 0 - 1 | 1 | 1 | 1 - 1 | 1 | 2 | 0 - 2 | 2 | 1 | 1 - 2 | 2 | 2 | 0 - 2 | 2 | 0 | 1 - 2 | 2 | 1 | 0 - 2 | 2 | 2 | 1 - 2 | 2 | 0 | 0 - 2 | 2 | 1 | 1 - 2 | 2 | 2 | 0 - 2 | 2 | 0 | 1 - 2 | 2 | 1 | 0 - 2 | 2 | 2 | 1 - 2 | 2 | 0 | 0 - 2 | 2 | 1 | 1 - 2 | 2 | 2 | 0 - 13 | 1 | 1 | 1 + 11 | 2 | 2 | 1 + 11 | 2 | 3 | 0 + 11 | 2 | 4 | 1 + 11 | 2 | 2 | 0 + 11 | 2 | 3 | 1 + 11 | 2 | 4 | 0 + 11 | 2 | 2 | 1 + 11 | 2 | 3 | 0 + 11 | 2 | 4 | 1 + 11 | 2 | 2 | 0 13 | 1 | 2 | 0 - 13 | 1 | 0 | 1 - 13 | 1 | 1 | 0 + 13 | 1 | 3 | 1 + 13 | 1 | 4 | 0 13 | 1 | 2 | 1 - 13 | 1 | 0 | 0 - 13 | 1 | 1 | 1 + 13 | 1 | 3 | 0 + 13 | 1 | 4 | 1 13 | 1 | 2 | 0 - 13 | 1 | 0 | 1 - 13 | 1 | 1 | 0 + 13 | 1 | 3 | 1 + 13 | 1 | 4 | 0 13 | 1 | 2 | 1 - 13 | 1 | 0 | 0 - 13 | 1 | 1 | 1 + 13 | 1 | 3 | 0 + 13 | 1 | 4 | 1 13 | 1 | 2 | 0 - 14 | 2 | 1 | 1 14 | 2 | 2 | 0 - 14 | 2 | 0 | 1 - 14 | 2 | 1 | 0 + 14 | 2 | 3 | 1 + 14 | 2 | 4 | 0 14 | 2 | 2 | 1 - 14 | 2 | 0 | 0 - 14 | 2 | 1 | 1 + 14 | 2 | 3 | 0 + 14 | 2 | 4 | 1 14 | 2 | 2 | 0 - 14 | 2 | 0 | 1 - 14 | 2 | 1 | 0 + 14 | 2 | 3 | 1 + 14 | 2 | 4 | 0 14 | 2 | 2 | 1 - 14 | 2 | 0 | 0 - 14 | 2 | 1 | 1 + 14 | 2 | 3 | 0 + 14 | 2 | 4 | 1 14 | 2 | 2 | 0 - 15 | 0 | 1 | 1 - 15 | 0 | 2 | 0 - 15 | 0 | 0 | 1 - 15 | 0 | 1 | 0 - 15 | 0 | 2 | 1 - 15 | 0 | 0 | 0 - 15 | 0 | 1 | 1 - 15 | 0 | 2 | 0 - 15 | 0 | 0 | 1 - 15 | 0 | 1 | 0 - 15 | 0 | 2 | 1 - 15 | 0 | 0 | 0 - 15 | 0 | 1 | 1 - 15 | 0 | 2 | 0 - 16 | 1 | 1 | 1 - 16 | 1 | 2 | 0 - 16 | 1 | 0 | 1 - 16 | 1 | 1 | 0 - 16 | 1 | 2 | 1 - 16 | 1 | 0 | 0 - 16 | 1 | 1 | 1 - 16 | 1 | 2 | 0 - 16 | 1 | 0 | 1 - 16 | 1 | 1 | 0 - 16 | 1 | 2 | 1 - 16 | 1 | 0 | 0 - 16 | 1 | 1 | 1 - 16 | 1 | 2 | 0 - 17 | 2 | 1 | 1 17 | 2 | 2 | 0 - 17 | 2 | 0 | 1 - 17 | 2 | 1 | 0 + 17 | 2 | 3 | 1 + 17 | 2 | 4 | 0 17 | 2 | 2 | 1 - 17 | 2 | 0 | 0 - 17 | 2 | 1 | 1 + 17 | 2 | 3 | 0 + 17 | 2 | 4 | 1 17 | 2 | 2 | 0 - 17 | 2 | 0 | 1 - 17 | 2 | 1 | 0 + 17 | 2 | 3 | 1 + 17 | 2 | 4 | 0 17 | 2 | 2 | 1 - 17 | 2 | 0 | 0 - 17 | 2 | 1 | 1 + 17 | 2 | 3 | 0 + 17 | 2 | 4 | 1 17 | 2 | 2 | 0 - 3 | 0 | 1 | 1 - 3 | 0 | 2 | 0 - 3 | 0 | 0 | 1 - 3 | 0 | 1 | 0 + 2 | 2 | 2 | 0 + 2 | 2 | 3 | 1 + 2 | 2 | 4 | 0 + 2 | 2 | 2 | 1 + 2 | 2 | 3 | 0 + 2 | 2 | 4 | 1 + 2 | 2 | 2 | 0 + 2 | 2 | 3 | 1 + 2 | 2 | 4 | 0 + 2 | 2 | 2 | 1 + 2 | 2 | 3 | 0 + 2 | 2 | 4 | 1 + 2 | 2 | 2 | 0 + 3 | 0 | 2 | 0 + 3 | 0 | 3 | 1 + 3 | 0 | 4 | 0 3 | 0 | 2 | 1 - 3 | 0 | 0 | 0 - 3 | 0 | 1 | 1 + 3 | 0 | 3 | 0 + 3 | 0 | 4 | 1 3 | 0 | 2 | 0 - 3 | 0 | 0 | 1 - 3 | 0 | 1 | 0 + 3 | 0 | 3 | 1 + 3 | 0 | 4 | 0 3 | 0 | 2 | 1 - 3 | 0 | 0 | 0 - 3 | 0 | 1 | 1 + 3 | 0 | 3 | 0 + 3 | 0 | 4 | 1 3 | 0 | 2 | 0 - 4 | 1 | 1 | 1 4 | 1 | 2 | 0 - 4 | 1 | 0 | 1 - 4 | 1 | 1 | 0 + 4 | 1 | 3 | 1 + 4 | 1 | 4 | 0 4 | 1 | 2 | 1 - 4 | 1 | 0 | 0 - 4 | 1 | 1 | 1 + 4 | 1 | 3 | 0 + 4 | 1 | 4 | 1 4 | 1 | 2 | 0 - 4 | 1 | 0 | 1 - 4 | 1 | 1 | 0 + 4 | 1 | 3 | 1 + 4 | 1 | 4 | 0 4 | 1 | 2 | 1 - 4 | 1 | 0 | 0 - 4 | 1 | 1 | 1 + 4 | 1 | 3 | 0 + 4 | 1 | 4 | 1 4 | 1 | 2 | 0 - 5 | 2 | 1 | 1 - 5 | 2 | 2 | 0 - 5 | 2 | 0 | 1 - 5 | 2 | 1 | 0 - 5 | 2 | 2 | 1 - 5 | 2 | 0 | 0 - 5 | 2 | 1 | 1 - 5 | 2 | 2 | 0 - 5 | 2 | 0 | 1 - 5 | 2 | 1 | 0 - 5 | 2 | 2 | 1 - 5 | 2 | 0 | 0 - 5 | 2 | 1 | 1 - 5 | 2 | 2 | 0 - 6 | 0 | 1 | 1 - 6 | 0 | 2 | 0 - 6 | 0 | 0 | 1 - 6 | 0 | 1 | 0 - 6 | 0 | 2 | 1 - 6 | 0 | 0 | 0 - 6 | 0 | 1 | 1 - 6 | 0 | 2 | 0 - 6 | 0 | 0 | 1 - 6 | 0 | 1 | 0 - 6 | 0 | 2 | 1 - 6 | 0 | 0 | 0 - 6 | 0 | 1 | 1 - 6 | 0 | 2 | 0 - 7 | 1 | 1 | 1 7 | 1 | 2 | 0 - 7 | 1 | 0 | 1 - 7 | 1 | 1 | 0 + 7 | 1 | 3 | 1 + 7 | 1 | 4 | 0 7 | 1 | 2 | 1 - 7 | 1 | 0 | 0 - 7 | 1 | 1 | 1 + 7 | 1 | 3 | 0 + 7 | 1 | 4 | 1 7 | 1 | 2 | 0 - 7 | 1 | 0 | 1 - 7 | 1 | 1 | 0 + 7 | 1 | 3 | 1 + 7 | 1 | 4 | 0 7 | 1 | 2 | 1 - 7 | 1 | 0 | 0 - 7 | 1 | 1 | 1 + 7 | 1 | 3 | 0 + 7 | 1 | 4 | 1 7 | 1 | 2 | 0 - 18 | 0 | 1 | 1 + 8 | 2 | 2 | 0 + 8 | 2 | 3 | 1 + 8 | 2 | 4 | 0 + 8 | 2 | 2 | 1 + 8 | 2 | 3 | 0 + 8 | 2 | 4 | 1 + 8 | 2 | 2 | 0 + 8 | 2 | 3 | 1 + 8 | 2 | 4 | 0 + 8 | 2 | 2 | 1 + 8 | 2 | 3 | 0 + 8 | 2 | 4 | 1 + 8 | 2 | 2 | 0 + 16 | 1 | 2 | 0 + 16 | 1 | 3 | 1 + 16 | 1 | 4 | 0 + 16 | 1 | 2 | 1 + 16 | 1 | 3 | 0 + 16 | 1 | 4 | 1 + 16 | 1 | 2 | 0 + 16 | 1 | 3 | 1 + 16 | 1 | 4 | 0 + 16 | 1 | 2 | 1 + 16 | 1 | 3 | 0 + 16 | 1 | 4 | 1 + 16 | 1 | 2 | 0 18 | 0 | 2 | 0 - 18 | 0 | 0 | 1 - 18 | 0 | 1 | 0 + 18 | 0 | 3 | 1 + 18 | 0 | 4 | 0 18 | 0 | 2 | 1 - 18 | 0 | 0 | 0 - 18 | 0 | 1 | 1 + 18 | 0 | 3 | 0 + 18 | 0 | 4 | 1 18 | 0 | 2 | 0 - 18 | 0 | 0 | 1 - 18 | 0 | 1 | 0 + 18 | 0 | 3 | 1 + 18 | 0 | 4 | 0 18 | 0 | 2 | 1 - 18 | 0 | 0 | 0 - 18 | 0 | 1 | 1 + 18 | 0 | 3 | 0 + 18 | 0 | 4 | 1 18 | 0 | 2 | 0 - 19 | 1 | 1 | 1 19 | 1 | 2 | 0 - 19 | 1 | 0 | 1 - 19 | 1 | 1 | 0 + 19 | 1 | 3 | 1 + 19 | 1 | 4 | 0 19 | 1 | 2 | 1 - 19 | 1 | 0 | 0 - 19 | 1 | 1 | 1 + 19 | 1 | 3 | 0 + 19 | 1 | 4 | 1 19 | 1 | 2 | 0 - 19 | 1 | 0 | 1 - 19 | 1 | 1 | 0 + 19 | 1 | 3 | 1 + 19 | 1 | 4 | 0 19 | 1 | 2 | 1 - 19 | 1 | 0 | 0 - 19 | 1 | 1 | 1 + 19 | 1 | 3 | 0 + 19 | 1 | 4 | 1 19 | 1 | 2 | 0 - 20 | 2 | 1 | 1 - 20 | 2 | 2 | 0 - 20 | 2 | 0 | 1 - 20 | 2 | 1 | 0 - 20 | 2 | 2 | 1 - 20 | 2 | 0 | 0 - 20 | 2 | 1 | 1 - 20 | 2 | 2 | 0 - 20 | 2 | 0 | 1 - 20 | 2 | 1 | 0 - 20 | 2 | 2 | 1 - 20 | 2 | 0 | 0 - 20 | 2 | 1 | 1 - 20 | 2 | 2 | 0 - 8 | 2 | 1 | 1 - 8 | 2 | 2 | 0 - 8 | 2 | 0 | 1 - 8 | 2 | 1 | 0 - 8 | 2 | 2 | 1 - 8 | 2 | 0 | 0 - 8 | 2 | 1 | 1 - 8 | 2 | 2 | 0 - 8 | 2 | 0 | 1 - 8 | 2 | 1 | 0 - 8 | 2 | 2 | 1 - 8 | 2 | 0 | 0 - 8 | 2 | 1 | 1 - 8 | 2 | 2 | 0 - 9 | 0 | 1 | 1 - 9 | 0 | 2 | 0 - 9 | 0 | 0 | 1 - 9 | 0 | 1 | 0 - 9 | 0 | 2 | 1 - 9 | 0 | 0 | 0 - 9 | 0 | 1 | 1 - 9 | 0 | 2 | 0 - 9 | 0 | 0 | 1 - 9 | 0 | 1 | 0 - 9 | 0 | 2 | 1 - 9 | 0 | 0 | 0 - 9 | 0 | 1 | 1 - 9 | 0 | 2 | 0 - 10 | 1 | 1 | 1 - 10 | 1 | 2 | 0 - 10 | 1 | 0 | 1 - 10 | 1 | 1 | 0 - 10 | 1 | 2 | 1 - 10 | 1 | 0 | 0 - 10 | 1 | 1 | 1 - 10 | 1 | 2 | 0 - 10 | 1 | 0 | 1 - 10 | 1 | 1 | 0 - 10 | 1 | 2 | 1 - 10 | 1 | 0 | 0 - 10 | 1 | 1 | 1 - 10 | 1 | 2 | 0 - 11 | 2 | 1 | 1 - 11 | 2 | 2 | 0 - 11 | 2 | 0 | 1 - 11 | 2 | 1 | 0 - 11 | 2 | 2 | 1 - 11 | 2 | 0 | 0 - 11 | 2 | 1 | 1 - 11 | 2 | 2 | 0 - 11 | 2 | 0 | 1 - 11 | 2 | 1 | 0 - 11 | 2 | 2 | 1 - 11 | 2 | 0 | 0 - 11 | 2 | 1 | 1 - 11 | 2 | 2 | 0 - 12 | 0 | 1 | 1 - 12 | 0 | 2 | 0 - 12 | 0 | 0 | 1 - 12 | 0 | 1 | 0 - 12 | 0 | 2 | 1 - 12 | 0 | 0 | 0 - 12 | 0 | 1 | 1 - 12 | 0 | 2 | 0 - 12 | 0 | 0 | 1 - 12 | 0 | 1 | 0 - 12 | 0 | 2 | 1 - 12 | 0 | 0 | 0 - 12 | 0 | 1 | 1 - 12 | 0 | 2 | 0 - 11 | 2 | 5 | 1 - 11 | 2 | 6 | 0 - 11 | 2 | 3 | 0 - 11 | 2 | 4 | 1 - 11 | 2 | 5 | 0 - 11 | 2 | 6 | 1 - 11 | 2 | 3 | 1 - 11 | 2 | 4 | 0 - 11 | 2 | 5 | 1 - 11 | 2 | 6 | 0 - 11 | 2 | 3 | 0 - 11 | 2 | 4 | 1 - 11 | 2 | 5 | 0 - 11 | 2 | 6 | 1 - 12 | 0 | 3 | 1 - 12 | 0 | 4 | 0 - 12 | 0 | 5 | 1 - 12 | 0 | 6 | 0 - 12 | 0 | 3 | 0 - 12 | 0 | 4 | 1 - 12 | 0 | 5 | 0 - 12 | 0 | 6 | 1 - 12 | 0 | 3 | 1 - 12 | 0 | 4 | 0 - 12 | 0 | 5 | 1 - 12 | 0 | 6 | 0 - 12 | 0 | 3 | 0 - 12 | 0 | 4 | 1 - 12 | 0 | 5 | 0 - 12 | 0 | 6 | 1 (600 rows) -- empty target list select r.* from orca.r, orca.s where s.c=2; a | b ----+--- + 2 | 2 + 3 | 0 + 4 | 1 + 7 | 1 8 | 2 + 16 | 1 + 18 | 0 + 19 | 1 + 2 | 2 + 3 | 0 + 4 | 1 + 7 | 1 8 | 2 + 16 | 1 + 18 | 0 + 19 | 1 + 2 | 2 + 3 | 0 + 4 | 1 + 7 | 1 8 | 2 + 16 | 1 + 18 | 0 + 19 | 1 + 2 | 2 + 3 | 0 + 4 | 1 + 7 | 1 8 | 2 + 16 | 1 + 18 | 0 + 19 | 1 + 2 | 2 + 3 | 0 + 4 | 1 + 7 | 1 8 | 2 - 9 | 0 - 9 | 0 - 9 | 0 - 9 | 0 - 9 | 0 - 10 | 1 - 10 | 1 - 10 | 1 - 10 | 1 - 10 | 1 - 11 | 2 - 11 | 2 - 11 | 2 - 11 | 2 - 11 | 2 - 12 | 0 - 12 | 0 - 12 | 0 - 12 | 0 - 12 | 0 + 16 | 1 + 18 | 0 + 19 | 1 1 | 1 + 12 | 0 + 15 | 0 + 20 | 2 1 | 1 + 12 | 0 + 15 | 0 + 20 | 2 1 | 1 + 12 | 0 + 15 | 0 + 20 | 2 1 | 1 + 12 | 0 + 15 | 0 + 20 | 2 1 | 1 - 2 | 2 - 2 | 2 - 2 | 2 - 2 | 2 - 2 | 2 - 13 | 1 - 13 | 1 - 13 | 1 - 13 | 1 + 12 | 0 + 15 | 0 + 20 | 2 + 5 | 2 + 6 | 0 + 9 | 0 + 10 | 1 + 11 | 2 13 | 1 14 | 2 - 14 | 2 - 14 | 2 - 14 | 2 - 14 | 2 - 15 | 0 - 15 | 0 - 15 | 0 - 15 | 0 - 15 | 0 - 16 | 1 - 16 | 1 - 16 | 1 - 16 | 1 - 16 | 1 - 17 | 2 - 17 | 2 - 17 | 2 17 | 2 - 17 | 2 - 3 | 0 - 3 | 0 - 3 | 0 - 3 | 0 - 3 | 0 - 4 | 1 - 4 | 1 - 4 | 1 - 4 | 1 - 4 | 1 - 5 | 2 5 | 2 - 5 | 2 - 5 | 2 - 5 | 2 - 6 | 0 6 | 0 + 9 | 0 + 10 | 1 + 11 | 2 + 13 | 1 + 14 | 2 + 17 | 2 + 5 | 2 6 | 0 + 9 | 0 + 10 | 1 + 11 | 2 + 13 | 1 + 14 | 2 + 17 | 2 + 5 | 2 6 | 0 + 9 | 0 + 10 | 1 + 11 | 2 + 13 | 1 + 14 | 2 + 17 | 2 + 5 | 2 6 | 0 - 7 | 1 - 7 | 1 - 7 | 1 - 7 | 1 - 7 | 1 - 18 | 0 - 18 | 0 - 18 | 0 - 18 | 0 - 18 | 0 - 19 | 1 - 19 | 1 - 19 | 1 - 19 | 1 - 19 | 1 - 20 | 2 - 20 | 2 - 20 | 2 - 20 | 2 - 20 | 2 + 9 | 0 + 10 | 1 + 11 | 2 + 13 | 1 + 14 | 2 + 17 | 2 (100 rows) create table orca.m(); @@ -3214,47 +3218,47 @@ insert into orca.r values (null, 1); select r.a, s.c from orca.r left outer join orca.s on(r.a=s.c); a | c ----+--- - 1 | 1 - 1 | 1 - 1 | 1 - 1 | 1 - 1 | 1 - 2 | 2 - 2 | 2 - 2 | 2 - 2 | 2 - 2 | 2 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 13 | 14 | - 15 | - 16 | - 17 | - 8 | - 9 | - 10 | 11 | - 12 | - 3 | 3 - 3 | 3 - 3 | 3 + 10 | + 9 | + 17 | + 2 | 2 3 | 3 4 | 4 + 2 | 2 + 3 | 3 4 | 4 + 2 | 2 + 3 | 3 4 | 4 + 2 | 2 + 3 | 3 4 | 4 - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 - 6 | 6 - 6 | 6 - 6 | 6 - 6 | 6 - 7 | + 2 | 2 + | 18 | + 7 | + 16 | + 8 | 19 | + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 20 | - | + 15 | + 12 | (41 rows) select r.a, s.c from orca.r left outer join orca.s on(r.a=s.c and r.a=r.b and s.c=s.d) order by r.a,s.c; @@ -3308,133 +3312,133 @@ select r.a, s.c from orca.r left outer join orca.s on(r.a=s.c) where s.d > 2 or select r.a, s.c from orca.r right outer join orca.s on(r.a=s.c); a | c ---+--- - 1 | 1 - 2 | 2 - | 0 - 1 | 1 - 2 | 2 - | 0 - 1 | 1 - 2 | 2 - | 0 - 1 | 1 - 2 | 2 - | 0 - 1 | 1 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 2 | 2 3 | 3 4 | 4 - 5 | 5 - 6 | 6 + 2 | 2 3 | 3 4 | 4 - 5 | 5 - 6 | 6 + 2 | 2 3 | 3 4 | 4 - 5 | 5 - 6 | 6 + 2 | 2 3 | 3 4 | 4 - 5 | 5 - 6 | 6 + 2 | 2 + 1 | 1 + | 0 + 1 | 1 + | 0 + 1 | 1 + | 0 + 1 | 1 + | 0 + 1 | 1 (30 rows) select * from orca.r where exists (select * from orca.s where s.c=r.a + 2); a | b ---+--- - 1 | 1 - 2 | 2 3 | 0 4 | 1 + 1 | 1 + 2 | 2 (4 rows) select * from orca.r where exists (select * from orca.s where s.c=r.b); a | b ----+--- - 1 | 1 + 20 | 2 2 | 2 - 13 | 1 + 8 | 2 + 5 | 2 + 11 | 2 14 | 2 - 15 | 0 - 16 | 1 17 | 2 + 1 | 1 + 12 | 0 + 15 | 0 3 | 0 4 | 1 - 5 | 2 - 6 | 0 7 | 1 + 16 | 1 18 | 0 19 | 1 - 20 | 2 | 1 - 8 | 2 + 6 | 0 9 | 0 10 | 1 - 11 | 2 - 12 | 0 + 13 | 1 (21 rows) select * from orca.m where m.a not in (select a from orca.m1 where a=5); a | b ----+--- - 0 | 1 - 1 | 0 - 3 | 0 4 | 1 8 | 1 - 9 | 0 - 15 | 0 - 23 | 0 + 12 | 1 + 13 | 0 + 14 | 1 + 20 | 1 + 21 | 0 + 28 | 1 30 | 1 - 2 | 1 - 6 | 1 + 32 | 1 + 34 | 1 + 0 | 1 7 | 0 + 9 | 0 10 | 1 11 | 0 - 13 | 0 16 | 1 17 | 0 18 | 1 - 20 | 1 - 21 | 0 + 19 | 0 22 | 1 - 24 | 1 + 23 | 0 + 25 | 0 26 | 1 27 | 0 - 31 | 0 - 32 | 1 - 12 | 1 - 14 | 1 - 19 | 0 - 25 | 0 - 28 | 1 29 | 0 + 31 | 0 + 1 | 0 + 2 | 1 + 3 | 0 + 6 | 1 + 15 | 0 + 24 | 1 33 | 0 - 34 | 1 (34 rows) select * from orca.m where m.a not in (select a from orca.m1); a | b ----+--- - 30 | 1 - 25 | 0 - 28 | 1 - 29 | 0 + 24 | 1 33 | 0 + 28 | 1 + 30 | 1 + 32 | 1 34 | 1 - 24 | 1 + 25 | 0 26 | 1 27 | 0 + 29 | 0 31 | 0 - 32 | 1 (11 rows) select * from orca.m where m.a in (select a from orca.m1 where m1.a-1 = m.b); a | b ---+--- - 2 | 1 1 | 0 + 2 | 1 (2 rows) -- enable_hashjoin=off; enable_mergejoin=on @@ -3463,24 +3467,24 @@ select 1 from orca.m, orca.m1 where m.a = m1.a and m.b!=m1.b; select * from orca.r left outer join orca.s on (r.a=s.c and r.b Hash Join (cost=3.45..6.20 rows=10 width=16) - Hash Cond: s.c = r.a - -> Seq Scan on s (cost=0.00..2.30 rows=10 width=8) - -> Hash (cost=3.20..3.20 rows=7 width=8) - -> Seq Scan on r (cost=0.00..3.20 rows=7 width=8) - Settings: optimizer=off; optimizer_segments=3 - Optimizer status: Postgres query optimizer -(8 rows) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.15..2.78 rows=30 width=16) + -> Hash Join (cost=1.15..2.38 rows=10 width=16) + Hash Cond: (s.c = r.a) + -> Seq Scan on s (cost=0.00..1.10 rows=10 width=8) + -> Hash (cost=1.07..1.07 rows=7 width=8) + -> Seq Scan on r (cost=0.00..1.07 rows=7 width=8) + Optimizer: Postgres query optimizer +(7 rows) -- sort select * from orca.r join orca.s on(r.a=s.c) order by r.a, s.d; @@ -3640,174 +3643,174 @@ insert into orca.m values (1,-1), (1,2), (1,1); select a,a,a+b from orca.m; a | a | ?column? ----+----+---------- - 12 | 12 | 13 - 14 | 14 | 15 - 19 | 19 | 19 - 25 | 25 | 25 - 28 | 28 | 29 - 29 | 29 | 29 - 33 | 33 | 33 - 34 | 34 | 35 - 1 | 1 | 0 - 0 | 0 | 1 1 | 1 | 1 + 2 | 2 | 3 3 | 3 | 3 - 4 | 4 | 5 - 8 | 8 | 9 - 9 | 9 | 9 + 6 | 6 | 7 15 | 15 | 15 - 23 | 23 | 23 - 30 | 30 | 31 + 24 | 24 | 25 + 33 | 33 | 33 + 1 | 1 | 0 + 1 | 1 | 3 1 | 1 | 2 - 2 | 2 | 3 - 5 | 5 | 5 - 6 | 6 | 7 + 0 | 0 | 1 7 | 7 | 7 + 9 | 9 | 9 10 | 10 | 11 11 | 11 | 11 - 13 | 13 | 13 16 | 16 | 17 17 | 17 | 17 18 | 18 | 19 - 20 | 20 | 21 - 21 | 21 | 21 + 19 | 19 | 19 22 | 22 | 23 - 24 | 24 | 25 + 23 | 23 | 23 + 25 | 25 | 25 26 | 26 | 27 27 | 27 | 27 + 29 | 29 | 29 31 | 31 | 31 + 4 | 4 | 5 + 5 | 5 | 5 + 8 | 8 | 9 + 12 | 12 | 13 + 13 | 13 | 13 + 14 | 14 | 15 + 20 | 20 | 21 + 21 | 21 | 21 + 28 | 28 | 29 + 30 | 30 | 31 32 | 32 | 33 - 1 | 1 | 3 + 34 | 34 | 35 (38 rows) select a,a+b,a+b from orca.m; a | ?column? | ?column? ----+----------+---------- - 12 | 13 | 13 - 14 | 15 | 15 - 19 | 19 | 19 - 25 | 25 | 25 - 28 | 29 | 29 - 29 | 29 | 29 - 33 | 33 | 33 - 34 | 35 | 35 - 1 | 0 | 0 - 0 | 1 | 1 1 | 1 | 1 + 2 | 3 | 3 3 | 3 | 3 - 4 | 5 | 5 - 8 | 9 | 9 - 9 | 9 | 9 + 6 | 7 | 7 15 | 15 | 15 - 23 | 23 | 23 - 30 | 31 | 31 + 24 | 25 | 25 + 33 | 33 | 33 + 1 | 0 | 0 + 1 | 3 | 3 1 | 2 | 2 - 2 | 3 | 3 - 5 | 5 | 5 - 6 | 7 | 7 + 0 | 1 | 1 7 | 7 | 7 + 9 | 9 | 9 10 | 11 | 11 11 | 11 | 11 - 13 | 13 | 13 16 | 17 | 17 17 | 17 | 17 18 | 19 | 19 - 20 | 21 | 21 - 21 | 21 | 21 + 19 | 19 | 19 22 | 23 | 23 - 24 | 25 | 25 + 23 | 23 | 23 + 25 | 25 | 25 26 | 27 | 27 27 | 27 | 27 + 29 | 29 | 29 31 | 31 | 31 + 4 | 5 | 5 + 5 | 5 | 5 + 8 | 9 | 9 + 12 | 13 | 13 + 13 | 13 | 13 + 14 | 15 | 15 + 20 | 21 | 21 + 21 | 21 | 21 + 28 | 29 | 29 + 30 | 31 | 31 32 | 33 | 33 - 1 | 3 | 3 + 34 | 35 | 35 (38 rows) -- func expr select * from orca.m where a=abs(b); a | b ---+---- - 1 | 1 1 | -1 + 1 | 1 (2 rows) -- grouping sets select a,b,count(*) from orca.m group by grouping sets ((a), (a,b)); a | b | count ----+----+------- - 13 | | 1 - 12 | | 1 - 24 | 1 | 1 - 16 | 1 | 1 - 6 | 1 | 1 - 14 | | 1 - 20 | 1 | 1 - 2 | 1 | 1 - 16 | | 1 - 31 | 0 | 1 - 27 | 0 | 1 - 13 | 0 | 1 9 | 0 | 1 - 17 | | 1 - 32 | | 1 - 17 | 0 | 1 - 3 | | 1 - 34 | 1 | 1 - 19 | | 1 - 11 | | 1 9 | | 1 - 31 | | 1 - 28 | 1 | 1 - 10 | 1 | 1 - 18 | 1 | 1 - 21 | 0 | 1 - 8 | | 1 - 32 | 1 | 1 - 25 | 0 | 1 - 29 | 0 | 1 - 0 | | 1 - 18 | | 1 - 2 | | 1 - 14 | 1 | 1 - 7 | 0 | 1 - 30 | | 1 - 27 | | 1 - 5 | | 1 + 33 | 0 | 1 + 23 | 0 | 1 + 28 | | 1 + 33 | | 1 3 | 0 | 1 - 11 | 0 | 1 + 27 | | 1 + 1 | | 4 + 5 | 0 | 1 0 | 1 | 1 - 26 | | 1 - 4 | | 1 - 34 | | 1 + 12 | 1 | 1 + 11 | | 1 + 0 | | 1 + 17 | | 1 + 34 | 1 | 1 + 26 | 1 | 1 + 16 | 1 | 1 + 23 | | 1 + 30 | 1 | 1 + 14 | 1 | 1 + 19 | | 1 22 | 1 | 1 + 6 | | 1 + 13 | | 1 + 8 | 1 | 1 1 | -1 | 1 - 28 | | 1 - 19 | 0 | 1 1 | 2 | 1 - 10 | | 1 - 23 | | 1 1 | 1 | 1 - 15 | | 1 - 8 | 1 | 1 - 30 | 1 | 1 - 25 | | 1 + 20 | 1 | 1 + 2 | 1 | 1 + 2 | | 1 + 7 | | 1 + 18 | | 1 + 12 | | 1 + 17 | 0 | 1 + 8 | | 1 + 7 | 0 | 1 + 22 | | 1 21 | | 1 - 33 | | 1 - 12 | 1 | 1 - 20 | | 1 - 24 | | 1 - 1 | | 4 15 | 0 | 1 - 6 | | 1 + 29 | 0 | 1 + 32 | | 1 + 30 | | 1 29 | | 1 - 7 | | 1 - 1 | 0 | 1 - 5 | 0 | 1 - 26 | 1 | 1 + 14 | | 1 + 34 | | 1 + 13 | 0 | 1 + 15 | | 1 + 6 | 1 | 1 + 3 | | 1 + 25 | | 1 + 21 | 0 | 1 + 11 | 0 | 1 + 10 | 1 | 1 + 10 | | 1 + 18 | 1 | 1 + 24 | 1 | 1 + 31 | 0 | 1 + 19 | 0 | 1 4 | 1 | 1 - 33 | 0 | 1 - 22 | | 1 - 23 | 0 | 1 + 31 | | 1 + 27 | 0 | 1 + 16 | | 1 + 26 | | 1 + 32 | 1 | 1 + 1 | 0 | 1 + 4 | | 1 + 24 | | 1 + 25 | 0 | 1 + 5 | | 1 + 20 | | 1 + 28 | 1 | 1 (73 rows) select b,count(*) from orca.m group by grouping sets ((a), (a,b)); @@ -3816,277 +3819,277 @@ select b,count(*) from orca.m group by grouping sets ((a), (a,b)); | 1 1 | 1 -1 | 1 - | 1 - 0 | 1 2 | 1 - | 1 - | 1 1 | 1 - | 1 1 | 1 1 | 1 | 1 | 1 | 1 - 1 | 1 - | 1 | 1 - | 4 0 | 1 | 1 + 0 | 1 | 1 | 1 0 | 1 - 0 | 1 - 1 | 1 - 1 | 1 0 | 1 | 1 - 0 | 1 | 1 | 1 - 1 | 1 - 1 | 1 - 1 | 1 | 1 - 1 | 1 - 1 | 1 | 1 0 | 1 + | 1 + 1 | 1 0 | 1 + | 1 0 | 1 0 | 1 | 1 - 1 | 1 | 1 0 | 1 | 1 - | 1 + | 4 + 0 | 1 + 1 | 1 + 1 | 1 | 1 | 1 | 1 1 | 1 1 | 1 1 | 1 - 0 | 1 | 1 - 0 | 1 1 | 1 - 0 | 1 + 1 | 1 | 1 1 | 1 | 1 | 1 + | 1 + 0 | 1 + 0 | 1 + 1 | 1 + | 1 + 1 | 1 + 1 | 1 + 0 | 1 0 | 1 + 1 | 1 | 1 + 0 | 1 | 1 | 1 + 1 | 1 0 | 1 + | 1 + | 1 0 | 1 - 1 | 1 | 1 | 1 + 1 | 1 (73 rows) select a,count(*) from orca.m group by grouping sets ((a), (a,b)); a | count ----+------- - 11 | 1 9 | 1 - 31 | 1 + 9 | 1 + 33 | 1 + 23 | 1 28 | 1 - 10 | 1 - 18 | 1 - 21 | 1 - 8 | 1 - 32 | 1 - 25 | 1 - 29 | 1 - 0 | 1 - 18 | 1 - 2 | 1 - 14 | 1 - 7 | 1 - 30 | 1 + 33 | 1 + 3 | 1 27 | 1 + 1 | 4 5 | 1 - 3 | 1 + 0 | 1 + 12 | 1 11 | 1 0 | 1 + 17 | 1 + 34 | 1 26 | 1 - 4 | 1 + 16 | 1 + 23 | 1 + 30 | 1 + 14 | 1 + 19 | 1 22 | 1 - 34 | 1 + 6 | 1 + 13 | 1 + 8 | 1 1 | 1 1 | 1 - 28 | 1 - 19 | 1 - 10 | 1 - 23 | 1 1 | 1 - 15 | 1 - 8 | 1 - 30 | 1 - 21 | 1 - 25 | 1 - 33 | 1 20 | 1 + 2 | 1 + 2 | 1 + 7 | 1 + 18 | 1 12 | 1 - 24 | 1 - 1 | 4 - 15 | 1 - 6 | 1 + 17 | 1 + 8 | 1 7 | 1 - 29 | 1 - 1 | 1 - 5 | 1 - 26 | 1 - 4 | 1 - 33 | 1 22 | 1 - 23 | 1 + 21 | 1 + 15 | 1 + 29 | 1 + 32 | 1 + 30 | 1 + 29 | 1 + 14 | 1 + 34 | 1 13 | 1 - 12 | 1 - 24 | 1 - 16 | 1 + 15 | 1 6 | 1 - 14 | 1 - 20 | 1 - 2 | 1 - 16 | 1 + 3 | 1 + 25 | 1 + 21 | 1 + 11 | 1 + 10 | 1 + 10 | 1 + 18 | 1 + 24 | 1 + 31 | 1 + 19 | 1 + 4 | 1 31 | 1 27 | 1 - 13 | 1 - 9 | 1 - 17 | 1 + 16 | 1 + 26 | 1 32 | 1 - 17 | 1 - 34 | 1 - 3 | 1 - 19 | 1 + 1 | 1 + 4 | 1 + 24 | 1 + 25 | 1 + 5 | 1 + 20 | 1 + 28 | 1 (73 rows) select a,count(*) from orca.m group by grouping sets ((a), (b)); a | count ----+------- - 11 | 1 - 9 | 1 31 | 1 - 8 | 1 - | 19 - 0 | 1 - 18 | 1 - 2 | 1 - 30 | 1 - 27 | 1 - 5 | 1 - 4 | 1 - 26 | 1 - 34 | 1 - | 1 - 28 | 1 - 10 | 1 - 23 | 1 + 29 | 1 + 3 | 1 15 | 1 + 24 | 1 | 1 - 25 | 1 21 | 1 - 33 | 1 - 20 | 1 - 24 | 1 - 1 | 4 - | 17 - 6 | 1 - 29 | 1 - 7 | 1 22 | 1 - 13 | 1 12 | 1 + | 17 + 10 | 1 + | 1 + 18 | 1 + | 19 + 2 | 1 + 4 | 1 14 | 1 + 23 | 1 + 8 | 1 + 28 | 1 + 9 | 1 + 13 | 1 + 6 | 1 + 19 | 1 + 33 | 1 + 26 | 1 + 5 | 1 + 32 | 1 + 34 | 1 16 | 1 + 0 | 1 + 11 | 1 + 20 | 1 + 7 | 1 + 27 | 1 + 25 | 1 17 | 1 - 32 | 1 - 3 | 1 - 19 | 1 + 1 | 4 + 30 | 1 (39 rows) select a,b,count(*) from orca.m group by rollup(a, b); a | b | count ----+----+------- - 22 | 1 | 1 - 34 | | 1 - 1 | -1 | 1 - 1 | 2 | 1 + | | 38 + 9 | 0 | 1 + 9 | | 1 + 33 | 0 | 1 + 23 | 0 | 1 28 | | 1 - 19 | 0 | 1 - 10 | | 1 - 23 | | 1 - 1 | 1 | 1 - 15 | | 1 - 8 | 1 | 1 - 30 | 1 | 1 - 21 | | 1 - 25 | | 1 33 | | 1 - 20 | | 1 - 12 | 1 | 1 - 24 | | 1 + 3 | 0 | 1 + 27 | | 1 1 | | 4 - 15 | 0 | 1 - 6 | | 1 - 7 | | 1 - 29 | | 1 - 1 | 0 | 1 5 | 0 | 1 + 0 | 1 | 1 + 12 | 1 | 1 + 11 | | 1 + 0 | | 1 + 17 | | 1 + 34 | 1 | 1 26 | 1 | 1 - 4 | 1 | 1 - 33 | 0 | 1 - 22 | | 1 - 23 | 0 | 1 - 13 | | 1 - 12 | | 1 - 24 | 1 | 1 16 | 1 | 1 - 6 | 1 | 1 - 14 | | 1 + 23 | | 1 + 30 | 1 | 1 + 14 | 1 | 1 + 19 | | 1 + 22 | 1 | 1 + 6 | | 1 + 13 | | 1 + 8 | 1 | 1 + 1 | -1 | 1 + 1 | 2 | 1 + 1 | 1 | 1 20 | 1 | 1 2 | 1 | 1 - 16 | | 1 - 31 | 0 | 1 - 27 | 0 | 1 - 13 | 0 | 1 - 9 | 0 | 1 - 17 | | 1 - 32 | | 1 + 2 | | 1 + 7 | | 1 + 18 | | 1 + 12 | | 1 17 | 0 | 1 - 3 | | 1 - 34 | 1 | 1 - 19 | | 1 - 11 | | 1 - 9 | | 1 - 31 | | 1 - 28 | 1 | 1 - 10 | 1 | 1 - 18 | 1 | 1 - 21 | 0 | 1 8 | | 1 - 32 | 1 | 1 - 25 | 0 | 1 - 29 | 0 | 1 - 0 | | 1 - 18 | | 1 - 2 | | 1 - 14 | 1 | 1 7 | 0 | 1 + 22 | | 1 + 21 | | 1 + 15 | 0 | 1 + 29 | 0 | 1 + 32 | | 1 30 | | 1 - 27 | | 1 - 5 | | 1 - 3 | 0 | 1 - | | 38 + 29 | | 1 + 14 | | 1 + 34 | | 1 + 13 | 0 | 1 + 15 | | 1 + 6 | 1 | 1 + 3 | | 1 + 25 | | 1 + 21 | 0 | 1 11 | 0 | 1 - 0 | 1 | 1 + 10 | 1 | 1 + 10 | | 1 + 18 | 1 | 1 + 24 | 1 | 1 + 31 | 0 | 1 + 19 | 0 | 1 + 4 | 1 | 1 + 31 | | 1 + 27 | 0 | 1 + 16 | | 1 26 | | 1 + 32 | 1 | 1 + 1 | 0 | 1 4 | | 1 + 24 | | 1 + 25 | 0 | 1 + 5 | | 1 + 20 | | 1 + 28 | 1 | 1 (74 rows) select a,b,count(*) from orca.m group by rollup((a),(a,b)) order by 1,2,3; @@ -4177,54 +4180,54 @@ select count(*) from orca.m group by (); select a, count(*) from orca.r group by (), a; a | count ----+------- - 14 | 1 - 16 | 1 - 17 | 1 - 1 | 1 - 15 | 1 - 2 | 1 - 13 | 1 - 7 | 1 + 8 | 1 + | 1 19 | 1 + 7 | 1 18 | 1 - | 1 - 6 | 1 4 | 1 - 20 | 1 + 2 | 1 + 16 | 1 3 | 1 - 5 | 1 - 9 | 1 + 1 | 1 12 | 1 - 8 | 1 + 20 | 1 + 15 | 1 + 17 | 1 11 | 1 + 13 | 1 10 | 1 + 9 | 1 + 5 | 1 + 6 | 1 + 14 | 1 (21 rows) select a, count(*) from orca.r group by grouping sets ((),(a)); a | count ----+------- - 14 | 1 - 5 | 1 - 9 | 1 - 10 | 1 + | 21 + 8 | 1 + 11 | 1 + | 1 19 | 1 - 18 | 1 - 1 | 1 + 4 | 1 + 14 | 1 + 3 | 1 + 17 | 1 20 | 1 - | 21 7 | 1 + 13 | 1 + 10 | 1 + 9 | 1 + 1 | 1 + 5 | 1 + 18 | 1 + 2 | 1 16 | 1 15 | 1 - 11 | 1 - 2 | 1 6 | 1 - | 1 - 17 | 1 - 4 | 1 - 8 | 1 - 3 | 1 12 | 1 - 13 | 1 (22 rows) select a, b, count(*) c from orca.r group by grouping sets ((),(a), (a,b)) order by b,a,c; @@ -4332,35 +4335,35 @@ select 1 from orca.r group by (); select a,1 from orca.r group by rollup(a); a | ?column? ----+---------- + | 1 + 11 | 1 + 8 | 1 + | 1 + 19 | 1 + 4 | 1 + 14 | 1 + 3 | 1 + 17 | 1 + 20 | 1 + 13 | 1 + 10 | 1 + 9 | 1 + 7 | 1 1 | 1 5 | 1 - 9 | 1 - 10 | 1 - 14 | 1 18 | 1 - 19 | 1 2 | 1 - 6 | 1 - 7 | 1 - 11 | 1 15 | 1 16 | 1 - 20 | 1 - | 1 - 3 | 1 - 4 | 1 - 8 | 1 + 6 | 1 12 | 1 - 13 | 1 - 17 | 1 - | 1 (22 rows) select distinct grouping(a) + grouping(b) from orca.m group by rollup(a,b); ?column? ---------- - 0 2 + 0 1 (3 rows) @@ -4368,205 +4371,205 @@ select distinct grouping(a) + grouping(b) from orca.m group by rollup(a,b); select array[array[a,b]], array[b] from orca.r; array | array ------------+------- - {{1,1}} | {1} {{2,2}} | {2} - {{13,1}} | {1} - {{14,2}} | {2} - {{15,0}} | {0} - {{16,1}} | {1} - {{17,2}} | {2} {{3,0}} | {0} {{4,1}} | {1} - {{5,2}} | {2} - {{6,0}} | {0} {{7,1}} | {1} + {{8,2}} | {2} + {{16,1}} | {1} {{18,0}} | {0} {{19,1}} | {1} - {{20,2}} | {2} {{NULL,1}} | {1} - {{8,2}} | {2} + {{5,2}} | {2} + {{6,0}} | {0} {{9,0}} | {0} {{10,1}} | {1} {{11,2}} | {2} + {{13,1}} | {1} + {{14,2}} | {2} + {{17,2}} | {2} + {{1,1}} | {1} {{12,0}} | {0} + {{15,0}} | {0} + {{20,2}} | {2} (21 rows) -- setops select a, b from orca.m union select b,a from orca.m; a | b ----+---- - 0 | 1 - 0 | 13 - 0 | 15 - 0 | 27 - 0 | 29 0 | 31 - 1 | 8 - 1 | 10 - 1 | 12 - 1 | 24 - 1 | 26 - 1 | 32 - 3 | 0 - 7 | 0 - 10 | 1 - 11 | 0 - 14 | 1 - 18 | 1 - 21 | 0 - 25 | 0 - 28 | 1 - 29 | 0 - 32 | 1 - 0 | 3 - 0 | 5 - 0 | 17 - 0 | 19 - 0 | 21 - 0 | 33 - 1 | -1 - 1 | 0 - 1 | 1 - 1 | 2 - 1 | 14 1 | 16 - 1 | 28 - 1 | 30 - 1 | 34 - 4 | 1 - 5 | 0 + 29 | 0 8 | 1 - 12 | 1 - 15 | 0 + 33 | 0 + -1 | 1 + 1 | 32 + 1 | 26 + 32 | 1 + 1 | 18 19 | 0 + 0 | 13 + 0 | 29 22 | 1 + 2 | 1 + 25 | 0 + 0 | 1 + 1 | 22 + 4 | 1 + 13 | 0 + 1 | 0 + 0 | 27 + 0 | 19 + 6 | 1 23 | 0 + 18 | 1 + 1 | 34 + 1 | 14 + 1 | 4 + 0 | 3 + 11 | 0 + 5 | 0 26 | 1 - 30 | 1 - 33 | 0 - -1 | 1 + 0 | 17 0 | 7 - 0 | 9 - 0 | 11 0 | 23 - 0 | 25 - 1 | 4 - 1 | 6 - 1 | 18 - 1 | 20 - 1 | 22 - 2 | 1 - 6 | 1 + 1 | 28 + 15 | 0 + 21 | 0 9 | 0 - 13 | 0 - 16 | 1 + 1 | 8 + 14 | 1 + 3 | 0 + 1 | 24 + 0 | 11 + 0 | 15 + 0 | 33 + 28 | 1 17 | 0 - 20 | 1 - 24 | 1 - 27 | 0 + 30 | 1 + 0 | 5 + 7 | 0 + 1 | 10 31 | 0 + 10 | 1 + 1 | 30 + 12 | 1 + 1 | 20 + 27 | 0 + 1 | 6 + 0 | 9 + 1 | 1 + 24 | 1 + 0 | 25 + 16 | 1 + 1 | -1 + 0 | 21 + 1 | 2 + 1 | 12 + 20 | 1 34 | 1 (71 rows) SELECT a from orca.m UNION ALL select b from orca.m UNION ALL select a+b from orca.m group by 1; a ---- + 4 + 5 + 8 12 + 13 14 - 19 - 25 + 20 + 21 28 - 29 - 33 + 30 + 32 34 1 + 0 1 1 0 - 0 1 - 0 + 1 0 1 - -1 - 35 - 31 - 17 1 - 15 - 33 - 2 - 0 + 1 + 1 13 - 29 - 0 + 5 + 11 + 9 + 25 + 21 + 33 + 17 1 + 2 3 - 4 - 8 - 9 + 6 15 - 23 - 30 - 1 + 24 + 33 1 - 0 - 0 1 1 0 + 1 0 + 1 0 1 + 0 + -1 + 2 1 - 7 19 - 21 - 3 - 5 2 - 5 - 6 + 29 + 27 + 3 7 + 0 + 7 + 9 10 11 - 13 16 17 18 - 20 - 21 + 19 22 - 24 + 23 + 25 26 27 + 29 31 - 32 - 1 1 0 - 1 0 1 0 - 0 1 0 1 - 1 0 1 - 1 - 1 0 0 1 - 2 - 9 - 27 - 11 + 0 + 0 + 0 + 31 + 15 + 35 + 0 23 - 25 + 1 (96 rows) drop table if exists orca.foo; @@ -4584,421 +4587,421 @@ insert into orca.bar select i, i%3, i%2 from generate_series(1,30)i; SELECT distinct a, b from orca.foo; a | b ----+--- - 16 | 0 - 15 | 1 - 33 | 1 - 13 | 1 - 30 | 0 - 17 | 1 - 34 | 0 + 1 | 1 + 38 | 0 36 | 0 + 12 | 0 + 40 | 0 + 20 | 0 + 30 | 0 35 | 1 + 23 | 1 + 26 | 0 + 15 | 1 31 | 1 - 37 | 1 - 29 | 1 - 28 | 0 2 | 0 - 1 | 1 - 14 | 0 - 5 | 1 - 6 | 0 + 16 | 0 + 22 | 0 + 3 | 1 + 8 | 0 7 | 1 + 34 | 0 + 39 | 1 4 | 0 - 22 | 0 - 18 | 0 - 21 | 1 19 | 1 - 38 | 0 - 40 | 0 - 39 | 1 - 20 | 0 - 3 | 1 - 27 | 1 + 29 | 1 24 | 0 - 12 | 0 - 8 | 0 - 25 | 1 - 23 | 1 - 10 | 0 + 27 | 1 + 37 | 1 + 18 | 0 11 | 1 - 32 | 0 - 26 | 0 + 14 | 0 + 28 | 0 + 6 | 0 + 5 | 1 + 33 | 1 + 13 | 1 + 25 | 1 9 | 1 + 17 | 1 + 32 | 0 + 10 | 0 + 21 | 1 (40 rows) SELECT distinct foo.a, bar.b from orca.foo, orca.bar where foo.b = bar.a; a | b ----+--- - 5 | 1 - 33 | 1 - 19 | 1 37 | 1 - 23 | 1 + 13 | 1 1 | 1 - 9 | 1 - 15 | 1 - 7 | 1 - 35 | 1 + 17 | 1 + 29 | 1 39 | 1 25 | 1 - 29 | 1 + 9 | 1 11 | 1 - 27 | 1 - 13 | 1 - 17 | 1 - 21 | 1 - 31 | 1 + 23 | 1 + 33 | 1 3 | 1 + 31 | 1 + 5 | 1 + 19 | 1 + 15 | 1 + 7 | 1 + 21 | 1 + 27 | 1 + 35 | 1 (20 rows) SELECT distinct a, b from orca.foo; a | b ----+--- - 16 | 0 - 15 | 1 - 33 | 1 - 13 | 1 - 30 | 0 - 17 | 1 - 34 | 0 - 36 | 0 - 35 | 1 - 31 | 1 - 37 | 1 - 29 | 1 - 28 | 0 2 | 0 - 1 | 1 - 14 | 0 - 5 | 1 - 6 | 0 + 16 | 0 + 22 | 0 + 3 | 1 + 8 | 0 7 | 1 + 34 | 0 + 39 | 1 4 | 0 - 22 | 0 - 18 | 0 - 21 | 1 19 | 1 + 29 | 1 + 24 | 0 + 27 | 1 + 37 | 1 + 18 | 0 + 1 | 1 38 | 0 + 36 | 0 + 12 | 0 40 | 0 - 39 | 1 20 | 0 - 3 | 1 - 27 | 1 - 24 | 0 - 12 | 0 - 8 | 0 - 25 | 1 + 30 | 0 + 35 | 1 23 | 1 - 10 | 0 - 11 | 1 - 32 | 0 26 | 0 + 15 | 1 + 31 | 1 + 11 | 1 + 14 | 0 + 28 | 0 + 6 | 0 + 5 | 1 + 33 | 1 + 13 | 1 + 25 | 1 9 | 1 + 17 | 1 + 32 | 0 + 10 | 0 + 21 | 1 (40 rows) SELECT distinct a, count(*) from orca.foo group by a; a | count ----+------- 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 - 6 | 1 - 7 | 1 - 8 | 1 - 9 | 1 - 10 | 1 - 11 | 1 - 12 | 1 - 13 | 1 - 14 | 1 + 26 | 1 15 | 1 - 16 | 1 - 17 | 1 - 18 | 1 - 19 | 1 - 20 | 1 - 21 | 1 - 22 | 1 23 | 1 - 24 | 1 - 25 | 1 - 26 | 1 - 27 | 1 - 28 | 1 - 29 | 1 30 | 1 + 40 | 1 31 | 1 + 38 | 1 + 12 | 1 + 35 | 1 + 20 | 1 + 36 | 1 + 17 | 1 + 25 | 1 + 10 | 1 + 28 | 1 + 13 | 1 + 21 | 1 + 9 | 1 + 11 | 1 + 5 | 1 32 | 1 33 | 1 + 14 | 1 + 6 | 1 + 7 | 1 + 3 | 1 + 22 | 1 + 27 | 1 + 29 | 1 34 | 1 - 35 | 1 - 36 | 1 - 37 | 1 - 38 | 1 39 | 1 - 40 | 1 + 24 | 1 + 37 | 1 + 8 | 1 + 4 | 1 + 19 | 1 + 16 | 1 + 18 | 1 + 2 | 1 (40 rows) SELECT distinct foo.a, bar.b, sum(bar.c+foo.c) from orca.foo, orca.bar where foo.b = bar.a group by foo.a, bar.b; a | b | sum ----+---+----- - 1 | 1 | 2 3 | 1 | 4 - 5 | 1 | 2 - 7 | 1 | 4 9 | 1 | 2 + 33 | 1 | 2 11 | 1 | 4 + 31 | 1 | 4 + 23 | 1 | 4 + 29 | 1 | 2 13 | 1 | 2 - 15 | 1 | 4 17 | 1 | 2 - 19 | 1 | 4 - 21 | 1 | 2 - 23 | 1 | 4 + 39 | 1 | 4 + 37 | 1 | 2 + 1 | 1 | 2 25 | 1 | 2 + 15 | 1 | 4 27 | 1 | 4 - 29 | 1 | 2 - 31 | 1 | 4 - 33 | 1 | 2 + 21 | 1 | 2 35 | 1 | 4 - 37 | 1 | 2 - 39 | 1 | 4 + 7 | 1 | 4 + 5 | 1 | 2 + 19 | 1 | 4 (20 rows) SELECT distinct a, count(*) from orca.foo group by a; a | count ----+------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 - 6 | 1 7 | 1 + 3 | 1 + 22 | 1 + 27 | 1 + 29 | 1 + 34 | 1 + 39 | 1 + 24 | 1 + 37 | 1 8 | 1 - 9 | 1 - 10 | 1 - 11 | 1 - 12 | 1 - 13 | 1 - 14 | 1 - 15 | 1 + 4 | 1 + 19 | 1 16 | 1 - 17 | 1 18 | 1 - 19 | 1 - 20 | 1 - 21 | 1 - 22 | 1 - 23 | 1 - 24 | 1 - 25 | 1 + 2 | 1 + 1 | 1 26 | 1 - 27 | 1 - 28 | 1 - 29 | 1 + 15 | 1 + 23 | 1 30 | 1 + 40 | 1 31 | 1 - 32 | 1 - 33 | 1 - 34 | 1 + 38 | 1 + 12 | 1 35 | 1 + 20 | 1 36 | 1 - 37 | 1 - 38 | 1 - 39 | 1 - 40 | 1 + 17 | 1 + 25 | 1 + 10 | 1 + 28 | 1 + 13 | 1 + 21 | 1 + 9 | 1 + 11 | 1 + 5 | 1 + 32 | 1 + 33 | 1 + 14 | 1 + 6 | 1 (40 rows) SELECT distinct foo.a, bar.b from orca.foo, orca.bar where foo.b = bar.a; a | b ----+--- - 15 | 1 - 7 | 1 - 35 | 1 + 9 | 1 + 11 | 1 + 23 | 1 + 33 | 1 + 3 | 1 + 31 | 1 + 37 | 1 + 13 | 1 + 1 | 1 + 17 | 1 + 29 | 1 39 | 1 25 | 1 - 29 | 1 - 11 | 1 5 | 1 - 33 | 1 19 | 1 - 37 | 1 - 23 | 1 - 1 | 1 - 9 | 1 - 27 | 1 - 13 | 1 - 17 | 1 + 15 | 1 + 7 | 1 21 | 1 - 31 | 1 - 3 | 1 + 27 | 1 + 35 | 1 (20 rows) SELECT distinct foo.a, bar.b, sum(bar.c+foo.c) from orca.foo, orca.bar where foo.b = bar.a group by foo.a, bar.b; a | b | sum ----+---+----- - 1 | 1 | 2 3 | 1 | 4 - 5 | 1 | 2 - 7 | 1 | 4 9 | 1 | 2 + 33 | 1 | 2 11 | 1 | 4 + 31 | 1 | 4 + 23 | 1 | 4 + 29 | 1 | 2 13 | 1 | 2 - 15 | 1 | 4 17 | 1 | 2 - 19 | 1 | 4 - 21 | 1 | 2 - 23 | 1 | 4 + 39 | 1 | 4 + 37 | 1 | 2 + 1 | 1 | 2 25 | 1 | 2 + 15 | 1 | 4 27 | 1 | 4 - 29 | 1 | 2 - 31 | 1 | 4 - 33 | 1 | 2 + 21 | 1 | 2 35 | 1 | 4 - 37 | 1 | 2 - 39 | 1 | 4 + 7 | 1 | 4 + 5 | 1 | 2 + 19 | 1 | 4 (20 rows) SELECT distinct a, b from orca.foo; a | b ----+--- - 16 | 0 - 15 | 1 - 33 | 1 - 13 | 1 - 30 | 0 - 17 | 1 - 34 | 0 - 36 | 0 - 35 | 1 - 31 | 1 - 37 | 1 - 29 | 1 - 28 | 0 2 | 0 - 1 | 1 - 14 | 0 - 5 | 1 - 6 | 0 + 16 | 0 + 22 | 0 + 3 | 1 + 8 | 0 7 | 1 + 34 | 0 + 39 | 1 4 | 0 - 22 | 0 - 18 | 0 - 21 | 1 19 | 1 + 29 | 1 + 24 | 0 + 27 | 1 + 37 | 1 + 18 | 0 + 1 | 1 38 | 0 + 36 | 0 + 12 | 0 40 | 0 - 39 | 1 20 | 0 - 3 | 1 - 27 | 1 - 24 | 0 - 12 | 0 - 8 | 0 - 25 | 1 + 30 | 0 + 35 | 1 23 | 1 - 10 | 0 - 11 | 1 - 32 | 0 26 | 0 + 15 | 1 + 31 | 1 + 11 | 1 + 14 | 0 + 28 | 0 + 6 | 0 + 5 | 1 + 33 | 1 + 13 | 1 + 25 | 1 9 | 1 + 17 | 1 + 32 | 0 + 10 | 0 + 21 | 1 (40 rows) SELECT distinct a, count(*) from orca.foo group by a; a | count ----+------- 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 - 6 | 1 - 7 | 1 - 8 | 1 - 9 | 1 - 10 | 1 - 11 | 1 - 12 | 1 - 13 | 1 - 14 | 1 + 26 | 1 15 | 1 - 16 | 1 - 17 | 1 - 18 | 1 - 19 | 1 - 20 | 1 - 21 | 1 - 22 | 1 23 | 1 - 24 | 1 - 25 | 1 - 26 | 1 - 27 | 1 - 28 | 1 - 29 | 1 30 | 1 + 40 | 1 31 | 1 + 38 | 1 + 12 | 1 + 35 | 1 + 20 | 1 + 36 | 1 + 17 | 1 + 25 | 1 + 10 | 1 + 28 | 1 + 13 | 1 + 21 | 1 + 9 | 1 + 11 | 1 + 5 | 1 32 | 1 33 | 1 + 14 | 1 + 6 | 1 + 7 | 1 + 3 | 1 + 22 | 1 + 27 | 1 + 29 | 1 34 | 1 - 35 | 1 - 36 | 1 - 37 | 1 - 38 | 1 39 | 1 - 40 | 1 + 24 | 1 + 37 | 1 + 8 | 1 + 4 | 1 + 19 | 1 + 16 | 1 + 18 | 1 + 2 | 1 (40 rows) SELECT distinct foo.a, bar.b from orca.foo, orca.bar where foo.b = bar.a; a | b ----+--- - 15 | 1 - 7 | 1 - 35 | 1 + 37 | 1 + 13 | 1 + 1 | 1 + 17 | 1 + 29 | 1 39 | 1 25 | 1 - 29 | 1 + 9 | 1 11 | 1 - 5 | 1 + 23 | 1 33 | 1 + 3 | 1 + 31 | 1 + 5 | 1 19 | 1 - 37 | 1 - 23 | 1 - 1 | 1 - 9 | 1 - 27 | 1 - 13 | 1 - 17 | 1 + 15 | 1 + 7 | 1 21 | 1 - 31 | 1 - 3 | 1 + 27 | 1 + 35 | 1 (20 rows) SELECT distinct foo.a, bar.b, sum(bar.c+foo.c) from orca.foo, orca.bar where foo.b = bar.a group by foo.a, bar.b; a | b | sum ----+---+----- + 29 | 1 | 2 + 13 | 1 | 2 + 17 | 1 | 2 + 39 | 1 | 4 + 37 | 1 | 2 1 | 1 | 2 + 25 | 1 | 2 3 | 1 | 4 - 5 | 1 | 2 - 7 | 1 | 4 9 | 1 | 2 + 33 | 1 | 2 11 | 1 | 4 - 13 | 1 | 2 - 15 | 1 | 4 - 17 | 1 | 2 - 19 | 1 | 4 - 21 | 1 | 2 + 31 | 1 | 4 23 | 1 | 4 - 25 | 1 | 2 + 15 | 1 | 4 27 | 1 | 4 - 29 | 1 | 2 - 31 | 1 | 4 - 33 | 1 | 2 + 21 | 1 | 2 35 | 1 | 4 - 37 | 1 | 2 - 39 | 1 | 4 + 7 | 1 | 4 + 5 | 1 | 2 + 19 | 1 | 4 (20 rows) -- window operations @@ -6565,12 +6568,12 @@ select (select b from orca_w3 where a = orca_w1.a) as one, row_number() over(par | 1 | 2 | 3 - | 1 - | 2 - | 3 3 | 1 3 | 2 3 | 3 + | 1 + | 2 + | 3 (9 rows) -- aggref in subquery with window func in target list @@ -6632,9 +6635,9 @@ select (select rank() over(partition by orca_w2.a) from orca_w3 where a = orca_w select (select a+1 from (select a from orca_w2 where orca_w1.a=orca_w2.a) sq(a)) as one, row_number() over(partition by orca_w1.a) as two from orca_w1; one | two -----+----- - | 1 3 | 1 4 | 1 + | 1 (3 rows) -- correlated subquery in target list, mismatching varattnos @@ -6721,46 +6724,46 @@ select rank() over(partition by a, case when b = 0 then a+b end order by b asc) select foo.d from orca.foo full join orca.bar on (foo.d = bar.a) group by d; d ---- - 30 - 35 - 14 - 37 - 16 - 28 - 31 - 1 - 17 - 15 - 36 - 34 - 33 2 - 0 - 13 - 29 + 3 + 4 7 + 8 + 16 + 18 19 + 22 + 24 + 27 + 29 + 34 + 37 39 - 18 - 6 - 4 - 21 + 0 + 1 + 12 + 15 20 + 23 + 26 + 30 + 31 + 35 + 36 38 - 3 5 - 22 - 24 + 6 9 - 12 - 8 - 26 - 27 - 32 + 10 11 - 23 + 13 + 14 + 17 + 21 25 - 10 + 28 + 32 + 33 (40 rows) select 1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d; @@ -6846,87 +6849,87 @@ insert into orca.rcte select i, i%2, i%3 from generate_series(1,40)i; with x as (select * from orca.rcte where a < 10) select * from x x1, x x2; a | b | c | a | b | c ---+---+---+---+---+--- - 8 | 0 | 2 | 8 | 0 | 2 - 8 | 0 | 2 | 9 | 1 | 0 - 8 | 0 | 2 | 3 | 1 | 0 - 8 | 0 | 2 | 4 | 0 | 1 - 8 | 0 | 2 | 5 | 1 | 2 - 8 | 0 | 2 | 6 | 0 | 0 - 8 | 0 | 2 | 7 | 1 | 1 - 8 | 0 | 2 | 1 | 1 | 1 - 8 | 0 | 2 | 2 | 0 | 2 - 9 | 1 | 0 | 8 | 0 | 2 - 9 | 1 | 0 | 9 | 1 | 0 - 9 | 1 | 0 | 3 | 1 | 0 - 9 | 1 | 0 | 4 | 0 | 1 - 9 | 1 | 0 | 5 | 1 | 2 - 9 | 1 | 0 | 6 | 0 | 0 - 9 | 1 | 0 | 7 | 1 | 1 - 9 | 1 | 0 | 1 | 1 | 1 - 9 | 1 | 0 | 2 | 0 | 2 - 1 | 1 | 1 | 8 | 0 | 2 - 1 | 1 | 1 | 9 | 1 | 0 - 1 | 1 | 1 | 1 | 1 | 1 - 1 | 1 | 1 | 2 | 0 | 2 - 1 | 1 | 1 | 3 | 1 | 0 - 1 | 1 | 1 | 4 | 0 | 1 - 1 | 1 | 1 | 5 | 1 | 2 - 1 | 1 | 1 | 6 | 0 | 0 - 1 | 1 | 1 | 7 | 1 | 1 - 2 | 0 | 2 | 8 | 0 | 2 - 2 | 0 | 2 | 9 | 1 | 0 - 2 | 0 | 2 | 1 | 1 | 1 2 | 0 | 2 | 2 | 0 | 2 2 | 0 | 2 | 3 | 1 | 0 2 | 0 | 2 | 4 | 0 | 1 + 2 | 0 | 2 | 7 | 1 | 1 + 2 | 0 | 2 | 8 | 0 | 2 + 2 | 0 | 2 | 1 | 1 | 1 2 | 0 | 2 | 5 | 1 | 2 2 | 0 | 2 | 6 | 0 | 0 - 2 | 0 | 2 | 7 | 1 | 1 - 3 | 1 | 0 | 8 | 0 | 2 - 3 | 1 | 0 | 9 | 1 | 0 + 2 | 0 | 2 | 9 | 1 | 0 + 3 | 1 | 0 | 2 | 0 | 2 3 | 1 | 0 | 3 | 1 | 0 3 | 1 | 0 | 4 | 0 | 1 - 3 | 1 | 0 | 5 | 1 | 2 - 3 | 1 | 0 | 6 | 0 | 0 3 | 1 | 0 | 7 | 1 | 1 + 3 | 1 | 0 | 8 | 0 | 2 3 | 1 | 0 | 1 | 1 | 1 - 3 | 1 | 0 | 2 | 0 | 2 - 4 | 0 | 1 | 8 | 0 | 2 - 4 | 0 | 1 | 9 | 1 | 0 + 3 | 1 | 0 | 5 | 1 | 2 + 3 | 1 | 0 | 6 | 0 | 0 + 3 | 1 | 0 | 9 | 1 | 0 + 4 | 0 | 1 | 2 | 0 | 2 4 | 0 | 1 | 3 | 1 | 0 4 | 0 | 1 | 4 | 0 | 1 - 4 | 0 | 1 | 5 | 1 | 2 - 4 | 0 | 1 | 6 | 0 | 0 4 | 0 | 1 | 7 | 1 | 1 + 4 | 0 | 1 | 8 | 0 | 2 4 | 0 | 1 | 1 | 1 | 1 - 4 | 0 | 1 | 2 | 0 | 2 - 5 | 1 | 2 | 8 | 0 | 2 - 5 | 1 | 2 | 9 | 1 | 0 + 4 | 0 | 1 | 5 | 1 | 2 + 4 | 0 | 1 | 6 | 0 | 0 + 4 | 0 | 1 | 9 | 1 | 0 + 7 | 1 | 1 | 2 | 0 | 2 + 7 | 1 | 1 | 3 | 1 | 0 + 7 | 1 | 1 | 4 | 0 | 1 + 7 | 1 | 1 | 7 | 1 | 1 + 7 | 1 | 1 | 8 | 0 | 2 + 7 | 1 | 1 | 1 | 1 | 1 + 7 | 1 | 1 | 5 | 1 | 2 + 7 | 1 | 1 | 6 | 0 | 0 + 7 | 1 | 1 | 9 | 1 | 0 + 8 | 0 | 2 | 2 | 0 | 2 + 8 | 0 | 2 | 3 | 1 | 0 + 8 | 0 | 2 | 4 | 0 | 1 + 8 | 0 | 2 | 7 | 1 | 1 + 8 | 0 | 2 | 8 | 0 | 2 + 8 | 0 | 2 | 1 | 1 | 1 + 8 | 0 | 2 | 5 | 1 | 2 + 8 | 0 | 2 | 6 | 0 | 0 + 8 | 0 | 2 | 9 | 1 | 0 + 1 | 1 | 1 | 2 | 0 | 2 + 1 | 1 | 1 | 3 | 1 | 0 + 1 | 1 | 1 | 4 | 0 | 1 + 1 | 1 | 1 | 7 | 1 | 1 + 1 | 1 | 1 | 8 | 0 | 2 + 1 | 1 | 1 | 1 | 1 | 1 + 1 | 1 | 1 | 5 | 1 | 2 + 1 | 1 | 1 | 6 | 0 | 0 + 1 | 1 | 1 | 9 | 1 | 0 + 5 | 1 | 2 | 2 | 0 | 2 5 | 1 | 2 | 3 | 1 | 0 5 | 1 | 2 | 4 | 0 | 1 - 5 | 1 | 2 | 5 | 1 | 2 - 5 | 1 | 2 | 6 | 0 | 0 5 | 1 | 2 | 7 | 1 | 1 + 5 | 1 | 2 | 8 | 0 | 2 5 | 1 | 2 | 1 | 1 | 1 - 5 | 1 | 2 | 2 | 0 | 2 - 6 | 0 | 0 | 8 | 0 | 2 - 6 | 0 | 0 | 9 | 1 | 0 + 5 | 1 | 2 | 5 | 1 | 2 + 5 | 1 | 2 | 6 | 0 | 0 + 5 | 1 | 2 | 9 | 1 | 0 + 6 | 0 | 0 | 2 | 0 | 2 6 | 0 | 0 | 3 | 1 | 0 6 | 0 | 0 | 4 | 0 | 1 - 6 | 0 | 0 | 5 | 1 | 2 - 6 | 0 | 0 | 6 | 0 | 0 6 | 0 | 0 | 7 | 1 | 1 + 6 | 0 | 0 | 8 | 0 | 2 6 | 0 | 0 | 1 | 1 | 1 - 6 | 0 | 0 | 2 | 0 | 2 - 7 | 1 | 1 | 8 | 0 | 2 - 7 | 1 | 1 | 9 | 1 | 0 - 7 | 1 | 1 | 3 | 1 | 0 - 7 | 1 | 1 | 4 | 0 | 1 - 7 | 1 | 1 | 5 | 1 | 2 - 7 | 1 | 1 | 6 | 0 | 0 - 7 | 1 | 1 | 7 | 1 | 1 - 7 | 1 | 1 | 1 | 1 | 1 - 7 | 1 | 1 | 2 | 0 | 2 + 6 | 0 | 0 | 5 | 1 | 2 + 6 | 0 | 0 | 6 | 0 | 0 + 6 | 0 | 0 | 9 | 1 | 0 + 9 | 1 | 0 | 2 | 0 | 2 + 9 | 1 | 0 | 3 | 1 | 0 + 9 | 1 | 0 | 4 | 0 | 1 + 9 | 1 | 0 | 7 | 1 | 1 + 9 | 1 | 0 | 8 | 0 | 2 + 9 | 1 | 0 | 1 | 1 | 1 + 9 | 1 | 0 | 5 | 1 | 2 + 9 | 1 | 0 | 6 | 0 | 0 + 9 | 1 | 0 | 9 | 1 | 0 (81 rows) with x as (select * from orca.rcte where a < 10) select * from x x1, x x2 where x2.a = x1.b; @@ -6934,41 +6937,41 @@ with x as (select * from orca.rcte where a < 10) select * from x x1, x x2 where ---+---+---+---+---+--- 1 | 1 | 1 | 1 | 1 | 1 3 | 1 | 0 | 1 | 1 | 1 - 5 | 1 | 2 | 1 | 1 | 1 7 | 1 | 1 | 1 | 1 | 1 + 5 | 1 | 2 | 1 | 1 | 1 9 | 1 | 0 | 1 | 1 | 1 (5 rows) with x as (select * from orca.rcte where a < 10) select a from x union all select b from x; a --- + 2 3 4 - 5 - 6 7 - 1 + 8 0 1 0 1 - 8 + 0 + 5 + 6 9 + 1 0 1 1 - 2 1 - 0 (18 rows) with x as (select * from orca.rcte where a < 10) select * from x x1 where x1.b = any (select x2.a from x x2 group by x2.a); a | b | c ---+---+--- - 1 | 1 | 1 3 | 1 | 0 - 5 | 1 | 2 7 | 1 | 1 + 1 | 1 | 1 + 5 | 1 | 2 9 | 1 | 0 (5 rows) @@ -6980,31 +6983,31 @@ with x as (select * from orca.rcte where a < 10) select * from x x1 where x1.b = with x as (select * from orca.rcte where a < 10) select * from x x1, x x2, x x3 where x2.a = x1.b and x3.b = x2.b ; a | b | c | a | b | c | a | b | c ---+---+---+---+---+---+---+---+--- - 9 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 - 7 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 - 5 | 1 | 2 | 1 | 1 | 1 | 1 | 1 | 1 + 3 | 1 | 0 | 1 | 1 | 1 | 9 | 1 | 0 + 3 | 1 | 0 | 1 | 1 | 1 | 5 | 1 | 2 + 3 | 1 | 0 | 1 | 1 | 1 | 7 | 1 | 1 + 3 | 1 | 0 | 1 | 1 | 1 | 3 | 1 | 0 3 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 - 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 - 9 | 1 | 0 | 1 | 1 | 1 | 3 | 1 | 0 + 7 | 1 | 1 | 1 | 1 | 1 | 9 | 1 | 0 + 7 | 1 | 1 | 1 | 1 | 1 | 5 | 1 | 2 + 7 | 1 | 1 | 1 | 1 | 1 | 7 | 1 | 1 7 | 1 | 1 | 1 | 1 | 1 | 3 | 1 | 0 - 5 | 1 | 2 | 1 | 1 | 1 | 3 | 1 | 0 - 3 | 1 | 0 | 1 | 1 | 1 | 3 | 1 | 0 + 7 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 + 1 | 1 | 1 | 1 | 1 | 1 | 9 | 1 | 0 + 1 | 1 | 1 | 1 | 1 | 1 | 5 | 1 | 2 + 1 | 1 | 1 | 1 | 1 | 1 | 7 | 1 | 1 1 | 1 | 1 | 1 | 1 | 1 | 3 | 1 | 0 - 9 | 1 | 0 | 1 | 1 | 1 | 5 | 1 | 2 - 7 | 1 | 1 | 1 | 1 | 1 | 5 | 1 | 2 + 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 + 5 | 1 | 2 | 1 | 1 | 1 | 9 | 1 | 0 5 | 1 | 2 | 1 | 1 | 1 | 5 | 1 | 2 - 3 | 1 | 0 | 1 | 1 | 1 | 5 | 1 | 2 - 1 | 1 | 1 | 1 | 1 | 1 | 5 | 1 | 2 - 9 | 1 | 0 | 1 | 1 | 1 | 7 | 1 | 1 - 7 | 1 | 1 | 1 | 1 | 1 | 7 | 1 | 1 5 | 1 | 2 | 1 | 1 | 1 | 7 | 1 | 1 - 3 | 1 | 0 | 1 | 1 | 1 | 7 | 1 | 1 - 1 | 1 | 1 | 1 | 1 | 1 | 7 | 1 | 1 + 5 | 1 | 2 | 1 | 1 | 1 | 3 | 1 | 0 + 5 | 1 | 2 | 1 | 1 | 1 | 1 | 1 | 1 9 | 1 | 0 | 1 | 1 | 1 | 9 | 1 | 0 - 7 | 1 | 1 | 1 | 1 | 1 | 9 | 1 | 0 - 5 | 1 | 2 | 1 | 1 | 1 | 9 | 1 | 0 - 3 | 1 | 0 | 1 | 1 | 1 | 9 | 1 | 0 - 1 | 1 | 1 | 1 | 1 | 1 | 9 | 1 | 0 + 9 | 1 | 0 | 1 | 1 | 1 | 5 | 1 | 2 + 9 | 1 | 0 | 1 | 1 | 1 | 7 | 1 | 1 + 9 | 1 | 0 | 1 | 1 | 1 | 3 | 1 | 0 + 9 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 (25 rows) with x as (select * from orca.rcte where a < 10) select * from x x2 where x2.b < (select avg(b) from x x1); @@ -7012,8 +7015,8 @@ with x as (select * from orca.rcte where a < 10) select * from x x2 where x2.b < ---+---+--- 2 | 0 | 2 4 | 0 | 1 - 6 | 0 | 0 8 | 0 | 2 + 6 | 0 | 0 (4 rows) with x as (select r.a from orca.r, orca.s where r.a < 10 and s.d < 10 and r.a = s.d) select * from x x1, x x2; @@ -7244,700 +7247,700 @@ with x as (select r.a from orca.r, orca.s where r.a < 10 and s.d < 10 and r.a = 1 | 1 1 | 1 1 | 1 -(225 rows) - -with x as (select r.a from orca.r, orca.s where r.a < 10 and s.c < 10 and r.a = s.c) select * from x x1, x x2; - a | a ----+--- - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 +(225 rows) + +with x as (select r.a from orca.r, orca.s where r.a < 10 and s.c < 10 and r.a = s.c) select * from x x1, x x2; + a | a +---+--- + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 5 + 1 | 6 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 2 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 5 | 1 5 | 2 5 | 1 - 5 | 2 5 | 1 - 5 | 2 5 | 1 - 5 | 2 5 | 1 + 5 | 1 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 + 5 | 2 + 5 | 1 + 5 | 1 + 5 | 1 + 5 | 1 + 5 | 1 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 6 | 2 6 | 3 6 | 4 - 6 | 5 - 6 | 6 + 6 | 2 6 | 3 6 | 4 - 6 | 5 - 6 | 6 + 6 | 2 6 | 3 6 | 4 - 6 | 5 - 6 | 6 + 6 | 2 6 | 3 6 | 4 - 6 | 5 - 6 | 6 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 + 6 | 2 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 3 - 6 | 4 + 5 | 2 + 5 | 1 + 5 | 1 + 5 | 1 + 5 | 1 + 5 | 1 6 | 5 6 | 6 - 6 | 3 - 6 | 4 6 | 5 6 | 6 - 6 | 3 - 6 | 4 6 | 5 6 | 6 - 6 | 3 - 6 | 4 6 | 5 6 | 6 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 1 - 3 | 2 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 3 | 3 - 3 | 4 - 3 | 5 - 3 | 6 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 1 - 4 | 2 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 4 | 3 - 4 | 4 - 4 | 5 - 4 | 6 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 - 5 | 2 - 5 | 1 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 2 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 + 5 | 5 + 5 | 6 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 + 5 | 2 5 | 3 5 | 4 - 5 | 5 - 5 | 6 - 6 | 1 - 6 | 2 - 6 | 1 - 6 | 2 - 6 | 1 + 5 | 2 + 5 | 1 + 5 | 1 + 5 | 1 + 5 | 1 + 5 | 1 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 + 6 | 5 + 6 | 6 6 | 2 - 6 | 1 + 6 | 3 + 6 | 4 6 | 2 - 6 | 1 + 6 | 3 + 6 | 4 6 | 2 6 | 3 6 | 4 - 6 | 5 - 6 | 6 + 6 | 2 6 | 3 6 | 4 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 2 | 1 - 2 | 2 - 2 | 1 + 6 | 2 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 6 | 1 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 2 | 2 - 2 | 1 + 2 | 3 + 2 | 4 2 | 2 - 2 | 1 + 2 | 3 + 2 | 4 2 | 2 - 2 | 1 + 2 | 3 + 2 | 4 2 | 2 2 | 3 2 | 4 + 2 | 2 + 2 | 1 + 2 | 1 + 2 | 1 + 2 | 1 + 2 | 1 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 2 | 5 2 | 6 - 2 | 3 - 2 | 4 2 | 5 2 | 6 - 2 | 3 - 2 | 4 2 | 5 2 | 6 - 2 | 3 - 2 | 4 2 | 5 2 | 6 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 2 | 1 2 | 2 - 2 | 1 - 2 | 2 - 2 | 1 + 2 | 3 + 2 | 4 2 | 2 - 2 | 1 + 2 | 3 + 2 | 4 2 | 2 - 2 | 1 + 2 | 3 + 2 | 4 2 | 2 2 | 3 2 | 4 + 2 | 2 + 2 | 1 + 2 | 1 + 2 | 1 + 2 | 1 + 2 | 1 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 2 | 5 2 | 6 - 2 | 3 - 2 | 4 2 | 5 2 | 6 - 2 | 3 - 2 | 4 2 | 5 2 | 6 - 2 | 3 - 2 | 4 2 | 5 2 | 6 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 2 | 1 - 2 | 2 - 2 | 1 - 2 | 2 - 2 | 1 - 2 | 2 - 2 | 1 - 2 | 2 - 2 | 1 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 2 | 1 2 | 2 2 | 1 - 2 | 2 2 | 1 - 2 | 2 2 | 1 - 2 | 2 2 | 1 + 2 | 1 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 1 - 1 | 2 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 1 | 3 - 1 | 4 - 1 | 5 - 1 | 6 - 2 | 1 2 | 2 2 | 1 - 2 | 2 2 | 1 - 2 | 2 2 | 1 - 2 | 2 2 | 1 + 2 | 1 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 5 + 3 | 6 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 3 + 3 | 4 + 3 | 2 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 3 | 1 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 5 + 4 | 6 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 3 + 4 | 4 + 4 | 2 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 + 4 | 1 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 + 2 | 5 + 2 | 6 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 + 2 | 2 2 | 3 2 | 4 - 2 | 5 - 2 | 6 - 6 | 5 - 6 | 6 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 + 2 | 2 + 2 | 1 + 2 | 1 + 2 | 1 + 2 | 1 + 2 | 1 (676 rows) with x as (select * from orca.rcte where a < 10) (select a from x x2) union all (select max(a) from x x1); a --- - 1 2 3 4 - 5 - 6 7 8 + 1 + 5 + 6 9 9 (10 rows) @@ -7975,7 +7978,7 @@ ERROR: more than one row returned by a subquery used as an expression select (select generate_series(1,5)); ERROR: more than one row returned by a subquery used as an expression select (select a from orca.foo inner1 where inner1.a=outer1.a union select b from orca.foo inner2 where inner2.b=outer1.b) from orca.foo outer1; -ERROR: more than one row returned by a subquery used as an expression (seg0 slice3 192.168.0.37:25432 pid=14095) +ERROR: more than one row returned by a subquery used as an expression (seg0 slice1 127.0.1.1:9202 pid=1583557) select (select generate_series(1,1)) as series; series -------- @@ -8113,10 +8116,10 @@ group by ten having exists (select 1 from orca.onek b where sum(distinct a.four) = b.four); ten | sum -----+----- - 0 | 2 - 1 | 3 - 3 | 3 4 | 2 + 3 | 3 + 1 | 3 + 0 | 2 9 | 3 (5 rows) @@ -8255,13 +8258,13 @@ select * from orca.t order by 1,2; -- test EXPLAIN support of partition selection nodes, while we're at it. explain select * from orca.t order by 1,2; - QUERY PLAN ------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=15864.38..16260.38 rows=158400 width=94) - Merge Key: t_1_prt_part201203.timest, t_1_prt_part201203.user_id - -> Sort (cost=15864.38..16260.38 rows=52800 width=94) - Sort Key: t_1_prt_part201203.timest, t_1_prt_part201203.user_id - -> Append (cost=0.00..2184.00 rows=52800 width=94) + QUERY PLAN +---------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=5137.70..7381.70 rows=158400 width=94) + Merge Key: t.timest, t.user_id + -> Sort (cost=5137.70..5269.70 rows=52800 width=94) + Sort Key: t.timest, t.user_id + -> Append (cost=0.00..996.00 rows=52800 width=94) -> Seq Scan on t_1_prt_part201203 t_1 (cost=0.00..122.00 rows=8800 width=94) -> Seq Scan on t_1_prt_part201204 t_2 (cost=0.00..122.00 rows=8800 width=94) -> Seq Scan on t_1_prt_part201205 t_3 (cost=0.00..122.00 rows=8800 width=94) @@ -8336,11 +8339,14 @@ insert into orca.t_date values('01-03-2012'::date,8,'tag1','tag2'); insert into orca.t_date values('01-03-2012'::date,9,'tag1','tag2'); set optimizer_enable_space_pruning=off; set optimizer_enable_constant_expression_evaluation=on; +-- start_ignore +analyze orca.t_date; +-- end_ignore explain select * from orca.t_date where user_id=9; - QUERY PLAN ------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..8.19 rows=6 width=21) - -> Append (cost=0.00..8.19 rows=2 width=21) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.37 rows=18 width=21) + -> Append (cost=0.00..6.13 rows=6 width=21) -> Seq Scan on t_date_1_prt_part201203 t_date_1 (cost=0.00..1.04 rows=1 width=20) Filter: (user_id = '9'::numeric) -> Seq Scan on t_date_1_prt_part201204 t_date_2 (cost=0.00..1.01 rows=1 width=21) @@ -8389,11 +8395,14 @@ insert into orca.t_text values('01-03-2012'::date,8,'bad','tag2'); insert into orca.t_text values('01-03-2012'::date,9,'ugly','tag2'); set optimizer_enable_space_pruning=off; set optimizer_enable_constant_expression_evaluation=on; +-- start_ignore +analyze orca.t_text; +-- end_ignore explain select * from orca.t_text where user_id=9; - QUERY PLAN ----------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..9.26 rows=3 width=21) - -> Append (cost=0.00..9.20 rows=3 width=21) + QUERY PLAN +------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..3.20 rows=9 width=21) + -> Append (cost=0.00..3.08 rows=3 width=21) -> Seq Scan on t_text_1_prt_partbad t_text_1 (cost=0.00..1.02 rows=1 width=21) Filter: (user_id = '9'::numeric) -> Seq Scan on t_text_1_prt_partgood t_text_2 (cost=0.00..1.02 rows=1 width=20) @@ -8428,11 +8437,14 @@ insert into orca.t_ceeval_ints values(5, 102, 'tag1', 'tag2'); set optimizer_enable_space_pruning=off; set optimizer_enable_constant_expression_evaluation=on; set optimizer_use_external_constant_expression_evaluation_for_ints = on; +-- start_ignore +analyze orca.t_ceeval_ints; +-- end_ignore explain select * from orca.t_ceeval_ints where user_id=4; - QUERY PLAN ----------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..4.06 rows=3 width=21) - -> Append (cost=0.00..4.06 rows=1 width=21) + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..3.17 rows=9 width=21) + -> Append (cost=0.00..3.05 rows=3 width=21) -> Seq Scan on t_ceeval_ints_1_prt_part100 t_ceeval_ints_1 (cost=0.00..1.01 rows=1 width=21) Filter: (user_id = '4'::numeric) -> Seq Scan on t_ceeval_ints_1_prt_part101 t_ceeval_ints_2 (cost=0.00..1.01 rows=1 width=21) @@ -8502,10 +8514,10 @@ insert into orca.fooh2 select i%3, i%2, i from generate_series(1,20) i; select sum(f1.b) from orca.fooh1 f1 group by f1.a; sum ----- - 6 5 - 6 4 + 6 + 6 (4 rows) select f1.a + 1 from fooh1 f1 group by f1.a+1 having sum(f1.a+1) + 1 > 20; @@ -8518,8 +8530,8 @@ select 1 as one, f1.a from orca.fooh1 f1 group by f1.a having sum(f1.b) > 4; one | a -----+--- 1 | 1 - 1 | 2 1 | 0 + 1 | 2 (3 rows) select f1.a, 1 as one from orca.fooh1 f1 group by f1.a having 10 > (select f2.a from orca.fooh2 f2 group by f2.a having sum(f1.a) > count(*) order by f2.a limit 1) order by f1.a; @@ -8613,28 +8625,28 @@ select f1.a, 1 as one from orca.fooh1 f1 group by f1.a having f1.a = (select f2. select sum(f1.a+1)+1 from orca.fooh1 f1 group by f1.a+1; ?column? ---------- - 21 - 16 6 + 21 11 + 16 (4 rows) select sum(f1.a+1)+sum(f1.a+1) from orca.fooh1 f1 group by f1.a+1; ?column? ---------- 40 + 20 30 10 - 20 (4 rows) select sum(f1.a+1)+avg(f1.a+1), sum(f1.a), sum(f1.a+1) from orca.fooh1 f1 group by f1.a+1; ?column? | sum | sum ------------------------+-----+----- 24.0000000000000000 | 15 | 20 + 12.0000000000000000 | 5 | 10 18.0000000000000000 | 10 | 15 6.00000000000000000000 | 0 | 5 - 12.0000000000000000 | 5 | 10 (4 rows) -- @@ -8665,27 +8677,27 @@ select a, (select sum(e) from bar where foo.b = bar.f), b, count(*) from foo, ja select foo.a, (select (foo.a + foo.b) * count(bar.e) from bar), b, count(*) from foo group by foo.a, foo.b, foo.a + foo.b; a | ?column? | b | count ---+----------+---+------- + 3 | 18 | 3 | 1 2 | 12 | 2 | 1 1 | 6 | 1 | 1 - 3 | 18 | 3 | 1 (3 rows) -- aggfunc over an outer reference in a subquery select (select sum(foo.a + bar.d) from bar) from foo group by a, b; sum ----- - 9 15 12 + 9 (3 rows) -- complex expression of aggfunc over an outer reference in a subquery select (select sum(foo.a + bar.d) + 1 from bar) from foo group by a, b; ?column? ---------- - 13 10 16 + 13 (3 rows) -- aggrefs with multiple agglevelsup @@ -8719,27 +8731,27 @@ select (select max(f) from bar where d = 1 group by a, e) from foo group by a; select a, count(*), (with cte as (select min(d) dd from bar group by e) select max(a * dd) from cte) from foo group by a; a | count | max ---+-------+----- - 1 | 1 | 3 2 | 1 | 6 3 | 1 | 9 + 1 | 1 | 3 (3 rows) -- cte with an aggfunc of outer ref in an complex expression select a, count(*), (with cte as (select e, min(d) as dd from bar group by e) select max(a) * sum(dd) from cte) from foo group by a; a | count | ?column? ---+-------+---------- - 1 | 1 | 6 2 | 1 | 12 3 | 1 | 18 + 1 | 1 | 6 (3 rows) -- subquery in group by select max(a) from foo group by (select e from bar where bar.e = foo.a); max ----- - 1 2 3 + 1 (3 rows) -- nested subquery in group by @@ -8762,8 +8774,8 @@ select max(a) from foo group by (select min(g) from jazz where foo.a = (select m select max(a) from foo group by b, (with cte as (select min(g) from jazz group by h) select a from cte); max ----- - 3 2 + 3 1 (3 rows) @@ -8790,8 +8802,8 @@ select b + (a+1) from foo group by b, a+1; ?column? ---------- 5 - 3 7 + 3 (3 rows) -- subselects inside aggs @@ -8839,9 +8851,9 @@ insert into orca.t77 select 'orange'::text; SELECT to_char(AVG( char_length(DT466.C952) ), '9999999.9999999'), MAX( char_length(DT466.C952) ) FROM orca.t77 DT466 GROUP BY char_length(DT466.C952); to_char | max ------------------+----- - 6.0000000 | 6 4.0000000 | 4 5.0000000 | 5 + 6.0000000 | 6 (3 rows) create table orca.prod9 (sale integer, prodnm varchar,price integer); @@ -8864,7 +8876,7 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into orca.toanalyze values (1,1), (2,2), (3,3); alter table orca.toanalyze drop column a; -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy analyze orca.toanalyze; -- union create table orca.ur (a int, b int); @@ -8971,7 +8983,7 @@ create table orca.tab1 (i, j) as select i,i%2 from generate_series(1,10) i; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table orca.tab2 (a, b) as select 1, 2; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '?column?' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. select * from orca.tab1 where 0 < (select count(*) from generate_series(1,i)) order by 1; i | j @@ -8993,10 +9005,10 @@ select * from orca.tab1 where i > (select b from orca.tab2); ----+--- 3 | 1 4 | 0 - 5 | 1 - 6 | 0 7 | 1 8 | 0 + 5 | 1 + 6 | 0 9 | 1 10 | 0 (8 rows) @@ -9706,15 +9718,15 @@ select 1 as x where 1 in (2, 3); SELECT generate_series(1,10) EXCEPT SELECT 1; generate_series ----------------- - 2 - 3 - 4 - 5 - 6 - 7 8 - 9 10 + 9 + 7 + 5 + 4 + 2 + 6 + 3 (9 rows) -- MPP-23932: SetOp of const table and volatile function @@ -9727,16 +9739,16 @@ SELECT generate_series(1,10) INTERSECT SELECT 1; SELECT generate_series(1,10) UNION SELECT 1; generate_series ----------------- + 8 + 10 + 9 + 7 1 - 2 - 3 - 4 5 + 4 + 2 6 - 7 - 8 - 9 - 10 + 3 (10 rows) -- warning messages for missing stats @@ -9899,14 +9911,13 @@ analyze orca.bm_test; create index bm_test_idx on orca.bm_test using bitmap (i); set optimizer_enable_bitmapscan=on; explain select * from orca.bm_test where i=2 and t='2'; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..4.50 rows=4 width=6) - -> Seq Scan on bm_test (cost=0.00..4.50 rows=2 width=6) - Filter: i = 2 AND t = '2'::text - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..1.54 rows=3 width=6) + -> Seq Scan on bm_test (cost=0.00..1.50 rows=1 width=6) + Filter: ((i = 2) AND (t = '2'::text)) + Optimizer: Postgres query optimizer +(4 rows) select * from orca.bm_test where i=2 and t='2'; i | t @@ -9938,12 +9949,15 @@ alter table orca.bm_dyn_test drop column to_be_dropped; alter table orca.bm_dyn_test add partition part5 values(5); insert into orca.bm_dyn_test values(2, 5, '2'); set optimizer_enable_dynamicbitmapscan=on; +-- start_ignore +analyze orca.bm_dyn_test; +-- end_ignore -- gather on 1 segment because of direct dispatch explain select * from orca.bm_dyn_test where i=2 and t='2'; - QUERY PLAN ------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..13.52 rows=13 width=10) - -> Append (cost=0.00..13.52 rows=5 width=10) + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.85 rows=22 width=10) + -> Append (cost=0.00..6.55 rows=7 width=10) -> Seq Scan on bm_dyn_test_1_prt_part0 bm_dyn_test_1 (cost=0.00..1.10 rows=1 width=10) Filter: ((i = 2) AND (t = '2'::text)) -> Seq Scan on bm_dyn_test_1_prt_part1 bm_dyn_test_2 (cost=0.00..1.10 rows=1 width=10) @@ -9992,12 +10006,15 @@ alter table orca.bm_dyn_test_onepart add partition part5 values(5); insert into orca.bm_dyn_test_onepart values(2, 5, '2'); set optimizer_enable_bitmapscan=on; set optimizer_enable_dynamictablescan = off; +-- start_ignore +analyze orca.bm_dyn_test_onepart; +-- end_ignore -- gather on 1 segment because of direct dispatch explain select * from orca.bm_dyn_test_onepart where i=2 and t='2'; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..13.52 rows=13 width=10) - -> Append (cost=0.00..13.52 rows=5 width=10) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.85 rows=22 width=10) + -> Append (cost=0.00..6.55 rows=7 width=10) -> Seq Scan on bm_dyn_test_onepart_1_prt_part0 bm_dyn_test_onepart_1 (cost=0.00..1.10 rows=1 width=10) Filter: ((i = 2) AND (t = '2'::text)) -> Seq Scan on bm_dyn_test_onepart_1_prt_part1 bm_dyn_test_onepart_2 (cost=0.00..1.10 rows=1 width=10) @@ -10047,10 +10064,10 @@ create index bm_multi_test_idx_part on orca.bm_dyn_test_multilvl_part using bitm analyze orca.bm_dyn_test_multilvl_part; -- print name of parent index explain select * from orca.bm_dyn_test_multilvl_part where year = 2019; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..7.69 rows=53 width=18) - -> Append (cost=0.00..6.62 rows=18 width=18) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..5.13 rows=59 width=18) + -> Append (cost=0.00..4.34 rows=20 width=18) -> Seq Scan on bm_dyn_test_multilvl_part_1_prt_2_2_prt_1_3_prt_usa bm_dyn_test_multilvl_part_1 (cost=0.00..1.01 rows=1 width=44) Filter: (year = 2019) -> Seq Scan on bm_dyn_test_multilvl_part_1_prt_2_2_prt_1_3_prt_other_regions bm_dyn_test_multilvl_part_2 (cost=0.00..1.01 rows=1 width=44) @@ -10289,12 +10306,12 @@ GROUP BY 1 ORDER BY 1 asc ; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=39937.77..39951.93 rows=1000 width=16) + Gather Motion 3:1 (slice1; segments: 3) (cost=39962.80..39989.46 rows=1000 width=16) Merge Key: ((((tt.event_ts / 100000) / 5) * 5)) - -> Sort (cost=39937.77..39938.60 rows=333 width=16) - Sort Key: ((((tt.event_ts / 100000) / 5) * 5)) - -> Finalize HashAggregate (cost=39917.97..39923.80 rows=333 width=16) - Group Key: ((((tt.event_ts / 100000) / 5) * 5)) + -> Finalize GroupAggregate (cost=39962.80..39976.13 rows=333 width=16) + Group Key: ((((tt.event_ts / 100000) / 5) * 5)) + -> Sort (cost=39962.80..39965.30 rows=1000 width=16) + Sort Key: ((((tt.event_ts / 100000) / 5) * 5)) -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=39875.47..39912.97 rows=1000 width=16) Hash Key: ((((tt.event_ts / 100000) / 5) * 5)) -> Streaming Partial HashAggregate (cost=39875.47..39892.97 rows=1000 width=16) @@ -10350,27 +10367,26 @@ analyze idxscan_inner; set optimizer_enable_hashjoin = off; explain select id, comment from idxscan_outer as o join idxscan_inner as i on o.id = i.productid where ordernum between 10 and 20; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=2.12..5.28 rows=4 width=9) - -> Hash Join (cost=2.12..5.28 rows=2 width=9) - Hash Cond: o.id = i.productid - -> Seq Scan on idxscan_outer o (cost=0.00..3.09 rows=3 width=4) - -> Hash (cost=2.09..2.09 rows=1 width=9) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..2.09 rows=1 width=9) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.05..2.14 rows=3 width=9) + -> Hash Join (cost=1.05..2.10 rows=1 width=9) + Hash Cond: (o.id = i.productid) + -> Seq Scan on idxscan_outer o (cost=0.00..1.03 rows=3 width=4) + -> Hash (cost=1.03..1.03 rows=1 width=9) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=9) Hash Key: i.productid - -> Seq Scan on idxscan_inner i (cost=0.00..2.04 rows=1 width=9) - Filter: ordernum >= 10 AND ordernum <= 20 - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(11 rows) + -> Seq Scan on idxscan_inner i (cost=0.00..1.01 rows=1 width=9) + Filter: ((ordernum >= 10) AND (ordernum <= 20)) + Optimizer: Postgres query optimizer +(10 rows) select id, comment from idxscan_outer as o join idxscan_inner as i on o.id = i.productid where ordernum between 10 and 20; id | comment ----+--------- - 1 | xxxx 3 | zzzz + 1 | xxxx (2 rows) reset optimizer_enable_hashjoin; @@ -10409,36 +10425,33 @@ insert into orca.index_test select i,i%2,i%3,i%4,i%5 from generate_series(1,100) analyze orca.index_test; -- force_explain explain select * from orca.index_test where a = 5; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=1 width=20) - -> Seq Scan on index_test (cost=0.00..4.25 rows=1 width=20) - Filter: a = 5 - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.44 rows=1 width=20) + -> Seq Scan on index_test (cost=0.00..1.42 rows=1 width=20) + Filter: (a = 5) + Optimizer: Postgres query optimizer +(4 rows) -- force_explain explain select * from orca.index_test where c = 5; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=1 width=20) - -> Seq Scan on index_test (cost=0.00..4.25 rows=1 width=20) - Filter: c = 5 - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.44 rows=1 width=20) + -> Seq Scan on index_test (cost=0.00..1.42 rows=1 width=20) + Filter: (c = 5) + Optimizer: Postgres query optimizer +(4 rows) -- force_explain explain select * from orca.index_test where a = 5 and c = 5; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.50 rows=1 width=20) - -> Seq Scan on index_test (cost=0.00..4.50 rows=1 width=20) - Filter: a = 5 AND c = 5 - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.52 rows=1 width=20) + -> Seq Scan on index_test (cost=0.00..1.50 rows=1 width=20) + Filter: ((a = 5) AND (c = 5)) + Optimizer: Postgres query optimizer +(4 rows) -- renaming columns select * from (values (2),(null)) v(k); @@ -10500,6 +10513,7 @@ drop table can_set_tag_target; drop table can_set_tag_audit; -- start_ignore create language plpython3u; +ERROR: extension "plpython3u" already exists -- end_ignore -- Checking if ORCA uses parser's canSetTag for CREATE TABLE AS SELECT create or replace function canSetTag_Func(x int) returns int as $$ @@ -10531,12 +10545,18 @@ select disable_xform('CXformSelect2IndexGet'); CXformSelect2IndexGet is disabled (1 row) +select disable_xform('CXformSelect2IndexOnlyGet'); + disable_xform +--------------------------------------- + CXformSelect2IndexOnlyGet is disabled +(1 row) + -- end_ignore EXPLAIN SELECT * FROM btree_test WHERE a in (1, 47); QUERY PLAN ---------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=2 width=8) - -> Seq Scan on btree_test (cost=0.00..4.25 rows=1 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.45 rows=2 width=8) + -> Seq Scan on btree_test (cost=0.00..1.42 rows=1 width=8) Filter: (a = ANY ('{1,47}'::integer[])) Optimizer: Postgres query optimizer (4 rows) @@ -10544,8 +10564,8 @@ EXPLAIN SELECT * FROM btree_test WHERE a in (1, 47); EXPLAIN SELECT * FROM btree_test WHERE a in ('2', 47); QUERY PLAN ---------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=2 width=8) - -> Seq Scan on btree_test (cost=0.00..4.25 rows=1 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.45 rows=2 width=8) + -> Seq Scan on btree_test (cost=0.00..1.42 rows=1 width=8) Filter: (a = ANY ('{2,47}'::integer[])) Optimizer: Postgres query optimizer (4 rows) @@ -10553,8 +10573,8 @@ EXPLAIN SELECT * FROM btree_test WHERE a in ('2', 47); EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2'); QUERY PLAN ---------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=2 width=8) - -> Seq Scan on btree_test (cost=0.00..4.25 rows=1 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.45 rows=2 width=8) + -> Seq Scan on btree_test (cost=0.00..1.42 rows=1 width=8) Filter: (a = ANY ('{1,2}'::integer[])) Optimizer: Postgres query optimizer (4 rows) @@ -10562,8 +10582,8 @@ EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2'); EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2', 47); QUERY PLAN ---------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.38 rows=3 width=8) - -> Seq Scan on btree_test (cost=0.00..4.38 rows=1 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.50 rows=3 width=8) + -> Seq Scan on btree_test (cost=0.00..1.46 rows=1 width=8) Filter: (a = ANY ('{1,2,47}'::integer[])) Optimizer: Postgres query optimizer (4 rows) @@ -10571,17 +10591,17 @@ EXPLAIN SELECT * FROM btree_test WHERE a in ('1', '2', 47); SELECT * FROM btree_test WHERE a in ('1', '2', 47); a | b ----+---- + 2 | 2 1 | 1 47 | 47 - 2 | 2 (3 rows) CREATE INDEX btree_test_index_ab ON btree_test using btree(a,b); EXPLAIN SELECT * FROM btree_test WHERE a in (1, 2, 47) AND b > 1; QUERY PLAN ---------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.62 rows=3 width=8) - -> Seq Scan on btree_test (cost=0.00..4.62 rows=1 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.58 rows=3 width=8) + -> Seq Scan on btree_test (cost=0.00..1.54 rows=1 width=8) Filter: ((b > 1) AND (a = ANY ('{1,2,47}'::integer[]))) Optimizer: Postgres query optimizer (4 rows) @@ -10600,6 +10620,12 @@ select enable_xform('CXformSelect2IndexGet'); CXformSelect2IndexGet is enabled (1 row) +select enable_xform('CXformSelect2IndexOnlyGet'); + enable_xform +-------------------------------------- + CXformSelect2IndexOnlyGet is enabled +(1 row) + -- end_ignore reset optimizer_enable_tablescan; -- Test Bitmap index scan with in list @@ -10607,54 +10633,49 @@ CREATE TABLE bitmap_test as SELECT * FROM generate_series(1,100) as a distribute ANALYZE bitmap_test; CREATE INDEX bitmap_index ON bitmap_test USING BITMAP(a); EXPLAIN SELECT * FROM bitmap_test WHERE a in (1); - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=1 width=4) - -> Seq Scan on bitmap_test (cost=0.00..4.25 rows=1 width=4) - Filter: a = 1 - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.44 rows=1 width=4) + -> Seq Scan on bitmap_test (cost=0.00..1.42 rows=1 width=4) + Filter: (a = 1) + Optimizer: Postgres query optimizer +(4 rows) EXPLAIN SELECT * FROM bitmap_test WHERE a in (1, 47); - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=2 width=4) - -> Seq Scan on bitmap_test (cost=0.00..4.25 rows=1 width=4) - Filter: a = ANY ('{1,47}'::integer[]) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.45 rows=2 width=4) + -> Seq Scan on bitmap_test (cost=0.00..1.42 rows=1 width=4) + Filter: (a = ANY ('{1,47}'::integer[])) + Optimizer: Postgres query optimizer +(4 rows) EXPLAIN SELECT * FROM bitmap_test WHERE a in ('2', 47); - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=2 width=4) - -> Seq Scan on bitmap_test (cost=0.00..4.25 rows=1 width=4) - Filter: a = ANY ('{2,47}'::integer[]) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.45 rows=2 width=4) + -> Seq Scan on bitmap_test (cost=0.00..1.42 rows=1 width=4) + Filter: (a = ANY ('{2,47}'::integer[])) + Optimizer: Postgres query optimizer +(4 rows) EXPLAIN SELECT * FROM bitmap_test WHERE a in ('1', '2'); - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.25 rows=2 width=4) - -> Seq Scan on bitmap_test (cost=0.00..4.25 rows=1 width=4) - Filter: a = ANY ('{1,2}'::integer[]) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.45 rows=2 width=4) + -> Seq Scan on bitmap_test (cost=0.00..1.42 rows=1 width=4) + Filter: (a = ANY ('{1,2}'::integer[])) + Optimizer: Postgres query optimizer +(4 rows) EXPLAIN SELECT * FROM bitmap_test WHERE a in ('1', '2', 47); - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4.38 rows=3 width=4) - -> Seq Scan on bitmap_test (cost=0.00..4.38 rows=1 width=4) - Filter: a = ANY ('{1,2,47}'::integer[]) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.50 rows=3 width=4) + -> Seq Scan on bitmap_test (cost=0.00..1.46 rows=1 width=4) + Filter: (a = ANY ('{1,2,47}'::integer[])) + Optimizer: Postgres query optimizer +(4 rows) -- Test Logging for unsupported features in ORCA -- start_ignore @@ -10671,20 +10692,19 @@ set client_min_messages='log'; explain select count(*) from foo group by cube(a,b); QUERY PLAN ------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=1319.44..1496.29 rows=10611 width=16) - -> Finalize HashAggregate (cost=1319.44..1354.81 rows=3537 width=16) + Gather Motion 3:1 (slice1; segments: 3) (cost=1313.47..1490.32 rows=10611 width=16) + -> Finalize HashAggregate (cost=1313.47..1348.84 rows=3537 width=16) Group Key: a, b, (GROUPINGSET_ID()) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1213.33 rows=10611 width=16) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1208.85 rows=10462 width=16) Hash Key: a, b, (GROUPINGSET_ID()) - -> Partial MixedAggregate (cost=0.00..1001.11 rows=10611 width=16) + -> Partial MixedAggregate (cost=0.00..999.62 rows=10462 width=16) Hash Key: a, b Hash Key: a Hash Key: b Group Key: () - Planned Partitions: 4 -> Seq Scan on foo (cost=0.00..321.00 rows=28700 width=8) Optimizer: Postgres query optimizer -(9 rows) +(12 rows) reset client_min_messages; reset log_statement; @@ -10791,14 +10811,13 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur -- end_ignore -- Query should not fallback to planner explain select * from foo where b in ('1', '2'); - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..667.50 rows=91 width=42) - -> Seq Scan on foo (cost=0.00..667.50 rows=31 width=42) - Filter: b::text = ANY ('{1,2}'::text[]) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(5 rows) + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..224.38 rows=91 width=42) + -> Seq Scan on foo (cost=0.00..223.17 rows=30 width=42) + Filter: ((b)::text = ANY ('{1,2}'::text[])) + Optimizer: Postgres query optimizer +(4 rows) set optimizer_enable_ctas = off; set log_statement='none'; @@ -10818,8 +10837,8 @@ create table input_tab2 (c int, d int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into input_tab1 values (1, 1); -insert into input_tab1 values (NULL, NULL); analyze input_tab1; +insert into input_tab1 values (NULL, NULL); set optimizer_force_multistage_agg = off; set optimizer_force_three_stage_scalar_dqa = off; -- end_ignore @@ -10882,17 +10901,17 @@ FROM (SELECT * FROM tab_3)a; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=4.28..4.29 rows=1 width=8) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=4.25..4.28 rows=1 width=8) - -> Partial Aggregate (cost=4.25..4.26 rows=1 width=8) - -> Append (cost=1.04..4.20 rows=2 width=64) - -> Hash Left Join (cost=1.04..3.14 rows=2 width=6) + Finalize Aggregate (cost=3.21..3.22 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=3.16..3.21 rows=3 width=8) + -> Partial Aggregate (cost=3.16..3.17 rows=1 width=8) + -> Append (cost=1.04..3.13 rows=2 width=64) + -> Hash Left Join (cost=1.04..2.09 rows=1 width=6) Hash Cond: ((tab_1.id)::text = (tab_2.id)::text) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..2.06 rows=1 width=5) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=5) Hash Key: tab_1.id - -> Seq Scan on tab_1 (cost=0.00..2.02 rows=1 width=5) + -> Seq Scan on tab_1 (cost=0.00..1.01 rows=1 width=5) -> Hash (cost=1.03..1.03 rows=1 width=7) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=7) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.03 rows=1 width=7) Hash Key: tab_2.id -> Seq Scan on tab_2 (cost=0.00..1.01 rows=1 width=7) -> Subquery Scan on "*SELECT* 2" (cost=0.00..1.02 rows=1 width=64) @@ -10935,32 +10954,32 @@ set optimizer_enable_streaming_material = on; select c1 from t_outer where not c1 =all (select c2 from t_inner); c1 ---- - 8 - 9 - 10 - 1 2 3 4 + 7 + 8 + 1 5 6 - 7 + 9 + 10 (10 rows) set optimizer_enable_streaming_material = off; select c1 from t_outer where not c1 =all (select c2 from t_inner); c1 ---- - 8 - 9 - 10 1 2 3 4 + 7 + 8 5 6 - 7 + 9 + 10 (10 rows) reset optimizer_enable_streaming_material; @@ -11060,10 +11079,10 @@ insert into y_tab select 1 union all select a from x_tab limit 10; select * from y_tab; a --- + 0 1 1 1 - 0 (4 rows) -- @@ -11091,6 +11110,7 @@ select * from x_tab; -- start_ignore drop table bar; +ERROR: table "bar" does not exist -- end_ignore -- TVF with a subplan that generates an RTABLE entry create table bar(name text); @@ -11131,18 +11151,17 @@ INSERT INTO csq_cast_param_inner VALUES (11, '11'), (101, '12'); EXPLAIN SELECT a FROM csq_cast_param_outer WHERE b in (SELECT CASE WHEN a > 1 THEN d ELSE '42' END FROM csq_cast_param_inner); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) (cost=1.11..2.49 rows=4 width=4) - -> Nested Loop Semi Join (cost=1.11..2.49 rows=2 width=4) - Join Filter: CASE WHEN csq_cast_param_outer.a > 1 THEN csq_cast_param_inner.d ELSE '42'::myint END::bigint = csq_cast_param_outer.b::bigint - -> Seq Scan on csq_cast_param_outer (cost=0.00..1.02 rows=1 width=8) - -> Materialize (cost=1.11..1.17 rows=2 width=4) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.10 rows=2 width=4) - -> Seq Scan on csq_cast_param_inner (cost=0.00..1.02 rows=1 width=4) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(9 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10046320507.70 rows=43050 width=4) + -> Nested Loop Semi Join (cost=10000000000.00..10046319933.70 rows=14350 width=4) + Join Filter: ((CASE WHEN (csq_cast_param_outer.a > 1) THEN csq_cast_param_inner.d ELSE '42'::myint END)::bigint = (csq_cast_param_outer.b)::bigint) + -> Seq Scan on csq_cast_param_outer (cost=0.00..321.00 rows=28700 width=8) + -> Materialize (cost=0.00..1899.50 rows=86100 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=4) + -> Seq Scan on csq_cast_param_inner (cost=0.00..321.00 rows=28700 width=4) + Optimizer: Postgres query optimizer +(8 rows) SELECT a FROM csq_cast_param_outer WHERE b in (SELECT CASE WHEN a > 1 THEN d ELSE '42' END FROM csq_cast_param_inner); a @@ -11155,18 +11174,17 @@ DROP CAST (myint as int8); CREATE FUNCTION myint_numeric(myint) RETURNS numeric AS 'int4_numeric' LANGUAGE INTERNAL STRICT IMMUTABLE; CREATE CAST (myint AS numeric) WITH FUNCTION myint_numeric(myint) AS IMPLICIT; EXPLAIN SELECT a FROM csq_cast_param_outer WHERE b in (SELECT CASE WHEN a > 1 THEN d ELSE '42' END FROM csq_cast_param_inner); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=1.11..2.49 rows=4 width=4) - -> Nested Loop Semi Join (cost=1.11..2.49 rows=2 width=4) - Join Filter: CASE WHEN csq_cast_param_outer.a > 1 THEN csq_cast_param_inner.d ELSE '42'::myint END::numeric = csq_cast_param_outer.b::numeric - -> Seq Scan on csq_cast_param_outer (cost=0.00..1.02 rows=1 width=8) - -> Materialize (cost=1.11..1.17 rows=2 width=4) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.10 rows=2 width=4) - -> Seq Scan on csq_cast_param_inner (cost=0.00..1.02 rows=1 width=4) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(9 rows) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10046320507.70 rows=43050 width=4) + -> Nested Loop Semi Join (cost=10000000000.00..10046319933.70 rows=14350 width=4) + Join Filter: ((CASE WHEN (csq_cast_param_outer.a > 1) THEN csq_cast_param_inner.d ELSE '42'::myint END)::numeric = (csq_cast_param_outer.b)::numeric) + -> Seq Scan on csq_cast_param_outer (cost=0.00..321.00 rows=28700 width=8) + -> Materialize (cost=0.00..1899.50 rows=86100 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=4) + -> Seq Scan on csq_cast_param_inner (cost=0.00..321.00 rows=28700 width=4) + Optimizer: Postgres query optimizer +(8 rows) SELECT a FROM csq_cast_param_outer WHERE b in (SELECT CASE WHEN a > 1 THEN d ELSE '42' END FROM csq_cast_param_inner); a @@ -11192,14 +11210,13 @@ EXPLAIN SELECT a FROM ggg WHERE a NOT IN (NULL, ''); (3 rows) EXPLAIN SELECT a FROM ggg WHERE a IN (NULL, 'x'); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.01 rows=1 width=2) - -> Seq Scan on ggg (cost=0.00..1.01 rows=1 width=2) + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..241.33 rows=50 width=8) + -> Seq Scan on ggg (cost=0.00..240.67 rows=17 width=8) Filter: (a = ANY ('{NULL,x}'::bpchar[])) - Planning time: 0.038 ms Optimizer: Postgres query optimizer -(5 rows) +(4 rows) -- result node with one time filter and filter CREATE TABLE onetimefilter1 (a int, b int); @@ -11264,15 +11281,15 @@ WITH abc AS (SELECT onetimefilter1.a, onetimefilter1.b FROM onetimefilter1, onet ?column? | coalesce | b ----------+----------+---- 1 | 0 | 1 - 1 | 0 | 3 - 1 | 0 | 4 - 1 | 0 | 7 - 1 | 0 | 8 - 1 | 0 | 2 1 | 0 | 5 1 | 0 | 6 1 | 0 | 9 1 | 0 | 10 + 1 | 0 | 2 + 1 | 0 | 3 + 1 | 0 | 4 + 1 | 0 | 7 + 1 | 0 | 8 (10 rows) -- full joins with predicates @@ -11303,28 +11320,28 @@ CREATE INDEX tinnerbitmap_ix ON tinnerbitmap USING bitmap(a); CREATE INDEX tinnerbtree_ix ON tinnerbtree USING btree(a); SELECT * FROM touter LEFT JOIN tinnerbitmap ON touter.a = tinnerbitmap.a; a | b | a | b -----+---+----+--- - 1 | 1 | 1 | 1 - 6 | 0 | 6 | 0 - 5 | 5 | 5 | 5 - 9 | 3 | 9 | 3 - 10 | 4 | 10 | 4 +----+---+----+--- 2 | 2 | 2 | 2 7 | 1 | 7 | 1 8 | 2 | 8 | 2 3 | 3 | 3 | 3 4 | 4 | 4 | 4 + 6 | 0 | 6 | 0 + 5 | 5 | 5 | 5 + 9 | 3 | 9 | 3 + 10 | 4 | 10 | 4 + 1 | 1 | 1 | 1 (10 rows) SELECT * FROM touter LEFT JOIN tinnerbitmap ON touter.a = tinnerbitmap.a AND tinnerbitmap.b=10; a | b | a | b ----+---+---+--- + 1 | 1 | | 2 | 2 | | 3 | 3 | | 4 | 4 | | 7 | 1 | | 8 | 2 | | - 1 | 1 | | 5 | 5 | | 6 | 0 | | 9 | 3 | | @@ -11334,31 +11351,31 @@ SELECT * FROM touter LEFT JOIN tinnerbitmap ON touter.a = tinnerbitmap.a AND tin SELECT * FROM touter LEFT JOIN tinnerbtree ON touter.a = tinnerbtree.a; a | b | a | b ----+---+----+--- - 1 | 1 | 1 | 1 - 6 | 0 | 6 | 0 - 5 | 5 | 5 | 5 - 9 | 3 | 9 | 3 - 10 | 4 | 10 | 4 2 | 2 | 2 | 2 7 | 1 | 7 | 1 8 | 2 | 8 | 2 3 | 3 | 3 | 3 4 | 4 | 4 | 4 + 6 | 0 | 6 | 0 + 5 | 5 | 5 | 5 + 9 | 3 | 9 | 3 + 10 | 4 | 10 | 4 + 1 | 1 | 1 | 1 (10 rows) SELECT * FROM touter LEFT JOIN tinnerbtree ON touter.a = tinnerbtree.a AND tinnerbtree.b=10; a | b | a | b ----+---+---+--- 1 | 1 | | - 5 | 5 | | - 6 | 0 | | - 9 | 3 | | - 10 | 4 | | 2 | 2 | | 3 | 3 | | 4 | 4 | | 7 | 1 | | 8 | 2 | | + 5 | 5 | | + 6 | 0 | | + 9 | 3 | | + 10 | 4 | | (10 rows) -- test subplan in a qual under dynamic scan @@ -11384,36 +11401,46 @@ SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = (0 rows) explain analyze SELECT * FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b + 1 FROM non_part1); - QUERY PLAN -------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000000004.33..10000000039.45 rows=7 width=20) - -> Nested Loop Semi Join (cost=10000000004.33..10000000039.32 rows=3 width=20) - -> Hash Join (cost=4.33..28.41 rows=2 width=20) - Hash Cond: (ds_part_1_prt_2.c = non_part2.e) - -> Append (cost=0.00..24.03 rows=2 width=12) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000001.45..10000000015.71 rows=7 width=20) (actual time=4.000..4.000 rows=0 loops=1) + -> Nested Loop Semi Join (cost=10000000001.45..10000000015.62 rows=2 width=20) (actual time=4.000..4.000 rows=0 loops=1) + -> Hash Join (cost=1.45..12.54 rows=1 width=20) (actual time=4.000..4.000 rows=0 loops=1) + Hash Cond: (ds_part.c = non_part2.e) + -> Append (cost=0.00..11.06 rows=6 width=12) (actual time=0.000..0.000 rows=0 loops=1) Partition Selectors: $0 - -> Seq Scan on ds_part_1_prt_2 ds_part_1 (cost=0.00..1.01 rows=1 width=12) + -> Seq Scan on ds_part_1_prt_2 ds_part_1 (cost=0.00..1.01 rows=1 width=12) (never executed) Filter: (a = (b + 1)) - -> Seq Scan on ds_part_1_prt_3 ds_part_2 (cost=0.00..1.01 rows=1 width=12) + -> Seq Scan on ds_part_1_prt_3 ds_part_2 (cost=0.00..1.01 rows=1 width=12) (never executed) Filter: (a = (b + 1)) - -> Seq Scan on ds_part_1_prt_4 ds_part_3 (cost=0.00..1.01 rows=1 width=12) + -> Seq Scan on ds_part_1_prt_4 ds_part_3 (cost=0.00..1.01 rows=1 width=12) (never executed) Filter: (a = (b + 1)) - -> Seq Scan on ds_part_1_prt_5 ds_part_4 (cost=0.00..1.01 rows=1 width=12) + -> Seq Scan on ds_part_1_prt_5 ds_part_4 (cost=0.00..1.01 rows=1 width=12) (never executed) Filter: (a = (b + 1)) - -> Seq Scan on ds_part_1_prt_6 ds_part_5 (cost=0.00..1.01 rows=1 width=12) + -> Seq Scan on ds_part_1_prt_6 ds_part_5 (cost=0.00..1.01 rows=1 width=12) (never executed) Filter: (a = (b + 1)) - -> Seq Scan on ds_part_1_prt_deflt ds_part_6 (cost=0.00..5.95 rows=1 width=12) + -> Seq Scan on ds_part_1_prt_deflt ds_part_6 (cost=0.00..5.95 rows=1 width=12) (actual time=0.000..0.000 rows=0 loops=1) Filter: (a = (b + 1)) - -> Hash (cost=4.29..4.29 rows=1 width=8) - -> Partition Selector (selector id: $0) (cost=0.00..4.29 rows=1 width=8) - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..4.29 rows=1 width=8) - -> Seq Scan on non_part2 (cost=0.00..4.25 rows=1 width=8) + Rows Removed by Filter: 333 + -> Hash (cost=1.44..1.44 rows=1 width=8) (actual time=0.000..0.000 rows=1 loops=1) + Buckets: 262144 Batches: 1 Memory Usage: 2049kB + -> Partition Selector (selector id: $0) (cost=0.00..1.44 rows=1 width=8) (actual time=0.000..0.000 rows=1 loops=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.44 rows=1 width=8) (actual time=0.000..0.000 rows=1 loops=1) + -> Seq Scan on non_part2 (cost=0.00..1.42 rows=1 width=8) (actual time=0.000..0.000 rows=1 loops=1) Filter: (f = 10) - -> Materialize (cost=0.00..9.50 rows=100 width=0) - -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..8.00 rows=100 width=0) - -> Seq Scan on non_part1 (cost=0.00..4.00 rows=34 width=0) + Rows Removed by Filter: 24 + -> Materialize (cost=0.00..3.17 rows=100 width=0) (actual time=0.000..0.000 rows=1 loops=1) + -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..2.67 rows=100 width=0) (actual time=0.000..0.000 rows=100 loops=1) + -> Seq Scan on non_part1 (cost=0.00..1.33 rows=33 width=0) (actual time=0.000..0.000 rows=38 loops=1) + Planning Time: 1.217 ms + (slice0) Executor memory: 160K bytes. + (slice1) Executor memory: 2166K bytes avg x 3x(0) workers, 2166K bytes max (seg0). Work_mem: 2049K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + (slice3) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB Optimizer: Postgres query optimizer -(27 rows) + Execution Time: 3.612 ms +(37 rows) SELECT *, a IN ( SELECT b + 1 FROM non_part1) FROM ds_part, non_part2 WHERE ds_part.c = non_part2.e AND non_part2.f = 10 AND a IN ( SELECT b FROM non_part1); a | b | c | e | f | ?column? @@ -11436,15 +11463,15 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO varchar_sc_array_cmp VALUES ('a'), ('b'), ('c'), ('d'); EXPLAIN SELECT * FROM varchar_sc_array_cmp t1, varchar_sc_array_cmp t2 where t1.a = t2.a and t1.a in ('b', 'c'); - QUERY PLAN ------------------------------------------------------------------------------------------ - Gather Motion 2:1 (slice1; segments: 2) (cost=3.07..6.17 rows=4 width=4) - -> Hash Join (cost=3.07..6.17 rows=2 width=4) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) (cost=254.44..508.71 rows=11 width=64) + -> Hash Join (cost=254.44..508.57 rows=4 width=64) Hash Cond: ((t1.a)::text = (t2.a)::text) - -> Seq Scan on varchar_sc_array_cmp t1 (cost=0.00..3.05 rows=1 width=2) + -> Seq Scan on varchar_sc_array_cmp t1 (cost=0.00..254.00 rows=35 width=32) Filter: ((a)::text = ANY ('{b,c}'::text[])) - -> Hash (cost=3.05..3.05 rows=1 width=2) - -> Seq Scan on varchar_sc_array_cmp t2 (cost=0.00..3.05 rows=1 width=2) + -> Hash (cost=254.00..254.00 rows=35 width=32) + -> Seq Scan on varchar_sc_array_cmp t2 (cost=0.00..254.00 rows=35 width=32) Filter: ((a)::text = ANY ('{b,c}'::text[])) Optimizer: Postgres query optimizer (9 rows) @@ -11452,20 +11479,20 @@ EXPLAIN SELECT * FROM varchar_sc_array_cmp t1, varchar_sc_array_cmp t2 where t1. SELECT * FROM varchar_sc_array_cmp t1, varchar_sc_array_cmp t2 where t1.a = t2.a and t1.a in ('b', 'c'); a | a ---+--- - c | c b | b + c | c (2 rows) SET optimizer_array_constraints=on; EXPLAIN SELECT * FROM varchar_sc_array_cmp t1, varchar_sc_array_cmp t2 where t1.a = t2.a and (t1.a in ('b', 'c') OR t1.a = 'a'); QUERY PLAN ---------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=3.09..6.18 rows=4 width=4) - -> Hash Join (cost=3.09..6.18 rows=2 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=298.66..692.31 rows=8358 width=64) + -> Hash Join (cost=298.66..580.87 rows=2786 width=64) Hash Cond: ((t2.a)::text = (t1.a)::text) - -> Seq Scan on varchar_sc_array_cmp t2 (cost=0.00..3.04 rows=2 width=2) - -> Hash (cost=3.06..3.06 rows=1 width=2) - -> Seq Scan on varchar_sc_array_cmp t1 (cost=0.00..3.06 rows=1 width=2) + -> Seq Scan on varchar_sc_array_cmp t2 (cost=0.00..210.00 rows=17600 width=32) + -> Hash (cost=298.00..298.00 rows=53 width=32) + -> Seq Scan on varchar_sc_array_cmp t1 (cost=0.00..298.00 rows=53 width=32) Filter: (((a)::text = ANY ('{b,c}'::text[])) OR ((a)::text = 'a'::text)) Optimizer: Postgres query optimizer (8 rows) @@ -11554,10 +11581,10 @@ insert into tt values (1, 'b'), (1, 'B'); select * from tc, tt where c = v; a | c | b | v ---+---+---+--- - 1 | a | 1 | a - 1 | A | 1 | A 1 | B | 1 | B 1 | b | 1 | b + 1 | a | 1 | a + 1 | A | 1 | A (4 rows) -- bitmap scan on bitmap index @@ -11602,12 +11629,12 @@ update gp_distribution_policy set numsegments = numsegments-1 where localoid = ' reset allow_system_table_mods; -- populate the tables on this smaller cluster explain insert into gpexp_hash select i, i from generate_series(1,50) i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Insert on gpexp_hash (cost=0.00..30.00 rows=500 width=8) - -> Redistribute Motion 1:2 (slice1; segments: 1) (cost=0.00..30.00 rows=1000 width=8) + QUERY PLAN +----------------------------------------------------------------------------------------- + Insert on gpexp_hash (cost=0.00..1.25 rows=0 width=0) + -> Redistribute Motion 1:2 (slice1; segments: 1) (cost=0.00..1.25 rows=25 width=8) Hash Key: i.i - -> Function Scan on generate_series i (cost=0.00..10.00 rows=500 width=8) + -> Function Scan on generate_series i (cost=0.00..0.50 rows=50 width=8) Optimizer: Postgres query optimizer (5 rows) @@ -11675,14 +11702,14 @@ select b, count(*) from gpexp_hash group by b order by b; explain update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); QUERY PLAN ---------------------------------------------------------------------------------------------------------- - Update on gpexp_rand (cost=0.00..216.00 rows=25 width=18) - -> Seq Scan on gpexp_rand (cost=0.00..215.00 rows=25 width=18) + Update on gpexp_rand (cost=0.00..70.00 rows=0 width=0) + -> Seq Scan on gpexp_rand (cost=0.00..70.00 rows=25 width=14) SubPlan 1 - -> Result (cost=0.00..4.25 rows=50 width=4) + -> Result (cost=0.00..2.75 rows=50 width=4) Filter: (gpexp_rand.a = gpexp_hash.a) - -> Materialize (cost=0.00..3.75 rows=50 width=8) - -> Broadcast Motion 2:2 (slice1; segments: 2) (cost=0.00..3.50 rows=25 width=8) - -> Seq Scan on gpexp_hash (cost=0.00..2.50 rows=25 width=8) + -> Materialize (cost=0.00..2.25 rows=50 width=8) + -> Broadcast Motion 2:2 (slice1; segments: 2) (cost=0.00..2.00 rows=50 width=8) + -> Seq Scan on gpexp_hash (cost=0.00..1.25 rows=25 width=8) Optimizer: Postgres query optimizer (9 rows) @@ -11708,7 +11735,7 @@ delete from gpexp_repl where b >= 20; explain insert into gpexp_repl values (20, 20); QUERY PLAN -------------------------------------------------------- - Insert on gpexp_repl (cost=0.00..0.01 rows=1 width=8) + Insert on gpexp_repl (cost=0.00..0.01 rows=0 width=0) -> Result (cost=0.00..0.01 rows=1 width=8) Optimizer: Postgres query optimizer (3 rows) @@ -11776,22 +11803,22 @@ analyze part2_1_prt_2; explain select * from part1, part2 where part1.b = part2.b limit 5; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- - Limit (cost=410984.11..410984.89 rows=5 width=16) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=410984.11..410984.89 rows=5 width=16) - -> Limit (cost=410984.11..410984.79 rows=2 width=16) - -> Hash Join (cost=7528.75..4042082.35 rows=9947454 width=16) - Hash Cond: (part1_1_prt_1.b = part2_1_prt_1.b) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..5402.00 rows=57734 width=8) - Hash Key: part1_1_prt_1.b - -> Append (cost=0.00..1938.00 rows=57734 width=8) + Limit (cost=2798.08..2798.24 rows=5 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=2798.08..2798.54 rows=15 width=16) + -> Limit (cost=2798.08..2798.34 rows=5 width=16) + -> Hash Join (cost=2798.08..518984.84 rows=9947453 width=16) + Hash Cond: (part1.b = part2.b) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..2090.67 rows=57733 width=8) + Hash Key: part1.b + -> Append (cost=0.00..936.00 rows=57733 width=8) -> Seq Scan on part1_1_prt_1 part1_1 (cost=0.00..2.67 rows=167 width=8) -> Seq Scan on part1_1_prt_2 part1_2 (cost=0.00..2.67 rows=167 width=8) -> Seq Scan on part1_1_prt_3 part1_3 (cost=0.00..321.00 rows=28700 width=8) -> Seq Scan on part1_1_prt_4 part1_4 (cost=0.00..321.00 rows=28700 width=8) - -> Hash (cost=5375.00..5375.00 rows=57434 width=8) - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..5375.00 rows=57434 width=8) - Hash Key: part2_1_prt_1.b - -> Append (cost=0.00..1929.00 rows=57434 width=8) + -> Hash (cost=2080.17..2080.17 rows=57433 width=8) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..2080.17 rows=57433 width=8) + Hash Key: part2.b + -> Append (cost=0.00..931.50 rows=57433 width=8) -> Seq Scan on part2_1_prt_1 part2_1 (cost=0.00..1.17 rows=17 width=8) -> Seq Scan on part2_1_prt_2 part2_2 (cost=0.00..1.17 rows=17 width=8) -> Seq Scan on part2_1_prt_3 part2_3 (cost=0.00..321.00 rows=28700 width=8) @@ -11800,6 +11827,16 @@ explain select * from part1, part2 where part1.b = part2.b limit 5; (21 rows) -- test opfamily handling in ORCA +-- start_ignore +DROP FUNCTION abseq(int, int) CASCADE; +ERROR: function abseq(integer, integer) does not exist +DROP FUNCTION abslt(int, int) CASCADE; +ERROR: function abslt(integer, integer) does not exist +DROP FUNCTION absgt(int, int) CASCADE; +ERROR: function absgt(integer, integer) does not exist +DROP FUNCTION abscmp(int, int) CASCADE; +ERROR: function abscmp(integer, integer) does not exist +-- end_ignore CREATE FUNCTION abseq(int, int) RETURNS BOOL AS $$ begin return abs($1) = abs($2); end; @@ -11848,18 +11885,18 @@ ANALYZE btab_old_hash; EXPLAIN SELECT a, b FROM atab_old_hash INNER JOIN btab_old_hash ON a |=| b; QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.26..5.99 rows=6 width=8) - -> Hash Join (cost=2.26..5.87 rows=2 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.09..2.37 rows=6 width=8) + -> Hash Join (cost=1.09..2.29 rows=2 width=8) Hash Cond: (btab_old_hash.b |=| atab_old_hash.a) - -> Seq Scan on btab_old_hash (cost=0.00..3.04 rows=2 width=4) - -> Hash (cost=2.15..2.15 rows=3 width=4) - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.15 rows=3 width=4) - -> Seq Scan on atab_old_hash (cost=0.00..2.03 rows=1 width=4) + -> Seq Scan on btab_old_hash (cost=0.00..1.01 rows=1 width=4) + -> Hash (cost=1.05..1.05 rows=3 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.05 rows=3 width=4) + -> Seq Scan on atab_old_hash (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (8 rows) SELECT a, b FROM atab_old_hash INNER JOIN btab_old_hash ON a |=| b; -ERROR: could not find hash function for hash operator 73890 (nodeHash.c:389) (seg0 slice2 127.0.0.1:6002 pid=18942) (nodeHash.c:389) +ERROR: could not find hash function for hash operator 59061 (nodeHash.c:685) (seg1 slice1 127.0.1.1:9203 pid=1583558) (nodeHash.c:685) CREATE OPERATOR CLASS abs_int_hash_ops FOR TYPE int4 USING hash AS OPERATOR 1 |=|, @@ -11874,16 +11911,16 @@ CREATE OPERATOR CLASS abs_int_btree_ops FOR TYPE int4 EXPLAIN SELECT a, b FROM atab_old_hash INNER JOIN btab_old_hash ON a |=| b; QUERY PLAN ---------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=3.17..5.70 rows=6 width=8) - -> Hash Join (cost=3.17..5.70 rows=2 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.31 rows=6 width=8) + -> Hash Join (cost=1.06..2.23 rows=2 width=8) Hash Cond: (atab_old_hash.a |=| btab_old_hash.b) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..2.09 rows=1 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) Hash Key: atab_old_hash.a - -> Seq Scan on atab_old_hash (cost=0.00..2.03 rows=1 width=4) - -> Hash (cost=3.12..3.12 rows=2 width=4) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..3.12 rows=2 width=4) + -> Seq Scan on atab_old_hash (cost=0.00..1.01 rows=1 width=4) + -> Hash (cost=1.04..1.04 rows=1 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.04 rows=1 width=4) Hash Key: btab_old_hash.b - -> Seq Scan on btab_old_hash (cost=0.00..3.04 rows=2 width=4) + -> Seq Scan on btab_old_hash (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (11 rows) @@ -11891,37 +11928,37 @@ SELECT a, b FROM atab_old_hash INNER JOIN btab_old_hash ON a |=| b; a | b ----+---- 0 | 0 - 1 | 1 1 | -1 - -1 | 1 + 1 | 1 -1 | -1 + -1 | 1 (5 rows) EXPLAIN SELECT a, b FROM btab_old_hash LEFT OUTER JOIN atab_old_hash ON a |=| b; QUERY PLAN ---------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=3.17..5.70 rows=6 width=8) - -> Hash Right Join (cost=3.17..5.70 rows=2 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.31 rows=6 width=8) + -> Hash Right Join (cost=1.06..2.23 rows=2 width=8) Hash Cond: (atab_old_hash.a |=| btab_old_hash.b) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..2.09 rows=1 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) Hash Key: atab_old_hash.a - -> Seq Scan on atab_old_hash (cost=0.00..2.03 rows=1 width=4) - -> Hash (cost=3.12..3.12 rows=2 width=4) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..3.12 rows=2 width=4) + -> Seq Scan on atab_old_hash (cost=0.00..1.01 rows=1 width=4) + -> Hash (cost=1.04..1.04 rows=1 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.04 rows=1 width=4) Hash Key: btab_old_hash.b - -> Seq Scan on btab_old_hash (cost=0.00..3.04 rows=2 width=4) + -> Seq Scan on btab_old_hash (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (11 rows) SELECT a, b FROM btab_old_hash LEFT OUTER JOIN atab_old_hash ON a |=| b; a | b ----+---- - | 2 0 | 0 - 1 | -1 1 | 1 - -1 | -1 + 1 | -1 -1 | 1 + -1 | -1 + | 2 (6 rows) set optimizer_expand_fulljoin = on; @@ -11934,28 +11971,28 @@ select disable_xform('CXformFullOuterJoin2HashJoin'); EXPLAIN SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b; QUERY PLAN ---------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=3.17..5.70 rows=6 width=8) - -> Hash Full Join (cost=3.17..5.70 rows=2 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.31 rows=6 width=8) + -> Hash Full Join (cost=1.06..2.23 rows=2 width=8) Hash Cond: (atab_old_hash.a |=| btab_old_hash.b) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..2.09 rows=1 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) Hash Key: atab_old_hash.a - -> Seq Scan on atab_old_hash (cost=0.00..2.03 rows=1 width=4) - -> Hash (cost=3.12..3.12 rows=2 width=4) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..3.12 rows=2 width=4) + -> Seq Scan on atab_old_hash (cost=0.00..1.01 rows=1 width=4) + -> Hash (cost=1.04..1.04 rows=1 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.04 rows=1 width=4) Hash Key: btab_old_hash.b - -> Seq Scan on btab_old_hash (cost=0.00..3.04 rows=2 width=4) + -> Seq Scan on btab_old_hash (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (11 rows) SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b; a | b ----+---- - | 2 0 | 0 - 1 | -1 1 | 1 - -1 | -1 + 1 | -1 -1 | 1 + -1 | -1 + | 2 (6 rows) reset optimizer_expand_fulljoin; @@ -12063,8 +12100,8 @@ FROM t55 L1 CROSS JOIN META WHERE L1.lid = int4in(textout(meta.load_id)); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. - QUERY PLAN ----------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------- Unique (cost=5.25..5.28 rows=3 width=8) Output: l1.c, l1.lid Group Key: l1.c, l1.lid @@ -12082,8 +12119,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur Output: meta.load_id -> Result (cost=0.00..0.01 rows=1 width=64) Output: '2020-01-01'::text, '99'::text + Settings: enable_incremental_sort = 'on', optimizer = 'off', optimizer_enable_dynamicbitmapscan = 'on', optimizer_join_order = 'query' Optimizer: Postgres query optimizer - Settings: optimizer=off, optimizer_join_order=query (19 rows) CREATE TABLE TP AS @@ -12106,10 +12143,10 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into lossycastrangepart (values (5.1,5.1), (9.9,9.9), (10.1,10.1), (9.1,9.1), (10.9,10.9), (11.1,11.1), (21.0,21.0)); explain select * from lossycastrangepart where b::int = 10; - QUERY PLAN -------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4666.00 rows=285 width=16) - -> Append (cost=0.00..4666.00 rows=95 width=16) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1562.27 rows=284 width=16) + -> Append (cost=0.00..1558.47 rows=95 width=16) -> Seq Scan on lossycastrangepart_1_prt_1 lossycastrangepart_1 (cost=0.00..389.50 rows=24 width=16) Filter: ((b)::integer = 10) -> Seq Scan on lossycastrangepart_1_prt_2 lossycastrangepart_2 (cost=0.00..389.50 rows=24 width=16) @@ -12129,10 +12166,10 @@ select * from lossycastrangepart where b::int = 10; (2 rows) explain select * from lossycastrangepart where b::int = 11; - QUERY PLAN -------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4666.00 rows=285 width=16) - -> Append (cost=0.00..4666.00 rows=95 width=16) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1562.27 rows=284 width=16) + -> Append (cost=0.00..1558.47 rows=95 width=16) -> Seq Scan on lossycastrangepart_1_prt_1 lossycastrangepart_1 (cost=0.00..389.50 rows=24 width=16) Filter: ((b)::integer = 11) -> Seq Scan on lossycastrangepart_1_prt_2 lossycastrangepart_2 (cost=0.00..389.50 rows=24 width=16) @@ -12152,10 +12189,10 @@ select * from lossycastrangepart where b::int = 11; (2 rows) explain select * from lossycastrangepart where b::int < 10; - QUERY PLAN ---------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4666.00 rows=94800 width=16) - -> Append (cost=0.00..4666.00 rows=31600 width=16) + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2980.00 rows=94800 width=16) + -> Append (cost=0.00..1716.00 rows=31600 width=16) -> Seq Scan on lossycastrangepart_1_prt_1 lossycastrangepart_1 (cost=0.00..389.50 rows=7900 width=16) Filter: ((b)::integer < 10) -> Seq Scan on lossycastrangepart_1_prt_2 lossycastrangepart_2 (cost=0.00..389.50 rows=7900 width=16) @@ -12175,10 +12212,10 @@ select * from lossycastrangepart where b::int < 10; (2 rows) explain select * from lossycastrangepart where b::int < 11; - QUERY PLAN ---------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..4666.00 rows=94800 width=16) - -> Append (cost=0.00..4666.00 rows=31600 width=16) + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2980.00 rows=94800 width=16) + -> Append (cost=0.00..1716.00 rows=31600 width=16) -> Seq Scan on lossycastrangepart_1_prt_1 lossycastrangepart_1 (cost=0.00..389.50 rows=7900 width=16) Filter: ((b)::integer < 11) -> Seq Scan on lossycastrangepart_1_prt_2 lossycastrangepart_2 (cost=0.00..389.50 rows=7900 width=16) @@ -12193,10 +12230,10 @@ explain select * from lossycastrangepart where b::int < 11; select * from lossycastrangepart where b::int < 11; a | b ------+------ - 9.9 | 9.9 9.1 | 9.1 5.1 | 5.1 10.1 | 10.1 + 9.9 | 9.9 (4 rows) create table lossycastlistpart( a int, b float) partition by list(b) (partition l1 values(1.7, 2.1), partition l2 values(1.3, 2.7), partition l3 values(1.8, 2.8)); @@ -12204,10 +12241,10 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into lossycastlistpart (values (1.0,2.1), (1.0,1.3), (10.1,2.1), (9.1,2.7), (10.9,1.8), (11.1,2.8), (21.0,1.7)); explain select * from lossycastlistpart where b::int < 2; - QUERY PLAN ---------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..5753.00 rows=77900 width=12) - -> Append (cost=0.00..4195.00 rows=25967 width=12) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2439.00 rows=77900 width=12) + -> Append (cost=0.00..1400.33 rows=25967 width=12) -> Seq Scan on lossycastlistpart_1_prt_l2 lossycastlistpart_1 (cost=0.00..423.50 rows=8656 width=12) Filter: ((b)::integer < 2) -> Seq Scan on lossycastlistpart_1_prt_l1 lossycastlistpart_2 (cost=0.00..423.50 rows=8656 width=12) @@ -12224,10 +12261,10 @@ select * from lossycastlistpart where b::int < 2; (1 row) explain select * from lossycastlistpart where b::int = 2; - QUERY PLAN -------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..3811.34 rows=234 width=12) - -> Append (cost=0.00..3806.67 rows=78 width=12) + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1274.01 rows=234 width=12) + -> Append (cost=0.00..1270.89 rows=78 width=12) -> Seq Scan on lossycastlistpart_1_prt_l2 lossycastlistpart_1 (cost=0.00..423.50 rows=26 width=12) Filter: ((b)::integer = 2) -> Seq Scan on lossycastlistpart_1_prt_l1 lossycastlistpart_2 (cost=0.00..423.50 rows=26 width=12) @@ -12253,39 +12290,6 @@ partition by range(sales_ts) (start (timestamp '2010-01-01 00:00:00') end(timest every (interval '1 day')); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -NOTICE: CREATE TABLE will create partition "sales_1_prt_1" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_2" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_3" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_4" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_5" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_6" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_7" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_8" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_9" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_10" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_11" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_12" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_13" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_14" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_15" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_16" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_17" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_18" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_19" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_20" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_21" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_22" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_23" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_24" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_25" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_26" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_27" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_28" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_29" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_30" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_31" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_32" for table "sales" -NOTICE: CREATE TABLE will create partition "sales_1_prt_33" for table "sales" insert into sales select i, i%100, i%1000, timestamp '2010-01-01 00:00:00' + i * interval '1 day' from generate_series(1,20) i; select * from sales where sales_ts::date != '2010-01-05' order by sales_ts; id | prod_id | cust_id | sales_ts @@ -12654,16 +12658,16 @@ select * from foo join tbtree on foo.a=tbtree.a; select * from foo join tbtree on foo.a=tbtree.a; a | b | c | a | b | c -------+-------+-------+-------+-------+------- - 3000 | 3000 | 3000 | 3000 | 3000 | 3000 - 4000 | 4000 | 4000 | 4000 | 4000 | 4000 - 9000 | 9000 | 9000 | 9000 | 9000 | 9000 - 10000 | 10000 | 10000 | 10000 | 10000 | 10000 1000 | 1000 | 1000 | 1000 | 1000 | 1000 2000 | 2000 | 2000 | 2000 | 2000 | 2000 6000 | 6000 | 6000 | 6000 | 6000 | 6000 7000 | 7000 | 7000 | 7000 | 7000 | 7000 8000 | 8000 | 8000 | 8000 | 8000 | 8000 2000 | 2000 | 2000 | 2000 | -1 | -1 + 3000 | 3000 | 3000 | 3000 | 3000 | 3000 + 4000 | 4000 | 4000 | 4000 | 4000 | 4000 + 9000 | 9000 | 9000 | 9000 | 9000 | 9000 + 10000 | 10000 | 10000 | 10000 | 10000 | 10000 5000 | 5000 | 5000 | 5000 | 5000 | 5000 (11 rows) @@ -12684,17 +12688,17 @@ select * from foo join tbitmap on foo.a=tbitmap.a; select * from foo join tbitmap on foo.a=tbitmap.a; a | b | c | a | b | c -------+-------+-------+-------+-------+------- - 5000 | 5000 | 5000 | 5000 | 5000 | 5000 - 3000 | 3000 | 3000 | 3000 | 3000 | 3000 - 4000 | 4000 | 4000 | 4000 | 4000 | 4000 - 9000 | 9000 | 9000 | 9000 | 9000 | 9000 - 10000 | 10000 | 10000 | 10000 | 10000 | 10000 1000 | 1000 | 1000 | 1000 | 1000 | 1000 2000 | 2000 | 2000 | 2000 | 2000 | 2000 6000 | 6000 | 6000 | 6000 | 6000 | 6000 7000 | 7000 | 7000 | 7000 | 7000 | 7000 8000 | 8000 | 8000 | 8000 | 8000 | 8000 2000 | 2000 | 2000 | 2000 | -1 | -1 + 3000 | 3000 | 3000 | 3000 | 3000 | 3000 + 4000 | 4000 | 4000 | 4000 | 4000 | 4000 + 9000 | 9000 | 9000 | 9000 | 9000 | 9000 + 10000 | 10000 | 10000 | 10000 | 10000 | 10000 + 5000 | 5000 | 5000 | 5000 | 5000 | 5000 (11 rows) -- 3 btree with select pred @@ -12744,11 +12748,11 @@ select * from foo join tbitmap on foo.a=tbitmap.a where tbitmap.a < 5000; select * from foo join tbitmap on foo.a=tbitmap.a where tbitmap.a < 5000; a | b | c | a | b | c ------+------+------+------+------+------ + 3000 | 3000 | 3000 | 3000 | 3000 | 3000 + 4000 | 4000 | 4000 | 4000 | 4000 | 4000 1000 | 1000 | 1000 | 1000 | 1000 | 1000 2000 | 2000 | 2000 | 2000 | 2000 | 2000 2000 | 2000 | 2000 | 2000 | -1 | -1 - 3000 | 3000 | 3000 | 3000 | 3000 | 3000 - 4000 | 4000 | 4000 | 4000 | 4000 | 4000 (5 rows) -- 5 btree with project @@ -12772,13 +12776,13 @@ select * from foo join (select a, b+c as bc from tbtree) proj on foo.a=proj.a; 4000 | 4000 | 4000 | 4000 | 8000 9000 | 9000 | 9000 | 9000 | 18000 10000 | 10000 | 10000 | 10000 | 20000 - 5000 | 5000 | 5000 | 5000 | 10000 1000 | 1000 | 1000 | 1000 | 2000 2000 | 2000 | 2000 | 2000 | 4000 6000 | 6000 | 6000 | 6000 | 12000 7000 | 7000 | 7000 | 7000 | 14000 8000 | 8000 | 8000 | 8000 | 16000 2000 | 2000 | 2000 | 2000 | -2 + 5000 | 5000 | 5000 | 5000 | 10000 (11 rows) -- 6 bitmap with project @@ -12802,13 +12806,13 @@ select * from foo join (select a, b+c as bc from tbitmap) proj on foo.a=proj.a; 4000 | 4000 | 4000 | 4000 | 8000 9000 | 9000 | 9000 | 9000 | 18000 10000 | 10000 | 10000 | 10000 | 20000 + 5000 | 5000 | 5000 | 5000 | 10000 1000 | 1000 | 1000 | 1000 | 2000 2000 | 2000 | 2000 | 2000 | 4000 6000 | 6000 | 6000 | 6000 | 12000 7000 | 7000 | 7000 | 7000 | 14000 8000 | 8000 | 8000 | 8000 | 16000 2000 | 2000 | 2000 | 2000 | -2 - 5000 | 5000 | 5000 | 5000 | 10000 (11 rows) -- 7 btree with grby @@ -12834,13 +12838,13 @@ select * from foo join (select a, count(*) as cnt from tbtree group by a,b) grby 9000 | 9000 | 9000 | 9000 | 1 4000 | 4000 | 4000 | 4000 | 1 3000 | 3000 | 3000 | 3000 | 1 - 5000 | 5000 | 5000 | 5000 | 1 6000 | 6000 | 6000 | 6000 | 1 2000 | 2000 | 2000 | 2000 | 1 8000 | 8000 | 8000 | 8000 | 1 7000 | 7000 | 7000 | 7000 | 1 2000 | 2000 | 2000 | 2000 | 1 1000 | 1000 | 1000 | 1000 | 1 + 5000 | 5000 | 5000 | 5000 | 1 (11 rows) -- 8 bitmap with grby @@ -12862,23 +12866,23 @@ select * from foo join (select a, count(*) as cnt from tbitmap group by a) grby select * from foo join (select a, count(*) as cnt from tbitmap group by a) grby on foo.a=grby.a; a | b | c | a | cnt -------+-------+-------+-------+----- - 5000 | 5000 | 5000 | 5000 | 1 + 10000 | 10000 | 10000 | 10000 | 1 + 3000 | 3000 | 3000 | 3000 | 1 + 9000 | 9000 | 9000 | 9000 | 1 + 4000 | 4000 | 4000 | 4000 | 1 8000 | 8000 | 8000 | 8000 | 1 6000 | 6000 | 6000 | 6000 | 1 2000 | 2000 | 2000 | 2000 | 2 7000 | 7000 | 7000 | 7000 | 1 1000 | 1000 | 1000 | 1000 | 1 - 10000 | 10000 | 10000 | 10000 | 1 - 3000 | 3000 | 3000 | 3000 | 1 - 9000 | 9000 | 9000 | 9000 | 1 - 4000 | 4000 | 4000 | 4000 | 1 + 5000 | 5000 | 5000 | 5000 | 1 (10 rows) -- 9 btree with proj select grby select explain (costs off) select * from foo join (select a, count(*) + 5 as cnt from tbtree where tbtree.a < 5000 group by a having count(*) < 2) proj_sel_grby_sel on foo.a=proj_sel_grby_sel.a; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (tbtree.a = foo.a) @@ -12895,9 +12899,9 @@ select * from foo join (select a, count(*) + 5 as cnt from tbtree where tbtree.a select * from foo join (select a, count(*) + 5 as cnt from tbtree where tbtree.a < 5000 group by a having count(*) < 2) proj_sel_grby_sel on foo.a=proj_sel_grby_sel.a; a | b | c | a | cnt ------+------+------+------+----- - 1000 | 1000 | 1000 | 1000 | 6 - 4000 | 4000 | 4000 | 4000 | 6 3000 | 3000 | 3000 | 3000 | 6 + 4000 | 4000 | 4000 | 4000 | 6 + 1000 | 1000 | 1000 | 1000 | 6 (3 rows) -- 10 bitmap with proj select grby select @@ -12949,16 +12953,16 @@ select * from foo join (select a, count(*) as cnt from (select distinct a, b fro select * from foo join (select a, count(*) as cnt from (select distinct a, b from tbitmap) grby1 group by a) grby2 on foo.a=grby2.a; a | b | c | a | cnt -------+-------+-------+-------+----- - 8000 | 8000 | 8000 | 8000 | 1 - 6000 | 6000 | 6000 | 6000 | 1 - 2000 | 2000 | 2000 | 2000 | 2 - 7000 | 7000 | 7000 | 7000 | 1 - 1000 | 1000 | 1000 | 1000 | 1 10000 | 10000 | 10000 | 10000 | 1 3000 | 3000 | 3000 | 3000 | 1 9000 | 9000 | 9000 | 9000 | 1 4000 | 4000 | 4000 | 4000 | 1 5000 | 5000 | 5000 | 5000 | 1 + 8000 | 8000 | 8000 | 8000 | 1 + 6000 | 6000 | 6000 | 6000 | 1 + 2000 | 2000 | 2000 | 2000 | 2 + 7000 | 7000 | 7000 | 7000 | 1 + 1000 | 1000 | 1000 | 1000 | 1 (10 rows) -- 12 btree with proj select 2*grby select @@ -12966,8 +12970,8 @@ explain (costs off) select * from foo join (select a, count(*) + cnt1 as cnt2 from (select a, count(*) as cnt1 from tbtree group by a) grby1 where grby1.a < 5000 group by a, cnt1 having count(*) < 2) proj_sel_grby_sel on foo.a=proj_sel_grby_sel.a; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (tbtree.a = foo.a) @@ -12988,10 +12992,10 @@ select * from foo join (select a, count(*) + cnt1 as cnt2 from (select a, count( on foo.a=proj_sel_grby_sel.a; a | b | c | a | cnt2 ------+------+------+------+------ - 2000 | 2000 | 2000 | 2000 | 3 - 1000 | 1000 | 1000 | 1000 | 2 3000 | 3000 | 3000 | 3000 | 2 4000 | 4000 | 4000 | 4000 | 2 + 2000 | 2000 | 2000 | 2000 | 3 + 1000 | 1000 | 1000 | 1000 | 2 (4 rows) -- 13 join pred accesses a projected column - no index scan @@ -13094,7 +13098,7 @@ select * from foo join (select min_a, count(*) as cnt from (select min(a) as min -> Hash -> Seq Scan on foo Optimizer: Postgres query optimizer -(16 rows) +(17 rows) reset optimizer_join_order; reset optimizer_enable_hashjoin; @@ -13149,7 +13153,7 @@ explain (costs off) select count(*), t2.c from roj1 t1 left join roj2 t2 on t1.a -> Seq Scan on roj2 t2 -> Hash -> Seq Scan on roj1 t1 - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (11 rows) reset optimizer_enable_right_outer_join; @@ -13376,8 +13380,8 @@ create INDEX y_idx on y (j); set optimizer_enable_indexjoin=on; set optimizer_enable_dynamicindexscan=on; explain (costs off) select count(*) from x, y where (x.i > y.j AND x.j <= y.i); - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------- Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate @@ -13420,8 +13424,8 @@ CREATE TABLE infer_part_vc (id int, gender varchar(1)) insert into infer_part_vc select i, substring(i::varchar, 1, 1) from generate_series(1, 1000) i; analyze infer_part_vc; explain (costs off) select * from infer_part_vc inner join infer_txt on (infer_part_vc.gender = infer_txt.a) and infer_txt.a = 'M'; - QUERY PLAN ---------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop -> Seq Scan on infer_part_vc_1_prt_boys infer_part_vc @@ -13555,8 +13559,8 @@ where d.msisdn=f.subscriberaddress and f.sessioncreationtimestamp >= d.start_dtm and f.sessioncreationtimestamp Hash Join Hash Cond: ((stg_xdr_crce_cdr.subscriberaddress)::text = (d.msisdn)::text) @@ -13670,8 +13674,8 @@ with v(year) as ( select * from v where year > 1; year ------ - 2018 2020 + 2018 (2 rows) with v(year) as ( @@ -13681,10 +13685,10 @@ with v(year) as ( select * from v where year > 1; year ------ - 2019 - 2018 2020 2020 + 2018 + 2019 (4 rows) with v(year) as ( @@ -13773,10 +13777,10 @@ explain select r.a, r.b, r.c, l.c from left_outer_index_nl_foo r left outer join select r.a, r.b, r.c, l.c from left_outer_index_nl_foo r left outer join left_outer_index_nl_bar l on r.b=l.b; a | b | c | c ---+---+---+--- - 1 | 1 | 1 | - 3 | 3 | 3 | 3 2 | 2 | 2 | 2 4 | 4 | 4 | 4 + 3 | 3 | 3 | 3 + 1 | 1 | 1 | (4 rows) create table left_outer_index_nl_foo_hash (a integer, b integer, c text); @@ -13915,7 +13919,7 @@ ANALYZE tt2; EXPLAIN SELECT b FROM tt1 WHERE NOT EXISTS (SELECT * FROM tt2 WHERE (tt2.d = tt1.b) IS DISTINCT FROM false); QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000002.56 rows=3 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000002.57 rows=4 width=4) -> Nested Loop Anti Join (cost=10000000000.00..10000000002.52 rows=1 width=4) Join Filter: ((tt2.d = tt1.b) IS DISTINCT FROM false) -> Seq Scan on tt1 (cost=0.00..1.01 rows=1 width=4) @@ -13933,7 +13937,7 @@ SELECT b FROM tt1 WHERE NOT EXISTS (SELECT * FROM tt2 WHERE (tt2.d = tt1.b) IS D EXPLAIN SELECT b FROM tt1 WHERE NOT EXISTS (SELECT * FROM tt2 WHERE (tt2.d = tt1.b) IS DISTINCT FROM true); QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000002.56 rows=3 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000002.57 rows=4 width=4) -> Nested Loop Anti Join (cost=10000000000.00..10000000002.52 rows=1 width=4) Join Filter: ((tt2.d = tt1.b) IS DISTINCT FROM true) -> Seq Scan on tt1 (cost=0.00..1.01 rows=1 width=4) @@ -14077,16 +14081,16 @@ EXPLAIN (COSTS OFF) SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.a SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.a; a | b | c | a | b | c ----+----+----+----+----+---- - 5 | 5 | 5 | 5 | 5 | 5 - 6 | 6 | 6 | 6 | 6 | 6 - 9 | 9 | 9 | 9 | 9 | 9 - 10 | 10 | 10 | 10 | 10 | 10 + 1 | 1 | 1 | 1 | 1 | 1 2 | 2 | 2 | 2 | 2 | 2 3 | 3 | 3 | 3 | 3 | 3 4 | 4 | 4 | 4 | 4 | 4 7 | 7 | 7 | 7 | 7 | 7 8 | 8 | 8 | 8 | 8 | 8 - 1 | 1 | 1 | 1 | 1 | 1 + 5 | 5 | 5 | 5 | 5 | 5 + 6 | 6 | 6 | 6 | 6 | 6 + 9 | 9 | 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 | 10 | 10 (10 rows) --- if the inner child is not distributed on the join column, orca should @@ -14108,15 +14112,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.b SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.b; a | b | c | a | b | c ----+----+----+----+----+---- - 5 | 5 | 5 | 5 | 5 | 5 - 6 | 6 | 6 | 6 | 6 | 6 - 9 | 9 | 9 | 9 | 9 | 9 - 10 | 10 | 10 | 10 | 10 | 10 2 | 2 | 2 | 2 | 2 | 2 3 | 3 | 3 | 3 | 3 | 3 4 | 4 | 4 | 4 | 4 | 4 7 | 7 | 7 | 7 | 7 | 7 8 | 8 | 8 | 8 | 8 | 8 + 5 | 5 | 5 | 5 | 5 | 5 + 6 | 6 | 6 | 6 | 6 | 6 + 9 | 9 | 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 | 10 | 10 1 | 1 | 1 | 1 | 1 | 1 (10 rows) @@ -14137,16 +14141,16 @@ EXPLAIN (COSTS OFF) SELECT * FROM tone t1 LEFT OUTER JOIN (SELECT 1+t2.b as b fr SELECT * FROM tone t1 LEFT OUTER JOIN (SELECT 1+t2.b as b from tone t2) t2 ON t1.a = t2.b; a | b | c | b ----+----+----+---- - 2 | 2 | 2 | 2 - 7 | 7 | 7 | 7 - 3 | 3 | 3 | 3 - 4 | 4 | 4 | 4 - 8 | 8 | 8 | 8 - 6 | 6 | 6 | 6 - 10 | 10 | 10 | 10 5 | 5 | 5 | 5 9 | 9 | 9 | 9 + 6 | 6 | 6 | 6 + 10 | 10 | 10 | 10 1 | 1 | 1 | + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 8 | 8 | 8 | 8 + 2 | 2 | 2 | 2 + 7 | 7 | 7 | 7 (10 rows) --- if the join condition does not involve a simple scalar ident, orca must @@ -14167,16 +14171,16 @@ EXPLAIN (COSTS OFF) SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.b SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.b-t1.a; a | b | c | a | b | c ----+----+----+----+----+---- - 1 | 1 | 1 | 2 | 2 | 2 - 5 | 5 | 5 | 10 | 10 | 10 - 6 | 6 | 6 | | | - 9 | 9 | 9 | | | - 10 | 10 | 10 | | | 2 | 2 | 2 | 4 | 4 | 4 3 | 3 | 3 | 6 | 6 | 6 4 | 4 | 4 | 8 | 8 | 8 7 | 7 | 7 | | | 8 | 8 | 8 | | | + 5 | 5 | 5 | 10 | 10 | 10 + 6 | 6 | 6 | | | + 9 | 9 | 9 | | | + 10 | 10 | 10 | | | + 1 | 1 | 1 | 2 | 2 | 2 (10 rows) --- orca should broadcast the inner child if the guc is set off @@ -14225,16 +14229,16 @@ EXPLAIN (COSTS OFF) SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.a SELECT * FROM tone t1 LEFT OUTER JOIN tone t2 ON t1.a = t2.a; a | b | c | a | b | c ----+----+----+----+----+---- - 5 | 5 | 5 | 5 | 5 | 5 - 6 | 6 | 6 | 6 | 6 | 6 - 9 | 9 | 9 | 9 | 9 | 9 - 10 | 10 | 10 | 10 | 10 | 10 2 | 2 | 2 | 2 | 2 | 2 3 | 3 | 3 | 3 | 3 | 3 4 | 4 | 4 | 4 | 4 | 4 7 | 7 | 7 | 7 | 7 | 7 8 | 8 | 8 | 8 | 8 | 8 1 | 1 | 1 | 1 | 1 | 1 + 5 | 5 | 5 | 5 | 5 | 5 + 6 | 6 | 6 | 6 | 6 | 6 + 9 | 9 | 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 | 10 | 10 (10 rows) RESET optimizer_enable_redistribute_nestloop_loj_inner_child; @@ -14265,9 +14269,9 @@ CREATE TABLE result_tab AS SELECT gp_segment_id, * FROM result_tab; gp_segment_id | a | b ---------------+-------+--- - 0 | 3 | 3 - 2 | 1 | 1 1 | 2 | 2 + 2 | 1 | 1 + 0 | 3 | 3 (3 rows) DROP TABLE IF EXISTS dist_tab_a; @@ -14380,7 +14384,11 @@ DROP TABLE t_clientinstrumentind2, t_clientproductind2; -- eval_const_expressions() call for subplan. ORCA has only one call to -- fold_constants() at the very beginning and doesn't perform folding later. CREATE TABLE join_null_rej1(i int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE join_null_rej2(i int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO join_null_rej1(i) VALUES (1), (2), (3); INSERT INTO join_null_rej2 SELECT i FROM join_null_rej1; CREATE OR REPLACE FUNCTION join_null_rej_func() RETURNS int AS $$ @@ -14480,16 +14488,16 @@ explain select c2 from quad_func_cast(); (2 rows) explain select (c1).r from quad_func_cast(); - QUERY PLAN --------------------------------------------------------------------- - Function Scan on quad_func_cast (cost=0.00..0.01 rows=1 width=32) + QUERY PLAN +------------------------------------------------------------------- + Function Scan on quad_func_cast (cost=0.00..0.01 rows=1 width=8) Optimizer: Postgres query optimizer (2 rows) explain select (c2).i from quad_func_cast(); - QUERY PLAN --------------------------------------------------------------------- - Function Scan on quad_func_cast (cost=0.00..0.01 rows=1 width=32) + QUERY PLAN +------------------------------------------------------------------- + Function Scan on quad_func_cast (cost=0.00..0.01 rows=1 width=8) Optimizer: Postgres query optimizer (2 rows) @@ -14581,7 +14589,7 @@ explain (costs off) select sum((item).b) from comp_table where (item).c=20; -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on comp_table Filter: ((item).c = 20) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (5 rows) select distinct (item).b from comp_table where (item).c=20; @@ -14603,7 +14611,7 @@ explain (costs off) select distinct (item).b from comp_table where (item).c=20; Hash Key: ((item).b) -> Seq Scan on comp_table Filter: ((item).c = 20) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (11 rows) -- verify the query output using predicate with the same composite type @@ -14619,7 +14627,7 @@ explain (costs off) select (item).a from comp_table where (item).c=20 and (item) Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on comp_table Filter: (((item).e > 10) AND ((item).c = 20)) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (4 rows) -- verify the query output using predicate with the different composite type @@ -14635,18 +14643,18 @@ explain (costs off) select * from comp_table where (item).c>(item).d ; Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on comp_table Filter: (((item).c)::double precision > (item).d) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (4 rows) -- verify the query output by using a composite type in a join query select (x.item).a from comp_table x join comp_table y on (x.item).c=(y.item).c; a ---- + VM DB DB GP GP - VM (5 rows) explain (costs off) select (x.item).a from comp_table x join comp_table y on (x.item).c=(y.item).c; @@ -14662,16 +14670,16 @@ explain (costs off) select (x.item).a from comp_table x join comp_table y on (x. -> Redistribute Motion 3:3 (slice3; segments: 3) Hash Key: (y.item).c -> Seq Scan on comp_table y - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (11 rows) -- verify the query output by using a composite type in a TVF query select (x.item).a, (select count(*) from generate_series(1, (x.item).c)) from comp_table x; a | count ----+------- - GP | 10 VM | 20 DB | 10 + GP | 10 (3 rows) explain (costs off) select (x.item).a, (select count(*) from generate_series(1, (x.item).c)) from comp_table x; @@ -14682,7 +14690,7 @@ explain (costs off) select (x.item).a, (select count(*) from generate_series(1, SubPlan 1 -> Aggregate -> Function Scan on generate_series - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (6 rows) -- verify the query output by using a composite type in a cte query @@ -14699,15 +14707,15 @@ explain (costs off) with cte1 as (select * from comp_table where (item).c>10) se -> Subquery Scan on cte1 -> Seq Scan on comp_table Filter: ((item).c > 10) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (5 rows) -- verify the query output by using a composite type in a subquery select (item).a from comp_table where (item).c=10 and (item).e IN (SELECT (item).e FROM comp_table WHERE (item).c = 10); a ---- - GP DB + GP (2 rows) explain (costs off) select (item).a from comp_table where (item).c=10 and (item).e IN (SELECT (item).e FROM comp_table WHERE (item).c = 10); @@ -14725,17 +14733,17 @@ explain (costs off) select (item).a from comp_table where (item).c=10 and (item) Hash Key: (comp_table_1.item).e -> Seq Scan on comp_table comp_table_1 Filter: ((item).c = 10) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (13 rows) -- verify the query output by using a composite type in a partition table query select (x.item).a from comp_part x join comp_part y on (X.item).c=(Y.item).c; a ---- - DB - DB GP GP + DB + DB VM (5 rows) @@ -14811,13 +14819,13 @@ insert into array_coerce_bar values (1, ARRAY['abcde']); explain insert into array_coerce_foo select * from array_coerce_bar; QUERY PLAN ----------------------------------------------------------------------------- - Insert on array_coerce_foo (cost=0.00..654.00 rows=16533 width=36) + Insert on array_coerce_foo (cost=0.00..654.00 rows=0 width=0) -> Seq Scan on array_coerce_bar (cost=0.00..654.00 rows=16533 width=36) Optimizer: Postgres query optimizer (3 rows) insert into array_coerce_foo select * from array_coerce_bar; -ERROR: value too long for type character varying(2) (seg1 127.0.0.1:7003 pid=55908) +ERROR: value too long for type character varying(2) (seg1 127.0.1.1:9203 pid=1583558) WITH conf AS ( SELECT setting FROM pg_catalog.pg_config @@ -14832,11 +14840,13 @@ reset optimizer_trace_fallback; -- These testcases will fallback to postgres when "PexprConvert2In" is enabled if -- underlying issues are not fixed create table baz (a int,b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. explain select baz.* from baz where baz.a=1 OR baz.b = 1 OR baz.b = 2 OR baz.b = 3 OR baz.b = 4 OR baz.b = 5 OR baz.b = 6 OR baz.b = 7 OR baz.b = 8 OR baz.b = 9 OR baz.b = 10 OR baz.b = 11 OR baz.b = 12 OR baz.b = 13 OR baz.b = 14 OR baz.b = 15 OR baz.b = 16 OR baz.b = 17 OR baz.b = 18 OR baz.b = 19 OR baz.b = 20; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1851.62 rows=1790 width=8) -> Seq Scan on baz (cost=0.00..1827.75 rows=597 width=8) @@ -14846,8 +14856,10 @@ baz.b = 11 OR baz.b = 12 OR baz.b = 13 OR baz.b = 14 OR baz.b = 15 OR baz.b = 16 drop table baz; create table baz ( a varchar); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. explain select * from baz where baz.a::bpchar='b' or baz.a='c'; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..299.41 rows=106 width=32) -> Seq Scan on baz (cost=0.00..298.00 rows=35 width=32) @@ -14876,6 +14888,7 @@ explain (analyze, costs off, summary off, timing off) with cte as (select * from -- stats that cover all the child tables should be used -- start_ignore create language plpython3u; +ERROR: extension "plpython3u" already exists -- end_ignore create or replace function check_col_width(query text, operator text, width text) returns int as $$ @@ -14936,7 +14949,7 @@ EXPLAIN SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; ------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1882.00 rows=52800 width=32) -> Seq Scan on array_coerceviaio (cost=0.00..1178.00 rows=17600 width=32) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (3 rows) SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; @@ -14947,31 +14960,32 @@ SELECT CAST(a AS TEXT[]) FROM array_coerceviaio; --------------------------------------------------------------------------------- DROP TABLE IF EXISTS schema_test_table; +NOTICE: table "schema_test_table" does not exist, skipping CREATE TABLE schema_test_table(a numeric, b numeric(5,2), c char(10) NOT NULL) distributed by (a); -- In 7x, redundant Result nodes in planned_stmt are being removed by ORCA, -- which caused the loss of typmod info of column type in plan. -- Below query is used by external libraries to fetch schema of table. -- Test that the typmod of column type is correct in explain plan. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM schema_test_table WHERE 1=0; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------- Result Output: a, b, c One-Time Filter: false - Optimizer: Postgres-based planner - Settings: optimizer = 'off', optimizer_enable_coordinator_only_queries = 'on', optimizer_enable_master_only_queries = 'on', optimizer_segments = '3' + Settings: enable_incremental_sort = 'on', optimizer = 'off', optimizer_enable_dynamicbitmapscan = 'on', optimizer_enable_dynamicindexscan = 'on' + Optimizer: Postgres query optimizer (5 rows) --------------------------------------------------------------------------------- -- Test ALL NULL scalar array compare create table DatumSortedSet_core (a int, b character varying NOT NULL) distributed by (a); explain select * from DatumSortedSet_core where b in (NULL, NULL); - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..240.69 rows=1 width=36) -> Seq Scan on datumsortedset_core (cost=0.00..240.67 rows=1 width=36) Filter: ((b)::text = ANY ('{NULL,NULL}'::text[])) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (4 rows) --------------------------------------------------------------------------------- @@ -15006,7 +15020,7 @@ explain (costs off) select max(s1) from foo inner join bar on j1 = j2 group by g -> Seq Scan on foo -> Hash -> Seq Scan on bar - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (13 rows) drop table foo; diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 5222cc22b70..48d38c91171 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -916,19 +916,25 @@ select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 or explain (costs off) select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: a - Group Key: () - Filter: (a IS DISTINCT FROM 1) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: a + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Finalize GroupAggregate + Group Key: a, (GROUPINGSET_ID()) + Filter: (a IS DISTINCT FROM 1) -> Sort - Sort Key: a - -> Seq Scan on gstest2 + Sort Key: a, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a, (GROUPINGSET_ID()) + -> Partial GroupAggregate + Group Key: a + Group Key: () + -> Sort + Sort Key: a + -> Seq Scan on gstest2 Optimizer: Postgres query optimizer -(10 rows) +(16 rows) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; @@ -1483,21 +1489,22 @@ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); explain (costs off) select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); - QUERY PLAN --------------------------------------------------- - Finalize GroupAggregate - Group Key: (GROUPINGSET_ID()) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (GROUPINGSET_ID()) + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize GroupAggregate + Group Key: (GROUPINGSET_ID()) -> Sort Sort Key: (GROUPINGSET_ID()) - -> Partial Aggregate - Group Key: () - Group Key: () - Group Key: () - -> Seq Scan on gstest_empty + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial Aggregate + Group Key: () + Group Key: () + Group Key: () + -> Seq Scan on gstest_empty Optimizer: Postgres query optimizer -(12 rows) +(13 rows) -- check that functionally dependent cols are not nulled select a, d, grouping(a,b,c) @@ -1697,22 +1704,24 @@ explain (costs off) BEGIN; SET LOCAL enable_hashagg = false; EXPLAIN (COSTS OFF) SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b; - QUERY PLAN ------------------------------------------------------------------- - Finalize GroupAggregate - Group Key: a, b, (GROUPINGSET_ID()) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: a, b, (GROUPINGSET_ID()) + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a, b + -> Finalize GroupAggregate + Group Key: a, b, (GROUPINGSET_ID()) -> Sort Sort Key: a, b, (GROUPINGSET_ID()) - -> Partial GroupAggregate - Group Key: a - Group Key: () - Sort Key: b - Group Key: b - -> Index Scan using gstest3_pkey on gstest3 + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a, b, (GROUPINGSET_ID()) + -> Partial GroupAggregate + Group Key: a + Group Key: () + Sort Key: b + Group Key: b + -> Index Scan using gstest3_pkey on gstest3 Optimizer: Postgres query optimizer -(13 rows) +(15 rows) SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b; a | b | count | max | max @@ -1726,22 +1735,24 @@ SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,( SET LOCAL enable_seqscan = false; EXPLAIN (COSTS OFF) SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b; - QUERY PLAN ------------------------------------------------------------------- - Finalize GroupAggregate - Group Key: a, b, (GROUPINGSET_ID()) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: a, b, (GROUPINGSET_ID()) + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a, b + -> Finalize GroupAggregate + Group Key: a, b, (GROUPINGSET_ID()) -> Sort Sort Key: a, b, (GROUPINGSET_ID()) - -> Partial GroupAggregate - Group Key: a - Group Key: () - Sort Key: b - Group Key: b - -> Index Scan using gstest3_pkey on gstest3 + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a, b, (GROUPINGSET_ID()) + -> Partial GroupAggregate + Group Key: a + Group Key: () + Sort Key: b + Group Key: b + -> Index Scan using gstest3_pkey on gstest3 Optimizer: Postgres query optimizer -(13 rows) +(15 rows) SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b; a | b | count | max | max @@ -1916,23 +1927,24 @@ explain (costs off) count(hundred), count(thousand), count(twothousand), count(*) from tenk1 group by grouping sets (unique1,hundred,ten,four,two); - QUERY PLAN ------------------------------------------------------------------------------- - Finalize GroupAggregate - Group Key: unique1, hundred, ten, four, two, (GROUPINGSET_ID()) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: unique1, hundred, ten, four, two, (GROUPINGSET_ID()) + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize GroupAggregate + Group Key: unique1, hundred, ten, four, two, (GROUPINGSET_ID()) -> Sort Sort Key: unique1, hundred, ten, four, two, (GROUPINGSET_ID()) - -> Partial HashAggregate - Hash Key: unique1 - Hash Key: hundred - Hash Key: ten - Hash Key: four - Hash Key: two - -> Seq Scan on tenk1 + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: unique1, hundred, ten, four, two, (GROUPINGSET_ID()) + -> Partial HashAggregate + Hash Key: unique1 + Hash Key: hundred + Hash Key: ten + Hash Key: four + Hash Key: two + -> Seq Scan on tenk1 Optimizer: Postgres query optimizer -(14 rows) +(15 rows) set work_mem = '384kB'; explain (costs off) diff --git a/src/test/regress/expected/matview_data.out b/src/test/regress/expected/matview_data.out index 9a9074cd2d5..f23550bd623 100644 --- a/src/test/regress/expected/matview_data.out +++ b/src/test/regress/expected/matview_data.out @@ -1339,23 +1339,25 @@ explain(costs off) select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id group by c.region; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: c.region - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: c.region - -> Streaming Partial HashAggregate - Group Key: c.region - -> Hash Join - Hash Cond: (o.customer_id = c.customer_id) - -> Seq Scan on aqj_orders o - -> Hash - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on aqj_customers c + -> Sort + Sort Key: c.region + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: c.region + -> Streaming Partial HashAggregate + Group Key: c.region + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on aqj_customers c Optimizer: Postgres query optimizer -(14 rows) +(16 rows) select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id @@ -1470,23 +1472,25 @@ explain(costs off) select c.region, sum(o.amount) as total_amount from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id group by c.region; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: c.region - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: c.region - -> Streaming Partial HashAggregate - Group Key: c.region - -> Hash Join - Hash Cond: (o.customer_id = c.customer_id) - -> Seq Scan on aqj_orders o - -> Hash - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on aqj_customers c + -> Sort + Sort Key: c.region + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: c.region + -> Streaming Partial HashAggregate + Group Key: c.region + -> Hash Join + Hash Cond: (o.customer_id = c.customer_id) + -> Seq Scan on aqj_orders o + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on aqj_customers c Optimizer: Postgres query optimizer -(14 rows) +(16 rows) -- 18. Non-match: different join order (o JOIN c vs c JOIN o) explain(costs off) select o.order_id, o.amount, c.name, c.region @@ -2363,12 +2367,12 @@ insert into par_1_prt_1 values (1, 1, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | i + mv_par1 | i + mv_par1_1 | i mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1_1 | i - mv_par1 | i - mv_par | i (6 rows) insert into par values (1, 2, 2); @@ -2376,12 +2380,12 @@ insert into par values (1, 2, 2); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u - mv_par2_1 | u - mv_par1_1 | i - mv_par1 | i mv_par | i + mv_par1 | i + mv_par1_1 | i + mv_par1_2 | u mv_par2 | i + mv_par2_1 | u (6 rows) refresh materialized view mv_par; @@ -2396,12 +2400,12 @@ insert into par_1_prt_2_2_prt_1 values (1, 2, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u + mv_par | i mv_par1 | u mv_par1_1 | u - mv_par2_1 | i + mv_par1_2 | u mv_par2 | i - mv_par | i + mv_par2_1 | i (6 rows) abort; @@ -2411,12 +2415,12 @@ truncate par_1_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u + mv_par | e mv_par1 | u mv_par1_1 | u - mv_par2_1 | e + mv_par1_2 | u mv_par2 | e - mv_par | e + mv_par2_1 | e (6 rows) abort; @@ -2425,12 +2429,12 @@ truncate par_1_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par1_2 | u + mv_par | e mv_par1 | u mv_par1_1 | u - mv_par2_1 | e + mv_par1_2 | u mv_par2 | e - mv_par | e + mv_par2_1 | e (6 rows) refresh materialized view mv_par; @@ -2443,12 +2447,12 @@ vacuum full par_1_prt_1_2_prt_1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | r + mv_par1 | r + mv_par1_1 | r mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1_1 | r - mv_par1 | r - mv_par | r (6 rows) refresh materialized view mv_par; @@ -2462,12 +2466,12 @@ vacuum full par; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | r mv_par | r - mv_par2_1 | r - mv_par1_2 | r mv_par1 | r mv_par1_1 | r + mv_par1_2 | r + mv_par2 | r + mv_par2_1 | r (6 rows) refresh materialized view mv_par; @@ -2483,12 +2487,12 @@ NOTICE: table has parent, setting distribution columns to match parent table select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e + mv_par1 | e mv_par1_1 | u mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1 | e - mv_par | e (6 rows) abort; @@ -2502,9 +2506,9 @@ drop cascades to materialized view mv_par1 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e mv_par2 | u mv_par2_1 | u - mv_par | e (3 rows) abort; @@ -2514,12 +2518,12 @@ alter table par_1_prt_1 detach partition par_1_prt_1_2_prt_1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e + mv_par1 | e mv_par1_1 | u mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1 | e - mv_par | e (6 rows) abort; @@ -2532,12 +2536,12 @@ alter table par_1_prt_1 attach partition new_par for values from (4) to (5); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e + mv_par1 | e mv_par1_1 | u mv_par1_2 | u mv_par2 | u mv_par2_1 | u - mv_par1 | e - mv_par | e (6 rows) abort; @@ -2567,12 +2571,12 @@ insert into par values(1, 1, 1), (1, 1, 2); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ - mv_par2 | u - mv_par2_1 | u - mv_par1_2 | i - mv_par1 | i mv_par | i + mv_par1 | i mv_par1_1 | i + mv_par1_2 | i + mv_par2 | u + mv_par2_1 | u (6 rows) abort; @@ -2592,12 +2596,12 @@ insert into par_1_prt_2_2_prt_1 values(2, 2, 1); select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | i mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2_1 | i mv_par2 | i - mv_par | i + mv_par2_1 | i (6 rows) abort; @@ -2617,12 +2621,12 @@ delete from par where b = 2 and c = 1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e mv_par1 | u mv_par1_1 | u mv_par1_2 | u - mv_par2_1 | e mv_par2 | e - mv_par | e + mv_par2_1 | e (6 rows) abort; @@ -2642,12 +2646,12 @@ delete from par_1_prt_1_2_prt_2; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e + mv_par1 | e mv_par1_1 | u + mv_par1_2 | e mv_par2 | u mv_par2_1 | u - mv_par1_2 | e - mv_par1 | e - mv_par | e (6 rows) abort; @@ -2668,12 +2672,12 @@ update par set c = 2 where b = 1 and c = 1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e + mv_par1 | e + mv_par1_1 | e + mv_par1_2 | i mv_par2 | u mv_par2_1 | u - mv_par1_2 | i - mv_par1_1 | e - mv_par1 | e - mv_par | e (6 rows) abort; @@ -2694,12 +2698,12 @@ update par set c = 2, a = 2 where b = 1 and c = 1; select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; mvname | datastatus -----------+------------ + mv_par | e + mv_par1 | e + mv_par1_1 | e + mv_par1_2 | i mv_par2 | u mv_par2_1 | u - mv_par1_2 | i - mv_par1_1 | e - mv_par1 | e - mv_par | e (6 rows) abort; diff --git a/src/test/regress/expected/olap_plans.out b/src/test/regress/expected/olap_plans.out index 8861e4a5c5f..fda51512e98 100644 --- a/src/test/regress/expected/olap_plans.out +++ b/src/test/regress/expected/olap_plans.out @@ -79,18 +79,20 @@ select a, b, c, sum(d) from olap_test group by a, b, c; -- If it's not a superset, redistribution is needed. explain select a, sum(d) from olap_test group by a; - QUERY PLAN -------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=56.11..56.16 rows=3 width=12) - -> Finalize HashAggregate (cost=56.11..56.12 rows=1 width=12) + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=56.11..56.19 rows=3 width=12) + -> Finalize GroupAggregate (cost=56.11..56.15 rows=1 width=12) Group Key: a - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=56.00..56.09 rows=3 width=12) - Hash Key: a - -> Streaming Partial HashAggregate (cost=56.00..56.03 rows=3 width=12) - Group Key: a - -> Seq Scan on olap_test (cost=0.00..39.33 rows=3333 width=8) + -> Sort (cost=56.11..56.12 rows=3 width=12) + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=56.00..56.09 rows=3 width=12) + Hash Key: a + -> Streaming Partial HashAggregate (cost=56.00..56.03 rows=3 width=12) + Group Key: a + -> Seq Scan on olap_test (cost=0.00..39.33 rows=3333 width=8) Optimizer: Postgres query optimizer -(9 rows) +(11 rows) select a, sum(d) from olap_test group by a; a | sum diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index e8d78fc6643..a43153b7a7a 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -27,29 +27,33 @@ ANALYZE pagg_tab; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) -> Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) -> Append - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: pagg_tab.c Filter: (avg(pagg_tab.d) < '15'::numeric) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: pagg_tab.c - -> Streaming Partial HashAggregate - Group Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab - -> Finalize HashAggregate + -> Sort + Sort Key: pagg_tab.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: pagg_tab.c + -> Partial HashAggregate + Group Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Finalize GroupAggregate Group Key: pagg_tab_1.c Filter: (avg(pagg_tab_1.d) < '15'::numeric) - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: pagg_tab_1.c - -> Streaming Partial HashAggregate - Group Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Sort + Sort Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: pagg_tab_1.c + -> Partial HashAggregate + Group Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Finalize GroupAggregate Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) @@ -57,11 +61,11 @@ SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVI Sort Key: pagg_tab_2.c -> Redistribute Motion 3:3 (slice4; segments: 3) Hash Key: pagg_tab_2.c - -> Streaming Partial HashAggregate + -> Partial HashAggregate Group Key: pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer -(30 rows) +(36 rows) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count | min | max @@ -77,8 +81,8 @@ SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVI -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; - QUERY PLAN ------------------------------------------------------------------------------ + QUERY PLAN +-------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) -> Sort @@ -207,35 +211,44 @@ SET enable_hashagg TO false; -- When GROUP BY clause matches full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------ Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) - -> Append - -> Finalize GroupAggregate - Group Key: pagg_tab.c - Filter: (avg(pagg_tab.d) < '15'::numeric) - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: pagg_tab.c - -> Partial GroupAggregate - Group Key: pagg_tab.c - -> Sort - Sort Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab - -> Finalize GroupAggregate - Group Key: pagg_tab_1.c - Filter: (avg(pagg_tab_1.d) < '15'::numeric) - -> Gather Motion 3:1 (slice2; segments: 3) - Merge Key: pagg_tab_1.c - -> Partial GroupAggregate - Group Key: pagg_tab_1.c - -> Sort - Sort Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 + -> Merge Append + Sort Key: pagg_tab.c + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pagg_tab.c + -> Finalize GroupAggregate + Group Key: pagg_tab.c + Filter: (avg(pagg_tab.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: pagg_tab.c + -> Partial GroupAggregate + Group Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab + -> Gather Motion 3:1 (slice3; segments: 3) + Merge Key: pagg_tab_1.c + -> Finalize GroupAggregate + Group Key: pagg_tab_1.c + Filter: (avg(pagg_tab_1.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: pagg_tab_1.c + -> Partial GroupAggregate + Group Key: pagg_tab_1.c + -> Sort + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Finalize GroupAggregate Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) - -> Gather Motion 3:1 (slice3; segments: 3) + -> Gather Motion 3:1 (slice5; segments: 3) Merge Key: pagg_tab_2.c -> Partial GroupAggregate Group Key: pagg_tab_2.c @@ -243,7 +256,7 @@ SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 O Sort Key: pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer -(34 rows) +(43 rows) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count @@ -306,39 +319,48 @@ SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 O -- Test partitionwise grouping without any aggregates EXPLAIN (COSTS OFF) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; - QUERY PLAN -------------------------------------------------------- - Merge Append - Sort Key: pagg_tab.c - -> GroupAggregate - Group Key: pagg_tab.c - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: pagg_tab.c + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Append -> GroupAggregate Group Key: pagg_tab.c -> Sort Sort Key: pagg_tab.c - -> Seq Scan on pagg_tab_p1 pagg_tab - -> GroupAggregate - Group Key: pagg_tab_1.c - -> Gather Motion 3:1 (slice2; segments: 3) - Merge Key: pagg_tab_1.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: pagg_tab.c + -> GroupAggregate + Group Key: pagg_tab.c + -> Sort + Sort Key: pagg_tab.c + -> Seq Scan on pagg_tab_p1 pagg_tab -> GroupAggregate Group Key: pagg_tab_1.c -> Sort Sort Key: pagg_tab_1.c - -> Seq Scan on pagg_tab_p2 pagg_tab_1 - -> GroupAggregate - Group Key: pagg_tab_2.c - -> Gather Motion 3:1 (slice3; segments: 3) - Merge Key: pagg_tab_2.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: pagg_tab_1.c + -> GroupAggregate + Group Key: pagg_tab_1.c + -> Sort + Sort Key: pagg_tab_1.c + -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> GroupAggregate Group Key: pagg_tab_2.c -> Sort Sort Key: pagg_tab_2.c - -> Seq Scan on pagg_tab_p3 pagg_tab_2 + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: pagg_tab_2.c + -> GroupAggregate + Group Key: pagg_tab_2.c + -> Sort + Sort Key: pagg_tab_2.c + -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer -(30 rows) +(39 rows) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; c diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 98431e7a104..3289e60b7df 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -1875,10 +1875,10 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c --------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: t1.c - -> Sort - Sort Key: t1.c - -> Finalize HashAggregate - Group Key: t1.c, t2.c + -> Finalize GroupAggregate + Group Key: t1.c, t2.c + -> Sort + Sort Key: t1.c -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: t1.c, t1.c -> Streaming Partial HashAggregate diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 2fc722f5c41..2d9ee511ffe 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -19,12 +19,19 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE; SET client_min_messages TO 'notice'; -- initial setup CREATE USER regress_rls_alice NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE USER regress_rls_bob NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE USER regress_rls_carol NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE USER regress_rls_dave NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE ROLE regress_rls_group1 NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE ROLE regress_rls_group2 NOLOGIN; +NOTICE: resource queue required -- using default resource queue "pg_default" GRANT regress_rls_group1 TO regress_rls_bob; GRANT regress_rls_group2 TO regress_rls_carol; CREATE SCHEMA regress_rls_schema; @@ -66,6 +73,7 @@ CREATE TABLE document ( dauthor name, dtitle text ); +WARNING: referential integrity (FOREIGN KEY) constraints are not supported in Apache Cloudberry, will not be enforced GRANT ALL ON document TO public; INSERT INTO document VALUES ( 1, 11, 1, 'regress_rls_bob', 'my first novel'), @@ -143,6 +151,7 @@ Policies: POLICY "p2r" AS RESTRICTIVE TO regress_rls_dave USING (((cid <> 44) AND (cid < 50))) +Distributed by: (did) SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename = 'document' ORDER BY policyname; schemaname | tablename | policyname | permissive | roles | cmd | qual | with_check @@ -158,11 +167,11 @@ SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename SET SESSION AUTHORIZATION regress_rls_bob; SET row_security TO ON; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my first manga -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great manga -NOTICE: f_leak => awesome science fiction +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -173,11 +182,11 @@ NOTICE: f_leak => awesome science fiction (5 rows) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my first manga -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great manga -NOTICE: f_leak => awesome science fiction +NOTICE: f_leak => my first manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) cid | did | dlevel | dauthor | dtitle | cname -----+-----+--------+-------------------+-------------------------+----------------- 11 | 1 | 1 | regress_rls_bob | my first novel | novel @@ -190,7 +199,7 @@ NOTICE: f_leak => awesome science fiction -- try a sampled version SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0) WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => great manga +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------- 8 | 44 | 1 | regress_rls_carol | great manga @@ -199,16 +208,16 @@ NOTICE: f_leak => great manga -- viewpoint from regress_rls_carol SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my science fiction -NOTICE: f_leak => my first manga -NOTICE: f_leak => my second manga -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book -NOTICE: f_leak => great manga -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my science fiction (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my second manga (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -224,16 +233,16 @@ NOTICE: f_leak => awesome technology book (10 rows) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my science fiction -NOTICE: f_leak => my first manga -NOTICE: f_leak => my second manga -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book -NOTICE: f_leak => great manga -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my second manga (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my science fiction (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) cid | did | dlevel | dauthor | dtitle | cname -----+-----+--------+-------------------+-------------------------+----------------- 11 | 1 | 1 | regress_rls_bob | my first novel | novel @@ -251,9 +260,9 @@ NOTICE: f_leak => awesome technology book -- try a sampled version SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0) WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => great technology book -NOTICE: f_leak => great manga -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 7 | 33 | 2 | regress_rls_carol | great technology book @@ -262,8 +271,8 @@ NOTICE: f_leak => awesome technology book (3 rows) EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); - QUERY PLAN ------------------------------------------------------ + QUERY PLAN +-------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) InitPlan 1 (returns $0) (slice2) -> Gather Motion 1:1 (slice3; segments: 1) @@ -295,13 +304,13 @@ EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dt -- viewpoint from regress_rls_dave SET SESSION AUTHORIZATION regress_rls_dave; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my science fiction -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my science fiction (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -314,13 +323,13 @@ NOTICE: f_leak => awesome technology book (7 rows) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my science fiction -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my science fiction (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) cid | did | dlevel | dauthor | dtitle | cname -----+-----+--------+-------------------+-------------------------+----------------- 11 | 1 | 1 | regress_rls_bob | my first novel | novel @@ -367,10 +376,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dt -- 44 would technically fail for both p2r and p1r, but we should get an error -- back from p1r for this because it sorts first INSERT INTO document VALUES (100, 44, 1, 'regress_rls_dave', 'testing sorting of policies'); -- fail -ERROR: new row violates row-level security policy "p1r" for table "document" +ERROR: new row violates row-level security policy "p1r" for table "document" (seg2 127.0.1.1:9204 pid=1527275) -- Just to see a p2r error INSERT INTO document VALUES (100, 55, 1, 'regress_rls_dave', 'testing sorting of policies'); -- fail -ERROR: new row violates row-level security policy "p2r" for table "document" +ERROR: new row violates row-level security policy "p2r" for table "document" (seg2 127.0.1.1:9204 pid=1527275) -- only owner can change policies ALTER POLICY p1 ON document USING (true); --fail ERROR: must be owner of table document @@ -381,11 +390,11 @@ ALTER POLICY p1 ON document USING (dauthor = current_user); -- viewpoint from regress_rls_bob again SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my science fiction -NOTICE: f_leak => my first manga -NOTICE: f_leak => my second manga +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my second manga (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my science fiction (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first manga (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-----------------+-------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -396,11 +405,11 @@ NOTICE: f_leak => my second manga (5 rows) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER by did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my science fiction -NOTICE: f_leak => my first manga -NOTICE: f_leak => my second manga +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my second manga (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my science fiction (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first manga (seg0 slice1 127.0.1.1:9202 pid=1527273) cid | did | dlevel | dauthor | dtitle | cname -----+-----+--------+-----------------+--------------------+----------------- 11 | 1 | 1 | regress_rls_bob | my first novel | novel @@ -413,9 +422,9 @@ NOTICE: f_leak => my second manga -- viewpoint from rls_regres_carol again SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book -NOTICE: f_leak => great manga +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+----------------------- 6 | 22 | 1 | regress_rls_carol | great science fiction @@ -424,9 +433,9 @@ NOTICE: f_leak => great manga (3 rows) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER by did; -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book -NOTICE: f_leak => great manga +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great manga (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) cid | did | dlevel | dauthor | dtitle | cname -----+-----+--------+-------------------+-----------------------+----------------- 22 | 6 | 1 | regress_rls_carol | great science fiction | science fiction @@ -435,8 +444,8 @@ NOTICE: f_leak => great manga (3 rows) EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); - QUERY PLAN ---------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on document Filter: ((dauthor = 'regress_rls_carol'::name) AND f_leak(dtitle)) @@ -444,8 +453,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (category.cid = document.cid) @@ -479,9 +488,7 @@ SELECT * FROM document d FULL OUTER JOIN category c on d.cid = c.cid ORDER BY d. -- GPDB: referential integrity checks are not enforced -- start_ignore -DELETE FROM category WHERE cid = 33; -- fails with FK violation -ERROR: update or delete on table "category" violates foreign key constraint "document_cid_fkey" on table "document" -DETAIL: Key is still referenced from table "document". +-- DELETE FROM category WHERE cid = 33; -- fails with FK violation -- end_ignore -- can insert FK referencing invisible PK SET SESSION AUTHORIZATION regress_rls_carol; @@ -497,7 +504,7 @@ INSERT INTO document VALUES (11, 33, 1, current_user, 'hoge'); -- UNIQUE or PRIMARY KEY constraint violation DOES reveal presence of row SET SESSION AUTHORIZATION regress_rls_bob; INSERT INTO document VALUES (8, 44, 1, 'regress_rls_bob', 'my third manga'); -- Must fail with unique violation, revealing presence of did we can't see -ERROR: duplicate key value violates unique constraint "document_pkey" +ERROR: duplicate key value violates unique constraint "document_pkey" (seg0 127.0.1.1:9202 pid=1527273) SELECT * FROM document WHERE did = 8; -- and confirm we can't see it did | cid | dlevel | dauthor | dtitle -----+-----+--------+---------+-------- @@ -505,11 +512,11 @@ SELECT * FROM document WHERE did = 8; -- and confirm we can't see it -- RLS policies are checked before constraints INSERT INTO document VALUES (8, 44, 1, 'regress_rls_carol', 'my third manga'); -- Should fail with RLS check violation, not duplicate key violation -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg0 127.0.1.1:9202 pid=1527273) -- GPDB: UPDATE on distributed key column not allowed on relation with update triggers -- start_ignore UPDATE document SET did = 8, dauthor = 'regress_rls_carol' WHERE did = 5; -- Should fail with RLS check violation, not duplicate key violation -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg0 127.0.1.1:9202 pid=1527273) -- end_ignore -- database superuser does bypass RLS policy when enabled RESET SESSION AUTHORIZATION; @@ -517,26 +524,26 @@ SET row_security TO ON; SELECT * FROM document; did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- - 1 | 11 | 1 | regress_rls_bob | my first novel 2 | 11 | 2 | regress_rls_bob | my second novel 3 | 22 | 2 | regress_rls_bob | my science fiction 4 | 44 | 1 | regress_rls_bob | my first manga - 5 | 44 | 2 | regress_rls_bob | my second manga - 6 | 22 | 1 | regress_rls_carol | great science fiction 7 | 33 | 2 | regress_rls_carol | great technology book 8 | 44 | 1 | regress_rls_carol | great manga + 5 | 44 | 2 | regress_rls_bob | my second manga + 6 | 22 | 1 | regress_rls_carol | great science fiction 9 | 22 | 1 | regress_rls_dave | awesome science fiction 10 | 33 | 2 | regress_rls_dave | awesome technology book 11 | 33 | 1 | regress_rls_carol | hoge + 1 | 11 | 1 | regress_rls_bob | my first novel (11 rows) SELECT * FROM category; cid | cname -----+----------------- - 11 | novel 22 | science fiction - 33 | technology 44 | manga + 11 | novel + 33 | technology (4 rows) -- database superuser does bypass RLS policy when disabled @@ -545,14 +552,14 @@ SET row_security TO OFF; SELECT * FROM document; did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- - 1 | 11 | 1 | regress_rls_bob | my first novel 2 | 11 | 2 | regress_rls_bob | my second novel 3 | 22 | 2 | regress_rls_bob | my science fiction 4 | 44 | 1 | regress_rls_bob | my first manga - 5 | 44 | 2 | regress_rls_bob | my second manga - 6 | 22 | 1 | regress_rls_carol | great science fiction 7 | 33 | 2 | regress_rls_carol | great technology book 8 | 44 | 1 | regress_rls_carol | great manga + 1 | 11 | 1 | regress_rls_bob | my first novel + 5 | 44 | 2 | regress_rls_bob | my second manga + 6 | 22 | 1 | regress_rls_carol | great science fiction 9 | 22 | 1 | regress_rls_dave | awesome science fiction 10 | 33 | 2 | regress_rls_dave | awesome technology book 11 | 33 | 1 | regress_rls_carol | hoge @@ -561,8 +568,8 @@ SELECT * FROM document; SELECT * FROM category; cid | cname -----+----------------- - 11 | novel 22 | science fiction + 11 | novel 33 | technology 44 | manga (4 rows) @@ -573,26 +580,26 @@ SET row_security TO OFF; SELECT * FROM document; did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- + 5 | 44 | 2 | regress_rls_bob | my second manga + 6 | 22 | 1 | regress_rls_carol | great science fiction + 9 | 22 | 1 | regress_rls_dave | awesome science fiction + 10 | 33 | 2 | regress_rls_dave | awesome technology book + 11 | 33 | 1 | regress_rls_carol | hoge 1 | 11 | 1 | regress_rls_bob | my first novel 2 | 11 | 2 | regress_rls_bob | my second novel 3 | 22 | 2 | regress_rls_bob | my science fiction 4 | 44 | 1 | regress_rls_bob | my first manga - 5 | 44 | 2 | regress_rls_bob | my second manga - 6 | 22 | 1 | regress_rls_carol | great science fiction 7 | 33 | 2 | regress_rls_carol | great technology book 8 | 44 | 1 | regress_rls_carol | great manga - 9 | 22 | 1 | regress_rls_dave | awesome science fiction - 10 | 33 | 2 | regress_rls_dave | awesome technology book - 11 | 33 | 1 | regress_rls_carol | hoge (11 rows) SELECT * FROM category; cid | cname -----+----------------- - 11 | novel 22 | science fiction - 33 | technology 44 | manga + 11 | novel + 33 | technology (4 rows) -- RLS policy does not apply to table owner when RLS enabled. @@ -602,23 +609,23 @@ SELECT * FROM document; did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel - 2 | 11 | 2 | regress_rls_bob | my second novel - 3 | 22 | 2 | regress_rls_bob | my science fiction - 4 | 44 | 1 | regress_rls_bob | my first manga 5 | 44 | 2 | regress_rls_bob | my second manga 6 | 22 | 1 | regress_rls_carol | great science fiction - 7 | 33 | 2 | regress_rls_carol | great technology book - 8 | 44 | 1 | regress_rls_carol | great manga 9 | 22 | 1 | regress_rls_dave | awesome science fiction 10 | 33 | 2 | regress_rls_dave | awesome technology book 11 | 33 | 1 | regress_rls_carol | hoge + 2 | 11 | 2 | regress_rls_bob | my second novel + 3 | 22 | 2 | regress_rls_bob | my science fiction + 4 | 44 | 1 | regress_rls_bob | my first manga + 7 | 33 | 2 | regress_rls_carol | great technology book + 8 | 44 | 1 | regress_rls_carol | great manga (11 rows) SELECT * FROM category; cid | cname -----+----------------- - 11 | novel 22 | science fiction + 11 | novel 33 | technology 44 | manga (4 rows) @@ -629,14 +636,14 @@ SET row_security TO OFF; SELECT * FROM document; did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- - 1 | 11 | 1 | regress_rls_bob | my first novel 2 | 11 | 2 | regress_rls_bob | my second novel 3 | 22 | 2 | regress_rls_bob | my science fiction 4 | 44 | 1 | regress_rls_bob | my first manga - 5 | 44 | 2 | regress_rls_bob | my second manga - 6 | 22 | 1 | regress_rls_carol | great science fiction 7 | 33 | 2 | regress_rls_carol | great technology book 8 | 44 | 1 | regress_rls_carol | great manga + 1 | 11 | 1 | regress_rls_bob | my first novel + 5 | 44 | 2 | regress_rls_bob | my second manga + 6 | 22 | 1 | regress_rls_carol | great science fiction 9 | 22 | 1 | regress_rls_dave | awesome science fiction 10 | 33 | 2 | regress_rls_dave | awesome technology book 11 | 33 | 1 | regress_rls_carol | hoge @@ -645,8 +652,8 @@ SELECT * FROM document; SELECT * FROM category; cid | cname -----+----------------- - 11 | novel 22 | science fiction + 11 | novel 33 | technology 44 | manga (4 rows) @@ -662,6 +669,7 @@ GRANT ALL ON t1 TO public; COPY t1 FROM stdin WITH ; ANALYZE t1; CREATE TABLE t2 (c float) INHERITS (t1); +NOTICE: table has parent, setting distribution columns to match parent table GRANT ALL ON t2 TO public; COPY t2 FROM stdin; ANALYZE t2; @@ -678,10 +686,10 @@ SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM t1; id | a | b -----+---+----- + 204 | 4 | def 102 | 2 | bbb 104 | 4 | dad 202 | 2 | bcd - 204 | 4 | def 302 | 2 | yyy (5 rows) @@ -700,17 +708,17 @@ EXPLAIN (COSTS OFF) SELECT * FROM t1; (9 rows) SELECT * FROM t1 WHERE f_leak(b); -NOTICE: f_leak => bbb -NOTICE: f_leak => dad -NOTICE: f_leak => bcd -NOTICE: f_leak => def -NOTICE: f_leak => yyy +NOTICE: f_leak => def (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => yyy (seg1 slice1 127.0.1.1:9203 pid=1527274) id | a | b -----+---+----- + 204 | 4 | def 102 | 2 | bbb 104 | 4 | dad 202 | 2 | bcd - 204 | 4 | def 302 | 2 | yyy (5 rows) @@ -732,10 +740,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE f_leak(b); SELECT tableoid::regclass, * FROM t1; tableoid | id | a | b ----------+-----+---+----- + t2 | 204 | 4 | def t1 | 102 | 2 | bbb t1 | 104 | 4 | dad t2 | 202 | 2 | bcd - t2 | 204 | 4 | def t3 | 302 | 2 | yyy (5 rows) @@ -750,16 +758,17 @@ EXPLAIN (COSTS OFF) SELECT *, t1 FROM t1; Filter: ((a % 2) = 0) -> Seq Scan on t3 t1_3 Filter: ((a % 2) = 0) + Optimizer: Postgres query optimizer (9 rows) -- reference to whole-row reference SELECT *, t1 FROM t1; id | a | b | t1 -----+---+-----+------------- + 204 | 4 | def | (204,4,def) 102 | 2 | bbb | (102,2,bbb) 104 | 4 | dad | (104,4,dad) 202 | 2 | bcd | (202,2,bcd) - 204 | 4 | def | (204,4,def) 302 | 2 | yyy | (302,2,yyy) (5 rows) @@ -781,10 +790,10 @@ EXPLAIN (COSTS OFF) SELECT *, t1 FROM t1; SELECT * FROM t1 FOR SHARE; id | a | b -----+---+----- + 204 | 4 | def 102 | 2 | bbb 104 | 4 | dad 202 | 2 | bcd - 204 | 4 | def 302 | 2 | yyy (5 rows) @@ -803,17 +812,17 @@ EXPLAIN (COSTS OFF) SELECT * FROM t1 FOR SHARE; (9 rows) SELECT * FROM t1 WHERE f_leak(b) FOR SHARE; -NOTICE: f_leak => bbb -NOTICE: f_leak => dad -NOTICE: f_leak => bcd -NOTICE: f_leak => def -NOTICE: f_leak => yyy +NOTICE: f_leak => def (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => yyy (seg1 slice1 127.0.1.1:9203 pid=1527274) id | a | b -----+---+----- + 204 | 4 | def 102 | 2 | bbb 104 | 4 | dad 202 | 2 | bcd - 204 | 4 | def 302 | 2 | yyy (5 rows) @@ -836,10 +845,10 @@ SELECT a, b, tableoid::regclass FROM t2 UNION ALL SELECT a, b, tableoid::regclas a | b | tableoid ---+-----+---------- 1 | abc | t2 - 3 | cde | t2 - 1 | xxx | t3 2 | yyy | t3 3 | zzz | t3 + 3 | cde | t2 + 1 | xxx | t3 (5 rows) EXPLAIN (COSTS OFF) SELECT a, b, tableoid::regclass FROM t2 UNION ALL SELECT a, b, tableoid::regclass FROM t3; @@ -857,30 +866,30 @@ EXPLAIN (COSTS OFF) SELECT a, b, tableoid::regclass FROM t2 UNION ALL SELECT a, RESET SESSION AUTHORIZATION; SET row_security TO OFF; SELECT * FROM t1 WHERE f_leak(b); -NOTICE: f_leak => aba -NOTICE: f_leak => bbb -NOTICE: f_leak => ccc -NOTICE: f_leak => dad -NOTICE: f_leak => abc -NOTICE: f_leak => bcd -NOTICE: f_leak => cde -NOTICE: f_leak => def -NOTICE: f_leak => xxx -NOTICE: f_leak => yyy -NOTICE: f_leak => zzz +NOTICE: f_leak => def (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => aba (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => yyy (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => zzz (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => cde (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => xxx (seg0 slice1 127.0.1.1:9202 pid=1527273) id | a | b -----+---+----- + 204 | 4 | def + 201 | 1 | abc + 202 | 2 | bcd + 302 | 2 | yyy + 303 | 3 | zzz 101 | 1 | aba 102 | 2 | bbb 103 | 3 | ccc 104 | 4 | dad - 201 | 1 | abc - 202 | 2 | bcd 203 | 3 | cde - 204 | 4 | def 301 | 1 | xxx - 302 | 2 | yyy - 303 | 3 | zzz (11 rows) EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE f_leak(b); @@ -901,30 +910,30 @@ EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE f_leak(b); SET SESSION AUTHORIZATION regress_rls_exempt_user; SET row_security TO OFF; SELECT * FROM t1 WHERE f_leak(b); -NOTICE: f_leak => aba -NOTICE: f_leak => bbb -NOTICE: f_leak => ccc -NOTICE: f_leak => dad -NOTICE: f_leak => abc -NOTICE: f_leak => bcd -NOTICE: f_leak => cde -NOTICE: f_leak => def -NOTICE: f_leak => xxx -NOTICE: f_leak => yyy -NOTICE: f_leak => zzz +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => def (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => yyy (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => zzz (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => aba (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => cde (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => xxx (seg0 slice1 127.0.1.1:9202 pid=1527273) id | a | b -----+---+----- + 204 | 4 | def + 201 | 1 | abc + 202 | 2 | bcd + 302 | 2 | yyy + 303 | 3 | zzz 101 | 1 | aba 102 | 2 | bbb 103 | 3 | ccc 104 | 4 | dad - 201 | 1 | abc - 202 | 2 | bcd 203 | 3 | cde - 204 | 4 | def 301 | 1 | xxx - 302 | 2 | yyy - 303 | 3 | zzz (11 rows) EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE f_leak(b); @@ -952,11 +961,16 @@ CREATE TABLE part_document ( dauthor name, dtitle text ) PARTITION BY RANGE (cid); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'did' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT ALL ON part_document TO public; -- Create partitions for document categories CREATE TABLE part_document_fiction PARTITION OF part_document FOR VALUES FROM (11) to (12); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE part_document_satire PARTITION OF part_document FOR VALUES FROM (55) to (56); +NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE part_document_nonfiction PARTITION OF part_document FOR VALUES FROM (99) to (100); +NOTICE: table has parent, setting distribution columns to match parent table GRANT ALL ON part_document_fiction TO public; GRANT ALL ON part_document_satire TO public; GRANT ALL ON part_document_nonfiction TO public; @@ -1000,6 +1014,7 @@ Policies: Partitions: part_document_fiction FOR VALUES FROM (11) TO (12), part_document_nonfiction FOR VALUES FROM (99) TO (100), part_document_satire FOR VALUES FROM (55) TO (56) +Distributed by: (did) SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename like '%part_document%' ORDER BY policyname; schemaname | tablename | policyname | permissive | roles | cmd | qual | with_check @@ -1014,10 +1029,10 @@ SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename SET SESSION AUTHORIZATION regress_rls_bob; SET row_security TO ON; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => great science fiction -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => my first satire +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1027,8 +1042,8 @@ NOTICE: f_leak => my first satire (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) InitPlan 1 (returns $0) (slice2) -> Gather Motion 1:1 (slice3; segments: 1) @@ -1047,16 +1062,16 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- viewpoint from regress_rls_carol SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => great science fiction -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => my first satire -NOTICE: f_leak => great satire -NOTICE: f_leak => my science textbook -NOTICE: f_leak => my history book -NOTICE: f_leak => great technology book -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my history book (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my science textbook (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1072,8 +1087,8 @@ NOTICE: f_leak => awesome technology book (10 rows) EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) InitPlan 1 (returns $0) (slice2) -> Gather Motion 1:1 (slice3; segments: 1) @@ -1092,10 +1107,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- viewpoint from regress_rls_dave SET SESSION AUTHORIZATION regress_rls_dave; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => great science fiction -NOTICE: f_leak => awesome science fiction +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1119,22 +1134,22 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- pp1 ERROR INSERT INTO part_document VALUES (100, 11, 5, 'regress_rls_dave', 'testing pp1'); -- fail -ERROR: new row violates row-level security policy for table "part_document" +ERROR: new row violates row-level security policy for table "part_document" (seg2 127.0.1.1:9204 pid=1527275) -- pp1r ERROR INSERT INTO part_document VALUES (100, 99, 1, 'regress_rls_dave', 'testing pp1r'); -- fail -ERROR: new row violates row-level security policy "pp1r" for table "part_document" +ERROR: new row violates row-level security policy "pp1r" for table "part_document" (seg2 127.0.1.1:9204 pid=1527275) -- Show that RLS policy does not apply for direct inserts to children -- This should fail with RLS POLICY pp1r violation. INSERT INTO part_document VALUES (100, 55, 1, 'regress_rls_dave', 'testing RLS with partitions'); -- fail -ERROR: new row violates row-level security policy "pp1r" for table "part_document" +ERROR: new row violates row-level security policy "pp1r" for table "part_document" (seg2 127.0.1.1:9204 pid=1527275) -- But this should succeed. INSERT INTO part_document_satire VALUES (100, 55, 1, 'regress_rls_dave', 'testing RLS with partitions'); -- success -- We still cannot see the row using the parent SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => great science fiction -NOTICE: f_leak => awesome science fiction +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1145,9 +1160,9 @@ NOTICE: f_leak => awesome science fiction -- But we can if we look directly SELECT * FROM part_document_satire WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first satire -NOTICE: f_leak => great satire -NOTICE: f_leak => testing RLS with partitions +NOTICE: f_leak => my first satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => testing RLS with partitions (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great satire (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+----------------------------- 4 | 55 | 1 | regress_rls_bob | my first satire @@ -1163,7 +1178,7 @@ CREATE POLICY pp3 ON part_document_satire AS RESTRICTIVE -- This should fail with RLS violation now. SET SESSION AUTHORIZATION regress_rls_dave; INSERT INTO part_document_satire VALUES (101, 55, 1, 'regress_rls_dave', 'testing RLS with partitions'); -- fail -ERROR: new row violates row-level security policy for table "part_document_satire" +ERROR: new row violates row-level security policy for table "part_document_satire" (seg0 127.0.1.1:9202 pid=1527273) -- And now we cannot see directly into the partition either, due to RLS SELECT * FROM part_document_satire WHERE f_leak(dtitle) ORDER BY did; did | cid | dlevel | dauthor | dtitle @@ -1173,10 +1188,10 @@ SELECT * FROM part_document_satire WHERE f_leak(dtitle) ORDER BY did; -- The parent looks same as before -- viewpoint from regress_rls_dave SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => great science fiction -NOTICE: f_leak => awesome science fiction +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1201,17 +1216,17 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- viewpoint from regress_rls_carol SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => great science fiction -NOTICE: f_leak => awesome science fiction -NOTICE: f_leak => my first satire -NOTICE: f_leak => great satire -NOTICE: f_leak => testing RLS with partitions -NOTICE: f_leak => my science textbook -NOTICE: f_leak => my history book -NOTICE: f_leak => great technology book -NOTICE: f_leak => awesome technology book +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => awesome science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my science textbook (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => testing RLS with partitions (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my history book (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => awesome technology book (seg2 slice1 127.0.1.1:9204 pid=1527275) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+----------------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1228,8 +1243,8 @@ NOTICE: f_leak => awesome technology book (11 rows) EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) InitPlan 1 (returns $0) (slice2) -> Gather Motion 1:1 (slice3; segments: 1) @@ -1255,11 +1270,11 @@ ALTER POLICY pp1 ON part_document USING (dauthor = current_user); -- viewpoint from regress_rls_bob again SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel -NOTICE: f_leak => my first satire -NOTICE: f_leak => my science textbook -NOTICE: f_leak => my history book +NOTICE: f_leak => my second novel (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my first novel (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => my history book (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => my first satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => my science textbook (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-----------------+--------------------- 1 | 11 | 1 | regress_rls_bob | my first novel @@ -1272,9 +1287,9 @@ NOTICE: f_leak => my history book -- viewpoint from rls_regres_carol again SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => great science fiction -NOTICE: f_leak => great satire -NOTICE: f_leak => great technology book +NOTICE: f_leak => great satire (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => great science fiction (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => great technology book (seg0 slice1 127.0.1.1:9202 pid=1527273) did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+----------------------- 6 | 11 | 1 | regress_rls_carol | great science fiction @@ -1283,8 +1298,8 @@ NOTICE: f_leak => great technology book (3 rows) EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); - QUERY PLAN ---------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Append -> Seq Scan on part_document_fiction part_document_1 @@ -1391,12 +1406,16 @@ CREATE POLICY pp3 ON part_document AS RESTRICTIVE USING ((SELECT dlevel <= seclv FROM uaccount WHERE pguser = current_user)); SET SESSION AUTHORIZATION regress_rls_carol; INSERT INTO part_document VALUES (100, 11, 5, 'regress_rls_carol', 'testing pp3'); -- fail -ERROR: new row violates row-level security policy "pp3" for table "part_document" +ERROR: new row violates row-level security policy "pp3" for table "part_document" (seg2 127.0.1.1:9204 pid=1527275) ----- Dependencies ----- SET SESSION AUTHORIZATION regress_rls_alice; SET row_security TO ON; CREATE TABLE dependee (x integer, y integer); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE dependent (x integer, y integer); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY d1 ON dependent FOR ALL TO PUBLIC USING (x = (SELECT d.x FROM dependee d WHERE d.y = y)); @@ -1420,6 +1439,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM dependent; -- After drop, should be unqualifie -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE rec1 (x integer, y integer); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY r1 ON rec1 USING (x = (SELECT r.x FROM rec1 r WHERE y = r.y)); ALTER TABLE rec1 ENABLE ROW LEVEL SECURITY; SET SESSION AUTHORIZATION regress_rls_bob; @@ -1430,6 +1451,8 @@ ERROR: infinite recursion detected in policy for relation "rec1" -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE rec2 (a integer, b integer); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ALTER POLICY r1 ON rec1 USING (x = (SELECT a FROM rec2 WHERE b = y)); CREATE POLICY r2 ON rec2 USING (a = (SELECT x FROM rec1 WHERE y = b)); ALTER TABLE rec2 ENABLE ROW LEVEL SECURITY; @@ -1469,8 +1492,12 @@ ERROR: infinite recursion detected in policy for relation "rec1" -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE s1 (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO s1 (SELECT x, md5(x::text) FROM generate_series(-10,10) x); CREATE TABLE s2 (x int, y text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO s2 (SELECT x, md5(x::text) FROM generate_series(-6,6) x); GRANT SELECT ON s1, s2 TO regress_rls_bob; CREATE POLICY p1 ON s1 USING (a in (select x from s2 where y like '%2f%')); @@ -1489,8 +1516,8 @@ DROP POLICY p3 on s1; ALTER POLICY p2 ON s2 USING (x % 2 = 0); SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM s1 WHERE f_leak(b); -- OK -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c +NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+---------------------------------- 2 | c81e728d9d4c2f636f067f89cc14862c @@ -1514,8 +1541,8 @@ SET SESSION AUTHORIZATION regress_rls_alice; ALTER POLICY p1 ON s1 USING (a in (select x from v2)); -- using VIEW in RLS policy SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM s1 WHERE f_leak(b); -- OK -NOTICE: f_leak => 0267aaf632e87a63288a08331f22c7c3 -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc +NOTICE: f_leak => 0267aaf632e87a63288a08331f22c7c3 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc (seg2 slice1 127.0.1.1:9204 pid=1527275) a | b ----+---------------------------------- -4 | 0267aaf632e87a63288a08331f22c7c3 @@ -1539,8 +1566,8 @@ SELECT (SELECT x FROM s1 LIMIT 1) xx, * FROM s2 WHERE y like '%28%'; xx | x | y ----+----+---------------------------------- -6 | -6 | 596a3d04481816330f07e4f97510c28f - -4 | -4 | 0267aaf632e87a63288a08331f22c7c3 2 | 2 | c81e728d9d4c2f636f067f89cc14862c + -4 | -4 | 0267aaf632e87a63288a08331f22c7c3 (3 rows) EXPLAIN (COSTS OFF) SELECT (SELECT x FROM s1 LIMIT 1) xx, * FROM s2 WHERE y like '%28%'; @@ -1597,30 +1624,30 @@ EXPLAIN (COSTS OFF) EXECUTE p1(2); RESET SESSION AUTHORIZATION; SET row_security TO OFF; SELECT * FROM t1 WHERE f_leak(b); -NOTICE: f_leak => aba -NOTICE: f_leak => bbb -NOTICE: f_leak => ccc -NOTICE: f_leak => dad -NOTICE: f_leak => abc -NOTICE: f_leak => bcd -NOTICE: f_leak => cde -NOTICE: f_leak => def -NOTICE: f_leak => xxx -NOTICE: f_leak => yyy -NOTICE: f_leak => zzz +NOTICE: f_leak => aba (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => bcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => def (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => yyy (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => zzz (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => cde (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => xxx (seg0 slice1 127.0.1.1:9202 pid=1527273) id | a | b -----+---+----- + 204 | 4 | def + 201 | 1 | abc + 202 | 2 | bcd + 302 | 2 | yyy + 303 | 3 | zzz 101 | 1 | aba 102 | 2 | bbb 103 | 3 | ccc 104 | 4 | dad - 201 | 1 | abc - 202 | 2 | bcd 203 | 3 | cde - 204 | 4 | def 301 | 1 | xxx - 302 | 2 | yyy - 303 | 3 | zzz (11 rows) EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE f_leak(b); @@ -1643,9 +1670,9 @@ EXECUTE p1(2); -----+---+----- 101 | 1 | aba 102 | 2 | bbb + 301 | 1 | xxx 201 | 1 | abc 202 | 2 | bcd - 301 | 1 | xxx 302 | 2 | yyy (6 rows) @@ -1730,29 +1757,31 @@ EXPLAIN (COSTS OFF) UPDATE t1 SET b = b || b WHERE f_leak(b); Filter: (((a % 2) = 0) AND f_leak(b)) -> Seq Scan on t3 t1_3 Filter: (((a % 2) = 0) AND f_leak(b)) -(12 rows) + Optimizer: Postgres query optimizer +(13 rows) UPDATE t1 SET b = b || b WHERE f_leak(b); -NOTICE: f_leak => bbb -NOTICE: f_leak => dad -NOTICE: f_leak => bcd -NOTICE: f_leak => def -NOTICE: f_leak => yyy +NOTICE: f_leak => bbb (seg0 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcd (seg1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => def (seg2 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => dad (seg0 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => yyy (seg1 127.0.1.1:9203 pid=1527274) EXPLAIN (COSTS OFF) UPDATE only t1 SET b = b || '_updt' WHERE f_leak(b); QUERY PLAN ----------------------------------------------- Update on t1 -> Seq Scan on t1 Filter: (((a % 2) = 0) AND f_leak(b)) -(3 rows) + Optimizer: Postgres query optimizer +(4 rows) UPDATE only t1 SET b = b || '_updt' WHERE f_leak(b); -NOTICE: f_leak => bbbbbb -NOTICE: f_leak => daddad +NOTICE: f_leak => bbbbbb (seg0 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => daddad (seg0 127.0.1.1:9202 pid=1527273) -- returning clause with system column UPDATE only t1 SET b = b WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; -NOTICE: f_leak => bbbbbb_updt -NOTICE: f_leak => daddad_updt +NOTICE: f_leak => bbbbbb_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => daddad_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) tableoid | id | a | b | t1 ----------+-----+---+-------------+--------------------- t1 | 102 | 2 | bbbbbb_updt | (102,2,bbbbbb_updt) @@ -1760,32 +1789,32 @@ NOTICE: f_leak => daddad_updt (2 rows) UPDATE t1 SET b = b WHERE f_leak(b) RETURNING *; -NOTICE: f_leak => bbbbbb_updt -NOTICE: f_leak => daddad_updt -NOTICE: f_leak => bcdbcd -NOTICE: f_leak => defdef -NOTICE: f_leak => yyyyyy +NOTICE: f_leak => bbbbbb_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => defdef (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => daddad_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcdbcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => yyyyyy (seg1 slice1 127.0.1.1:9203 pid=1527274) id | a | b -----+---+------------- + 204 | 4 | defdef 102 | 2 | bbbbbb_updt 104 | 4 | daddad_updt 202 | 2 | bcdbcd - 204 | 4 | defdef 302 | 2 | yyyyyy (5 rows) UPDATE t1 SET b = b WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; -NOTICE: f_leak => bbbbbb_updt -NOTICE: f_leak => daddad_updt -NOTICE: f_leak => bcdbcd -NOTICE: f_leak => defdef -NOTICE: f_leak => yyyyyy +NOTICE: f_leak => bbbbbb_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcdbcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => defdef (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => daddad_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => yyyyyy (seg1 slice1 127.0.1.1:9203 pid=1527274) tableoid | id | a | b | t1 ----------+-----+---+-------------+--------------------- + t2 | 204 | 4 | defdef | (204,4,defdef) t1 | 102 | 2 | bbbbbb_updt | (102,2,bbbbbb_updt) t1 | 104 | 4 | daddad_updt | (104,4,daddad_updt) t2 | 202 | 2 | bcdbcd | (202,2,bcdbcd) - t2 | 204 | 4 | defdef | (204,4,defdef) t3 | 302 | 2 | yyyyyy | (302,2,yyyyyy) (5 rows) @@ -1807,12 +1836,12 @@ WHERE t2.a = 3 and t3.a = 2 AND f_leak(t2.b) AND f_leak(t3.b); UPDATE t2 SET b=t2.b FROM t3 WHERE t2.a = 3 and t3.a = 2 AND f_leak(t2.b) AND f_leak(t3.b); -NOTICE: f_leak => cde -NOTICE: f_leak => yyyyyy +NOTICE: f_leak => cde (seg0 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => yyyyyy (seg1 slice1 127.0.1.1:9203 pid=1527342) EXPLAIN (COSTS OFF) UPDATE t1 SET b=t1.b FROM t2 WHERE t1.a = 3 and t2.a = 3 AND f_leak(t1.b) AND f_leak(t2.b); - QUERY PLAN ------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------- Update on t1 Update on t1 t1_1 Update on t2 t1_2 @@ -1833,11 +1862,11 @@ WHERE t1.a = 3 and t2.a = 3 AND f_leak(t1.b) AND f_leak(t2.b); UPDATE t1 SET b=t1.b FROM t2 WHERE t1.a = 3 and t2.a = 3 AND f_leak(t1.b) AND f_leak(t2.b); -NOTICE: f_leak => cde +NOTICE: f_leak => cde (seg0 slice1 127.0.1.1:9202 pid=1527343) EXPLAIN (COSTS OFF) UPDATE t2 SET b=t2.b FROM t1 WHERE t1.a = 3 and t2.a = 3 AND f_leak(t1.b) AND f_leak(t2.b); - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------- Update on t2 -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) -> Nested Loop @@ -1856,7 +1885,7 @@ WHERE t1.a = 3 and t2.a = 3 AND f_leak(t1.b) AND f_leak(t2.b); UPDATE t2 SET b=t2.b FROM t1 WHERE t1.a = 3 and t2.a = 3 AND f_leak(t1.b) AND f_leak(t2.b); -NOTICE: f_leak => cde +NOTICE: f_leak => cde (seg0 slice2 127.0.1.1:9202 pid=1527348) -- updates with from clause self join EXPLAIN (COSTS OFF) UPDATE t2 t2_1 SET b = t2_2.b FROM t2 t2_2 WHERE t2_1.a = 3 AND t2_2.a = t2_1.a AND t2_2.b = t2_1.b @@ -1880,8 +1909,8 @@ AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *, t2_1, t2_2; UPDATE t2 t2_1 SET b = t2_2.b FROM t2 t2_2 WHERE t2_1.a = 3 AND t2_2.a = t2_1.a AND t2_2.b = t2_1.b AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *, t2_1, t2_2; -NOTICE: f_leak => cde -NOTICE: f_leak => cde +NOTICE: f_leak => cde (seg0 slice3 127.0.1.1:9202 pid=1527348) +NOTICE: f_leak => cde (seg0 slice2 127.0.1.1:9202 pid=1527343) id | a | b | c | id | a | b | c | t2_1 | t2_2 -----+---+-----+-----+-----+---+-----+-----+-----------------+----------------- 203 | 3 | cde | 3.3 | 203 | 3 | cde | 3.3 | (203,3,cde,3.3) | (203,3,cde,3.3) @@ -1922,10 +1951,10 @@ AND f_leak(t1_1.b) AND f_leak(t1_2.b) RETURNING *, t1_1, t1_2; UPDATE t1 t1_1 SET b = t1_2.b FROM t1 t1_2 WHERE t1_1.a = 4 AND t1_2.a = t1_1.a AND t1_2.b = t1_1.b AND f_leak(t1_1.b) AND f_leak(t1_2.b) RETURNING *, t1_1, t1_2; -NOTICE: f_leak => daddad_updt -NOTICE: f_leak => daddad_updt -NOTICE: f_leak => defdef -NOTICE: f_leak => defdef +NOTICE: f_leak => daddad_updt (seg0 slice3 127.0.1.1:9202 pid=1527348) +NOTICE: f_leak => daddad_updt (seg0 slice2 127.0.1.1:9202 pid=1527343) +NOTICE: f_leak => defdef (seg2 slice3 127.0.1.1:9204 pid=1527350) +NOTICE: f_leak => defdef (seg2 slice2 127.0.1.1:9204 pid=1527344) id | a | b | id | a | b | t1_1 | t1_2 -----+---+-------------+-----+---+-------------+---------------------+--------------------- 104 | 4 | daddad_updt | 104 | 4 | daddad_updt | (104,4,daddad_updt) | (104,4,daddad_updt) @@ -1975,11 +2004,12 @@ EXPLAIN (COSTS OFF) DELETE FROM t1 WHERE f_leak(b); Filter: (((a % 2) = 0) AND f_leak(b)) -> Seq Scan on t3 t1_3 Filter: (((a % 2) = 0) AND f_leak(b)) -(11 rows) + Optimizer: Postgres query optimizer +(12 rows) DELETE FROM only t1 WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; -NOTICE: f_leak => bbbbbb_updt -NOTICE: f_leak => daddad_updt +NOTICE: f_leak => bbbbbb_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => daddad_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) tableoid | id | a | b | t1 ----------+-----+---+-------------+--------------------- t1 | 102 | 2 | bbbbbb_updt | (102,2,bbbbbb_updt) @@ -1987,13 +2017,13 @@ NOTICE: f_leak => daddad_updt (2 rows) DELETE FROM t1 WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; -NOTICE: f_leak => bcdbcd -NOTICE: f_leak => defdef -NOTICE: f_leak => yyyyyy +NOTICE: f_leak => defdef (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bcdbcd (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => yyyyyy (seg1 slice1 127.0.1.1:9203 pid=1527274) tableoid | id | a | b | t1 ----------+-----+---+--------+---------------- - t2 | 202 | 2 | bcdbcd | (202,2,bcdbcd) t2 | 204 | 4 | defdef | (204,4,defdef) + t2 | 202 | 2 | bcdbcd | (202,2,bcdbcd) t3 | 302 | 2 | yyyyyy | (302,2,yyyyyy) (3 rows) @@ -2002,6 +2032,8 @@ NOTICE: f_leak => yyyyyy -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE b1 (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO b1 (SELECT x, md5(x::text) FROM generate_series(-10,10) x); CREATE POLICY p1 ON b1 USING (a % 2 = 0); ALTER TABLE b1 ENABLE ROW LEVEL SECURITY; @@ -2022,24 +2054,24 @@ EXPLAIN (COSTS OFF) SELECT * FROM bv1 WHERE f_leak(b); (6 rows) SELECT * FROM bv1 WHERE f_leak(b); -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 +NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ----+---------------------------------- + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 10 | d3d9446802a44259755d38e6d163e820 2 | c81e728d9d4c2f636f067f89cc14862c 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 (5 rows) INSERT INTO bv1 VALUES (-1, 'xxx'); -- should fail view WCO -ERROR: new row violates row-level security policy for table "b1" +ERROR: new row violates row-level security policy for table "b1" (seg2 127.0.1.1:9204 pid=1527275) INSERT INTO bv1 VALUES (11, 'xxx'); -- should fail RLS check -ERROR: new row violates row-level security policy for table "b1" +ERROR: new row violates row-level security policy for table "b1" (seg2 127.0.1.1:9204 pid=1527275) INSERT INTO bv1 VALUES (12, 'xxx'); -- ok EXPLAIN (COSTS OFF) UPDATE bv1 SET b = 'yyy' WHERE a = 4 AND f_leak(b); QUERY PLAN @@ -2047,44 +2079,46 @@ EXPLAIN (COSTS OFF) UPDATE bv1 SET b = 'yyy' WHERE a = 4 AND f_leak(b); Update on b1 -> Seq Scan on b1 Filter: ((a > 0) AND (a = 4) AND ((a % 2) = 0) AND f_leak(b)) -(3 rows) + Optimizer: Postgres query optimizer +(4 rows) UPDATE bv1 SET b = 'yyy' WHERE a = 4 AND f_leak(b); -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c +NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c (seg0 127.0.1.1:9202 pid=1527273) EXPLAIN (COSTS OFF) DELETE FROM bv1 WHERE a = 6 AND f_leak(b); QUERY PLAN ----------------------------------------------------------------------- Delete on b1 -> Seq Scan on b1 Filter: ((a > 0) AND (a = 6) AND ((a % 2) = 0) AND f_leak(b)) -(3 rows) + Optimizer: Postgres query optimizer +(4 rows) DELETE FROM bv1 WHERE a = 6 AND f_leak(b); -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc +NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc (seg2 127.0.1.1:9204 pid=1527275) SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM b1; a | b -----+---------------------------------- - -10 | 1b0fd9efa5279c4203b7c70233f86dbf - -9 | 252e691406782824eec43d7eadc3d256 -8 | a8d2ec85eaf98407310b72eb73dda247 - -7 | 74687a12d3915d3c4d83f1af7b3683d5 - -6 | 596a3d04481816330f07e4f97510c28f + -1 | 6bb61e3b7bce0931da574d19d1d82c88 + 5 | e4da3b7fbbce2345d7772b0674a318d5 + 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 10 | d3d9446802a44259755d38e6d163e820 + -9 | 252e691406782824eec43d7eadc3d256 -5 | 47c1b025fa18ea96c33fbb6718688c0f -4 | 0267aaf632e87a63288a08331f22c7c3 -3 | b3149ecea4628efd23d2f86e5a723472 -2 | 5d7b9adcbe1c629ec722529dd12e5129 - -1 | 6bb61e3b7bce0931da574d19d1d82c88 0 | cfcd208495d565ef66e7dff9f98764da 1 | c4ca4238a0b923820dcc509a6f75849b + 12 | xxx + -10 | 1b0fd9efa5279c4203b7c70233f86dbf + -7 | 74687a12d3915d3c4d83f1af7b3683d5 + -6 | 596a3d04481816330f07e4f97510c28f 2 | c81e728d9d4c2f636f067f89cc14862c 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 5 | e4da3b7fbbce2345d7772b0674a318d5 7 | 8f14e45fceea167a5a36dedd4bea2543 8 | c9f0f895fb98ab9159f51fd0297e236d - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 10 | d3d9446802a44259755d38e6d163e820 - 12 | xxx 4 | yyy (21 rows) @@ -2111,7 +2145,7 @@ SELECT * FROM document WHERE did = 2; -- alternative UPDATE path happens to be taken): INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_carol', 'my first novel') ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, dauthor = EXCLUDED.dauthor; -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg0 127.0.1.1:9202 pid=1527273) -- Violates USING qual for UPDATE policy p3. -- -- UPDATE path is taken, but UPDATE fails purely because *existing* row to be @@ -2120,7 +2154,7 @@ ERROR: new row violates row-level security policy for table "document" INSERT INTO document VALUES (33, 22, 1, 'regress_rls_bob', 'okay science fiction'); -- preparation for next statement INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'Some novel, replaces sci-fi') -- takes UPDATE path ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle; -ERROR: new row violates row-level security policy (USING expression) for table "document" +ERROR: new row violates row-level security policy (USING expression) for table "document" (seg2 127.0.1.1:9204 pid=1527275) -- Fine (we UPDATE, since INSERT WCOs and UPDATE security barrier quals + WCOs -- not violated): INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') @@ -2151,7 +2185,7 @@ INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel' -- passing quals: INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; -ERROR: new row violates row-level security policy (USING expression) for table "document" +ERROR: new row violates row-level security policy (USING expression) for table "document" (seg1 slice1 127.0.1.1:9203 pid=1527274) -- Don't fail just because INSERT doesn't satisfy WITH CHECK option that -- originated as a barrier/USING() qual from the UPDATE. Note that the UPDATE -- path *isn't* taken, and so UPDATE-related policy does not apply: @@ -2168,7 +2202,7 @@ INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'techno -- irrelevant, in fact. INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; -ERROR: new row violates row-level security policy (USING expression) for table "document" +ERROR: new row violates row-level security policy (USING expression) for table "document" (seg1 slice1 127.0.1.1:9203 pid=1527274) -- Test default USING qual enforced as WCO SET SESSION AUTHORIZATION regress_rls_alice; DROP POLICY p1 ON document; @@ -2190,14 +2224,14 @@ SET SESSION AUTHORIZATION regress_rls_bob; -- UPDATE to make this fail: INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg1 slice1 127.0.1.1:9203 pid=1527274) -- UPDATE path is taken here. Existing tuple passes, since its cid -- corresponds to "novel", but default USING qual is enforced against -- post-UPDATE tuple too (as always when updating with a policy that lacks an -- explicit WCO), and so this fails: INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'my first novel') ON CONFLICT (did) DO UPDATE SET cid = EXCLUDED.cid, dtitle = EXCLUDED.dtitle RETURNING *; -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg0 slice1 127.0.1.1:9202 pid=1527273) SET SESSION AUTHORIZATION regress_rls_alice; DROP POLICY p3_with_default ON document; -- @@ -2211,22 +2245,26 @@ SET SESSION AUTHORIZATION regress_rls_bob; -- Fails, since ALL WCO is enforced in insert path: INSERT INTO document VALUES (80, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_carol', 'my first novel') ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33; -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg0 127.0.1.1:9202 pid=1527273) -- Fails, since ALL policy USING qual is enforced (existing, target tuple is in -- violation, since it has the "manga" cid): INSERT INTO document VALUES (4, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle; -ERROR: new row violates row-level security policy (USING expression) for table "document" +ERROR: new row violates row-level security policy (USING expression) for table "document" (seg0 127.0.1.1:9202 pid=1527273) -- Fails, since ALL WCO are enforced: INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') ON CONFLICT (did) DO UPDATE SET dauthor = 'regress_rls_carol'; -ERROR: new row violates row-level security policy for table "document" +ERROR: new row violates row-level security policy for table "document" (seg1 127.0.1.1:9203 pid=1527274) -- -- ROLE/GROUP -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE z1 (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE z2 (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT SELECT ON z1,z2 TO regress_rls_group1, regress_rls_group2, regress_rls_bob, regress_rls_carol; INSERT INTO z1 VALUES @@ -2241,8 +2279,8 @@ analyze z1; analyze z2; SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM z1 WHERE f_leak(b); -NOTICE: f_leak => bbb -NOTICE: f_leak => dad +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+----- 2 | bbb @@ -2300,8 +2338,8 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test3; SET ROLE regress_rls_group1; SELECT * FROM z1 WHERE f_leak(b); -NOTICE: f_leak => bbb -NOTICE: f_leak => dad +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+----- 2 | bbb @@ -2356,12 +2394,12 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test3; SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM z1 WHERE f_leak(b); -NOTICE: f_leak => aba -NOTICE: f_leak => ccc +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => aba (seg1 slice1 127.0.1.1:9203 pid=1527274) a | b ---+----- - 1 | aba 3 | ccc + 1 | aba (2 rows) EXPLAIN (COSTS OFF) SELECT * FROM z1 WHERE f_leak(b); @@ -2412,8 +2450,8 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test3; SET ROLE regress_rls_group2; SELECT * FROM z1 WHERE f_leak(b); -NOTICE: f_leak => aba -NOTICE: f_leak => ccc +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => aba (seg1 slice1 127.0.1.1:9203 pid=1527274) a | b ---+----- 1 | aba @@ -2476,10 +2514,10 @@ GRANT SELECT ON rls_view TO regress_rls_bob; -- Query as role that is not owner of view or table. Should return all records. SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM rls_view; -NOTICE: f_leak => aba -NOTICE: f_leak => bbb -NOTICE: f_leak => ccc -NOTICE: f_leak => dad +NOTICE: f_leak => aba (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+----- 1 | aba @@ -2500,16 +2538,16 @@ EXPLAIN (COSTS OFF) SELECT * FROM rls_view; -- Query as view/table owner. Should return all records. SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM rls_view; -NOTICE: f_leak => aba -NOTICE: f_leak => bbb -NOTICE: f_leak => ccc -NOTICE: f_leak => dad +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => ccc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => aba (seg1 slice1 127.0.1.1:9203 pid=1527274) a | b ---+----- - 1 | aba 2 | bbb 3 | ccc 4 | dad + 1 | aba (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM rls_view; @@ -2530,8 +2568,8 @@ GRANT SELECT ON rls_view TO regress_rls_alice; -- Should return records based on view owner policies. SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM rls_view; -NOTICE: f_leak => bbb -NOTICE: f_leak => dad +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+----- 2 | bbb @@ -2551,8 +2589,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM rls_view; -- Should return records based on view owner policies. SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM rls_view; -NOTICE: f_leak => bbb -NOTICE: f_leak => dad +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+----- 2 | bbb @@ -2578,8 +2616,8 @@ ERROR: permission denied for view rls_view SET SESSION AUTHORIZATION regress_rls_bob; GRANT SELECT ON rls_view TO regress_rls_carol; SELECT * FROM rls_view; -NOTICE: f_leak => bbb -NOTICE: f_leak => dad +NOTICE: f_leak => bbb (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => dad (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ---+----- 2 | bbb @@ -2602,6 +2640,8 @@ DROP VIEW rls_view; -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE x1 (a int, b text, c text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT ALL ON x1 TO PUBLIC; INSERT INTO x1 VALUES (1, 'abc', 'regress_rls_bob'), @@ -2620,12 +2660,12 @@ CREATE POLICY p4 ON x1 FOR DELETE USING (a < 8); ALTER TABLE x1 ENABLE ROW LEVEL SECURITY; SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM x1 WHERE f_leak(b) ORDER BY a ASC; -NOTICE: f_leak => abc -NOTICE: f_leak => bcd -NOTICE: f_leak => def -NOTICE: f_leak => efg -NOTICE: f_leak => fgh -NOTICE: f_leak => fgh +NOTICE: f_leak => bcd (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => def (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => efg (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => fgh (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh (seg2 slice1 127.0.1.1:9204 pid=1527275) a | b | c ---+-----+------------------- 1 | abc | regress_rls_bob @@ -2637,30 +2677,30 @@ NOTICE: f_leak => fgh (6 rows) UPDATE x1 SET b = b || '_updt' WHERE f_leak(b) RETURNING *; -NOTICE: f_leak => abc -NOTICE: f_leak => bcd -NOTICE: f_leak => def -NOTICE: f_leak => efg -NOTICE: f_leak => fgh -NOTICE: f_leak => fgh +NOTICE: f_leak => bcd (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => efg (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => def (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => fgh (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh (seg2 slice1 127.0.1.1:9204 pid=1527275) a | b | c ---+----------+------------------- 1 | abc_updt | regress_rls_bob 2 | bcd_updt | regress_rls_bob 4 | def_updt | regress_rls_carol + 8 | fgh_updt | regress_rls_carol 5 | efg_updt | regress_rls_bob 6 | fgh_updt | regress_rls_bob - 8 | fgh_updt | regress_rls_carol (6 rows) SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM x1 WHERE f_leak(b) ORDER BY a ASC; -NOTICE: f_leak => cde -NOTICE: f_leak => fgh -NOTICE: f_leak => bcd_updt -NOTICE: f_leak => def_updt -NOTICE: f_leak => fgh_updt -NOTICE: f_leak => fgh_updt +NOTICE: f_leak => cde (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bcd_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => def_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b | c ---+----------+------------------- 2 | bcd_updt | regress_rls_bob @@ -2672,36 +2712,36 @@ NOTICE: f_leak => fgh_updt (6 rows) UPDATE x1 SET b = b || '_updt' WHERE f_leak(b) RETURNING *; -NOTICE: f_leak => cde -NOTICE: f_leak => fgh -NOTICE: f_leak => bcd_updt -NOTICE: f_leak => def_updt -NOTICE: f_leak => fgh_updt -NOTICE: f_leak => fgh_updt +NOTICE: f_leak => cde (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => fgh (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => bcd_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => def_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b | c ---+---------------+------------------- + 6 | fgh_updt_updt | regress_rls_bob 3 | cde_updt | regress_rls_carol 7 | fgh_updt | regress_rls_carol 2 | bcd_updt_updt | regress_rls_bob 4 | def_updt_updt | regress_rls_carol - 6 | fgh_updt_updt | regress_rls_bob 8 | fgh_updt_updt | regress_rls_carol (6 rows) DELETE FROM x1 WHERE f_leak(b) RETURNING *; -NOTICE: f_leak => cde_updt -NOTICE: f_leak => fgh_updt -NOTICE: f_leak => bcd_updt_updt -NOTICE: f_leak => def_updt_updt -NOTICE: f_leak => fgh_updt_updt -NOTICE: f_leak => fgh_updt_updt +NOTICE: f_leak => cde_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt_updt (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => bcd_updt_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => def_updt_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => fgh_updt_updt (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b | c ---+---------------+------------------- + 6 | fgh_updt_updt | regress_rls_bob 3 | cde_updt | regress_rls_carol 7 | fgh_updt | regress_rls_carol 2 | bcd_updt_updt | regress_rls_bob 4 | def_updt_updt | regress_rls_carol - 6 | fgh_updt_updt | regress_rls_bob 8 | fgh_updt_updt | regress_rls_carol (6 rows) @@ -2710,7 +2750,11 @@ NOTICE: f_leak => fgh_updt_updt -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE y1 (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE y2 (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT ALL ON y1, y2 TO regress_rls_bob; CREATE POLICY p1 ON y1 FOR ALL USING (a % 2 = 0); CREATE POLICY p2 ON y1 FOR SELECT USING (a > 2); @@ -2760,36 +2804,36 @@ CREATE POLICY p2 ON y2 USING (a % 3 = 0); CREATE POLICY p3 ON y2 USING (a % 4 = 0); SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM y2 WHERE f_leak(b); -NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => eccbc87e4b5ce2fe28308fd9f2a7baf3 -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => 45c48cce2e2d7fbdea1afc51c7c6ad26 -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 -NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 -NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 -NOTICE: f_leak => 9bf31c7ff062936a96d3c8bd1f8f2ff3 -NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf -NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 -NOTICE: f_leak => 98f13708210194c475687be6106a3b84 +NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => 45c48cce2e2d7fbdea1afc51c7c6ad26 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => eccbc87e4b5ce2fe28308fd9f2a7baf3 (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => 9bf31c7ff062936a96d3c8bd1f8f2ff3 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 98f13708210194c475687be6106a3b84 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ----+---------------------------------- 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 4 | a87ff679a2f3e71d9181a67b7542122c + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 20 | 98f13708210194c475687be6106a3b84 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 14 | aab3238922bcc25a6f606eb525ffdc56 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 2 | c81e728d9d4c2f636f067f89cc14862c + 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 + 4 | a87ff679a2f3e71d9181a67b7542122c + 8 | c9f0f895fb98ab9159f51fd0297e236d 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 (14 rows) EXPLAIN (COSTS OFF) SELECT * FROM y2 WHERE f_leak(b); @@ -2805,43 +2849,43 @@ EXPLAIN (COSTS OFF) SELECT * FROM y2 WHERE f_leak(b); -- Qual push-down of leaky functions, when not referring to table -- SELECT * FROM y2 WHERE f_leak('abc'); -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc -NOTICE: f_leak => abc +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => abc (seg2 slice1 127.0.1.1:9204 pid=1527275) a | b ----+---------------------------------- 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 20 | 98f13708210194c475687be6106a3b84 2 | c81e728d9d4c2f636f067f89cc14862c 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc 8 | c9f0f895fb98ab9159f51fd0297e236d + 16 | c74d97b01eae257e44aa9d5bade97baf + 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 6 | 1679091c5a880faf6fb5e6087eb1b2dc 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 14 | aab3238922bcc25a6f606eb525ffdc56 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 (14 rows) EXPLAIN (COSTS OFF) SELECT * FROM y2 WHERE f_leak('abc'); @@ -2856,11 +2900,13 @@ EXPLAIN (COSTS OFF) SELECT * FROM y2 WHERE f_leak('abc'); CREATE TABLE test_qual_pushdown ( abc text ); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'abc' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO test_qual_pushdown VALUES ('abc'),('def'); ANALYZE test_qual_pushdown; SELECT * FROM y2 JOIN test_qual_pushdown ON (b = abc) WHERE f_leak(abc); -NOTICE: f_leak => abc -NOTICE: f_leak => def +NOTICE: f_leak => abc (seg0 slice2 127.0.1.1:9202 pid=1527372) +NOTICE: f_leak => def (seg2 slice2 127.0.1.1:9204 pid=1527373) a | b | abc ---+---+----- (0 rows) @@ -2881,20 +2927,20 @@ EXPLAIN (COSTS OFF) SELECT * FROM y2 JOIN test_qual_pushdown ON (b = abc) WHERE (10 rows) SELECT * FROM y2 JOIN test_qual_pushdown ON (b = abc) WHERE f_leak(b); -NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => eccbc87e4b5ce2fe28308fd9f2a7baf3 -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => 45c48cce2e2d7fbdea1afc51c7c6ad26 -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 -NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 -NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 -NOTICE: f_leak => 9bf31c7ff062936a96d3c8bd1f8f2ff3 -NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf -NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 -NOTICE: f_leak => 98f13708210194c475687be6106a3b84 +NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 9bf31c7ff062936a96d3c8bd1f8f2ff3 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 98f13708210194c475687be6106a3b84 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => 45c48cce2e2d7fbdea1afc51c7c6ad26 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => eccbc87e4b5ce2fe28308fd9f2a7baf3 (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b | abc ---+---+----- (0 rows) @@ -2923,6 +2969,8 @@ NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table t2 drop cascades to table t3 CREATE TABLE t1 (a integer); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT SELECT ON t1 TO regress_rls_bob, regress_rls_carol; CREATE POLICY p1 ON t1 TO regress_rls_bob USING ((a % 2) = 0); CREATE POLICY p2 ON t1 TO regress_rls_carol USING ((a % 4) = 0); @@ -2970,35 +3018,37 @@ EXPLAIN (COSTS OFF) EXECUTE role_inval; RESET SESSION AUTHORIZATION; DROP TABLE t1 CASCADE; CREATE TABLE t1 (a integer, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p1 ON t1 USING (a % 2 = 0); ALTER TABLE t1 ENABLE ROW LEVEL SECURITY; GRANT ALL ON t1 TO regress_rls_bob; INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series(0,20) x); SET SESSION AUTHORIZATION regress_rls_bob; WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; -NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 -NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 -NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 -NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf -NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 -NOTICE: f_leak => 98f13708210194c475687be6106a3b84 +NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 98f13708210194c475687be6106a3b84 (seg1 slice1 127.0.1.1:9203 pid=1527274) +NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 (seg2 slice1 127.0.1.1:9204 pid=1527275) +NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf (seg0 slice1 127.0.1.1:9202 pid=1527273) +NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 (seg0 slice1 127.0.1.1:9202 pid=1527273) a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 14 | aab3238922bcc25a6f606eb525ffdc56 + 2 | c81e728d9d4c2f636f067f89cc14862c + 4 | a87ff679a2f3e71d9181a67b7542122c + 8 | c9f0f895fb98ab9159f51fd0297e236d 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 20 | 98f13708210194c475687be6106a3b84 (11 rows) @@ -3014,25 +3064,25 @@ WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; (5 rows) WITH cte1 AS (UPDATE t1 SET a = a + 1 RETURNING *) SELECT * FROM cte1; --fail -ERROR: new row violates row-level security policy for table "t1" +ERROR: new row violates row-level security policy for table "t1" (seg1 slice1 127.0.1.1:9203 pid=1527274) WITH cte1 AS (UPDATE t1 SET a = a RETURNING *) SELECT * FROM cte1; --ok a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da 2 | c81e728d9d4c2f636f067f89cc14862c 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 20 | 98f13708210194c475687be6106a3b84 + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 10 | d3d9446802a44259755d38e6d163e820 + 14 | aab3238922bcc25a6f606eb525ffdc56 (11 rows) WITH cte1 AS (INSERT INTO t1 VALUES (21, 'Fail') RETURNING *) SELECT * FROM cte1; --fail -ERROR: new row violates row-level security policy for table "t1" +ERROR: new row violates row-level security policy for table "t1" (seg2 slice1 127.0.1.1:9204 pid=1527275) WITH cte1 AS (INSERT INTO t1 VALUES (20, 'Success') RETURNING *) SELECT * FROM cte1; --ok a | b ----+--------- @@ -3069,6 +3119,8 @@ SELECT polname, relname -- SET SESSION AUTHORIZATION regress_rls_bob; CREATE TABLE t2 (a integer, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO t2 (SELECT * FROM t1); EXPLAIN (COSTS OFF) INSERT INTO t2 (SELECT * FROM t1); QUERY PLAN @@ -3082,18 +3134,18 @@ EXPLAIN (COSTS OFF) INSERT INTO t2 (SELECT * FROM t1); SELECT * FROM t2; a | b ----+---------------------------------- + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 10 | d3d9446802a44259755d38e6d163e820 + 14 | aab3238922bcc25a6f606eb525ffdc56 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 20 | 98f13708210194c475687be6106a3b84 + 20 | Success 2 | c81e728d9d4c2f636f067f89cc14862c 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 - 20 | Success (12 rows) EXPLAIN (COSTS OFF) SELECT * FROM t2; @@ -3105,37 +3157,41 @@ EXPLAIN (COSTS OFF) SELECT * FROM t2; (3 rows) CREATE TABLE t3 AS SELECT * FROM t1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. SELECT * FROM t3; a | b ----+---------------------------------- 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c + 12 | c20ad4d76fe97759aa27a0c99bff6710 + 20 | 98f13708210194c475687be6106a3b84 + 20 | Success 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 14 | aab3238922bcc25a6f606eb525ffdc56 + 2 | c81e728d9d4c2f636f067f89cc14862c + 4 | a87ff679a2f3e71d9181a67b7542122c + 8 | c9f0f895fb98ab9159f51fd0297e236d 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 - 20 | Success (12 rows) SELECT * INTO t4 FROM t1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. SELECT * FROM t4; a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da 2 | c81e728d9d4c2f636f067f89cc14862c 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 10 | d3d9446802a44259755d38e6d163e820 + 14 | aab3238922bcc25a6f606eb525ffdc56 + 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 20 | 98f13708210194c475687be6106a3b84 20 | Success (12 rows) @@ -3145,7 +3201,11 @@ SELECT * FROM t4; -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE blog (id integer, author text, post text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE comment (blog_id integer, message text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'blog_id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT ALL ON blog, comment TO regress_rls_bob; CREATE POLICY blog_1 ON blog USING (id % 2 = 0); ALTER TABLE blog ENABLE ROW LEVEL SECURITY; @@ -3209,26 +3269,26 @@ RESET SESSION AUTHORIZATION; SELECT * FROM t1; a | b ----+---------------------------------- - 1 | c4ca4238a0b923820dcc509a6f75849b 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 5 | e4da3b7fbbce2345d7772b0674a318d5 7 | 8f14e45fceea167a5a36dedd4bea2543 + 19 | 1f0e3dad99908345f7439f8ffabdffc4 + 2 | c81e728d9d4c2f636f067f89cc14862c + 4 | a87ff679a2f3e71d9181a67b7542122c + 8 | c9f0f895fb98ab9159f51fd0297e236d + 16 | c74d97b01eae257e44aa9d5bade97baf + 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 5 | e4da3b7fbbce2345d7772b0674a318d5 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 11 | 6512bd43d9caa6e02c990b0a82652dca 13 | c51ce410c124a10e0db5e4b97fc2af39 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 17 | 70efdf2ec9b086079795c442636b55fb - 19 | 1f0e3dad99908345f7439f8ffabdffc4 - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 1 | c4ca4238a0b923820dcc509a6f75849b + 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 20 | 98f13708210194c475687be6106a3b84 20 | Success (22 rows) @@ -3246,28 +3306,28 @@ SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM t1; a | b ----+---------------------------------- - 1 | c4ca4238a0b923820dcc509a6f75849b 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 5 | e4da3b7fbbce2345d7772b0674a318d5 7 | 8f14e45fceea167a5a36dedd4bea2543 - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 11 | 6512bd43d9caa6e02c990b0a82652dca - 13 | c51ce410c124a10e0db5e4b97fc2af39 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 - 17 | 70efdf2ec9b086079795c442636b55fb 19 | 1f0e3dad99908345f7439f8ffabdffc4 - 0 | cfcd208495d565ef66e7dff9f98764da 2 | c81e728d9d4c2f636f067f89cc14862c 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 16 | c74d97b01eae257e44aa9d5bade97baf 18 | 6f4922f45568161a8cdf4ad2299f6d23 + 1 | c4ca4238a0b923820dcc509a6f75849b + 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 + 0 | cfcd208495d565ef66e7dff9f98764da + 12 | c20ad4d76fe97759aa27a0c99bff6710 20 | 98f13708210194c475687be6106a3b84 20 | Success + 5 | e4da3b7fbbce2345d7772b0674a318d5 + 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 + 11 | 6512bd43d9caa6e02c990b0a82652dca + 13 | c51ce410c124a10e0db5e4b97fc2af39 + 17 | 70efdf2ec9b086079795c442636b55fb + 6 | 1679091c5a880faf6fb5e6087eb1b2dc + 10 | d3d9446802a44259755d38e6d163e820 + 14 | aab3238922bcc25a6f606eb525ffdc56 (22 rows) EXPLAIN (COSTS OFF) SELECT * FROM t1; @@ -3315,6 +3375,8 @@ RESET SESSION AUTHORIZATION; DROP TABLE copy_t CASCADE; ERROR: table "copy_t" does not exist CREATE TABLE copy_t (a integer, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p1 ON copy_t USING (a % 2 = 0); ALTER TABLE copy_t ENABLE ROW LEVEL SECURITY; GRANT ALL ON copy_t TO regress_rls_bob, regress_rls_exempt_user; @@ -3400,6 +3462,8 @@ ERROR: permission denied for table copy_t RESET SESSION AUTHORIZATION; SET row_security TO ON; CREATE TABLE copy_rel_to (a integer, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p1 ON copy_rel_to USING (a % 2 = 0); ALTER TABLE copy_rel_to ENABLE ROW LEVEL SECURITY; GRANT ALL ON copy_rel_to TO regress_rls_bob, regress_rls_exempt_user; @@ -3468,6 +3532,8 @@ DROP TABLE copy_rel_to CASCADE; -- Check WHERE CURRENT OF SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE current_check (currentid int, payload text, rlsuser text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'currentid' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT ALL ON current_check TO PUBLIC; INSERT INTO current_check VALUES (1, 'abc', 'regress_rls_bob'), @@ -3537,8 +3603,15 @@ EXPLAIN (COSTS OFF) UPDATE current_check SET payload = payload WHERE CURRENT OF -> Tid Scan on current_check TID Cond: CURRENT OF current_check_cursor Filter: ((currentid = 4) AND ((currentid % 2) = 0)) -(4 rows) + Optimizer: Postgres query optimizer +(5 rows) +-- start_ignore +-- GPDB: does not support backwards scans, commit and restart +COMMIT; +BEGIN; +DECLARE current_check_cursor SCROLL CURSOR FOR SELECT * FROM current_check; +-- end_ignore -- Similarly can only delete row 4 FETCH ABSOLUTE 1 FROM current_check_cursor; currentid | payload | rlsuser @@ -3613,6 +3686,8 @@ SELECT attname, most_common_vals FROM pg_stats -- BEGIN; CREATE TABLE coll_t (c) AS VALUES ('bar'::text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'column1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY coll_p ON coll_t USING (c < ('foo'::text COLLATE "C")); ALTER TABLE coll_t ENABLE ROW LEVEL SECURITY; GRANT SELECT ON coll_t TO regress_rls_alice; @@ -3636,8 +3711,12 @@ ROLLBACK; RESET SESSION AUTHORIZATION; BEGIN; CREATE ROLE regress_rls_eve; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE ROLE regress_rls_frank; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE TABLE tbl1 (c) AS VALUES ('bar'::text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'column1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. GRANT SELECT ON TABLE tbl1 TO regress_rls_eve; CREATE POLICY P ON tbl1 TO regress_rls_eve, regress_rls_frank USING (true); SELECT refclassid::regclass, deptype @@ -3690,6 +3769,8 @@ ROLLBACK; -- cleanup -- BEGIN; CREATE TABLE t (c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p ON t USING (c % 2 = 1); ALTER TABLE t ENABLE ROW LEVEL SECURITY; SAVEPOINT q; @@ -3712,6 +3793,8 @@ ROLLBACK; -- BEGIN; CREATE TABLE t (c) AS VALUES ('bar'::text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'column1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p ON t USING (max(c)); -- fails: aggregate functions are not allowed in policy expressions ERROR: aggregate functions are not allowed in policy expressions ROLLBACK; @@ -3720,7 +3803,11 @@ ROLLBACK; -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE r1 (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE r2 (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO r1 VALUES (10), (20); INSERT INTO r2 VALUES (10), (20); GRANT ALL ON r1, r2 TO regress_rls_bob; @@ -3735,8 +3822,8 @@ SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM r1; a ---- - 10 20 + 10 (2 rows) SELECT * FROM r2; @@ -3748,7 +3835,7 @@ SELECT * FROM r2; -- r2 is read-only INSERT INTO r2 VALUES (2); -- Not allowed -ERROR: new row violates row-level security policy for table "r2" +ERROR: new row violates row-level security policy for table "r2" (seg0 127.0.1.1:9202 pid=1527273) UPDATE r2 SET a = 2 RETURNING *; -- Updates nothing a --- @@ -3763,15 +3850,15 @@ DELETE FROM r2 RETURNING *; -- Deletes nothing INSERT INTO r1 SELECT a + 1 FROM r2 RETURNING *; -- OK a ---- - 11 21 + 11 (2 rows) UPDATE r1 SET a = r2.a + 2 FROM r2 WHERE r1.a = r2.a RETURNING *; -- OK a | a ----+---- - 12 | 10 22 | 20 + 12 | 10 (2 rows) DELETE FROM r1 USING r2 WHERE r1.a = r2.a + 2 RETURNING *; -- OK @@ -3784,15 +3871,15 @@ DELETE FROM r1 USING r2 WHERE r1.a = r2.a + 2 RETURNING *; -- OK SELECT * FROM r1; a ---- - 11 21 + 11 (2 rows) SELECT * FROM r2; a ---- - 10 20 + 10 (2 rows) SET SESSION AUTHORIZATION regress_rls_alice; @@ -3804,6 +3891,8 @@ DROP TABLE r2; SET SESSION AUTHORIZATION regress_rls_alice; SET row_security = on; CREATE TABLE r1 (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO r1 VALUES (10), (20); CREATE POLICY p1 ON r1 USING (false); ALTER TABLE r1 ENABLE ROW LEVEL SECURITY; @@ -3816,7 +3905,7 @@ TABLE r1; -- RLS error INSERT INTO r1 VALUES (1); -ERROR: new row violates row-level security policy for table "r1" +ERROR: new row violates row-level security policy for table "r1" (seg1 127.0.1.1:9203 pid=1527274) -- No error (unable to see any rows to update) UPDATE r1 SET a = 1; TABLE r1; @@ -3850,6 +3939,9 @@ SET SESSION AUTHORIZATION regress_rls_alice; SET row_security = on; CREATE TABLE r1 (a int PRIMARY KEY); CREATE TABLE r2 (a int REFERENCES r1); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +WARNING: referential integrity (FOREIGN KEY) constraints are not supported in Apache Cloudberry, will not be enforced INSERT INTO r1 VALUES (10), (20); INSERT INTO r2 VALUES (10), (20); -- Create policies on r2 which prevent the @@ -3884,6 +3976,9 @@ DROP TABLE r1; -- Ensure cascaded DELETE works CREATE TABLE r1 (a int PRIMARY KEY); CREATE TABLE r2 (a int REFERENCES r1 ON DELETE CASCADE); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +WARNING: referential integrity (FOREIGN KEY) constraints are not supported in Apache Cloudberry, will not be enforced INSERT INTO r1 VALUES (10), (20); INSERT INTO r2 VALUES (10), (20); -- Create policies on r2 which prevent the @@ -3902,8 +3997,8 @@ ALTER TABLE r2 NO FORCE ROW LEVEL SECURITY; TABLE r2; a ---- - 20 10 + 20 (2 rows) DROP TABLE r2; @@ -3911,6 +4006,9 @@ DROP TABLE r1; -- Ensure cascaded UPDATE works CREATE TABLE r1 (a int PRIMARY KEY); CREATE TABLE r2 (a int REFERENCES r1 ON UPDATE CASCADE); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +WARNING: referential integrity (FOREIGN KEY) constraints are not supported in Apache Cloudberry, will not be enforced INSERT INTO r1 VALUES (10), (20); INSERT INTO r2 VALUES (10), (20); -- Create policies on r2 which prevent the @@ -3942,6 +4040,8 @@ DROP TABLE r1; SET SESSION AUTHORIZATION regress_rls_alice; SET row_security = on; CREATE TABLE r1 (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p1 ON r1 FOR SELECT USING (false); CREATE POLICY p2 ON r1 FOR INSERT WITH CHECK (true); ALTER TABLE r1 ENABLE ROW LEVEL SECURITY; @@ -3962,7 +4062,7 @@ HINT: To disable the policy for the table's owner, use ALTER TABLE NO FORCE ROW SET row_security = on; -- Error INSERT INTO r1 VALUES (10), (20) RETURNING *; -ERROR: new row violates row-level security policy for table "r1" +ERROR: new row violates row-level security policy for table "r1" (seg1 slice1 127.0.1.1:9203 pid=1527274) DROP TABLE r1; -- -- Test UPDATE+RETURNING applies SELECT policies as @@ -3999,24 +4099,28 @@ TABLE r1; ALTER TABLE r1 FORCE ROW LEVEL SECURITY; -- Error UPDATE r1 SET a = 30 RETURNING *; -ERROR: new row violates row-level security policy for table "r1" +ERROR: new row violates row-level security policy for table "r1" (seg1 slice1 127.0.1.1:9203 pid=1527274) -- UPDATE path of INSERT ... ON CONFLICT DO UPDATE should also error out INSERT INTO r1 VALUES (10) ON CONFLICT (a) DO UPDATE SET a = 30 RETURNING *; -ERROR: new row violates row-level security policy for table "r1" +ERROR: new row violates row-level security policy for table "r1" (seg2 slice1 127.0.1.1:9204 pid=1527275) -- Should still error out without RETURNING (use of arbiter always requires -- SELECT permissions) INSERT INTO r1 VALUES (10) ON CONFLICT (a) DO UPDATE SET a = 30; -ERROR: new row violates row-level security policy for table "r1" +ERROR: new row violates row-level security policy for table "r1" (seg0 127.0.1.1:9202 pid=1527273) INSERT INTO r1 VALUES (10) ON CONFLICT ON CONSTRAINT r1_pkey DO UPDATE SET a = 30; -ERROR: new row violates row-level security policy for table "r1" +ERROR: new row violates row-level security policy for table "r1" (seg0 127.0.1.1:9202 pid=1527273) DROP TABLE r1; -- Check dependency handling RESET SESSION AUTHORIZATION; CREATE TABLE dep1 (c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE dep2 (c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY dep_p1 ON dep1 TO regress_rls_bob USING (c1 > (select max(dep2.c1) from dep2)); ALTER POLICY dep_p1 ON dep1 TO regress_rls_bob,regress_rls_carol; -- Should return one @@ -4059,9 +4163,15 @@ SELECT count(*) = 0 FROM pg_depend -- DROP OWNED BY testing RESET SESSION AUTHORIZATION; CREATE ROLE regress_rls_dob_role1; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE ROLE regress_rls_dob_role2; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE TABLE dob_t1 (c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p1 ON dob_t1 TO regress_rls_dob_role1 USING (true); DROP OWNED BY regress_rls_dob_role1; DROP POLICY p1 ON dob_t1; -- should fail, already gone @@ -4085,8 +4195,12 @@ DROP USER regress_rls_dob_role1; DROP USER regress_rls_dob_role2; -- Bug #15708: view + table with RLS should check policies as view owner CREATE TABLE ref_tbl (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO ref_tbl VALUES (1); CREATE TABLE rls_tbl (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO rls_tbl VALUES (10); ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY; CREATE POLICY p1 ON rls_tbl USING (EXISTS (SELECT 1 FROM ref_tbl)); @@ -4112,6 +4226,8 @@ DROP TABLE rls_tbl; DROP TABLE ref_tbl; -- Leaky operator test CREATE TABLE rls_tbl (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO rls_tbl SELECT x/10 FROM generate_series(1, 100) x; ANALYZE rls_tbl; ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY; @@ -4134,6 +4250,8 @@ DROP TABLE rls_tbl; -- Bug #16006: whole-row Vars in a policy don't play nice with sub-selects SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE rls_tbl (a int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE POLICY p1 ON rls_tbl USING (rls_tbl >= ROW(1,1,1)); ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY; ALTER TABLE rls_tbl FORCE ROW LEVEL SECURITY; @@ -4155,8 +4273,8 @@ INSERT INTO rls_tbl -> Seq Scan on regress_rls_schema.rls_tbl rls_tbl_1 Output: rls_tbl_1.b, rls_tbl_1.c, rls_tbl_1.a Filter: (rls_tbl_1.* >= ROW(1, 1, 1)) + Settings: gp_enable_relsize_collection = 'on', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: gp_enable_relsize_collection=on (14 rows) INSERT INTO rls_tbl @@ -4164,14 +4282,16 @@ INSERT INTO rls_tbl SELECT * FROM rls_tbl; a | b | c ----+----+---- - 10 | 20 | 30 20 | 30 | + 10 | 20 | 30 (2 rows) DROP TABLE rls_tbl; RESET SESSION AUTHORIZATION; -- CVE-2023-2455: inlining an SRF may introduce an RLS dependency create table rls_t (c text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into rls_t values ('invisible to bob'); alter table rls_t enable row level security; grant select on rls_t to regress_rls_alice, regress_rls_bob; @@ -4243,12 +4363,16 @@ DROP ROLE regress_rls_group2; -- pg_dump/pg_restore CREATE SCHEMA regress_rls_schema; CREATE TABLE rls_tbl (c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY; CREATE POLICY p1 ON rls_tbl USING (c1 > 5); CREATE POLICY p2 ON rls_tbl FOR SELECT USING (c1 <= 3); CREATE POLICY p3 ON rls_tbl FOR UPDATE USING (c1 <= 3) WITH CHECK (c1 > 5); CREATE POLICY p4 ON rls_tbl FOR DELETE USING (c1 <= 3); CREATE TABLE rls_tbl_force (c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ALTER TABLE rls_tbl_force ENABLE ROW LEVEL SECURITY; ALTER TABLE rls_tbl_force FORCE ROW LEVEL SECURITY; CREATE POLICY p1 ON rls_tbl_force USING (c1 = 5) WITH CHECK (c1 < 5); diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index ce0f24e9f8f..b11ad34707b 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -130,28 +130,31 @@ SELECT DISTINCT p.age FROM person* p ORDER BY age using >; EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*) FROM (SELECT DISTINCT two, four, two FROM tenk1) ss; - QUERY PLAN --------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------- Finalize Aggregate Output: count(*) -> Gather Motion 3:1 (slice1; segments: 3) Output: (PARTIAL count(*)) -> Partial Aggregate Output: PARTIAL count(*) - -> HashAggregate + -> GroupAggregate Output: tenk1.two, tenk1.four, tenk1.two Group Key: tenk1.two, tenk1.four, tenk1.two - -> Redistribute Motion 3:3 (slice2; segments: 3) + -> Sort Output: tenk1.two, tenk1.four, tenk1.two - Hash Key: tenk1.two, tenk1.four, tenk1.two - -> Streaming HashAggregate + Sort Key: tenk1.two, tenk1.four + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: tenk1.two, tenk1.four, tenk1.two - Group Key: tenk1.two, tenk1.four, tenk1.two - -> Seq Scan on public.tenk1 + Hash Key: tenk1.two, tenk1.four, tenk1.two + -> Streaming HashAggregate Output: tenk1.two, tenk1.four, tenk1.two + Group Key: tenk1.two, tenk1.four, tenk1.two + -> Seq Scan on public.tenk1 + Output: tenk1.two, tenk1.four, tenk1.two + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: optimizer=off -(19 rows) +(22 rows) SELECT count(*) FROM (SELECT DISTINCT two, four, two FROM tenk1) ss; diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 4de01f4f632..042574710c7 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -147,16 +147,16 @@ explain (costs off) QUERY PLAN ------------------------------------------------------------------------------- Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) + -> Gather Motion 12:1 (slice1; segments: 12) -> Partial Aggregate - -> Append - -> Seq Scan on part_pa_test_p1 pa2_1 - -> Seq Scan on part_pa_test_p2 pa2_2 + -> Parallel Append + -> Parallel Seq Scan on part_pa_test_p1 pa2_1 + -> Parallel Seq Scan on part_pa_test_p2 pa2_2 SubPlan 1 -> Result Filter: (pa1.a = pa2.a) -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Broadcast Motion 3:12 (slice2; segments: 3) -> Append -> Seq Scan on part_pa_test_p1 pa1_1 -> Seq Scan on part_pa_test_p2 pa1_2 @@ -240,9 +240,11 @@ explain (costs off) -> Redistribute Motion 12:12 (slice2; segments: 12) Hash Key: (length((stringu1)::text)) Hash Module: 3 - -> Parallel Seq Scan on tenk1 + -> Streaming Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 Optimizer: Postgres query optimizer -(8 rows) +(10 rows) select length(stringu1) from tenk1 group by length(stringu1); length @@ -258,14 +260,16 @@ explain (costs off) Merge Key: stringu1 -> Sort Sort Key: stringu1 - -> HashAggregate + -> Finalize HashAggregate Group Key: stringu1 -> Redistribute Motion 12:12 (slice2; segments: 12) Hash Key: stringu1 Hash Module: 3 - -> Parallel Seq Scan on tenk1 + -> Streaming Partial HashAggregate + Group Key: stringu1 + -> Parallel Seq Scan on tenk1 Optimizer: Postgres query optimizer -(11 rows) +(13 rows) -- test that parallel plan for aggregates is not selected when -- target list contains parallel restricted clause. @@ -308,19 +312,20 @@ alter table tenk2 set (parallel_workers = 0); explain (costs off) select count(*) from tenk1 where (two, four) not in (select hundred, thousand from tenk2 where thousand > 100); - QUERY PLAN --------------------------------------------------------------------------------------------- - Aggregate + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Finalize Aggregate -> Gather Motion 12:1 (slice1; segments: 12) - -> Nested Loop Left Anti Semi (Not-In) Join - Join Filter: ((tenk1.two = tenk2.hundred) AND (tenk1.four = tenk2.thousand)) - -> Parallel Seq Scan on tenk1 - -> Materialize - -> Broadcast Motion 3:12 (slice2; segments: 3) - -> Seq Scan on tenk2 - Filter: (thousand > 100) + -> Partial Aggregate + -> Nested Loop Left Anti Semi (Not-In) Join + Join Filter: ((tenk1.two = tenk2.hundred) AND (tenk1.four = tenk2.thousand)) + -> Parallel Seq Scan on tenk1 + -> Materialize + -> Broadcast Motion 3:12 (slice2; segments: 3) + -> Seq Scan on tenk2 + Filter: (thousand > 100) Optimizer: Postgres query optimizer -(10 rows) +(11 rows) select count(*) from tenk1 where (two, four) not in (select hundred, thousand from tenk2 where thousand > 100); @@ -356,17 +361,18 @@ explain (costs off) where tenk1.unique1 = (Select max(tenk2.unique1) from tenk2); QUERY PLAN -------------------------------------------------------- - Aggregate + Finalize Aggregate InitPlan 1 (returns $1) (slice2) -> Finalize Aggregate -> Gather Motion 6:1 (slice3; segments: 6) -> Partial Aggregate -> Parallel Seq Scan on tenk2 -> Gather Motion 12:1 (slice1; segments: 12) - -> Parallel Seq Scan on tenk1 - Filter: (unique1 = $1) + -> Partial Aggregate + -> Parallel Seq Scan on tenk1 + Filter: (unique1 = $1) Optimizer: Postgres query optimizer -(10 rows) +(11 rows) select count(*) from tenk1 where tenk1.unique1 = (Select max(tenk2.unique1) from tenk2); @@ -1195,9 +1201,9 @@ ORDER BY 1, 2, 3; EXPLAIN (VERBOSE, COSTS OFF) SELECT generate_series(1, two), array(select generate_series(1, two)) FROM tenk1 ORDER BY tenthous; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 6:1 (slice1; segments: 6) Output: (generate_series(1, tenk1.two)), ((SubPlan 1)), tenk1.tenthous Merge Key: tenk1.tenthous -> ProjectSet @@ -1207,13 +1213,13 @@ SELECT generate_series(1, two), array(select generate_series(1, two)) -> Sort Output: tenk1.tenthous, tenk1.two Sort Key: tenk1.tenthous - -> Seq Scan on public.tenk1 + -> Parallel Seq Scan on public.tenk1 Output: tenk1.tenthous, tenk1.two SubPlan 1 -> ProjectSet Output: generate_series(1, tenk1.two) -> Result - Settings: enable_parallel = 'on', min_parallel_table_scan_size = '0', optimizer = 'off', parallel_setup_cost = '0', parallel_tuple_cost = '0' + Settings: cbdb_dedup_semi_damping_factor = '1', cbdb_eager_subplan = 'off', cbdb_enable_dynamic_shared_scan = 'off', cbdb_enable_multi_window_agg = 'off', cbdb_enable_setop_pre_dedup = 'off', cbdb_inner_join_selectivity_damping_factor = '1', cbdb_streaming_damping_factor = '1', enable_parallel = 'on', gp_cte_sharing = 'off', min_parallel_table_scan_size = '0', optimizer = 'off', parallel_setup_cost = '0', parallel_tuple_cost = '0' Optimizer: Postgres query optimizer (18 rows) @@ -1253,16 +1259,16 @@ SELECT 1 FROM tenk1_vw_sec WHERE (SELECT sum(f1) FROM int4_tbl WHERE f1 < unique1) < 100; QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + Gather Motion 6:1 (slice1; segments: 6) -> Subquery Scan on tenk1_vw_sec Filter: ((SubPlan 1) < 100) - -> Seq Scan on tenk1 + -> Parallel Seq Scan on tenk1 SubPlan 1 -> Aggregate -> Result Filter: (int4_tbl.f1 < tenk1_vw_sec.unique1) -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Broadcast Motion 3:6 (slice2; segments: 3) -> Seq Scan on int4_tbl Optimizer: Postgres query optimizer (12 rows) diff --git a/src/test/regress/expected/shared_scan.out b/src/test/regress/expected/shared_scan.out index 83a3815fb0e..f9322db4d8e 100644 --- a/src/test/regress/expected/shared_scan.out +++ b/src/test/regress/expected/shared_scan.out @@ -91,8 +91,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur -- ORCA plan contains a Shared Scan producer with a unsorted Motion below it EXPLAIN (COSTS OFF) WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 LIMIT 10) SELECT a, 1, 1 FROM cte JOIN t2 USING (a); - QUERY PLAN ------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (t2.a = cte.a) @@ -101,13 +101,15 @@ WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 LIMIT 10) SELECT a, 1, 1 FROM -> Redistribute Motion 1:3 (slice2; segments: 1) Hash Key: cte.a -> Subquery Scan on cte - -> Limit - -> Gather Motion 3:1 (slice3; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Result -> Limit - -> Seq Scan on t1 - Filter: (random() < '0.1'::double precision) + -> Gather Motion 3:1 (slice3; segments: 3) + -> Limit + -> Seq Scan on t1 + Filter: (random() < '0.1'::double precision) Optimizer: Postgres query optimizer -(14 rows) +(16 rows) -- This functions returns one more column than expected. CREATE OR REPLACE FUNCTION col_mismatch_func1() RETURNS TABLE (field1 int, field2 int) @@ -129,8 +131,8 @@ PL/pgSQL function col_mismatch_func1() line 6 at RETURN QUERY -- ORCA plan contains a Shared Scan producer with a sorted Motion below it EXPLAIN (COSTS OFF) WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 ORDER BY b LIMIT 10) SELECT a, 1, 1 FROM cte JOIN t2 USING (a); - QUERY PLAN ------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (t2.a = cte.a) @@ -139,16 +141,18 @@ WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 ORDER BY b LIMIT 10) SELECT a -> Redistribute Motion 1:3 (slice2; segments: 1) Hash Key: cte.a -> Subquery Scan on cte - -> Limit - -> Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t1.b + -> Shared Scan (share slice:id 2:0) + -> Result -> Limit - -> Sort - Sort Key: t1.b - -> Seq Scan on t1 - Filter: (random() < '0.1'::double precision) + -> Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t1.b + -> Limit + -> Sort + Sort Key: t1.b + -> Seq Scan on t1 + Filter: (random() < '0.1'::double precision) Optimizer: Postgres query optimizer -(17 rows) +(19 rows) --- This functions returns one more column than expected. CREATE OR REPLACE FUNCTION col_mismatch_func2() RETURNS TABLE (field1 int, field2 int) diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 8c3f7dfe580..e05e8c257d7 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1684,7 +1684,7 @@ with x as materialized (select * from (select f1 from subselect_tbl) ss) select * from x where f1 = 1; QUERY PLAN ---------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + Gather Motion 1:1 (slice1; segments: 1) Output: x.f1 -> Subquery Scan on x Output: x.f1 @@ -1693,9 +1693,10 @@ select * from x where f1 = 1; Output: share0_ref1.f1 -> Seq Scan on public.subselect_tbl Output: subselect_tbl.f1 + Filter: (subselect_tbl.f1 = 1) + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: gp_cte_sharing=on -(11 rows) +(12 rows) -- Stable functions are safe to inline explain (verbose, costs off) @@ -1720,7 +1721,7 @@ with x as (select * from (select f1, random() from subselect_tbl) ss) select * from x where f1 = 1; QUERY PLAN ---------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + Gather Motion 1:1 (slice1; segments: 1) Output: x.f1, x.random -> Subquery Scan on x Output: x.f1, x.random @@ -1729,8 +1730,10 @@ select * from x where f1 = 1; Output: share0_ref1.f1, share0_ref1.random -> Seq Scan on public.subselect_tbl Output: subselect_tbl.f1, random() + Filter: (subselect_tbl.f1 = 1) + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(10 rows) +(12 rows) create temporary sequence ts; create table vol_test(a int, b int); @@ -1741,7 +1744,7 @@ with x as (select * from (select a, nextval('ts') from vol_test) ss) select * from x where a = 1; QUERY PLAN ----------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + Gather Motion 1:1 (slice1; segments: 1) Output: x.a, x.nextval -> Subquery Scan on x Output: x.a, x.nextval @@ -1750,8 +1753,10 @@ select * from x where a = 1; Output: share0_ref1.a, share0_ref1.nextval -> Seq Scan on public.vol_test Output: vol_test.a, nextval('ts'::regclass) + Filter: (vol_test.a = 1) + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(10 rows) +(12 rows) drop sequence ts; drop table vol_test; @@ -1801,7 +1806,7 @@ select * from x, x x2 where x.n = x2.n; Output: share0_ref1.f1, share0_ref1.n -> Seq Scan on public.subselect_tbl Output: subselect_tbl.f1, 'regression'::name - Settings: gp_cte_sharing=on + Settings: gp_cte_sharing = 'on', optimizer = 'off' Optimizer: Postgres query optimizer (21 rows) @@ -1944,16 +1949,16 @@ select * from (with y as (select * from x) select * from y) ss; explain (verbose, costs off) with x as materialized (select * from int4_tbl) select * from (with y as (select * from x) select * from y) ss; - QUERY PLAN --------------------------------------------- + QUERY PLAN +---------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: share0_ref1.f1 -> Shared Scan (share slice:id 1:0) Output: share0_ref1.f1 -> Seq Scan on public.int4_tbl Output: int4_tbl.f1 + Settings: gp_cte_sharing = 'on', optimizer = 'off' Optimizer: Postgres query optimizer - Settings: gp_cte_sharing=on, optimizer=off (8 rows) -- Ensure that we inline the currect CTE when there are diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index b62bf04d803..8af8f9e6fea 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -1,3 +1,7 @@ +-- start_ignore +create schema subselect_gp; +set search_path to subselect_gp, public; +-- end_ignore set optimizer_enable_master_only_queries = on; set optimizer_segments = 3; set optimizer_nestloop_factor = 1.0; @@ -196,14 +200,14 @@ analyze mrs_t1; explain select * from mrs_t1 where exists (select x from mrs_t1 where x < -1); QUERY PLAN ------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=3.27..6.87 rows=20 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.10..2.44 rows=20 width=4) InitPlan 1 (returns $0) (slice2) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..3.27 rows=1 width=0) - -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..3.25 rows=1 width=0) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..1.10 rows=1 width=4) + -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..1.08 rows=1 width=4) Filter: (x < '-1'::integer) - -> Result (cost=3.27..6.47 rows=7 width=4) + -> Result (cost=0.00..1.07 rows=7 width=4) One-Time Filter: $0 - -> Seq Scan on mrs_t1 (cost=3.27..6.47 rows=7 width=4) + -> Seq Scan on mrs_t1 (cost=0.00..1.07 rows=7 width=4) Optimizer: Postgres query optimizer (9 rows) @@ -215,14 +219,14 @@ select * from mrs_t1 where exists (select x from mrs_t1 where x < -1) order by 1 explain select * from mrs_t1 where exists (select x from mrs_t1 where x = 1); QUERY PLAN ------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=3.27..6.87 rows=20 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.10..2.44 rows=20 width=4) InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice3; segments: 1) (cost=0.00..3.27 rows=1 width=0) - -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..3.25 rows=1 width=0) + -> Gather Motion 1:1 (slice3; segments: 1) (cost=0.00..1.10 rows=1 width=4) + -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..1.08 rows=1 width=4) Filter: (x = 1) - -> Result (cost=3.27..6.47 rows=7 width=4) + -> Result (cost=0.00..1.07 rows=7 width=4) One-Time Filter: $0 - -> Seq Scan on mrs_t1 (cost=3.27..6.47 rows=7 width=4) + -> Seq Scan on mrs_t1 (cost=0.00..1.07 rows=7 width=4) Optimizer: Postgres query optimizer (9 rows) @@ -254,12 +258,12 @@ select * from mrs_t1 where exists (select x from mrs_t1 where x = 1) order by 1; explain select * from mrs_t1 where x in (select x-95 from mrs_t1) or x < 5; QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=3.30..6.60 rows=13 width=4) - -> Seq Scan on mrs_t1 (cost=3.30..6.60 rows=5 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.24 rows=12 width=4) + -> Seq Scan on mrs_t1 (cost=0.00..1.08 rows=4 width=4) Filter: ((hashed SubPlan 1) OR (x < 5)) SubPlan 1 - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..3.25 rows=7 width=4) - -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..3.25 rows=7 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.08 rows=7 width=4) + -> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..1.08 rows=7 width=4) Optimizer: Postgres query optimizer (7 rows) @@ -363,12 +367,11 @@ analyze csq_d1; explain select array(select x from csq_m1); -- no initplan QUERY PLAN -------------------------------------------------------------- - Result (cost=1.01..1.02 rows=1 width=0) + Result (cost=1.01..1.02 rows=1 width=32) InitPlan 1 (returns $0) -> Seq Scan on csq_m1 (cost=0.00..1.01 rows=1 width=4) - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(5 rows) + Optimizer: Postgres query optimizer +(4 rows) select array(select x from csq_m1); -- {1} array @@ -379,13 +382,12 @@ select array(select x from csq_m1); -- {1} explain select array(select x from csq_d1); -- initplan QUERY PLAN ------------------------------------------------------------------------------------ - Result (cost=1.01..1.02 rows=1 width=0) + Result (cost=1.03..1.04 rows=1 width=32) InitPlan 1 (returns $0) (slice1) - -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.01 rows=1 width=4) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) -> Seq Scan on csq_d1 (cost=0.00..1.01 rows=1 width=4) - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(6 rows) + Optimizer: Postgres query optimizer +(5 rows) select array(select x from csq_d1); -- {1} array @@ -456,9 +458,9 @@ select * from csq_m1; select * from csq_d1; x --- - 1 2 4 + 1 (3 rows) -- @@ -467,11 +469,11 @@ select * from csq_d1; explain select * from csq_m1 where x not in (select x from csq_d1) or x < -100; -- gather motion QUERY PLAN ------------------------------------------------------------------------------------ - Seq Scan on csq_m1 (cost=2.02..3.07 rows=2 width=4) + Seq Scan on csq_m1 (cost=0.00..1.04 rows=2 width=4) Filter: ((NOT (hashed SubPlan 1)) OR (x < '-100'::integer)) SubPlan 1 - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.02 rows=2 width=4) - -> Seq Scan on csq_d1 (cost=0.00..2.02 rows=1 width=4) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.05 rows=3 width=4) + -> Seq Scan on csq_d1 (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (6 rows) @@ -487,11 +489,11 @@ select * from csq_m1 where x not in (select x from csq_d1) or x < -100; -- (3) explain select * from csq_d1 where x not in (select x from csq_m1) or x < -100; -- broadcast motion QUERY PLAN -------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=1.04..3.07 rows=2 width=4) - -> Seq Scan on csq_d1 (cost=1.04..3.07 rows=1 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.04 rows=2 width=4) + -> Seq Scan on csq_d1 (cost=0.00..1.01 rows=1 width=4) Filter: ((NOT (hashed SubPlan 1)) OR (x < '-100'::integer)) SubPlan 1 - -> Broadcast Motion 1:3 (slice1) (cost=0.00..1.03 rows=3 width=4) + -> Broadcast Motion 1:3 (slice2) (cost=0.00..1.03 rows=3 width=4) -> Seq Scan on csq_m1 (cost=0.00..1.03 rows=3 width=4) Optimizer: Postgres query optimizer (7 rows) @@ -535,10 +537,10 @@ INSERT INTO csq_r VALUES (1); -- with a correlated argument -- force_explain explain SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a)); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=4) @@ -553,10 +555,10 @@ SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a)); -- force_explain explain SELECT * FROM csq_r WHERE a not IN (SELECT * FROM csq_f(csq_r.a)); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=4) @@ -570,10 +572,10 @@ SELECT * FROM csq_r WHERE a not IN (SELECT * FROM csq_f(csq_r.a)); -- force_explain explain SELECT * FROM csq_r WHERE exists (SELECT * FROM csq_f(csq_r.a)); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=0) @@ -588,10 +590,10 @@ SELECT * FROM csq_r WHERE exists (SELECT * FROM csq_f(csq_r.a)); -- force_explain explain SELECT * FROM csq_r WHERE not exists (SELECT * FROM csq_f(csq_r.a)); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (NOT (SubPlan 1)) SubPlan 1 -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=0) @@ -605,10 +607,10 @@ SELECT * FROM csq_r WHERE not exists (SELECT * FROM csq_f(csq_r.a)); -- force_explain explain SELECT * FROM csq_r WHERE a > (SELECT csq_f FROM csq_f(csq_r.a) limit 1); - QUERY PLAN ------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..863.25 rows=32100 width=4) + -> Seq Scan on csq_r (cost=0.00..435.25 rows=10700 width=4) Filter: (a > (SubPlan 1)) SubPlan 1 -> Limit (cost=0.00..0.01 rows=1 width=4) @@ -623,10 +625,10 @@ SELECT * FROM csq_r WHERE a > (SELECT csq_f FROM csq_f(csq_r.a) limit 1); -- force_explain explain SELECT * FROM csq_r WHERE a < ANY (SELECT csq_f FROM csq_f(csq_r.a)); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=4) @@ -640,10 +642,10 @@ SELECT * FROM csq_r WHERE a < ANY (SELECT csq_f FROM csq_f(csq_r.a)); -- force_explain explain SELECT * FROM csq_r WHERE a <= ALL (SELECT csq_f FROM csq_f(csq_r.a)); - QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..1.02 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=4) @@ -658,17 +660,17 @@ SELECT * FROM csq_r WHERE a <= ALL (SELECT csq_f FROM csq_f(csq_r.a)); -- force_explain explain SELECT * FROM csq_r WHERE a IN (SELECT csq_f FROM csq_f(csq_r.a),csq_r); - QUERY PLAN ---------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..10000000001.54 rows=1 width=4) - -> Seq Scan on csq_r (cost=0.00..10000000001.54 rows=1 width=4) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on csq_r (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 - -> Nested Loop (cost=10000000000.00..10000000001.06 rows=4 width=4) + -> Nested Loop (cost=10000000000.00..10000003083.51 rows=96300 width=4) -> Function Scan on csq_f (cost=0.00..0.01 rows=1 width=4) - -> Materialize (cost=0.00..1.03 rows=1 width=0) - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=0) - -> Seq Scan on csq_r csq_r_1 (cost=0.00..1.01 rows=1 width=0) + -> Materialize (cost=0.00..2120.50 rows=96300 width=0) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=0) + -> Seq Scan on csq_r csq_r_1 (cost=0.00..355.00 rows=32100 width=0) Optimizer: Postgres query optimizer (10 rows) @@ -700,19 +702,18 @@ insert into csq_pullup values ('def',3, 1, 'abc'); explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t); QUERY PLAN ------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.11 rows=4 width=19) - -> Hash Join (cost=1.06..2.11 rows=2 width=19) - Hash Cond: t0.t = "Expr_SUBQUERY".csq_c0 - -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=19) - -> Hash (cost=1.04..1.04 rows=1 width=32) - -> Subquery Scan on "Expr_SUBQUERY" (cost=1.02..1.04 rows=1 width=32) - -> HashAggregate (cost=1.02..1.03 rows=1 width=40) - Filter: count(*) = 1 - Group By: t1.t + Gather Motion 3:1 (slice1; segments: 3) (cost=1.05..2.12 rows=3 width=17) + -> Hash Join (cost=1.05..2.07 rows=1 width=17) + Hash Cond: (t0.t = "Expr_SUBQUERY".csq_c0) + -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) + -> Hash (cost=1.04..1.04 rows=1 width=4) + -> Subquery Scan on "Expr_SUBQUERY" (cost=1.01..1.04 rows=1 width=4) + -> HashAggregate (cost=1.01..1.03 rows=1 width=12) + Group Key: t1.t + Filter: (1 = count(*)) -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=4) - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(12 rows) + Optimizer: Postgres query optimizer +(11 rows) select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t); t | n | i | v @@ -728,16 +729,16 @@ select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.v); QUERY PLAN ----------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=1.06..2.11 rows=4 width=17) - -> Hash Join (cost=1.06..2.11 rows=2 width=17) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.07..2.14 rows=3 width=17) + -> Hash Join (cost=1.07..2.09 rows=1 width=17) Hash Cond: (t0.t = "Expr_SUBQUERY".csq_c0) -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) - -> Hash (cost=1.05..1.05 rows=1 width=32) - -> Subquery Scan on "Expr_SUBQUERY" (cost=1.03..1.05 rows=1 width=32) - -> HashAggregate (cost=1.03..1.04 rows=1 width=40) + -> Hash (cost=1.06..1.06 rows=1 width=32) + -> Subquery Scan on "Expr_SUBQUERY" (cost=1.03..1.06 rows=1 width=32) + -> HashAggregate (cost=1.03..1.05 rows=1 width=40) Group Key: ((t1.v)::text) Filter: (1 = count(*)) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=32) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=32) Hash Key: ((t1.v)::text) -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=32) Optimizer: Postgres query optimizer @@ -757,17 +758,17 @@ select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n); QUERY PLAN ---------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=1.13..2.17 rows=4 width=17) - -> Hash Join (cost=1.13..2.17 rows=2 width=17) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.14..2.20 rows=3 width=17) + -> Hash Join (cost=1.14..2.16 rows=1 width=17) Hash Cond: (t0.n = "Expr_SUBQUERY".csq_c0) -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) - -> Hash (cost=1.09..1.09 rows=1 width=5) - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=1.03..1.09 rows=1 width=5) - -> Subquery Scan on "Expr_SUBQUERY" (cost=1.03..1.05 rows=1 width=5) - -> HashAggregate (cost=1.03..1.04 rows=1 width=13) + -> Hash (cost=1.10..1.10 rows=3 width=5) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=1.03..1.10 rows=3 width=5) + -> Subquery Scan on "Expr_SUBQUERY" (cost=1.03..1.06 rows=1 width=5) + -> HashAggregate (cost=1.03..1.05 rows=1 width=13) Group Key: t1.n Filter: (1 = count(*)) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=5) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.03 rows=1 width=5) Hash Key: t1.n -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=5) Optimizer: Postgres query optimizer @@ -776,9 +777,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n); t | n | i | v -----+---+---+----- - def | 3 | 1 | abc - xyz | 2 | 3 | def abc | 1 | 2 | xyz + xyz | 2 | 3 | def + def | 3 | 1 | abc (3 rows) -- @@ -787,18 +788,18 @@ select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1); QUERY PLAN ----------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=1.07..2.13 rows=4 width=17) - -> Hash Join (cost=1.07..2.13 rows=2 width=17) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.07..2.16 rows=3 width=17) + -> Hash Join (cost=1.07..2.12 rows=1 width=17) Hash Cond: ((t0.n + '1'::numeric) = "Expr_SUBQUERY".csq_c0) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=17) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=17) Hash Key: (t0.n + '1'::numeric) -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) - -> Hash (cost=1.05..1.05 rows=1 width=32) - -> Subquery Scan on "Expr_SUBQUERY" (cost=1.03..1.05 rows=1 width=32) - -> HashAggregate (cost=1.03..1.04 rows=1 width=40) + -> Hash (cost=1.06..1.06 rows=1 width=32) + -> Subquery Scan on "Expr_SUBQUERY" (cost=1.04..1.06 rows=1 width=32) + -> HashAggregate (cost=1.04..1.05 rows=1 width=40) Group Key: ((t1.n + '1'::numeric)) Filter: (1 = count(*)) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=32) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.03 rows=1 width=32) Hash Key: ((t1.n + '1'::numeric)) -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=32) Optimizer: Postgres query optimizer @@ -807,9 +808,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1); t | n | i | v -----+---+---+----- - def | 3 | 1 | abc - xyz | 2 | 3 | def abc | 1 | 2 | xyz + xyz | 2 | 3 | def + def | 3 | 1 | abc (3 rows) -- @@ -818,18 +819,18 @@ select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1); QUERY PLAN ----------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) (cost=1.07..2.14 rows=4 width=17) - -> Hash Join (cost=1.07..2.14 rows=2 width=17) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.08..2.17 rows=3 width=17) + -> Hash Join (cost=1.08..2.12 rows=1 width=17) Hash Cond: ((t0.n + '1'::numeric) = "Expr_SUBQUERY".csq_c0) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=17) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=17) Hash Key: (t0.n + '1'::numeric) -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) - -> Hash (cost=1.06..1.06 rows=1 width=32) - -> Subquery Scan on "Expr_SUBQUERY" (cost=1.03..1.06 rows=1 width=32) - -> HashAggregate (cost=1.03..1.05 rows=1 width=40) + -> Hash (cost=1.07..1.07 rows=1 width=32) + -> Subquery Scan on "Expr_SUBQUERY" (cost=1.04..1.07 rows=1 width=32) + -> HashAggregate (cost=1.04..1.06 rows=1 width=40) Group Key: (((t1.i + 1))::numeric) Filter: (1 = count(*)) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=32) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..1.03 rows=1 width=32) Hash Key: (((t1.i + 1))::numeric) -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=32) Optimizer: Postgres query optimizer @@ -838,9 +839,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1); t | n | i | v -----+---+---+----- + abc | 1 | 2 | xyz xyz | 2 | 3 | def def | 3 | 1 | abc - abc | 1 | 2 | xyz (3 rows) -- @@ -867,9 +868,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t LIMIT 1); t | n | i | v -----+---+---+----- - xyz | 2 | 3 | def def | 3 | 1 | abc abc | 1 | 2 | xyz + xyz | 2 | 3 | def (3 rows) -- subquery contains a HAVING clause @@ -893,9 +894,9 @@ explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t HAVING count(*) < 10); t | n | i | v -----+---+---+----- + abc | 1 | 2 | xyz xyz | 2 | 3 | def def | 3 | 1 | abc - abc | 1 | 2 | xyz (3 rows) -- subquery contains quals of form 'function(outervar, innervar1) = innvervar2' @@ -929,16 +930,15 @@ select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t explain select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.t=t1.t and t1.i = 1); QUERY PLAN ------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=1.02..2.07 rows=4 width=19) - -> Hash Anti Join (cost=1.02..2.07 rows=2 width=19) - Hash Cond: t0.t = t1.t - -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=19) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.02..2.09 rows=3 width=17) + -> Hash Anti Join (cost=1.02..2.05 rows=1 width=17) + Hash Cond: (t0.t = t1.t) + -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) -> Hash (cost=1.01..1.01 rows=1 width=4) -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=4) - Filter: t IS NOT NULL AND i = 1 - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(9 rows) + Filter: (i = 1) + Optimizer: Postgres query optimizer +(8 rows) select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.t=t1.t and t1.i = 1); t | n | i | v @@ -953,17 +953,15 @@ select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where explain select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.i=t1.i + 1); QUERY PLAN ------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=1.09..2.15 rows=4 width=19) - -> Hash Anti Join (cost=1.09..2.15 rows=2 width=19) - Hash Cond: t0.i = (t1.i + 1) - -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=19) - -> Hash (cost=1.05..1.05 rows=1 width=4) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.04..2.11 rows=3 width=17) + -> Hash Anti Join (cost=1.04..2.07 rows=1 width=17) + Hash Cond: (t0.i = (t1.i + 1)) + -> Seq Scan on csq_pullup t0 (cost=0.00..1.01 rows=1 width=17) + -> Hash (cost=1.03..1.03 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) -> Seq Scan on csq_pullup t1 (cost=0.00..1.01 rows=1 width=4) - Filter: (i + 1) IS NOT NULL - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(10 rows) + Optimizer: Postgres query optimizer +(8 rows) select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.i=t1.i + 1); t | n | i | v @@ -987,21 +985,20 @@ analyze subselect_t2; explain select * from subselect_t1 where x in (select y from subselect_t2); QUERY PLAN ------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=1.07..2.13 rows=4 width=4) - -> Hash Semi Join (cost=1.07..2.13 rows=2 width=4) - Hash Cond: subselect_t1.x = subselect_t2.y - -> Seq Scan on subselect_t1 (cost=0.00..1.02 rows=1 width=4) - -> Hash (cost=1.03..1.03 rows=1 width=4) - -> Seq Scan on subselect_t2 (cost=0.00..1.03 rows=1 width=4) - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(8 rows) + Gather Motion 3:1 (slice1; segments: 3) (cost=1.02..2.09 rows=3 width=4) + -> Hash Semi Join (cost=1.02..2.05 rows=1 width=4) + Hash Cond: (subselect_t1.x = subselect_t2.y) + -> Seq Scan on subselect_t1 (cost=0.00..1.01 rows=1 width=4) + -> Hash (cost=1.01..1.01 rows=1 width=4) + -> Seq Scan on subselect_t2 (cost=0.00..1.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(7 rows) select * from subselect_t1 where x in (select y from subselect_t2); x --- - 1 2 + 1 (2 rows) -- start_ignore @@ -1010,14 +1007,14 @@ select * from subselect_t1 where x in (select y from subselect_t2); explain select * from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2); QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=4.19..6.26 rows=4 width=4) - -> Hash Semi Join (cost=4.19..6.26 rows=2 width=4) - Hash Cond: subselect_t1.x = subselect_t2.y - -> Seq Scan on subselect_t1 (cost=0.00..2.02 rows=1 width=4) - -> Hash (cost=4.12..4.12 rows=2 width=4) - -> Append (cost=0.00..4.06 rows=2 width=4) - -> Seq Scan on subselect_t2 (cost=0.00..2.03 rows=1 width=4) - -> Seq Scan on subselect_t2 subselect_t2_1 (cost=0.00..2.03 rows=1 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=2.05..3.12 rows=3 width=4) + -> Hash Semi Join (cost=2.05..3.08 rows=1 width=4) + Hash Cond: (subselect_t1.x = subselect_t2.y) + -> Seq Scan on subselect_t1 (cost=0.00..1.01 rows=1 width=4) + -> Hash (cost=2.03..2.03 rows=2 width=4) + -> Append (cost=0.00..2.03 rows=2 width=4) + -> Seq Scan on subselect_t2 (cost=0.00..1.01 rows=1 width=4) + -> Seq Scan on subselect_t2 subselect_t2_1 (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (9 rows) @@ -1053,13 +1050,13 @@ select count(*) from subselect_t1 where x in (select y from subselect_t2); explain select count(*) from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2); QUERY PLAN --------------------------------------------------------------------------------------------------------- - Aggregate (cost=3.14..3.15 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.06..3.13 rows=3 width=0) - -> Hash Semi Join (cost=2.06..3.09 rows=1 width=0) + Aggregate (cost=3.13..3.14 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.05..3.12 rows=3 width=0) + -> Hash Semi Join (cost=2.05..3.08 rows=1 width=0) Hash Cond: (subselect_t1.x = subselect_t2.y) -> Seq Scan on subselect_t1 (cost=0.00..1.01 rows=1 width=4) - -> Hash (cost=2.04..2.04 rows=2 width=4) - -> Append (cost=0.00..2.02 rows=2 width=4) + -> Hash (cost=2.03..2.03 rows=2 width=4) + -> Append (cost=0.00..2.03 rows=2 width=4) -> Seq Scan on subselect_t2 (cost=0.00..1.01 rows=1 width=4) -> Seq Scan on subselect_t2 subselect_t2_1 (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer @@ -1084,9 +1081,13 @@ select count(*) from -- Query was deadlocking because of not squelching subplans (MPP-18936) -- drop table if exists t1; +NOTICE: table "t1" does not exist, skipping drop table if exists t2; +NOTICE: table "t2" does not exist, skipping drop table if exists t3; +NOTICE: table "t3" does not exist, skipping drop table if exists t4; +NOTICE: table "t4" does not exist, skipping CREATE TABLE t1 AS (SELECT generate_series(1, 5000) AS i, generate_series(5001, 10000) AS j); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -1138,18 +1139,17 @@ select * from t1 where a=1 and a=2 and a > (select t2.b from t2); explain select * from t1 where a=1 and a=2 and a > (select t2.b from t2); QUERY PLAN ------------------------------------------------ - Result (cost=1063.00..1063.01 rows=1 width=0) + Result (cost=1639.00..1639.00 rows=0 width=0) One-Time Filter: false - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(4 rows) + Optimizer: Postgres query optimizer +(3 rows) explain select * from t1 where a=1 and a=2 and a > (select t2.b from t2) union all select * from t1 where a=1 and a=2 and a > (select t2.b from t2); QUERY PLAN ------------------------------------------ - Result (cost=0.00..0.01 rows=1 width=0) + Result (cost=0.00..0.00 rows=0 width=0) One-Time Filter: false Optimizer: Postgres query optimizer (3 rows) @@ -1164,13 +1164,12 @@ select * from t1 where a=1 and a=2 and a > (select t2.b from t2); explain select * from t1, (select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo where t1.a = foo.a; - QUERY PLAN ------------------------------------------------------------------------------ - Result (cost=1063.00..1063.01 rows=1 width=0) + QUERY PLAN +------------------------------------------------ + Result (cost=1639.00..1639.00 rows=0 width=8) One-Time Filter: false - Settings: optimizer=off; optimizer_nestloop_factor=1; optimizer_segments=3 - Optimizer status: Postgres query optimizer -(4 rows) + Optimizer: Postgres query optimizer +(3 rows) select * from t1, (select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo @@ -1185,34 +1184,34 @@ where t1.a = foo.a; insert into t1 values (1); insert into t2 values (1); explain select 1 from t1 where a in (select b from t2 where a = 1 limit 1); - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.53 rows=1 width=0) - -> Seq Scan on t1 (cost=0.00..1.53 rows=1 width=0) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on t1 (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 - -> Limit (cost=0.00..1.03 rows=1 width=4) - -> Result (cost=0.00..1.03 rows=1 width=4) + -> Limit (cost=0.00..0.03 rows=1 width=4) + -> Result (cost=0.00..3083.50 rows=96300 width=4) One-Time Filter: (t1.a = 1) - -> Materialize (cost=0.00..1.03 rows=1 width=4) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=4) - -> Seq Scan on t2 (cost=0.00..1.01 rows=1 width=4) + -> Materialize (cost=0.00..2120.50 rows=96300 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) + -> Seq Scan on t2 (cost=0.00..355.00 rows=32100 width=4) Optimizer: Postgres query optimizer (11 rows) explain select 1 from t1 where a in (select b from t2 where a = 1 offset 1); - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..2.05 rows=1 width=0) - -> Seq Scan on t1 (cost=0.00..2.05 rows=1 width=0) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..997.00 rows=48150 width=4) + -> Seq Scan on t1 (cost=0.00..355.00 rows=16050 width=4) Filter: (SubPlan 1) SubPlan 1 - -> Limit (cost=1.03..1.03 rows=1 width=4) - -> Result (cost=0.00..1.03 rows=1 width=4) + -> Limit (cost=0.03..3083.50 rows=96299 width=4) + -> Result (cost=0.00..3083.50 rows=96300 width=4) One-Time Filter: (t1.a = 1) - -> Materialize (cost=0.00..1.03 rows=1 width=4) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=4) - -> Seq Scan on t2 (cost=0.00..1.01 rows=1 width=4) + -> Materialize (cost=0.00..2120.50 rows=96300 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) + -> Seq Scan on t2 (cost=0.00..355.00 rows=32100 width=4) Optimizer: Postgres query optimizer (11 rows) @@ -1284,26 +1283,25 @@ explain select row_number() over (order by seq asc) as id, foo.cnt from (select seq, (select count(*) from t1_mpp_24563 t1 where t1.id = t2.id) cnt from t2_mpp_24563 t2 where value = 7) foo; - QUERY PLAN ------------------------------------------------------------------------------------------------------- - WindowAgg (cost=1.02..2.12 rows=1 width=8) + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + WindowAgg (cost=359.19..232173.17 rows=78 width=20) Order By: t2.seq - -> Gather Motion 3:1 (slice2; segments: 3) (cost=1.02..1.05 rows=1 width=8) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=359.19..360.30 rows=78 width=8) Merge Key: t2.seq - -> Sort (cost=1.02..1.03 rows=1 width=8) + -> Sort (cost=359.19..359.26 rows=26 width=8) Sort Key: t2.seq - -> Seq Scan on t2_mpp_24563 t2 (cost=0.00..1.01 rows=1 width=8) - Filter: value = 7 + -> Seq Scan on t2_mpp_24563 t2 (cost=0.00..358.58 rows=26 width=8) + Filter: (value = 7) SubPlan 1 - -> Aggregate (cost=1.06..1.07 rows=1 width=8) - -> Result (cost=1.01..1.02 rows=1 width=0) - Filter: t1.id = $0 - -> Materialize (cost=1.01..1.02 rows=1 width=0) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.01 rows=1 width=0) - -> Seq Scan on t1_mpp_24563 t1 (cost=0.00..1.01 rows=1 width=0) - Settings: optimizer_segments=3 - Optimizer status: Postgres query optimizer -(17 rows) + -> Aggregate (cost=2975.75..2975.76 rows=1 width=8) + -> Result (cost=0.00..2760.50 rows=86100 width=0) + Filter: (t1.id = t2.id) + -> Materialize (cost=0.00..1899.50 rows=86100 width=4) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1469.00 rows=86100 width=4) + -> Seq Scan on t1_mpp_24563 t1 (cost=0.00..321.00 rows=28700 width=4) + Optimizer: Postgres query optimizer +(16 rows) drop table t1_mpp_24563; drop table t2_mpp_24563; @@ -1539,17 +1537,17 @@ EXPLAIN SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL) ORDER BY 2; QUERY PLAN ----------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=6.66..6.84 rows=8 width=36) + Gather Motion 3:1 (slice1; segments: 3) (cost=2.20..2.31 rows=8 width=36) Merge Key: subselect_tbl.f1 - -> Sort (cost=6.66..6.68 rows=3 width=36) + -> Sort (cost=2.20..2.20 rows=3 width=36) Sort Key: subselect_tbl.f1 - -> Hash Semi Join (cost=3.34..6.54 rows=3 width=36) + -> Hash Semi Join (cost=1.11..2.18 rows=3 width=36) Hash Cond: (subselect_tbl.f1 = subselect_tbl_1.f2) - -> Seq Scan on subselect_tbl (cost=0.00..3.08 rows=3 width=4) - -> Hash (cost=3.24..3.24 rows=3 width=4) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..3.24 rows=3 width=4) + -> Seq Scan on subselect_tbl (cost=0.00..1.03 rows=3 width=4) + -> Hash (cost=1.08..1.08 rows=3 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.08 rows=3 width=4) Hash Key: subselect_tbl_1.f2 - -> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..3.08 rows=3 width=4) + -> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..1.03 rows=3 width=4) Optimizer: Postgres query optimizer (12 rows) @@ -1580,19 +1578,19 @@ EXPLAIN SELECT '' AS three, f1, f2 FROM SUBSELECT_TBL WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL WHERE f3 IS NOT NULL) ORDER BY 2,3; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=10000000009.64..10000000009.64 rows=4 width=8) + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000004.35..10000000004.40 rows=3 width=40) Merge Key: subselect_tbl.f1, subselect_tbl.f2 - -> Sort (cost=10000000009.64..10000000009.64 rows=2 width=8) + -> Sort (cost=10000000004.35..10000000004.35 rows=1 width=40) Sort Key: subselect_tbl.f1, subselect_tbl.f2 - -> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10000000009.61 rows=2 width=8) - Join Filter: subselect_tbl.f1 = subselect_tbl_1.f2 AND subselect_tbl.f2 = subselect_tbl_1.f3::integer - -> Seq Scan on subselect_tbl (cost=0.00..3.08 rows=3 width=8) - -> Materialize (cost=0.00..3.47 rows=7 width=12) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..3.36 rows=7 width=12) - -> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..3.08 rows=3 width=12) - Filter: f3 IS NOT NULL + -> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10000000004.34 rows=1 width=40) + Join Filter: ((subselect_tbl.f1 = subselect_tbl_1.f2) AND (subselect_tbl.f2 = (subselect_tbl_1.f3)::integer)) + -> Seq Scan on subselect_tbl (cost=0.00..1.03 rows=3 width=8) + -> Materialize (cost=0.00..1.15 rows=7 width=12) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.12 rows=7 width=12) + -> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..1.03 rows=2 width=12) + Filter: (f3 IS NOT NULL) Optimizer: Postgres query optimizer (12 rows) @@ -1601,22 +1599,22 @@ EXPLAIN SELECT * FROM tenk1 a, tenk1 b WHERE (a.unique1,b.unique2) IN (SELECT unique1,unique2 FROM tenk1 c); QUERY PLAN ----------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=356.67..706.67 rows=10000 width=488) - -> Hash Join (cost=356.67..573.33 rows=3333 width=488) + Gather Motion 3:1 (slice1; segments: 3) (cost=353.67..703.67 rows=10000 width=488) + -> Hash Join (cost=353.67..570.33 rows=3333 width=488) Hash Cond: (c.unique2 = b.unique2) - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=185.00..355.83 rows=3333 width=248) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=183.00..353.83 rows=3333 width=248) Hash Key: c.unique2 - -> Hash Join (cost=185.00..289.17 rows=3333 width=248) + -> Hash Join (cost=183.00..287.17 rows=3333 width=248) Hash Cond: (c.unique1 = a.unique1) - -> HashAggregate (cost=80.00..113.33 rows=10000 width=8) + -> HashAggregate (cost=79.00..112.33 rows=10000 width=8) Group Key: c.unique1, c.unique2 - -> Seq Scan on tenk1 c (cost=0.00..63.33 rows=3333 width=8) - -> Hash (cost=63.33..63.33 rows=3333 width=244) - -> Seq Scan on tenk1 a (cost=0.00..63.33 rows=3333 width=244) - -> Hash (cost=130.00..130.00 rows=3333 width=244) - -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..130.00 rows=3333 width=244) + -> Seq Scan on tenk1 c (cost=0.00..62.33 rows=3333 width=8) + -> Hash (cost=62.33..62.33 rows=3333 width=244) + -> Seq Scan on tenk1 a (cost=0.00..62.33 rows=3333 width=244) + -> Hash (cost=129.00..129.00 rows=3333 width=244) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..129.00 rows=3333 width=244) Hash Key: b.unique2 - -> Seq Scan on tenk1 b (cost=0.00..63.33 rows=3333 width=244) + -> Seq Scan on tenk1 b (cost=0.00..62.33 rows=3333 width=244) Optimizer: Postgres query optimizer (17 rows) @@ -1626,40 +1624,38 @@ EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field" WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1) ORDER BY 2,3; QUERY PLAN ------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.10..2.11 rows=4 width=8) + Gather Motion 3:1 (slice1; segments: 3) (cost=2.10..2.15 rows=3 width=40) Merge Key: upper.f1, upper.f2 - -> Sort (cost=2.10..2.11 rows=2 width=8) + -> Sort (cost=2.10..2.11 rows=1 width=40) Sort Key: upper.f1, upper.f2 - -> Hash Semi Join (cost=3.11..6.25 rows=2 width=8) - Hash Cond: upper.f1 = subselect_tbl.f1 - -> Seq Scan on subselect_tbl upper (cost=0.00..1.01 rows=1 width=8) - -> Hash (cost=1.01..1.01 rows=1 width=8) - -> Seq Scan on subselect_tbl (cost=0.00..1.01 rows=1 width=8) - Filter: f1 = f2 - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(12 rows) + -> Hash Semi Join (cost=1.05..2.09 rows=1 width=40) + Hash Cond: (upper.f1 = subselect_tbl.f1) + -> Seq Scan on subselect_tbl upper (cost=0.00..1.03 rows=3 width=8) + -> Hash (cost=1.03..1.03 rows=1 width=8) + -> Seq Scan on subselect_tbl (cost=0.00..1.03 rows=1 width=8) + Filter: (f1 = f2) + Optimizer: Postgres query optimizer +(11 rows) EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" FROM SUBSELECT_TBL upper WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3) ORDER BY 2,3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) (cost=2.13..2.14 rows=4 width=12) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2.20..2.28 rows=6 width=44) Merge Key: upper.f1, upper.f3 - -> Sort (cost=2.13..2.14 rows=2 width=12) + -> Sort (cost=2.20..2.20 rows=2 width=44) Sort Key: upper.f1, upper.f3 - -> Hash Semi Join (cost=3.36..6.52 rows=2 width=12) - Hash Cond: upper.f1 = subselect_tbl.f2 AND upper.f2::double precision = subselect_tbl.f3 - -> Seq Scan on subselect_tbl upper (cost=0.00..1.01 rows=1 width=16) - -> Hash (cost=1.03..1.03 rows=1 width=12) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=12) + -> Hash Semi Join (cost=1.12..2.19 rows=2 width=44) + Hash Cond: (((upper.f2)::double precision = subselect_tbl.f3) AND (upper.f1 = subselect_tbl.f2)) + -> Seq Scan on subselect_tbl upper (cost=0.00..1.03 rows=3 width=16) + -> Hash (cost=1.08..1.08 rows=3 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.08 rows=3 width=12) Hash Key: subselect_tbl.f2 - -> Seq Scan on subselect_tbl (cost=0.00..1.01 rows=1 width=12) - Settings: optimizer=off - Optimizer status: Postgres query optimizer -(13 rows) + -> Seq Scan on subselect_tbl (cost=0.00..1.03 rows=3 width=12) + Optimizer: Postgres query optimizer +(12 rows) EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" FROM SUBSELECT_TBL upper @@ -1690,20 +1686,20 @@ EXPLAIN SELECT '' AS five, f1 AS "Correlated Field" FROM SUBSELECT_TBL WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL WHERE f3 IS NOT NULL) ORDER BY 2; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=6.67..6.69 rows=8 width=4) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2.19..2.29 rows=7 width=36) Merge Key: subselect_tbl.f1 - -> Sort (cost=6.67..6.69 rows=3 width=4) + -> Sort (cost=2.19..2.20 rows=2 width=36) Sort Key: subselect_tbl.f1 - -> Hash Semi Join (cost=3.33..6.55 rows=3 width=4) - Hash Cond: subselect_tbl.f1 = subselect_tbl_1.f2 AND subselect_tbl.f2 = subselect_tbl_1.f3::integer - -> Seq Scan on subselect_tbl (cost=0.00..3.08 rows=3 width=8) - -> Hash (cost=3.22..3.22 rows=3 width=12) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..3.22 rows=3 width=12) + -> Hash Semi Join (cost=1.11..2.18 rows=2 width=36) + Hash Cond: ((subselect_tbl.f1 = subselect_tbl_1.f2) AND (subselect_tbl.f2 = (subselect_tbl_1.f3)::integer)) + -> Seq Scan on subselect_tbl (cost=0.00..1.03 rows=3 width=8) + -> Hash (cost=1.07..1.07 rows=2 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.07 rows=2 width=12) Hash Key: subselect_tbl_1.f2 - -> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..3.08 rows=3 width=12) - Filter: f3 IS NOT NULL + -> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..1.03 rows=2 width=12) + Filter: (f3 IS NOT NULL) Optimizer: Postgres query optimizer (13 rows) @@ -1787,18 +1783,18 @@ EXPLAIN select count(*) from where unique1 IN (select hundred from tenk1 b)) ss; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=643.26..643.27 rows=1 width=8) - -> Gather Motion 3:1 (slice2; segments: 3) (cost=643.23..643.26 rows=1 width=8) - -> Partial Aggregate (cost=643.23..643.24 rows=1 width=8) - -> Hash Join (cost=415.48..642.98 rows=34 width=0) + Finalize Aggregate (cost=210.51..210.52 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=210.45..210.50 rows=3 width=8) + -> Partial Aggregate (cost=210.45..210.46 rows=1 width=8) + -> Hash Join (cost=138.92..210.37 rows=33 width=0) Hash Cond: (a.unique1 = b.hundred) - -> Seq Scan on tenk1 a (cost=0.00..189.00 rows=3334 width=4) - -> Hash (cost=414.23..414.23 rows=34 width=4) - -> HashAggregate (cost=413.90..414.23 rows=34 width=4) + -> Seq Scan on tenk1 a (cost=0.00..62.33 rows=3333 width=4) + -> Hash (cost=137.67..137.67 rows=100 width=4) + -> HashAggregate (cost=137.33..137.67 rows=100 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..389.00 rows=3334 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..129.00 rows=3333 width=4) Hash Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..189.00 rows=3334 width=4) + -> Seq Scan on tenk1 b (cost=0.00..62.33 rows=3333 width=4) Optimizer: Postgres query optimizer (13 rows) @@ -1807,24 +1803,24 @@ EXPLAIN select count(distinct ss.ten) from where unique1 IN (select hundred from tenk1 b)) ss; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=643.64..643.65 rows=1 width=8) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=643.61..643.64 rows=1 width=8) - -> Partial Aggregate (cost=643.61..643.62 rows=1 width=8) - -> HashAggregate (cost=643.57..643.60 rows=1 width=4) + Finalize Aggregate (cost=210.98..210.99 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=210.92..210.97 rows=3 width=8) + -> Partial Aggregate (cost=210.92..210.93 rows=1 width=8) + -> HashAggregate (cost=210.88..210.91 rows=3 width=4) Group Key: a.ten - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=643.48..643.57 rows=1 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=210.54..210.83 rows=10 width=4) Hash Key: a.ten - -> Streaming HashAggregate (cost=643.48..643.51 rows=1 width=4) + -> Streaming HashAggregate (cost=210.54..210.64 rows=10 width=4) Group Key: a.ten - -> Hash Join (cost=415.48..642.98 rows=34 width=4) + -> Hash Join (cost=138.92..210.37 rows=33 width=4) Hash Cond: (a.unique1 = b.hundred) - -> Seq Scan on tenk1 a (cost=0.00..189.00 rows=3334 width=8) - -> Hash (cost=414.23..414.23 rows=34 width=4) - -> HashAggregate (cost=413.90..414.23 rows=34 width=4) + -> Seq Scan on tenk1 a (cost=0.00..62.33 rows=3333 width=8) + -> Hash (cost=137.67..137.67 rows=100 width=4) + -> HashAggregate (cost=137.33..137.67 rows=100 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..389.00 rows=3334 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..129.00 rows=3333 width=4) Hash Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..189.00 rows=3334 width=4) + -> Seq Scan on tenk1 b (cost=0.00..62.33 rows=3333 width=4) Optimizer: Postgres query optimizer (19 rows) @@ -1833,18 +1829,18 @@ EXPLAIN select count(*) from where unique1 IN (select distinct hundred from tenk1 b)) ss; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=643.26..643.27 rows=1 width=8) - -> Gather Motion 3:1 (slice2; segments: 3) (cost=643.23..643.26 rows=1 width=8) - -> Partial Aggregate (cost=643.23..643.24 rows=1 width=8) - -> Hash Join (cost=415.48..642.98 rows=34 width=0) + Finalize Aggregate (cost=210.51..210.52 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=210.45..210.50 rows=3 width=8) + -> Partial Aggregate (cost=210.45..210.46 rows=1 width=8) + -> Hash Join (cost=138.92..210.37 rows=33 width=0) Hash Cond: (a.unique1 = b.hundred) - -> Seq Scan on tenk1 a (cost=0.00..189.00 rows=3334 width=4) - -> Hash (cost=414.23..414.23 rows=34 width=4) - -> HashAggregate (cost=413.90..414.23 rows=34 width=4) + -> Seq Scan on tenk1 a (cost=0.00..62.33 rows=3333 width=4) + -> Hash (cost=137.67..137.67 rows=100 width=4) + -> HashAggregate (cost=137.33..137.67 rows=100 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..389.00 rows=3334 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..129.00 rows=3333 width=4) Hash Key: b.hundred - -> Seq Scan on tenk1 b (cost=0.00..62.33 rows=3333 width=4) + -> Seq Scan on tenk1 b (cost=0.00..62.33 rows=3333 width=4) Optimizer: Postgres query optimizer (13 rows) @@ -1853,22 +1849,22 @@ EXPLAIN select count(distinct ss.ten) from where unique1 IN (select distinct hundred from tenk1 b)) ss; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate (cost=643.64..643.65 rows=1 width=8) - -> Gather Motion 3:1 (slice3; segments: 3) (cost=643.61..643.64 rows=1 width=8) - -> Partial Aggregate (cost=643.61..643.62 rows=1 width=8) - -> HashAggregate (cost=643.57..643.60 rows=1 width=4) + Finalize Aggregate (cost=210.98..210.99 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=210.92..210.97 rows=3 width=8) + -> Partial Aggregate (cost=210.92..210.93 rows=1 width=8) + -> HashAggregate (cost=210.88..210.91 rows=3 width=4) Group Key: a.ten - -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=643.48..643.57 rows=1 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=210.54..210.83 rows=10 width=4) Hash Key: a.ten - -> Streaming HashAggregate (cost=643.48..643.51 rows=1 width=4) + -> Streaming HashAggregate (cost=210.54..210.64 rows=10 width=4) Group Key: a.ten - -> Hash Join (cost=415.48..642.98 rows=34 width=4) + -> Hash Join (cost=138.92..210.37 rows=33 width=4) Hash Cond: (a.unique1 = b.hundred) - -> Seq Scan on tenk1 a (cost=0.00..189.00 rows=3334 width=8) - -> Hash (cost=414.23..414.23 rows=34 width=4) - -> HashAggregate (cost=413.90..414.23 rows=34 width=4) + -> Seq Scan on tenk1 a (cost=0.00..62.33 rows=3333 width=8) + -> Hash (cost=137.67..137.67 rows=100 width=4) + -> HashAggregate (cost=137.33..137.67 rows=100 width=4) Group Key: b.hundred - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..389.00 rows=3334 width=4) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..129.00 rows=3333 width=4) Hash Key: b.hundred -> Seq Scan on tenk1 b (cost=0.00..62.33 rows=3333 width=4) Optimizer: Postgres query optimizer @@ -1883,9 +1879,9 @@ EXPLAIN select count(distinct ss.ten) from EXPLAIN SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1; QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Limit (cost=17.76..17.79 rows=1 width=1) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=17.76..17.85 rows=3 width=1) - -> Limit (cost=17.76..17.81 rows=1 width=1) + Limit (cost=0.00..0.03 rows=1 width=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..0.09 rows=3 width=1) + -> Limit (cost=0.00..0.05 rows=1 width=1) -> Seq Scan on tenk2 (cost=0.00..177.56 rows=3333 width=1) SubPlan 2 -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..62.33 rows=3333 width=4) @@ -1955,7 +1951,7 @@ SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1); QUERY PLAN ------------------------------------------ - Result (cost=0.00..0.01 rows=1 width=0) + Result (cost=0.00..0.00 rows=0 width=8) One-Time Filter: false Optimizer: Postgres query optimizer (3 rows) @@ -1963,7 +1959,7 @@ EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1); QUERY PLAN ------------------------------------------ - Result (cost=0.00..0.01 rows=1 width=0) + Result (cost=0.00..0.00 rows=0 width=8) One-Time Filter: false Optimizer: Postgres query optimizer (3 rows) @@ -1971,7 +1967,7 @@ EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1); QUERY PLAN ------------------------------------------ - Result (cost=1.53..0.54 rows=1 width=0) + Result (cost=0.27..0.27 rows=0 width=8) One-Time Filter: false Optimizer: Postgres query optimizer (3 rows) @@ -2161,7 +2157,7 @@ select * from dedup_func() r(a) where r.a in (select t.a/10 from dedup_tab t); -> Seq Scan on dedup_tab t Filter: (5 = (a / 10)) Optimizer: Postgres query optimizer -(15 rows) +(7 rows) select * from dedup_func() r(a) where r.a in (select t.a/10 from dedup_tab t); a @@ -2180,7 +2176,7 @@ select * from dedup_func_stable() r(a) where r.a in (select t.a/10 from dedup_ta -> Seq Scan on dedup_tab t Filter: (5 = (a / 10)) Optimizer: Postgres query optimizer -(15 rows) +(7 rows) select * from dedup_func_stable() r(a) where r.a in (select t.a/10 from dedup_tab t); a @@ -2367,8 +2363,8 @@ select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 wh select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2460,8 +2456,8 @@ select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2534,8 +2530,8 @@ select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_s select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1); i --- - 2 1 + 2 (2 rows) explain (costs off) @@ -2612,22 +2608,22 @@ select * from foo left outer join baz on (select bar.i from bar where bar.i = fo Output: bar.i -> Seq Scan on subselect_gp.bar Output: bar.i + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: optimizer=off (29 rows) select * from foo left outer join baz on (select bar.i from bar where bar.i = foo.i) + 1 = baz.j; i | j | i | j ----+----+---+--- - 10 | 10 | | - 9 | 9 | | - 6 | 6 | | - 5 | 5 | | 8 | 8 | | 7 | 7 | | 4 | 4 | | 3 | 3 | | 2 | 2 | | + 10 | 10 | | + 9 | 9 | | + 6 | 6 | | + 5 | 5 | | 1 | 1 | | (10 rows) @@ -2669,8 +2665,8 @@ select * from foo where Output: a.i -> Seq Scan on subselect_gp.baz a Output: a.i + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: optimizer=off (27 rows) select * from foo where @@ -2678,12 +2674,12 @@ select * from foo where (select b.i from baz b); i | j ----+---- - 6 | 6 - 7 | 7 10 | 10 - 5 | 5 + 6 | 6 8 | 8 9 | 9 + 5 | 5 + 7 | 7 (6 rows) -- When creating plan with subquery and CTE, it sets the useless flow for the plan. @@ -2736,8 +2732,9 @@ where dt < '2010-01-01'::date; Output: extra_flow_dist.c, extra_flow_dist.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(25 rows) +(26 rows) with run_dt as ( select @@ -2750,8 +2747,8 @@ select * from run_dt, extra_flow_dist1 where dt < '2010-01-01'::date; dt | a | b ------------+----+---- - 10-01-1949 | 20 | 20 10-01-1949 | 22 | 22 + 10-01-1949 | 20 | 20 10-01-1949 | 21 | 21 (3 rows) @@ -2792,8 +2789,9 @@ where dt < '2010-01-01'::date; Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(24 rows) +(25 rows) with run_dt as ( select @@ -2850,8 +2848,9 @@ where dt < '2010-01-01'::date; Output: extra_flow_dist.c, extra_flow_dist.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(28 rows) +(29 rows) with run_dt as ( select @@ -2864,9 +2863,9 @@ select * from run_dt, extra_flow_dist1 where dt < '2010-01-01'::date; dt | a | b ------------+----+---- - 10-01-1949 | 20 | 20 - 10-01-1949 | 21 | 21 10-01-1949 | 22 | 22 + 10-01-1949 | 21 | 21 + 10-01-1949 | 20 | 20 (3 rows) -- case 4 subplan with outer segment general locus without param in subplan (CTE and subquery) @@ -2878,37 +2877,40 @@ explain (verbose, costs off) with run_dt as ( ) select * from run_dt, extra_flow_dist1 where dt < extra_flow_dist1.a; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b -> Nested Loop - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) - -> Nested Loop Left Join - Output: extra_flow_rand.a, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on subselect_gp.extra_flow_rand - Output: extra_flow_rand.a, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: extra_flow_rand.a, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on subselect_gp.extra_flow_rand + Output: extra_flow_rand.a, (SubPlan 1) + SubPlan 1 + -> Materialize Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(28 rows) +(31 rows) -- case 5 for subplan with outer entry locus without param in subplan (CTE and subquery) explain (verbose, costs off) with run_dt as ( @@ -2919,39 +2921,42 @@ explain (verbose, costs off) with run_dt as ( ) select * from run_dt, extra_flow_dist1 where dt < extra_flow_dist1.a; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b -> Nested Loop - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) -> Broadcast Motion 1:3 (slice2) - Output: pg_class.relnatts, (max(1)) - -> Nested Loop Left Join - Output: pg_class.relnatts, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.relnatts, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Gather Motion 3:1 (slice3; segments: 3) + Output: share0_ref1.x, share0_ref1.dt + -> Shared Scan (share slice:id 2:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: pg_class.relnatts, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.relnatts, (SubPlan 1) + SubPlan 1 + -> Materialize Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + -> Gather Motion 3:1 (slice3; segments: 3) + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(30 rows) +(33 rows) -- case 6 without CTE, nested subquery should not add extral flow explain (verbose, costs off) select * from ( @@ -3019,8 +3024,9 @@ where dt < '2010-01-01'::date; Output: extra_flow_dist_1.c, extra_flow_dist_1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'off' Optimizer: Postgres query optimizer -(47 rows) +(48 rows) -- Check DISTINCT ON clause and ORDER BY clause in SubLink, See https://github.com/greenplum-db/gpdb/issues/12656. -- For EXISTS SubLink, we don’t need to care about the data deduplication problem, we can delete DISTINCT ON clause and @@ -3055,8 +3061,8 @@ select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issu Sort Key: issue_12656_1.i -> Seq Scan on subselect_gp.issue_12656 issue_12656_1 Output: issue_12656_1.i, issue_12656_1.j + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: optimizer=off (20 rows) select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656); @@ -3088,8 +3094,8 @@ select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issu Sort Key: issue_12656_1.i, issue_12656_1.j -> Seq Scan on subselect_gp.issue_12656 issue_12656_1 Output: issue_12656_1.i, issue_12656_1.j + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: optimizer=off (20 rows) select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j asc); @@ -3120,8 +3126,8 @@ select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issu Sort Key: issue_12656_1.i, issue_12656_1.j DESC -> Seq Scan on subselect_gp.issue_12656 issue_12656_1 Output: issue_12656_1.i, issue_12656_1.j + Settings: optimizer = 'off' Optimizer: Postgres query optimizer - Settings: optimizer=off (20 rows) select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc); @@ -3251,30 +3257,33 @@ with cte(table_oid, size) as select pc.relname, ts.size from pg_class pc, cte ts where pc.oid = ts.table_oid; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Output: pc.relname, share0_ref1.size -> Nested Loop - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - Join Filter: (pc.oid = pg_class.oid) + Output: pc.relname, share0_ref1.size + Join Filter: (pc.oid = share0_ref1.table_oid) -> Redistribute Motion 1:3 (slice2) Output: pc.relname, pc.oid Hash Key: pc.oid -> Seq Scan on pg_catalog.pg_class pc Output: pc.relname, pc.oid -> Materialize - Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - -> HashAggregate - Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) - Group Key: pg_class.oid - -> Redistribute Motion 3:3 (slice3; segments: 3) - Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) - Hash Key: pg_class.oid - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Output: share0_ref1.table_oid, share0_ref1.size + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.table_oid, share0_ref1.size + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Settings: enable_bitmapscan = 'off', enable_hashjoin = 'off', enable_indexscan = 'off', enable_nestloop = 'on', gp_enable_multiphase_agg = 'on', optimizer = 'off' Optimizer: Postgres query optimizer -(21 rows) +(24 rows) set gp_enable_multiphase_agg = off; explain (verbose on, costs off) @@ -3293,30 +3302,33 @@ with cte(table_oid, size) as select pc.relname, ts.size from pg_class pc, cte ts where pc.oid = ts.table_oid; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Output: pc.relname, share0_ref1.size -> Nested Loop - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - Join Filter: (pc.oid = pg_class.oid) + Output: pc.relname, share0_ref1.size + Join Filter: (pc.oid = share0_ref1.table_oid) -> Redistribute Motion 1:3 (slice2) Output: pc.relname, pc.oid Hash Key: pc.oid -> Seq Scan on pg_catalog.pg_class pc Output: pc.relname, pc.oid -> Materialize - Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - -> HashAggregate - Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) - Group Key: pg_class.oid - -> Redistribute Motion 3:3 (slice3; segments: 3) - Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) - Hash Key: pg_class.oid - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Output: share0_ref1.table_oid, share0_ref1.size + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.table_oid, share0_ref1.size + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Settings: enable_bitmapscan = 'off', enable_hashjoin = 'off', enable_indexscan = 'off', enable_nestloop = 'on', gp_enable_multiphase_agg = 'off', optimizer = 'off' Optimizer: Postgres query optimizer -(21 rows) +(24 rows) reset gp_enable_multiphase_agg; reset enable_hashjoin; @@ -3568,10 +3580,10 @@ SELECT c FROM t0; -- Test push predicate into subquery -- more details could be found at https://github.com/greenplum-db/gpdb/issues/8429 CREATE TABLE foo_predicate_pushdown (a int, b int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE bar_predicate_pushdown (c int, d int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. explain (costs off) select * from ( select distinct (select bar.c from bar_predicate_pushdown bar where c = foo.b) as ss from foo_predicate_pushdown foo @@ -3635,7 +3647,7 @@ explain (costs off) select * from table_left where exists (select 1 from table_r -> Hash -> Broadcast Motion 3:3 (slice3; segments: 3) -> Seq Scan on table_left - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (12 rows) select * from table_left where exists (select 1 from table_right where l1 = r1); @@ -3658,7 +3670,7 @@ explain (costs off) select * from table_left where l1 in (select r1 from table_r -> Hash -> Broadcast Motion 3:3 (slice3; segments: 3) -> Seq Scan on table_left - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (12 rows) select * from table_left where exists (select 1 from table_right where l1 = r1); @@ -3680,7 +3692,7 @@ explain (costs off) select * from table_left where exists (select 1 from table_r -> Seq Scan on table_right -> Hash -> Seq Scan on table_left - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (9 rows) select * from table_left where exists (select 1 from table_right where l1 = r1); @@ -3700,7 +3712,7 @@ explain (costs off) select * from table_left where l1 in (select r1 from table_r -> Seq Scan on table_right -> Hash -> Seq Scan on table_left - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (9 rows) select * from table_left where exists (select 1 from table_right where l1 = r1); @@ -3802,7 +3814,7 @@ explain (costs off) select foo.a from foo where foo.a <= LEAST(foo.b, (SELECT 1) -> Result -> Seq Scan on foo Filter: (a <= LEAST(b, $0, NULL::integer)) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (6 rows) select foo.a from foo where foo.a <= LEAST(foo.b, (SELECT 1), NULL); @@ -3819,15 +3831,15 @@ explain (costs off) select foo.a from foo where foo.a <= GREATEST(foo.b, (SELECT -> Result -> Seq Scan on foo Filter: (a <= GREATEST(b, $0, NULL::integer)) - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (6 rows) select foo.a from foo where foo.a <= GREATEST(foo.b, (SELECT 1), NULL); a --- + 1 2 3 - 1 (3 rows) explain (costs off) select least((select 5), greatest(b, NULL, (select 1)), a) from foo; @@ -3839,7 +3851,7 @@ explain (costs off) select least((select 5), greatest(b, NULL, (select 1)), a) f InitPlan 2 (returns $1) (slice3) -> Result -> Seq Scan on foo - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (7 rows) select least((select 5), greatest(b, NULL, (select 1)), a) from foo; @@ -3864,7 +3876,7 @@ explain (costs off) select (select a from bar)[1] from bar; -> Gather Motion 3:1 (slice3; segments: 3) -> Seq Scan on bar bar_1 -> Seq Scan on bar - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (6 rows) select (select a from bar)[1] from bar; @@ -3883,7 +3895,7 @@ explain (costs off) select (select a from bar)[(select 1)] from bar; -> Gather Motion 3:1 (slice4; segments: 3) -> Seq Scan on bar bar_1 -> Seq Scan on bar - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (8 rows) select (select a from bar)[(select 1)] from bar; @@ -3900,7 +3912,7 @@ explain (costs off) select (select b from bar)[1][1:3] from bar; -> Gather Motion 3:1 (slice3; segments: 3) -> Seq Scan on bar bar_1 -> Seq Scan on bar - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (6 rows) select (select b from bar)[1][1:3] from bar; @@ -3919,7 +3931,7 @@ explain (costs off) select (select b from bar)[(select 1)][1:3] from bar; -> Gather Motion 3:1 (slice4; segments: 3) -> Seq Scan on bar bar_1 -> Seq Scan on bar - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (8 rows) select (select b from bar)[(select 1)][1:3] from bar; @@ -3949,7 +3961,7 @@ explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2. -> Materialize -> Gather Motion 3:1 (slice2; segments: 3) -> Seq Scan on inner_bar t2 - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (13 rows) explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q; @@ -3966,7 +3978,7 @@ explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2. -> Materialize -> Gather Motion 3:1 (slice2; segments: 3) -> Seq Scan on inner_bar t2 - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (12 rows) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1; @@ -4008,7 +4020,7 @@ select * from t where a > (select count(1) from cte where x > t.a + random()); -> Materialize -> Broadcast Motion 3:3 (slice3; segments: 3) -> Seq Scan on inner_bar t2 - Optimizer: Postgres-based planner + Optimizer: Postgres query optimizer (22 rows) with cte(x) as (select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1) diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 51344116edd..76063f8f029 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -2979,37 +2979,40 @@ explain (verbose, costs off) with run_dt as ( ) select * from run_dt, extra_flow_dist1 where dt < extra_flow_dist1.a; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b -> Nested Loop - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) - -> Nested Loop Left Join - Output: extra_flow_rand.a, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on subselect_gp.extra_flow_rand - Output: extra_flow_rand.a, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: extra_flow_rand.a, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on subselect_gp.extra_flow_rand + Output: extra_flow_rand.a, (SubPlan 1) + SubPlan 1 + -> Materialize Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'on' Optimizer: Postgres query optimizer -(28 rows) +(31 rows) -- case 5 for subplan with outer entry locus without param in subplan (CTE and subquery) explain (verbose, costs off) with run_dt as ( @@ -3020,39 +3023,42 @@ explain (verbose, costs off) with run_dt as ( ) select * from run_dt, extra_flow_dist1 where dt < extra_flow_dist1.a; - QUERY PLAN --------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b -> Nested Loop - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) -> Broadcast Motion 1:3 (slice2) - Output: pg_class.relnatts, (max(1)) - -> Nested Loop Left Join - Output: pg_class.relnatts, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.relnatts, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Gather Motion 3:1 (slice3; segments: 3) + Output: share0_ref1.x, share0_ref1.dt + -> Shared Scan (share slice:id 2:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: pg_class.relnatts, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.relnatts, (SubPlan 1) + SubPlan 1 + -> Materialize Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + -> Gather Motion 3:1 (slice3; segments: 3) + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b + Settings: optimizer = 'on' Optimizer: Postgres query optimizer -(30 rows) +(33 rows) -- case 6 without CTE, nested subquery should not add extral flow explain (verbose, costs off) select * from ( @@ -3364,30 +3370,33 @@ with cte(table_oid, size) as select pc.relname, ts.size from pg_class pc, cte ts where pc.oid = ts.table_oid; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Output: pc.relname, share0_ref1.size -> Nested Loop - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - Join Filter: (pc.oid = pg_class.oid) + Output: pc.relname, share0_ref1.size + Join Filter: (pc.oid = share0_ref1.table_oid) -> Redistribute Motion 1:3 (slice2) Output: pc.relname, pc.oid Hash Key: pc.oid -> Seq Scan on pg_catalog.pg_class pc Output: pc.relname, pc.oid -> Materialize - Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - -> HashAggregate - Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) - Group Key: pg_class.oid - -> Redistribute Motion 3:3 (slice3; segments: 3) - Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) - Hash Key: pg_class.oid - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Output: share0_ref1.table_oid, share0_ref1.size + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.table_oid, share0_ref1.size + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Settings: enable_bitmapscan = 'off', enable_hashjoin = 'off', enable_indexscan = 'off', enable_nestloop = 'on', gp_enable_multiphase_agg = 'on', optimizer = 'on' Optimizer: Postgres query optimizer -(21 rows) +(24 rows) set gp_enable_multiphase_agg = off; explain (verbose on, costs off) @@ -3406,30 +3415,33 @@ with cte(table_oid, size) as select pc.relname, ts.size from pg_class pc, cte ts where pc.oid = ts.table_oid; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Output: pc.relname, share0_ref1.size -> Nested Loop - Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - Join Filter: (pc.oid = pg_class.oid) + Output: pc.relname, share0_ref1.size + Join Filter: (pc.oid = share0_ref1.table_oid) -> Redistribute Motion 1:3 (slice2) Output: pc.relname, pc.oid Hash Key: pc.oid -> Seq Scan on pg_catalog.pg_class pc Output: pc.relname, pc.oid -> Materialize - Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) - -> HashAggregate - Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) - Group Key: pg_class.oid - -> Redistribute Motion 3:3 (slice3; segments: 3) - Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) - Hash Key: pg_class.oid - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Output: share0_ref1.table_oid, share0_ref1.size + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.table_oid, share0_ref1.size + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Settings: enable_bitmapscan = 'off', enable_hashjoin = 'off', enable_indexscan = 'off', enable_nestloop = 'on', gp_enable_multiphase_agg = 'off', optimizer = 'on' Optimizer: Postgres query optimizer -(21 rows) +(24 rows) reset gp_enable_multiphase_agg; reset enable_hashjoin; diff --git a/src/test/regress/expected/tpcds_q04.out b/src/test/regress/expected/tpcds_q04.out index be34e94a761..f3bd6803189 100644 --- a/src/test/regress/expected/tpcds_q04.out +++ b/src/test/regress/expected/tpcds_q04.out @@ -5426,8 +5426,8 @@ INSERT INTO pg_statistic VALUES ( ,t_s_secyear.customer_last_name ,t_s_secyear.customer_login limit 100; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_login @@ -5435,97 +5435,101 @@ INSERT INTO pg_statistic VALUES ( -> Sort Sort Key: t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_login -> Hash Join - Hash Cond: ((t_c_secyear.customer_id)::text = (t_s_secyear.customer_id)::text) - Join Filter: (CASE WHEN (t_c_firstyear.year_total > '0'::numeric) THEN (t_c_secyear.year_total / t_c_firstyear.year_total) ELSE NULL::numeric END > CASE WHEN (t_s_firstyear.year_total > '0'::numeric) THEN (t_s_secyear.year_total / t_s_firstyear.year_total) ELSE NULL::numeric END) - -> Hash Join - Hash Cond: ((t_c_secyear.customer_id)::text = (t_w_firstyear.customer_id)::text) - Join Filter: (CASE WHEN (t_c_firstyear.year_total > '0'::numeric) THEN (t_c_secyear.year_total / t_c_firstyear.year_total) ELSE NULL::numeric END > CASE WHEN (t_w_firstyear.year_total > '0'::numeric) THEN (t_w_secyear.year_total / t_w_firstyear.year_total) ELSE NULL::numeric END) + Hash Cond: ((t_s_secyear.customer_id)::text = (t_w_secyear.customer_id)::text) + Join Filter: (CASE WHEN (t_c_firstyear.year_total > '0'::numeric) THEN (t_c_secyear.year_total / t_c_firstyear.year_total) ELSE NULL::numeric END > CASE WHEN (t_w_firstyear.year_total > '0'::numeric) THEN (t_w_secyear.year_total / t_w_firstyear.year_total) ELSE NULL::numeric END) + -> Broadcast Motion 3:3 (slice2; segments: 3) -> Hash Join - Hash Cond: ((t_c_firstyear.customer_id)::text = (t_c_secyear.customer_id)::text) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: t_c_firstyear.customer_id + Hash Cond: ((t_s_secyear.customer_id)::text = (t_c_firstyear.customer_id)::text) + Join Filter: (CASE WHEN (t_c_firstyear.year_total > '0'::numeric) THEN (t_c_secyear.year_total / t_c_firstyear.year_total) ELSE NULL::numeric END > CASE WHEN (t_s_firstyear.year_total > '0'::numeric) THEN (t_s_secyear.year_total / t_s_firstyear.year_total) ELSE NULL::numeric END) + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Hash Join + Hash Cond: ((t_s_secyear.customer_id)::text = (t_c_secyear.customer_id)::text) + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Hash Join + Hash Cond: ((t_s_secyear.customer_id)::text = (t_w_firstyear.customer_id)::text) + -> Broadcast Motion 3:3 (slice5; segments: 3) + -> Hash Join + Hash Cond: ((t_s_firstyear.customer_id)::text = (t_s_secyear.customer_id)::text) + -> Broadcast Motion 3:3 (slice6; segments: 3) + -> Subquery Scan on t_s_firstyear + Filter: ((t_s_firstyear.year_total > '0'::numeric) AND (t_s_firstyear.sale_type = 's'::text) AND (t_s_firstyear.dyear = 2001)) + -> Shared Scan (share slice:id 6:0) + -> Hash + -> Subquery Scan on t_s_secyear + Filter: ((t_s_secyear.sale_type = 's'::text) AND (t_s_secyear.dyear = 2002)) + -> Shared Scan (share slice:id 5:0) + -> Hash + -> Subquery Scan on t_w_firstyear + Filter: ((t_w_firstyear.year_total > '0'::numeric) AND (t_w_firstyear.sale_type = 'w'::text) AND (t_w_firstyear.dyear = 2001)) + -> Shared Scan (share slice:id 4:0) + -> Hash + -> Subquery Scan on t_c_secyear + Filter: ((t_c_secyear.sale_type = 'c'::text) AND (t_c_secyear.dyear = 2002)) + -> Shared Scan (share slice:id 3:0) + -> Hash -> Subquery Scan on t_c_firstyear Filter: ((t_c_firstyear.year_total > '0'::numeric) AND (t_c_firstyear.sale_type = 'c'::text) AND (t_c_firstyear.dyear = 2001)) -> Shared Scan (share slice:id 2:0) - -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t_c_secyear.customer_id - -> Subquery Scan on t_c_secyear - Filter: ((t_c_secyear.sale_type = 'c'::text) AND (t_c_secyear.dyear = 2002)) - -> Shared Scan (share slice:id 3:0) - -> Hash - -> Hash Join - Hash Cond: ((t_w_firstyear.customer_id)::text = (t_w_secyear.customer_id)::text) - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: t_w_firstyear.customer_id - -> Subquery Scan on t_w_firstyear - Filter: ((t_w_firstyear.year_total > '0'::numeric) AND (t_w_firstyear.sale_type = 'w'::text) AND (t_w_firstyear.dyear = 2001)) - -> Shared Scan (share slice:id 4:0) - -> Hash - -> Redistribute Motion 3:3 (slice5; segments: 3) - Hash Key: t_w_secyear.customer_id - -> Subquery Scan on t_w_secyear - Filter: ((t_w_secyear.sale_type = 'w'::text) AND (t_w_secyear.dyear = 2002)) - -> Shared Scan (share slice:id 5:0) -> Hash - -> Redistribute Motion 3:3 (slice6; segments: 3) - Hash Key: t_s_secyear.customer_id - -> Hash Join - Hash Cond: ((t_s_firstyear.customer_id)::text = (t_s_secyear.customer_id)::text) - -> Broadcast Motion 3:3 (slice7; segments: 3) - -> Subquery Scan on t_s_firstyear - Filter: ((t_s_firstyear.year_total > '0'::numeric) AND (t_s_firstyear.sale_type = 's'::text) AND (t_s_firstyear.dyear = 2001)) - -> Shared Scan (share slice:id 7:0) - -> Hash - -> Subquery Scan on t_s_secyear - Filter: ((t_s_secyear.sale_type = 's'::text) AND (t_s_secyear.dyear = 2002)) - -> Shared Scan (share slice:id 6:0) - -> Append - -> HashAggregate - Group Key: customer.c_customer_id, customer.c_first_name, customer.c_last_name, customer.c_preferred_cust_flag, customer.c_birth_country, customer.c_login, customer.c_email_address, date_dim.d_year + -> Subquery Scan on t_w_secyear + Filter: ((t_w_secyear.sale_type = 'w'::text) AND (t_w_secyear.dyear = 2002)) + -> Shared Scan (share slice:id 1:0) + -> Result + -> Append + -> HashAggregate + Group Key: customer.c_customer_id, customer.c_first_name, customer.c_last_name, customer.c_preferred_cust_flag, customer.c_birth_country, customer.c_login, customer.c_email_address, date_dim.d_year + Filter: ((sum(((((store_sales.ss_ext_list_price - store_sales.ss_ext_wholesale_cost) - store_sales.ss_ext_discount_amt) + store_sales.ss_ext_sales_price) / '2'::numeric)) > '0'::numeric) OR (date_dim.d_year = 2002)) + -> Redistribute Motion 3:3 (slice7; segments: 3) + Hash Key: customer.c_customer_id, customer.c_first_name, customer.c_last_name, customer.c_preferred_cust_flag, customer.c_birth_country, customer.c_login, customer.c_email_address, date_dim.d_year + -> Hash Join + Hash Cond: (store_sales.ss_customer_sk = customer.c_customer_sk) -> Redistribute Motion 3:3 (slice8; segments: 3) - Hash Key: customer.c_customer_id, customer.c_first_name, customer.c_last_name, customer.c_preferred_cust_flag, customer.c_birth_country, customer.c_login, customer.c_email_address, date_dim.d_year + Hash Key: store_sales.ss_customer_sk -> Hash Join - Hash Cond: (store_sales.ss_customer_sk = customer.c_customer_sk) - -> Hash Join - Hash Cond: (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - -> Seq Scan on store_sales - -> Hash - -> Broadcast Motion 3:3 (slice9; segments: 3) - -> Seq Scan on date_dim + Hash Cond: (store_sales.ss_sold_date_sk = date_dim.d_date_sk) + -> Seq Scan on store_sales -> Hash - -> Broadcast Motion 3:3 (slice10; segments: 3) - -> Seq Scan on customer - -> HashAggregate - Group Key: customer_1.c_customer_id, customer_1.c_first_name, customer_1.c_last_name, customer_1.c_preferred_cust_flag, customer_1.c_birth_country, customer_1.c_login, customer_1.c_email_address, date_dim_1.d_year + -> Broadcast Motion 3:3 (slice9; segments: 3) + -> Seq Scan on date_dim + Filter: ((d_year = 2001) OR (d_year = 2002)) + -> Hash + -> Seq Scan on customer + -> HashAggregate + Group Key: customer_1.c_customer_id, customer_1.c_first_name, customer_1.c_last_name, customer_1.c_preferred_cust_flag, customer_1.c_birth_country, customer_1.c_login, customer_1.c_email_address, date_dim_1.d_year + Filter: ((sum(((((catalog_sales.cs_ext_list_price - catalog_sales.cs_ext_wholesale_cost) - catalog_sales.cs_ext_discount_amt) + catalog_sales.cs_ext_sales_price) / '2'::numeric)) > '0'::numeric) OR (date_dim_1.d_year = 2002)) + -> Redistribute Motion 3:3 (slice10; segments: 3) + Hash Key: customer_1.c_customer_id, customer_1.c_first_name, customer_1.c_last_name, customer_1.c_preferred_cust_flag, customer_1.c_birth_country, customer_1.c_login, customer_1.c_email_address, date_dim_1.d_year + -> Hash Join + Hash Cond: (catalog_sales.cs_bill_customer_sk = customer_1.c_customer_sk) -> Redistribute Motion 3:3 (slice11; segments: 3) - Hash Key: customer_1.c_customer_id, customer_1.c_first_name, customer_1.c_last_name, customer_1.c_preferred_cust_flag, customer_1.c_birth_country, customer_1.c_login, customer_1.c_email_address, date_dim_1.d_year + Hash Key: catalog_sales.cs_bill_customer_sk -> Hash Join - Hash Cond: (catalog_sales.cs_bill_customer_sk = customer_1.c_customer_sk) - -> Hash Join - Hash Cond: (catalog_sales.cs_sold_date_sk = date_dim_1.d_date_sk) - -> Seq Scan on catalog_sales - -> Hash - -> Broadcast Motion 3:3 (slice12; segments: 3) - -> Seq Scan on date_dim date_dim_1 + Hash Cond: (catalog_sales.cs_sold_date_sk = date_dim_1.d_date_sk) + -> Seq Scan on catalog_sales -> Hash - -> Broadcast Motion 3:3 (slice13; segments: 3) - -> Seq Scan on customer customer_1 - -> HashAggregate - Group Key: customer_2.c_customer_id, customer_2.c_first_name, customer_2.c_last_name, customer_2.c_preferred_cust_flag, customer_2.c_birth_country, customer_2.c_login, customer_2.c_email_address, date_dim_2.d_year + -> Broadcast Motion 3:3 (slice12; segments: 3) + -> Seq Scan on date_dim date_dim_1 + Filter: ((d_year = 2001) OR (d_year = 2002)) + -> Hash + -> Seq Scan on customer customer_1 + -> HashAggregate + Group Key: customer_2.c_customer_id, customer_2.c_first_name, customer_2.c_last_name, customer_2.c_preferred_cust_flag, customer_2.c_birth_country, customer_2.c_login, customer_2.c_email_address, date_dim_2.d_year + Filter: ((sum(((((web_sales.ws_ext_list_price - web_sales.ws_ext_wholesale_cost) - web_sales.ws_ext_discount_amt) + web_sales.ws_ext_sales_price) / '2'::numeric)) > '0'::numeric) OR (date_dim_2.d_year = 2002)) + -> Redistribute Motion 3:3 (slice13; segments: 3) + Hash Key: customer_2.c_customer_id, customer_2.c_first_name, customer_2.c_last_name, customer_2.c_preferred_cust_flag, customer_2.c_birth_country, customer_2.c_login, customer_2.c_email_address, date_dim_2.d_year + -> Hash Join + Hash Cond: (web_sales.ws_bill_customer_sk = customer_2.c_customer_sk) -> Redistribute Motion 3:3 (slice14; segments: 3) - Hash Key: customer_2.c_customer_id, customer_2.c_first_name, customer_2.c_last_name, customer_2.c_preferred_cust_flag, customer_2.c_birth_country, customer_2.c_login, customer_2.c_email_address, date_dim_2.d_year + Hash Key: web_sales.ws_bill_customer_sk -> Hash Join Hash Cond: (web_sales.ws_sold_date_sk = date_dim_2.d_date_sk) - -> Hash Join - Hash Cond: (web_sales.ws_bill_customer_sk = customer_2.c_customer_sk) - -> Seq Scan on web_sales - -> Hash - -> Broadcast Motion 3:3 (slice15; segments: 3) - -> Seq Scan on customer customer_2 + -> Seq Scan on web_sales -> Hash - -> Broadcast Motion 3:3 (slice16; segments: 3) + -> Broadcast Motion 3:3 (slice15; segments: 3) -> Seq Scan on date_dim date_dim_2 + Filter: ((d_year = 2001) OR (d_year = 2002)) + -> Hash + -> Seq Scan on customer customer_2 Optimizer: Postgres query optimizer -(99 rows) +(103 rows) diff --git a/src/test/regress/expected/window_parallel.out b/src/test/regress/expected/window_parallel.out index 55fd0e820e0..0a7f456f5ce 100644 --- a/src/test/regress/expected/window_parallel.out +++ b/src/test/regress/expected/window_parallel.out @@ -146,16 +146,16 @@ SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; depname | empno | salary | rank -----------+-------+--------+------ - personnel | 5 | 3500 | 1 - personnel | 2 | 3900 | 2 - sales | 4 | 4800 | 1 - sales | 3 | 4800 | 1 - sales | 1 | 5000 | 3 develop | 7 | 4200 | 1 develop | 9 | 4500 | 2 develop | 10 | 5200 | 3 develop | 11 | 5200 | 3 develop | 8 | 6000 | 5 + personnel | 5 | 3500 | 1 + personnel | 2 | 3900 | 2 + sales | 4 | 4800 | 1 + sales | 3 | 4800 | 1 + sales | 1 | 5000 | 3 (10 rows) -- w3 @@ -886,27 +886,33 @@ select b, count(*) from t1 group by b union all select b, count(*) from t2 group Gather Motion 6:1 (slice1; segments: 6) Output: t1.b, (count(*)) -> Append - -> HashAggregate + -> Finalize HashAggregate Output: t1.b, count(*) Group Key: t1.b -> Redistribute Motion 6:6 (slice2; segments: 6) - Output: t1.b + Output: t1.b, (PARTIAL count(*)) Hash Key: t1.b Hash Module: 3 - -> Parallel Seq Scan on window_parallel.t1 - Output: t1.b - -> HashAggregate + -> Streaming Partial HashAggregate + Output: t1.b, PARTIAL count(*) + Group Key: t1.b + -> Parallel Seq Scan on window_parallel.t1 + Output: t1.a, t1.b + -> Finalize HashAggregate Output: t2.b, count(*) Group Key: t2.b -> Redistribute Motion 6:6 (slice3; segments: 6) - Output: t2.b + Output: t2.b, (PARTIAL count(*)) Hash Key: t2.b Hash Module: 3 - -> Parallel Seq Scan on window_parallel.t2 - Output: t2.b + -> Streaming Partial HashAggregate + Output: t2.b, PARTIAL count(*) + Group Key: t2.b + -> Parallel Seq Scan on window_parallel.t2 + Output: t2.a, t2.b Settings: enable_parallel = 'on', enable_parallel_append = 'on', min_parallel_table_scan_size = '0', optimizer = 'off' Optimizer: Postgres query optimizer -(23 rows) +(29 rows) set local enable_parallel_append = off; -- Naturally, use parallel-oblivious append directly when parallel-aware mode is disabled. @@ -917,27 +923,33 @@ select b, count(*) from t1 group by b union all select b, count(*) from t2 group Gather Motion 6:1 (slice1; segments: 6) Output: t1.b, (count(*)) -> Append - -> HashAggregate + -> Finalize HashAggregate Output: t1.b, count(*) Group Key: t1.b -> Redistribute Motion 6:6 (slice2; segments: 6) - Output: t1.b + Output: t1.b, (PARTIAL count(*)) Hash Key: t1.b Hash Module: 3 - -> Parallel Seq Scan on window_parallel.t1 - Output: t1.b - -> HashAggregate + -> Streaming Partial HashAggregate + Output: t1.b, PARTIAL count(*) + Group Key: t1.b + -> Parallel Seq Scan on window_parallel.t1 + Output: t1.a, t1.b + -> Finalize HashAggregate Output: t2.b, count(*) Group Key: t2.b -> Redistribute Motion 6:6 (slice3; segments: 6) - Output: t2.b + Output: t2.b, (PARTIAL count(*)) Hash Key: t2.b Hash Module: 3 - -> Parallel Seq Scan on window_parallel.t2 - Output: t2.b + -> Streaming Partial HashAggregate + Output: t2.b, PARTIAL count(*) + Group Key: t2.b + -> Parallel Seq Scan on window_parallel.t2 + Output: t2.a, t2.b Settings: enable_parallel = 'on', enable_parallel_append = 'off', min_parallel_table_scan_size = '0', optimizer = 'off' Optimizer: Postgres query optimizer -(23 rows) +(29 rows) -- Ensure compatibility between different paths when using parallel workers set local enable_parallel_append = on; @@ -950,27 +962,33 @@ select b, count(*) from t1 group by b union all select b, count(*) from t2 group Gather Motion 9:1 (slice1; segments: 9) Output: t1.b, (count(*)) -> Append - -> HashAggregate + -> Finalize HashAggregate Output: t1.b, count(*) Group Key: t1.b -> Redistribute Motion 6:9 (slice2; segments: 6) - Output: t1.b + Output: t1.b, (PARTIAL count(*)) Hash Key: t1.b Hash Module: 3 - -> Parallel Seq Scan on window_parallel.t1 - Output: t1.b - -> HashAggregate + -> Streaming Partial HashAggregate + Output: t1.b, PARTIAL count(*) + Group Key: t1.b + -> Parallel Seq Scan on window_parallel.t1 + Output: t1.a, t1.b + -> Finalize HashAggregate Output: t2.b, count(*) Group Key: t2.b -> Redistribute Motion 9:9 (slice3; segments: 9) - Output: t2.b + Output: t2.b, (PARTIAL count(*)) Hash Key: t2.b Hash Module: 3 - -> Parallel Seq Scan on window_parallel.t2 - Output: t2.b + -> Streaming Partial HashAggregate + Output: t2.b, PARTIAL count(*) + Group Key: t2.b + -> Parallel Seq Scan on window_parallel.t2 + Output: t2.a, t2.b Settings: enable_parallel = 'on', enable_parallel_append = 'on', min_parallel_table_scan_size = '0', optimizer = 'off' Optimizer: Postgres query optimizer -(23 rows) +(29 rows) -- Could not drive a parallel plan if no partial paths are avaliable alter table t2 set(parallel_workers=0); @@ -1037,7 +1055,10 @@ abort; -- -- start_ignore drop schema window_parallel cascade; -NOTICE: drop cascades to table empsalary +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table empsalary +drop cascades to table t1 +drop cascades to table t2 -- end_ignore reset min_parallel_table_scan_size; reset enable_parallel; diff --git a/src/test/regress/expected/with_clause.out b/src/test/regress/expected/with_clause.out index 2c3aff9951f..c2269c70ea0 100644 --- a/src/test/regress/expected/with_clause.out +++ b/src/test/regress/expected/with_clause.out @@ -2269,23 +2269,27 @@ SELECT count(a1.i) -> Gather Motion 3:1 (slice1; segments: 3) -> Subquery Scan on "*SELECT* 1" -> Hash Join - Hash Cond: (share1_ref1.i = a2.i) - -> Shared Scan (share slice:id 1:1) - -> Seq Scan on foo + Hash Cond: (a1.i = a2.i) + -> Subquery Scan on a1 + -> Shared Scan (share slice:id 1:1) + -> Result + -> Seq Scan on foo -> Hash -> Subquery Scan on a2 -> Shared Scan (share slice:id 1:0) - -> Seq Scan on foo foo_1 + -> Result + -> Seq Scan on foo foo_1 -> Aggregate -> Gather Motion 3:1 (slice2; segments: 3) -> Hash Join - Hash Cond: (share1_ref2.i = a2_1.i) - -> Shared Scan (share slice:id 2:1) + Hash Cond: (a1_1.i = a2_1.i) + -> Subquery Scan on a1_1 + -> Shared Scan (share slice:id 2:1) -> Hash -> Subquery Scan on a2_1 -> Shared Scan (share slice:id 2:0) Optimizer: Postgres query optimizer -(20 rows) +(24 rows) -- Another cross-slice ShareInputScan test. There is one producing slice, -- and two consumers in second slice. Make sure the Share Input Scan diff --git a/src/test/regress/input/external_table.source b/src/test/regress/input/external_table.source index 5efa68919ac..b795e28e321 100644 --- a/src/test/regress/input/external_table.source +++ b/src/test/regress/input/external_table.source @@ -1277,6 +1277,9 @@ SELECT * FROM cte1, exttab_limit_2 e3 where cte1.i = e3.i ORDER BY cte1.i LIMIT SELECT count(*) FROM gp_read_error_log('exttab_limit_2'); SELECT gp_truncate_error_log('exttab_limit_1'); SELECT gp_truncate_error_log('exttab_limit_2'); +-- start_ignore +set cbdb_enable_setop_pre_dedup=off; +-- end_ignore -- Note that even though we use exttab_limit_2 here , the LIMIT 3 will not throw a segment reject limit error SELECT * FROM ( @@ -3703,3 +3706,6 @@ DROP EXTERNAL TABLE ext_persistently; CREATE EXTERNAL WEB TEMP TABLE test_program_not_exist(content text) EXECUTE '/bin/bash /xx/seq 1 5' ON MASTER FORMAT 'TEXT'; SELECT * FROM test_program_not_exist; +-- start_ignore +reset cbdb_enable_setop_pre_dedup; +-- end_ignore diff --git a/src/test/regress/sql/cbdb_parallel.sql b/src/test/regress/sql/cbdb_parallel.sql index 08e7aa198f9..a31b687f65e 100644 --- a/src/test/regress/sql/cbdb_parallel.sql +++ b/src/test/regress/sql/cbdb_parallel.sql @@ -1014,7 +1014,7 @@ set enable_parallel = on; -- first stage HashAgg, second stage GroupAgg explain(costs off) select distinct a from t_distinct_0; -set parallel_query_use_streaming_hashagg = off; +set gp_use_streaming_hashagg = off; explain(costs off) select distinct a from t_distinct_0; -- GroupAgg @@ -1026,7 +1026,7 @@ set enable_hashagg = on; set enable_groupagg = off; explain(costs off) select distinct a from t_distinct_0; -set parallel_query_use_streaming_hashagg = on; +set gp_use_streaming_hashagg = on; explain(costs off) select distinct a from t_distinct_0; -- multi DISTINCT tlist @@ -1200,7 +1200,7 @@ select count(*) from (pj_t1 full join pj_t2 using (id)) fj full join pj_t3 using abort; -- start_ignore -drop schema test_parallel cascade; +--drop schema test_parallel cascade; -- end_ignore reset gp_appendonly_insert_files; diff --git a/src/test/regress/sql/cte_prune.sql b/src/test/regress/sql/cte_prune.sql index 2083a4b7e10..b178fd0f0e4 100644 --- a/src/test/regress/sql/cte_prune.sql +++ b/src/test/regress/sql/cte_prune.sql @@ -13,71 +13,71 @@ insert into t2 values(generate_series(0, 100), generate_series(100, 200), genera analyze t2; -- should pruned both seq scan and shared scan -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; -- * also should be pruned -explain verbose with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; +explain(costs off, verbose) with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; with c1 as (select * from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 where c11.v1 < 5; -- no push filter -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v1=c22.v2; -- distribution col can be pruned which is better than do redistribute in CTE consumer -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; with c1 as (select v1, v2, v3 from t1) select c11.v2 from c1 as c11 left join c1 as c22 on c11.v2=c22.v2; -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; with c1 as (select v1, v2, v3 from t1) select c11.v3 from c1 as c11 left join c1 as c22 on c11.v3=c22.v3; -- groupby/order by/window function/grouping set should be contains in CTE output -- group by -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v1; -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; with c1 as (select v1, v2, v3 from t1) select sum(c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by c11.v2; -- order by -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v1; -explain verbose with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; with c1 as (select v1, v2, v3 from t1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 order by c22.v3; -- window function -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; with c1 as (select v1, v2, v3 from t1) select sum(c11.v1) OVER (ORDER BY c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; -- grouping set -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v1,c11.v2); -explain verbose with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v2,c11.v3); +explain(costs off, verbose) with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1 group by ROLLUP(c11.v2,c11.v3); with c1 as (select v1, v2, v3 from t1) select sum(c11.v2) OVER (ORDER BY c11.v3) from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; -- CTE producer should have right output -explain verbose with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) +explain(costs off, verbose) with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; with c1 as (select t1.v1 as v1, t2.v1 as t21, t2.v2 as t22, t2.v3 as t23 from t1 join t2 on t1.v1 = t2.v1) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; -explain verbose with c1 as (select sum(v1) as v1, sum(v2) as v2, v3 from t1 group by v3) +explain(costs off, verbose) with c1 as (select sum(v1) as v1, sum(v2) as v2, v3 from t1 group by v3) select c11.v1 from c1 as c11 left join c1 as c22 on c11.v1=c22.v1; with c1 as (select lt1.v3 as v3, lt1.v1 as lo1, rt1.v1 as ro1 from t1 lt1, t1 rt1 where lt1.v2 = rt1.v2 and lt1.v1 = rt1.v1) @@ -213,7 +213,7 @@ primary key (ws_item_sk, ws_order_number) ); -- sql 23 -explain verbose with frequent_ss_items as +explain (costs off, verbose) with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from tpcds_store_sales ,tpcds_date_dim @@ -227,14 +227,14 @@ select t1.v1 from t1 where t1.v1 in (select item_sk from frequent_ss_items where and t1.v1 in (select item_sk from frequent_ss_items where item_sk > 0); -- sql 95 -explain verbose with ws_wh as +explain (costs off, verbose) with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from tpcds_web_sales ws1,tpcds_web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) select * from t1 where t1.v1 in (select ws_order_number from ws_wh where true) and t1.v1 in (select ws_order_number from ws_wh where ws_order_number > 0); -explain verbose with ws_wh as +explain (costs off, verbose) with ws_wh as (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 from tpcds_web_sales ws1,tpcds_web_sales ws2 where ws1.ws_order_number = ws2.ws_order_number @@ -261,7 +261,7 @@ analyze t4; -- Additional filtering conditions are added to the consumer. -- This is caused by `PexprInferPredicates` in the ORCA preprocessor. -explain verbose WITH t(a,b,d) AS +explain(costs off, verbose) WITH t(a,b,d) AS ( SELECT t3.a,t3.b,t4.d FROM t3,t4 WHERE t3.a = t4.d ) @@ -315,7 +315,7 @@ SELECT END) AS string4 FROM generate_series(0, 99) AS i; -explain verbose select four, x +explain(costs off, verbose) select four, x from (select four, ten, 'foo'::text as x from cte_prune_tenk1) as t group by grouping sets (four, x) having x = 'foo'; @@ -376,7 +376,7 @@ ALTER TABLE ONLY countrylanguage ADD CONSTRAINT countrylanguage_pkey PRIMARY KEY (countrycode, "language"); -- CTE1(inlined) in CTE2(no-inlined) case -explain verbose with country as +explain(costs off, verbose) with country as (select country.code,country.name COUNTRY, city.name CAPITAL, language, isofficial, percentage FROM country,city,countrylanguage WHERE country.code = countrylanguage.countrycode @@ -393,7 +393,7 @@ select * from where country.percentage = countrylanguage.percentage order by countrylanguage.COUNTRY,country.language LIMIT 40; -- CTE in the main query and subqueries within the main query -explain verbose with bad_headofstates as +explain(costs off, verbose) with bad_headofstates as ( select country.code,country.name,country.headofstate,countrylanguage.language from @@ -424,7 +424,7 @@ CREATE TABLE t5 AS SELECT i as c, i+1 as d from generate_series(1,10)i; CREATE TABLE t6 AS SELECT i as a, i+1 as b from generate_series(1,10)i; -- inlined CTEs should have not unused columns(ex. t5.*, t6.* in output) -explain verbose WITH w AS (SELECT a, b from t6 where b < 5) +explain(costs off, verbose) WITH w AS (SELECT a, b from t6 where b < 5) SELECT * FROM t6, (WITH v AS (SELECT c, d FROM t5, w WHERE c = w.a AND c < 2) diff --git a/src/test/regress/sql/dboptions.sql b/src/test/regress/sql/dboptions.sql index 3a634e1620d..7cbdd9c226b 100644 --- a/src/test/regress/sql/dboptions.sql +++ b/src/test/regress/sql/dboptions.sql @@ -2,6 +2,10 @@ -- Test create/alter database options -- +-- start_matchsubs +-- m/\"\/tmp\/.s.PGSQL.\d+\"/ +-- s/\"\/tmp\/.s.PGSQL.\d+\"/\"\/tmp\/.s.PGSQL.xxx\"/ +-- end_matchsubs -- Test CONNECTION LIMIT -- create a regular user as superusers are exempt from limits diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index b00385019ae..d6cd51f1123 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -18,6 +18,9 @@ SET max_parallel_workers_per_gather TO 0; -- Disable incremental sort, which can influence selected plans due to fuzz factor. SET enable_incremental_sort TO off; +-- start_ignore +set gp_use_streaming_hashagg=off; +-- end_ignore -- -- Tests for list partitioned tables. -- @@ -336,3 +339,7 @@ RESET parallel_setup_cost; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + +-- start_ignore +reset gp_use_streaming_hashagg; +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/sql/qp_with_clause.sql b/src/test/regress/sql/qp_with_clause.sql index fbff6575d89..e923e50ff78 100644 --- a/src/test/regress/sql/qp_with_clause.sql +++ b/src/test/regress/sql/qp_with_clause.sql @@ -10351,7 +10351,7 @@ EXPLAIN (COSTS OFF) WITH q AS (SELECT * FROM (WITH cte AS (SELECT * FROM car) SE WITH q AS (SELECT * FROM (WITH cte AS (SELECT * FROM car) SELECT * FROM car WHERE a > 7) t) SELECT * FROM q; -- start_ignore -drop schema qp_with_clause cascade; +--drop schema qp_with_clause cascade; -- end_ignore RESET optimizer_trace_fallback; diff --git a/src/test/singlenode_regress/expected/partition_aggregate.out b/src/test/singlenode_regress/expected/partition_aggregate.out index e5658fb09f3..d57ec5e924d 100644 --- a/src/test/singlenode_regress/expected/partition_aggregate.out +++ b/src/test/singlenode_regress/expected/partition_aggregate.out @@ -67,13 +67,13 @@ SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVI Group Key: pagg_tab.a Filter: (avg(pagg_tab.d) < '15'::numeric) -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 Optimizer: Postgres query optimizer @@ -603,21 +603,21 @@ SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP B -> Sort Sort Key: b.y -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: b.y -> Hash Left Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: b_1.y -> Hash Left Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: b_2.y -> Hash Right Join Hash Cond: (b_2.y = a_2.x) @@ -697,21 +697,21 @@ SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y G -> Sort Sort Key: a.x -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: a.x -> Hash Full Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: a_1.x -> Hash Full Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: a_2.x -> Hash Full Join Hash Cond: (b_2.y = a_2.x) @@ -862,13 +862,13 @@ SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 Group Key: pagg_tab_m.a Filter: (avg(pagg_tab_m.c) < '22'::numeric) -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_m.a -> Seq Scan on pagg_tab_m_p1 pagg_tab_m - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_m_1.a -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_m_2.a -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 Optimizer: Postgres query optimizer @@ -1070,10 +1070,10 @@ SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER B -> Sort Sort Key: pagg_tab_ml_2.a -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_2.a -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_3.a -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -> Finalize GroupAggregate @@ -1082,10 +1082,10 @@ SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER B -> Sort Sort Key: pagg_tab_ml_5.a -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_5.a -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_6.a -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 Optimizer: Postgres query optimizer @@ -1118,19 +1118,19 @@ SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; -> Sort Sort Key: pagg_tab_ml.b -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml.b -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_1.b -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_2.b -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_3.b -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_4.b -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 Optimizer: Postgres query optimizer @@ -1217,10 +1217,10 @@ SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER B -> Sort Sort Key: pagg_tab_ml_2.a -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_2.a -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_3.a -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -> Finalize GroupAggregate @@ -1229,10 +1229,10 @@ SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER B -> Sort Sort Key: pagg_tab_ml_5.a -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_5.a -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_6.a -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 Optimizer: Postgres query optimizer @@ -1265,19 +1265,19 @@ SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; -> Sort Sort Key: pagg_tab_ml.b -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml.b -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_1.b -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_2.b -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_3.b -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_ml_4.b -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 Optimizer: Postgres query optimizer @@ -1403,13 +1403,13 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < -> Sort Sort Key: pagg_tab_para.y -> Append - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_para.y -> Seq Scan on pagg_tab_para_p1 pagg_tab_para - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_para_1.y -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 - -> Partial HashAggregate + -> Streaming Partial HashAggregate Group Key: pagg_tab_para_2.y -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 Optimizer: Postgres query optimizer diff --git a/src/test/singlenode_regress/expected/partition_prune.out b/src/test/singlenode_regress/expected/partition_prune.out index 3f0b9d69e42..7c28c2f429b 100644 --- a/src/test/singlenode_regress/expected/partition_prune.out +++ b/src/test/singlenode_regress/expected/partition_prune.out @@ -2401,59 +2401,98 @@ select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 QUERY PLAN ------------------------------------------------------------------------- Append (actual rows=0 loops=1) - InitPlan 1 (returns $0) - -> Aggregate (actual rows=1 loops=1) - -> Seq Scan on lprt_a (actual rows=102 loops=1) - InitPlan 2 (returns $1) - -> Aggregate (actual rows=1 loops=1) - -> Seq Scan on lprt_a lprt_a_1 (actual rows=102 loops=1) - -> Bitmap Heap Scan on ab_a1_b1 ab_1 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a1_b1_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a1_b2 ab_2 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a1_b3 ab_3 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a2_b1 ab_4 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a2_b1_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a2_b2 ab_5 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a2_b2_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a2_b3 ab_6 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a2_b3_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a3_b1 ab_7 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a3_b1_a_idx (never executed) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a3_b2 ab_8 (actual rows=0 loops=1) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a3_b2_a_idx (actual rows=0 loops=1) - Index Cond: (a = $0) - -> Bitmap Heap Scan on ab_a3_b3 ab_9 (never executed) - Recheck Cond: (a = $0) - Filter: (b = $1) - -> Bitmap Index Scan on ab_a3_b3_a_idx (never executed) - Index Cond: (a = $0) - Optimizer: Postgres query optimizer -(53 rows) + -> Seq Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 3) (copy 5))) AND (b = (SubPlan 2 (copy 4) (copy 6)))) + SubPlan 1 (copy 3) (copy 5) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a (never executed) + SubPlan 2 (copy 4) (copy 6) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_1 (never executed) + -> Seq Scan on ab_a1_b2 ab_2 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 3) (copy 7))) AND (b = (SubPlan 2 (copy 4) (copy 8)))) + SubPlan 1 (copy 3) (copy 7) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_2 (never executed) + SubPlan 2 (copy 4) (copy 8) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_3 (never executed) + -> Seq Scan on ab_a1_b3 ab_3 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 3) (copy 9))) AND (b = (SubPlan 2 (copy 4) (copy 10)))) + SubPlan 1 (copy 3) (copy 9) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_4 (never executed) + SubPlan 2 (copy 4) (copy 10) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_5 (never executed) + -> Seq Scan on ab_a2_b1 ab_4 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 11) (copy 13))) AND (b = (SubPlan 2 (copy 12) (copy 14)))) + SubPlan 1 (copy 11) (copy 13) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_6 (never executed) + SubPlan 2 (copy 12) (copy 14) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_7 (never executed) + -> Seq Scan on ab_a2_b2 ab_5 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 11) (copy 15))) AND (b = (SubPlan 2 (copy 12) (copy 16)))) + SubPlan 1 (copy 11) (copy 15) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_8 (never executed) + SubPlan 2 (copy 12) (copy 16) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_9 (never executed) + -> Seq Scan on ab_a2_b3 ab_6 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 11) (copy 17))) AND (b = (SubPlan 2 (copy 12) (copy 18)))) + SubPlan 1 (copy 11) (copy 17) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_10 (never executed) + SubPlan 2 (copy 12) (copy 18) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_11 (never executed) + -> Seq Scan on ab_a3_b1 ab_7 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 19) (copy 21))) AND (b = (SubPlan 2 (copy 20) (copy 22)))) + SubPlan 1 (copy 19) (copy 21) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_12 (never executed) + SubPlan 2 (copy 20) (copy 22) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_13 (never executed) + -> Seq Scan on ab_a3_b2 ab_8 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 19) (copy 23))) AND (b = (SubPlan 2 (copy 20) (copy 24)))) + SubPlan 1 (copy 19) (copy 23) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_14 (never executed) + SubPlan 2 (copy 20) (copy 24) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_15 (never executed) + -> Seq Scan on ab_a3_b3 ab_9 (actual rows=0 loops=1) + Filter: ((a = (SubPlan 1 (copy 19) (copy 25))) AND (b = (SubPlan 2 (copy 20) (copy 26)))) + SubPlan 1 (copy 19) (copy 25) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_16 (never executed) + SubPlan 2 (copy 20) (copy 26) + -> Materialize (never executed) + -> Aggregate (never executed) + -> Seq Scan on lprt_a lprt_a_17 (never executed) + Optimizer: Postgres query optimizer +(92 rows) -- Test run-time partition pruning with UNION ALL parents explain (analyze, costs off, summary off, timing off) diff --git a/src/test/singlenode_regress/expected/rangefuncs.out b/src/test/singlenode_regress/expected/rangefuncs.out index 4e16ec1d5d2..81a9de2e71b 100644 --- a/src/test/singlenode_regress/expected/rangefuncs.out +++ b/src/test/singlenode_regress/expected/rangefuncs.out @@ -2129,12 +2129,16 @@ select * from testrngfunc(); Output: (1), (2) Sort Key: (1), (2) -> Append - -> Result + -> Unique Output: 1, 2 - -> Result + -> Result + Output: 1, 2 + -> Unique Output: 3, 4 + -> Result + Output: 3, 4 Optimizer: Postgres query optimizer -(14 rows) +(18 rows) select * from testrngfunc(); f1 | f2 diff --git a/src/test/singlenode_regress/expected/select_parallel.out b/src/test/singlenode_regress/expected/select_parallel.out index 59be57dfb96..aab568643e4 100644 --- a/src/test/singlenode_regress/expected/select_parallel.out +++ b/src/test/singlenode_regress/expected/select_parallel.out @@ -317,13 +317,14 @@ explain (costs off) QUERY PLAN ------------------------------------- Aggregate - InitPlan 1 (returns $1) - -> Aggregate - -> Seq Scan on tenk2 -> Seq Scan on tenk1 - Filter: (unique1 = $1) + Filter: (unique1 = (SubPlan 1)) + SubPlan 1 + -> Materialize + -> Aggregate + -> Seq Scan on tenk2 Optimizer: Postgres query optimizer -(7 rows) +(8 rows) select count(*) from tenk1 where tenk1.unique1 = (Select max(tenk2.unique1) from tenk2); diff --git a/src/test/singlenode_regress/expected/subselect.out b/src/test/singlenode_regress/expected/subselect.out index 7cd68de4dab..fe4bc71ab2b 100644 --- a/src/test/singlenode_regress/expected/subselect.out +++ b/src/test/singlenode_regress/expected/subselect.out @@ -1151,13 +1151,21 @@ where o.ten = 1; -> HashSetOp Except -> Append -> Subquery Scan on "*SELECT* 1" - -> Index Scan using onek_unique1 on onek i1 - Index Cond: (unique1 = o.unique1) + -> Unique + Group Key: i1.unique2, i1.two, i1.four, i1.ten, i1.twenty, i1.hundred, i1.thousand, i1.twothousand, i1.fivethous, i1.tenthous, i1.odd, i1.even, i1.stringu1, i1.stringu2, i1.string4 + -> Sort + Sort Key: i1.unique2, i1.two, i1.four, i1.ten, i1.twenty, i1.hundred, i1.thousand, i1.twothousand, i1.fivethous, i1.tenthous, i1.odd, i1.even, i1.stringu1, i1.stringu2, i1.string4 + -> Index Scan using onek_unique1 on onek i1 + Index Cond: (unique1 = o.unique1) -> Subquery Scan on "*SELECT* 2" - -> Index Scan using onek_unique1 on onek i2 - Index Cond: (unique1 = o.unique2) + -> Unique + Group Key: i2.unique2, i2.two, i2.four, i2.ten, i2.twenty, i2.hundred, i2.thousand, i2.twothousand, i2.fivethous, i2.tenthous, i2.odd, i2.even, i2.stringu1, i2.stringu2, i2.string4 + -> Sort + Sort Key: i2.unique2, i2.two, i2.four, i2.ten, i2.twenty, i2.hundred, i2.thousand, i2.twothousand, i2.fivethous, i2.tenthous, i2.odd, i2.even, i2.stringu1, i2.stringu2, i2.string4 + -> Index Scan using onek_unique1 on onek i2 + Index Cond: (unique1 = o.unique2) Optimizer: Postgres query optimizer -(15 rows) +(23 rows) select count(*) from onek o cross join lateral ( @@ -1686,8 +1694,9 @@ select * from x where f1 = 1; Output: share0_ref1.f1 -> Seq Scan on public.subselect_tbl Output: subselect_tbl.f1 + Filter: (subselect_tbl.f1 = 1) Optimizer: Postgres query optimizer -(8 rows) +(10 rows) -- Stable functions are safe to inline explain (verbose, costs off) @@ -1718,8 +1727,9 @@ select * from x where f1 = 1; Output: share0_ref1.f1, share0_ref1.random -> Seq Scan on public.subselect_tbl Output: subselect_tbl.f1, random() + Filter: (subselect_tbl.f1 = 1) Optimizer: Postgres query optimizer -(4 rows) +(10 rows) -- SELECT FOR UPDATE cannot be inlined -- GPDB: select statement with locking clause is not easy to fully supported @@ -1746,18 +1756,16 @@ select * from x, x x2 where x.n = x2.n; QUERY PLAN ------------------------------------------------------------------------ Hash Join - Output: share0_ref2.f1, share0_ref2.n, share0_ref1.f1, share0_ref1.n - Hash Cond: (share0_ref2.n = share0_ref1.n) - -> Shared Scan (share slice:id -1:0) - Output: share0_ref2.f1, share0_ref2.n + Output: subselect_tbl.f1, ('regression'::name), subselect_tbl_1.f1, ('regression'::name) + Hash Cond: (('regression'::name) = ('regression'::name)) + -> Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, 'regression'::name -> Hash - Output: share0_ref1.f1, share0_ref1.n - -> Shared Scan (share slice:id -1:0) - Output: share0_ref1.f1, share0_ref1.n - -> Seq Scan on public.subselect_tbl - Output: subselect_tbl.f1, 'regression'::name + Output: subselect_tbl_1.f1, ('regression'::name) + -> Seq Scan on public.subselect_tbl subselect_tbl_1 + Output: subselect_tbl_1.f1, 'regression'::name Optimizer: Postgres query optimizer -(12 rows) +(11 rows) explain (verbose, costs off) with x as not materialized (select * from (select f1, current_database() as n from subselect_tbl) ss) diff --git a/src/test/singlenode_regress/expected/subselect_gp.out b/src/test/singlenode_regress/expected/subselect_gp.out index 7538ea4b4d2..4966909fc57 100644 --- a/src/test/singlenode_regress/expected/subselect_gp.out +++ b/src/test/singlenode_regress/expected/subselect_gp.out @@ -2410,30 +2410,32 @@ where dt < extra_flow_dist1.a; QUERY PLAN ------------------------------------------------------------------------------- Nested Loop - Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) - -> Nested Loop Left Join - Output: extra_flow_rand.a, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on subselect_gp.extra_flow_rand - Output: extra_flow_rand.a, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) + -> Shared Scan (share slice:id -1:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: extra_flow_rand.a, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on subselect_gp.extra_flow_rand + Output: extra_flow_rand.a, (SubPlan 1) + SubPlan 1 + -> Materialize + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b Optimizer: Postgres query optimizer -(25 rows) +(27 rows) -- case 5 for subplan with outer entry locus without param in subplan (CTE and subquery) explain (verbose, costs off) with run_dt as ( @@ -2447,30 +2449,32 @@ where dt < extra_flow_dist1.a; QUERY PLAN ------------------------------------------------------------------------------- Nested Loop - Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b - Join Filter: ((max(1)) < extra_flow_dist1.a) - -> Nested Loop Left Join - Output: pg_class.relnatts, (max(1)) - Inner Unique: true - Join Filter: ((SubPlan 1)) - -> Seq Scan on pg_catalog.pg_class - Output: pg_class.relnatts, (SubPlan 1) - SubPlan 1 - -> Materialize - Output: (random()) - -> Seq Scan on subselect_gp.extra_flow_dist - Output: random() - -> Materialize - Output: (max(1)) - -> Aggregate - Output: max(1) - -> Result + Output: share0_ref1.x, share0_ref1.dt, extra_flow_dist1.a, extra_flow_dist1.b + Join Filter: (share0_ref1.dt < extra_flow_dist1.a) + -> Shared Scan (share slice:id -1:0) + Output: share0_ref1.x, share0_ref1.dt + -> Nested Loop Left Join + Output: pg_class.relnatts, (max(1)) + Inner Unique: true + Join Filter: ((SubPlan 1)) + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.relnatts, (SubPlan 1) + SubPlan 1 + -> Materialize + Output: (random()) + -> Seq Scan on subselect_gp.extra_flow_dist + Output: random() + -> Materialize + Output: (max(1)) + -> Aggregate + Output: max(1) + -> Result -> Materialize Output: extra_flow_dist1.a, extra_flow_dist1.b -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b Optimizer: Postgres query optimizer -(25 rows) +(27 rows) -- case 6 without CTE, nested subquery should not add extral flow explain (verbose, costs off) select * from ( @@ -2504,28 +2508,40 @@ where dt < '2010-01-01'::date; Output: ((SubPlan 1)) Sort Key: ((SubPlan 1)) -> Append - -> Subquery Scan on a - Output: (SubPlan 1) - -> Aggregate - Output: max(1) - -> Result - SubPlan 1 - -> Seq Scan on subselect_gp.extra_flow_dist - Output: extra_flow_dist.c - Filter: (extra_flow_dist.b = a.x) - -> Subquery Scan on aa - Output: (SubPlan 2) - -> Aggregate - Output: max(1) - -> Result - SubPlan 2 - -> Seq Scan on subselect_gp.extra_flow_dist extra_flow_dist_1 - Output: extra_flow_dist_1.c - Filter: (extra_flow_dist_1.b = aa.x) + -> Unique + Output: ((SubPlan 1)) + Group Key: ((SubPlan 1)) + -> Sort + Output: ((SubPlan 1)) + Sort Key: ((SubPlan 1)) + -> Subquery Scan on a + Output: (SubPlan 1) + -> Aggregate + Output: max(1) + -> Result + SubPlan 1 + -> Seq Scan on subselect_gp.extra_flow_dist + Output: extra_flow_dist.c + Filter: (extra_flow_dist.b = a.x) + -> Unique + Output: ((SubPlan 2)) + Group Key: ((SubPlan 2)) + -> Sort + Output: ((SubPlan 2)) + Sort Key: ((SubPlan 2)) + -> Subquery Scan on aa + Output: (SubPlan 2) + -> Aggregate + Output: max(1) + -> Result + SubPlan 2 + -> Seq Scan on subselect_gp.extra_flow_dist extra_flow_dist_1 + Output: extra_flow_dist_1.c + Filter: (extra_flow_dist_1.b = aa.x) -> Seq Scan on subselect_gp.extra_flow_dist1 Output: extra_flow_dist1.a, extra_flow_dist1.b Optimizer: Postgres query optimizer -(34 rows) +(45 rows) -- Check DISTINCT ON clause and ORDER BY clause in SubLink, See https://github.com/greenplum-db/gpdb/issues/12656. -- For EXISTS SubLink, we don’t need to care about the data deduplication problem, we can delete DISTINCT ON clause and diff --git a/src/test/singlenode_regress/expected/union.out b/src/test/singlenode_regress/expected/union.out index 73158320abd..639826cacf4 100644 --- a/src/test/singlenode_regress/expected/union.out +++ b/src/test/singlenode_regress/expected/union.out @@ -356,10 +356,14 @@ select count(*) from -> HashAggregate Group Key: tenk1.unique1 -> Append - -> Seq Scan on tenk1 - -> Seq Scan on tenk1 tenk1_1 + -> Unique + Group Key: tenk1.unique1 + -> Index Only Scan using tenk1_unique1 on tenk1 + -> HashAggregate + Group Key: tenk1_1.fivethous + -> Seq Scan on tenk1 tenk1_1 Optimizer: Postgres query optimizer -(7 rows) +(11 rows) select count(*) from ( select unique1 from tenk1 union select fivethous from tenk1 ) ss; @@ -378,11 +382,15 @@ select count(*) from -> HashSetOp Intersect -> Append -> Subquery Scan on "*SELECT* 2" - -> Seq Scan on tenk1 + -> HashAggregate + Group Key: tenk1.fivethous + -> Seq Scan on tenk1 -> Subquery Scan on "*SELECT* 1" - -> Seq Scan on tenk1 tenk1_1 + -> Unique + Group Key: tenk1_1.unique1 + -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1 Optimizer: Postgres query optimizer -(9 rows) +(13 rows) select count(*) from ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss; @@ -423,10 +431,16 @@ select count(*) from -> Sort Sort Key: tenk1.unique1 -> Append - -> Seq Scan on tenk1 - -> Seq Scan on tenk1 tenk1_1 + -> Unique + Group Key: tenk1.unique1 + -> Index Only Scan using tenk1_unique1 on tenk1 + -> Unique + Group Key: tenk1_1.fivethous + -> Sort + Sort Key: tenk1_1.fivethous + -> Seq Scan on tenk1 tenk1_1 Optimizer: Postgres query optimizer -(9 rows) +(15 rows) select count(*) from ( select unique1 from tenk1 union select fivethous from tenk1 ) ss; @@ -447,11 +461,17 @@ select count(*) from Sort Key: "*SELECT* 2".fivethous -> Append -> Subquery Scan on "*SELECT* 2" - -> Seq Scan on tenk1 + -> Unique + Group Key: tenk1.fivethous + -> Sort + Sort Key: tenk1.fivethous + -> Seq Scan on tenk1 -> Subquery Scan on "*SELECT* 1" - -> Seq Scan on tenk1 tenk1_1 + -> Unique + Group Key: tenk1_1.unique1 + -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1 Optimizer: Postgres query optimizer -(11 rows) +(17 rows) select count(*) from ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss; @@ -1144,12 +1164,14 @@ explain (costs off) HashAggregate Group Key: ((t1.a || t1.b)) -> Append - -> Index Scan using t1_ab_idx on t1 - Index Cond: ((a || b) = 'ab'::text) - -> Index Only Scan using t2_pkey on t2 - Index Cond: (ab = 'ab'::text) + -> Unique + -> Index Scan using t1_ab_idx on t1 + Index Cond: ((a || b) = 'ab'::text) + -> Unique + -> Index Only Scan using t2_pkey on t2 + Index Cond: (ab = 'ab'::text) Optimizer: Postgres query optimizer -(8 rows) +(10 rows) -- -- Test that ORDER BY for UNION ALL can be pushed down to inheritance @@ -1255,11 +1277,13 @@ ORDER BY x; -> Sort Sort Key: (1), (2) -> Append - -> Result - -> Result - One-Time Filter: false + -> Unique + -> Result + -> Unique + -> Result + One-Time Filter: false Optimizer: Postgres query optimizer -(11 rows) +(13 rows) SELECT * FROM (SELECT 1 AS t, 2 AS x @@ -1286,13 +1310,17 @@ ORDER BY x; -> Subquery Scan on ss Filter: (ss.x < 4) -> HashAggregate - Group Key: (1), (generate_series(1, 10)) + Group Key: (1), "*SELECT* 1".x -> Append - -> ProjectSet + -> Subquery Scan on "*SELECT* 1" + -> HashAggregate + Group Key: 1, generate_series(1, 10) + -> ProjectSet + -> Result + -> Unique -> Result - -> Result Optimizer: Postgres query optimizer -(11 rows) +(15 rows) SELECT * FROM (SELECT 1 AS t, generate_series(1,10) AS x @@ -1325,12 +1353,14 @@ ORDER BY x; -> Sort Sort Key: (1), ((($0 * '3'::double precision))::integer) -> Append - -> Result + -> Unique InitPlan 1 (returns $0) -> Result - -> Result + -> Result + -> Unique + -> Result Optimizer: Postgres query optimizer -(14 rows) +(16 rows) SELECT * FROM (SELECT 1 AS t, (random()*3)::int AS x diff --git a/src/test/singlenode_regress/expected/update_gp.out b/src/test/singlenode_regress/expected/update_gp.out index edc15ade2b2..e4920eda469 100644 --- a/src/test/singlenode_regress/expected/update_gp.out +++ b/src/test/singlenode_regress/expected/update_gp.out @@ -118,28 +118,31 @@ WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk; QUERY PLAN ------------------------------------------------------------------------------------------------------- Update on keo1 - InitPlan 3 (returns $2) - -> Aggregate - InitPlan 2 (returns $1) - -> Seq Scan on keo4 keo4_1 - Filter: ((keo_para_budget_date)::text = $0) - InitPlan 1 (returns $0) - -> Aggregate - -> Seq Scan on keo4 - -> Seq Scan on keo3 - Filter: ((bky_per)::text = ($1)::text) -> Hash Join - Hash Cond: ((a.user_vie_project_code_pk)::text = (b.projects_pk)::text) + Hash Cond: ((b.projects_pk)::text = (keo1.user_vie_project_code_pk)::text) -> Hash Join - Hash Cond: ((a.user_vie_project_code_pk)::text = (keo1.user_vie_project_code_pk)::text) + Hash Cond: ((a.user_vie_project_code_pk)::text = (b.projects_pk)::text) + Join Filter: ((SubPlan 3) = (a.user_vie_fiscal_year_period_sk)::text) -> Seq Scan on keo1 a - Filter: ((user_vie_fiscal_year_period_sk)::text = $2) -> Hash - -> Seq Scan on keo1 + -> Seq Scan on keo2 b + SubPlan 3 + -> Materialize + -> Aggregate + -> Seq Scan on keo3 + Filter: ((bky_per)::text = ((SubPlan 2))::text) + SubPlan 2 + -> Materialize + -> Seq Scan on keo4 keo4_1 + Filter: ((keo_para_budget_date)::text = (SubPlan 1)) + SubPlan 1 + -> Materialize + -> Aggregate + -> Seq Scan on keo4 -> Hash - -> Seq Scan on keo2 b + -> Seq Scan on keo1 Optimizer: Postgres query optimizer -(22 rows) +(25 rows) UPDATE keo1 SET user_vie_act_cntr_marg_cum = 234.682 FROM ( SELECT a.user_vie_project_code_pk FROM keo1 a INNER JOIN keo2 b