diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out index 707be189e497..9a5350c2e0f2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out @@ -48,10 +48,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_ice - filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean) + filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean) + predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out index e32e34094e80..6701fbaf4109 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out @@ -150,27 +150,27 @@ Stage-0 File Output Operator [FS_61] Limit [LIM_60] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_59] (rows=473 width=447) + Select Operator [SEL_59] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_58] - Top N Key Operator [TNK_57] (rows=473 width=447) + Top N Key Operator [TNK_57] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_56] (rows=473 width=447) + Map Join Operator [MAPJOIN_56] (rows=791 width=447) BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_53] PartitionCols:_col0, _col1 - Select Operator [SEL_52] (rows=387 width=178) + Select Operator [SEL_52] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_51] (rows=387 width=178) - predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null) + Filter Operator [FIL_51] (rows=500 width=178) + predicate:((key <> '0') and (key <> '100') and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_55] (rows=387 width=269) + <-Select Operator [SEL_55] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=387 width=269) - predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null) + Filter Operator [FIL_54] (rows=500 width=269) + predicate:((key1 <> '0') and (key1 <> '100') and key2 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] @@ -346,27 +346,27 @@ Stage-0 File Output Operator [FS_41] Limit [LIM_40] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=473 width=447) + Select Operator [SEL_39] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=473 width=447) + Top N Key Operator [TNK_37] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=473 width=447) + Map Join Operator [MAPJOIN_36] (rows=791 width=447) BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_33] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=178) + Select Operator [SEL_32] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=387 width=178) - predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100'))) + Filter Operator [FIL_31] (rows=500 width=178) + predicate:((key <> '0') and (key <> '100')) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=387 width=269) + <-Select Operator [SEL_35] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=387 width=269) - predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100'))) + Filter Operator [FIL_34] (rows=500 width=269) + predicate:((key1 <> '0') and (key1 <> '100')) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] @@ -435,40 +435,40 @@ POSTHOOK: Input: default@srcbucket_big Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (BROADCAST_EDGE) -Reducer 3 <- Map 2 (SIMPLE_EDGE) +Map 1 <- Map 3 (CUSTOM_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:20 Stage-1 - Reducer 3 vectorized, llap + Reducer 2 vectorized, llap File Output Operator [FS_41] Limit [LIM_40] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=612 width=447) + Select Operator [SEL_39] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 2 [SIMPLE_EDGE] vectorized, llap + <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=612 width=447) + Top N Key Operator [TNK_37] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=612 width=447) - Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + Map Join Operator [MAPJOIN_36] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 3 [CUSTOM_EDGE] vectorized, llap + MULTICAST [RS_33] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=269) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=387 width=269) - predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null) - TableScan [TS_0] (rows=500 width=269) - default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] - <-Select Operator [SEL_35] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_32] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_31] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_35] (rows=500 width=269) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_34] (rows=500 width=269) + predicate:((key2 <> 'val_0') and (key2 <> 'val_100') and key1 is not null) + TableScan [TS_0] (rows=500 width=269) + default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] PREHOOK: query: SELECT * FROM srcbucket_big a diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out index 5d4e328faf21..d0ba154e1464 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out @@ -71,10 +71,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_ice - filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean) + filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean) + predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string) diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out index 6a149603f73a..150fa60ce166 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_ice - filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean) + filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((a = 22) or (b) IN ('four', 'one')) (type: boolean) @@ -93,7 +93,7 @@ STAGE PLANS: Map-reduce partition columns: FILE__PATH (type: string) Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean) + predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java index 8ea25a91a0bf..565479734b42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java @@ -72,30 +72,44 @@ public SearchTransformer(RexBuilder rexBuilder, RexCall search, final RexUnknown this.unknownContext = unknownContext; } + /** + * Transforms the SEARCH expression into an equivalent RexNode expression. + * Warning: when called from a shuttle, callers of this method should consider flattening AND/OR expressions + * afterward, to get the same result as applying {@link SearchTransformer.Shuttle}. + */ public RexNode transform() { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.SEARCH_TRANSFORMER); - RangeConverter consumer = new RangeConverter<>(rexBuilder, operandType, ref); - RangeSets.forEach(sarg.rangeSet, consumer); - List orList = new ArrayList<>(); if (sarg.nullAs == RexUnknownAs.TRUE && unknownContext != RexUnknownAs.TRUE) { orList.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref)); } - switch (consumer.inLiterals.size()) { - case 0: - break; - case 1: - orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0))); - break; - default: - List operands = new ArrayList<>(consumer.inLiterals.size() + 1); - operands.add(ref); - operands.addAll(consumer.inLiterals); - orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands)); + + if (sarg.isComplementedPoints()) { + // Generate 'ref <> value1 AND ... AND ref <> valueN' + List list = sarg.rangeSet.complement().asRanges().stream().map( + range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref, + rexBuilder.makeLiteral(range.lowerEndpoint(), operandType, true, true))).toList(); + orList.add(RexUtil.composeConjunction(rexBuilder, list)); + } else { + RangeConverter consumer = new RangeConverter<>(rexBuilder, operandType, ref); + RangeSets.forEach(sarg.rangeSet, consumer); + + switch (consumer.inLiterals.size()) { + case 0: + break; + case 1: + orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0))); + break; + default: + List operands = new ArrayList<>(consumer.inLiterals.size() + 1); + operands.add(ref); + operands.addAll(consumer.inLiterals); + orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands)); + } + orList.addAll(consumer.nodes); } - orList.addAll(consumer.nodes); RexNode x = RexUtil.composeDisjunction(rexBuilder, orList); if (sarg.nullAs == RexUnknownAs.FALSE && unknownContext != RexUnknownAs.FALSE) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index 477e7fca984c..1e66a896d0bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -628,14 +628,23 @@ private RexNode makeLiteral(C value) { private double compute() { final List inLiterals = new ArrayList<>(); final List rangeSelectivities = new ArrayList<>(); - for (Range range : sarg.rangeSet.asRanges()) { - if (!range.hasLowerBound() && !range.hasUpperBound()) { - return 1.0; // "all" range + final List searchSelectivities = new ArrayList<>(); + + if (sarg.isComplementedPoints()) { + // Generate 'ref <> value1 AND ... AND ref <> valueN' + List notEq = sarg.rangeSet.complement().asRanges().stream() + .map(range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref, makeLiteral(range.lowerEndpoint()))) + .toList(); + searchSelectivities.add(RexUtil.composeConjunction(rexBuilder, notEq).accept(FilterSelectivityEstimator.this)); + } else { + for (Range range : sarg.rangeSet.asRanges()) { + if (!range.hasLowerBound() && !range.hasUpperBound()) { + return 1.0; // "all" range + } + processRangeSelectivity(range, rangeSelectivities, inLiterals); } - processRangeSelectivity(range, rangeSelectivities, inLiterals); } - final List searchSelectivities = new ArrayList<>(); if (!rangeSelectivities.isEmpty() && rangeSelectivities.stream().noneMatch(Objects::isNull)) { // Aggregate all ranges selectivity, respecting the max value of 1 double total = Math.min(1.0, rangeSelectivities.stream().mapToDouble(Double::doubleValue).sum()); @@ -655,7 +664,8 @@ private double compute() { List operands = new ArrayList<>(inLiterals.size() + 1); operands.add(ref); operands.addAll(inLiterals); - searchSelectivities.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands).accept(FilterSelectivityEstimator.this)); + searchSelectivities.add( + rexBuilder.makeCall(HiveIn.INSTANCE, operands).accept(FilterSelectivityEstimator.this)); } } @@ -664,7 +674,9 @@ private double compute() { rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref).accept(FilterSelectivityEstimator.this)); } - return searchSelectivities.size() == 1 ? searchSelectivities.get(0) : computeDisjunctionSelectivity(searchSelectivities); + return searchSelectivities.size() == 1 + ? searchSelectivities.get(0) + : computeDisjunctionSelectivity(searchSelectivities); } private void processRangeSelectivity(Range range, List rangeSelectivities, List inLiterals) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index b582c62997e9..2098f29a7a63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -214,6 +215,12 @@ public ExprNodeDesc visitCall(RexCall call) { && SqlTypeUtil.equalSansNullability(dTFactory, call.getType(), call.operands.get(0).getType())) { return args.get(0); + } else if (call.isA(SqlKind.AND)) { + // Make sure AND is flattened (we may have nested ANDs due to SearchTransformer conversion above) + return ExprNodeDescUtils.and(args); + } else if (call.isA(SqlKind.OR)) { + // Make sure OR is flattened (we may have nested ORs due to SearchTransformer conversion above) + return ExprNodeDescUtils.or(args); } else { GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), args.size()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 541ce20f5180..34d5f0ba0b38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -64,6 +64,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Predicate; public class ExprNodeDescUtils { @@ -243,6 +244,21 @@ public static ExprNodeGenericFuncDesc and(List exps) { return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), "and", flatExps); } + /** + * Creates a disjunction (OR) of the given expressions flattening nested disjunctions if possible. + *
+   * Input: AND(A, B), C, OR(D, OR(E, F))
+   * Output: OR(AND(A, B), C, D, E, F)
+   * 
+ */ + public static ExprNodeGenericFuncDesc or(List exps) { + List flatExps = new ArrayList<>(); + for (ExprNodeDesc e : exps) { + split(e, flatExps, FunctionRegistry::isOpOr); + } + return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPOr(), "or", flatExps); + } + /** * Create an expression for computing a murmur hash by recursively hashing given expressions by two: *
@@ -305,9 +321,17 @@ public static List split(ExprNodeDesc current) {
    * split predicates by AND op
    */
   public static List split(ExprNodeDesc current, List splitted) {
-    if (FunctionRegistry.isOpAnd(current)) {
+    return split(current, splitted, FunctionRegistry::isOpAnd);
+  }
+
+  /**
+   * split predicates by a certain condition
+   */
+  private static List split(ExprNodeDesc current, List splitted,
+      Predicate condition) {
+    if (condition.test(current)) {
       for (ExprNodeDesc child : current.getChildren()) {
-        split(child, splitted);
+        split(child, splitted, condition);
       }
       return splitted;
     }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
index 39c6ca8f80c4..4e39be818e60 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
@@ -371,6 +371,17 @@ public void testBetweenSelectivityLeftEqualsRight_KO() {
     betweenSelectivity(KLL, 2, 2);
   }
 
+  @Test
+  public void testComputeNotEqualsPredicateSelectivity() {
+    RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+        REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int3),
+        REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int7));
+    filter = simplify(filter);
+    Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+    FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq);
+    Assert.assertEquals(0.7346938775510203, estimator.estimateSelectivity(filter), DELTA);
+  }
+
   @Test
   public void testComputeRangePredicateSelectivityWhenNoStats() {
     RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int3);
diff --git a/ql/src/test/results/clientpositive/llap/folder_predicate.q.out b/ql/src/test/results/clientpositive/llap/folder_predicate.q.out
index f8b2ef3663ef..1e67ce4271a4 100644
--- a/ql/src/test/results/clientpositive/llap/folder_predicate.q.out
+++ b/ql/src/test/results/clientpositive/llap/folder_predicate.q.out
@@ -41,9 +41,9 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: predicate_fold_tb
-          filterExpr: (value is null or (value < 3) or (value > 3)) (type: boolean)
+          filterExpr: ((value <> 3) or value is null) (type: boolean)
           Filter Operator
-            predicate: (value is null or (value < 3) or (value > 3)) (type: boolean)
+            predicate: ((value <> 3) or value is null) (type: boolean)
             Select Operator
               expressions: value (type: int)
               outputColumnNames: _col0
diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
index dcc7c103b771..cb2d50d73666 100644
--- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
@@ -627,7 +627,7 @@ STAGE PLANS:
                   alias: orc_pred
                   Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean)
+                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean)
                     Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
@@ -695,10 +695,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: orc_pred
-                  filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean)
+                  filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean)
                   Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean)
+                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean)
                     Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
index d7a825b592a6..4858f10aa63a 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
@@ -561,7 +561,7 @@ STAGE PLANS:
                   alias: tbl_pred
                   Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean)
+                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean)
                     Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
@@ -629,10 +629,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: tbl_pred
-                  filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean)
+                  filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean)
                   Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean)
+                    predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean)
                     Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: t (type: tinyint), s (type: string)
diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
index 23e8a82b7a2e..1edc82eeeded 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: decimal_date_test
-                  filterExpr: ((cdate < DATE'1969-07-14') or (cdate > DATE'1970-01-21') or ((cdate > DATE'1969-07-14') and (cdate < DATE'1969-10-26')) or ((cdate > DATE'1969-10-26') and (cdate < DATE'1970-01-21'))) (type: boolean)
+                  filterExpr: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean)
                   Statistics: Num rows: 12289 Data size: 339304 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
@@ -161,8 +161,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterDateColLessDateScalar(col 3:date, val -171), FilterDateColGreaterDateScalar(col 3:date, val 20), FilterExprAndExpr(children: FilterDateColGreaterDateScalar(col 3:date, val -171), FilterDateColLessDateScalar(col 3:date, val -67)), FilterExprAndExpr(children: FilterDateColGreaterDateScalar(col 3:date, val -67), FilterDateColLessDateScalar(col 3:date, val 20)))
-                    predicate: ((cdate < DATE'1969-07-14') or (cdate > DATE'1970-01-21') or ((cdate > DATE'1969-07-14') and (cdate < DATE'1969-10-26')) or ((cdate > DATE'1969-10-26') and (cdate < DATE'1970-01-21'))) (type: boolean)
+                        predicateExpression: FilterExprAndExpr(children: FilterDateColNotEqualDateScalar(col 3:date, val -171), FilterDateColNotEqualDateScalar(col 3:date, val -67), FilterDateColNotEqualDateScalar(col 3:date, val 20))
+                    predicate: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean)
                     Statistics: Num rows: 12289 Data size: 339304 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       Select Vectorization:
@@ -370,7 +370,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: decimal_date_test
-                  filterExpr: ((cdecimal1 < -3367.6517567568) or (cdecimal1 > 2365.8945945946) or ((cdecimal1 > -3367.6517567568) and (cdecimal1 < 881.0135135135)) or ((cdecimal1 > 881.0135135135) and (cdecimal1 < 2365.8945945946))) (type: boolean)
+                  filterExpr: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> 2365.8945945946)) (type: boolean)
                   Statistics: Num rows: 12289 Data size: 1027600 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
@@ -378,8 +378,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: FilterExprOrExpr(children: FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val -3367.6517567568), FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val 2365.8945945946), FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val -3367.6517567568), FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val 881.0135135135)), FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val 2365.8945945946)))
-                    predicate: ((cdecimal1 < -3367.6517567568) or (cdecimal1 > 2365.8945945946) or ((cdecimal1 > -3367.6517567568) and (cdecimal1 < 881.0135135135)) or ((cdecimal1 > 881.0135135135) and (cdecimal1 < 2365.8945945946))) (type: boolean)
+                        predicateExpression: FilterExprAndExpr(children: FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946))
+                    predicate: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> 2365.8945945946)) (type: boolean)
                     Statistics: Num rows: 12289 Data size: 1027600 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       Select Vectorization: