diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index 830b40d255..1d473b168e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -18,6 +18,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; import javax.annotation.Nullable; @@ -84,11 +85,13 @@ import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.SubsearchUtils; import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.CalciteUnsupportedException; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.CoercionUtils; import org.opensearch.sql.expression.function.PPLFuncImpTable; @RequiredArgsConstructor @@ -225,24 +228,41 @@ public RexNode visitIn(In node, CalcitePlanContext context) { final RexNode field = analyze(node.getField(), context); final List valueList = node.getValueList().stream().map(value -> analyze(value, context)).toList(); + // When the field is a temporal type, do NOT use leastRestrictive + rexBuilder.makeIn. For a + // temporal field tested against string/date literals, leastRestrictive collapses the common + // type to VARCHAR (the EXPR_DATE / EXPR_TIMESTAMP UDTs are VARCHAR-backed), so makeIn casts the + // field DOWN to VARCHAR and string-compares mismatched renderings (e.g. '2018-06-23 00:00:00' + // against '2018-06-23') — silently matching nothing. Rewrite the membership test as an OR of + // PPL `=` comparisons, the same temporal-aware comparison path visitCompare takes for `=`, so + // each value is coerced to the field's timestamp domain before comparison. + ExprType fieldExprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(field.getType()); + if (TEMPORAL_TYPES.contains(fieldExprType)) { + List equalities = + valueList.stream() + .map(value -> PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, "=", field, value)) + .toList(); + return context.relBuilder.or(equalities); + } final List dataTypes = - new java.util.ArrayList<>(valueList.stream().map(RexNode::getType).toList()); + new ArrayList<>(valueList.stream().map(RexNode::getType).toList()); dataTypes.add(field.getType()); RelDataType commonType = context.rexBuilder.getTypeFactory().leastRestrictive(dataTypes); if (commonType != null) { List newValueList = valueList.stream().map(value -> context.rexBuilder.makeCast(commonType, value)).toList(); return context.rexBuilder.makeIn(field, newValueList); - } else { - List exprTypes = - dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList(); - throw new SemanticCheckException( - StringUtils.format( - "In expression types are incompatible: fields type %s, values type %s", - exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1))); } + List exprTypes = + dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList(); + throw new SemanticCheckException( + StringUtils.format( + "In expression types are incompatible: fields type %s, values type %s", + exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1))); } + private static final Set TEMPORAL_TYPES = + Set.of(ExprCoreType.DATE, ExprCoreType.TIME, ExprCoreType.TIMESTAMP); + @Override public RexNode visitCompare(Compare node, CalcitePlanContext context) { RexNode left = analyze(node.getLeft(), context); @@ -258,6 +278,25 @@ public RexNode visitCompare(Compare node, CalcitePlanContext context) { return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right); } + /** + * Widens a set of operands to a common temporal type when, and only when, every operand is a + * temporal type (DATE / TIME / TIMESTAMP), including the EXPR_DATE / EXPR_TIME / EXPR_TIMESTAMP + * UDTs. Returns {@code null} otherwise so non-temporal incompatible mixes still fail the type + * check. The widening reuses {@link CoercionUtils#widenArguments} — the same path comparison + * operators take — which resolves DATE / TIME to TIMESTAMP via the shared widening graph. + */ + private static @Nullable List widenTemporalOperands( + CalcitePlanContext context, List operands) { + boolean allTemporal = + operands.stream() + .map(node -> OpenSearchTypeFactory.convertRelDataTypeToExprType(node.getType())) + .allMatch(TEMPORAL_TYPES::contains); + if (!allTemporal) { + return null; + } + return CoercionUtils.widenArguments(context.rexBuilder, operands); + } + @Override public RexNode visitBetween(Between node, CalcitePlanContext context) { RexNode value = analyze(node.getValue(), context); @@ -268,12 +307,25 @@ public RexNode visitBetween(Between node, CalcitePlanContext context) { lowerBound = context.rexBuilder.makeCast(commonType, lowerBound); upperBound = context.rexBuilder.makeCast(commonType, upperBound); } else { - throw new SemanticCheckException( - StringUtils.format( - "BETWEEN expression types are incompatible: [%s, %s, %s]", - OpenSearchTypeFactory.convertRelDataTypeToExprType(value.getType()), - OpenSearchTypeFactory.convertRelDataTypeToExprType(lowerBound.getType()), - OpenSearchTypeFactory.convertRelDataTypeToExprType(upperBound.getType()))); + // leastRestrictive() has no common type for mixed temporal representations — e.g. a standard + // Calcite TIMESTAMP field compared against EXPR_DATE UDT bounds (`ts between date('...') and + // date('...')`). Comparison operators coerce these through CoercionUtils; BETWEEN calls + // leastRestrictive directly and would otherwise reject them. Fall back to the same temporal + // widening, scoped to all-temporal operands so genuinely incompatible mixes (e.g. + // `age between '35' and 38.5`) still raise SemanticCheckException. + List widened = + widenTemporalOperands(context, List.of(value, lowerBound, upperBound)); + if (widened == null) { + throw new SemanticCheckException( + StringUtils.format( + "BETWEEN expression types are incompatible: [%s, %s, %s]", + OpenSearchTypeFactory.convertRelDataTypeToExprType(value.getType()), + OpenSearchTypeFactory.convertRelDataTypeToExprType(lowerBound.getType()), + OpenSearchTypeFactory.convertRelDataTypeToExprType(upperBound.getType()))); + } + value = widened.get(0); + lowerBound = widened.get(1); + upperBound = widened.get(2); } return context.relBuilder.between(value, lowerBound, upperBound); } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CoercionUtilsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CoercionUtilsTest.java index 373881e854..5533fd9091 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/CoercionUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/CoercionUtilsTest.java @@ -17,6 +17,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -51,6 +52,40 @@ public void findCommonWidestType( expectedCommonType, CoercionUtils.resolveCommonType(left, right).orElseGet(() -> null)); } + @Test + void widenArgumentsUnifiesPlainTimestampWithDateUdtBounds() { + // Reproduces the BETWEEN/IN operand set seen on a non-UDT path (e.g. the analytics engine's + // parquet scan): a standard Calcite TIMESTAMP field against EXPR_DATE UDT bounds. + // leastRestrictive + // returns no common type for this mix, but the temporal widening used by comparison operators + // must resolve all three to a single timestamp type so the predicate can run. + RexNode plainTimestampField = + REX_BUILDER.makeInputRef( + OpenSearchTypeFactory.TYPE_FACTORY.createTypeWithNullability( + OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP), true), + 0); + RexNode dateLower = + REX_BUILDER.makeNullLiteral( + OpenSearchTypeFactory.TYPE_FACTORY.createUDT( + OpenSearchTypeFactory.ExprUDT.EXPR_DATE, true)); + RexNode dateUpper = + REX_BUILDER.makeNullLiteral( + OpenSearchTypeFactory.TYPE_FACTORY.createUDT( + OpenSearchTypeFactory.ExprUDT.EXPR_DATE, true)); + + List widened = + CoercionUtils.widenArguments( + REX_BUILDER, List.of(plainTimestampField, dateLower, dateUpper)); + + assertNotNull(widened); + assertEquals(3, widened.size()); + for (RexNode node : widened) { + assertEquals( + ExprCoreType.TIMESTAMP, + OpenSearchTypeFactory.convertRelDataTypeToExprType(node.getType())); + } + } + @Test void castArgumentsReturnsExactMatchWhenAvailable() { PPLTypeChecker typeChecker = new StubTypeChecker(List.of(List.of(INTEGER), List.of(DOUBLE))); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java index 4c2c817669..27d971a461 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java @@ -500,7 +500,10 @@ public void testNotBetween3() throws IOException { verifyDataRows(actual, rows("Hattie", 36), rows("Elinor", 36)); } + @Test public void testDateBetween() throws IOException { + // birthdate is a TIMESTAMP-typed field; the bounds are DATE literals. BETWEEN must coerce the + // mixed temporal operands rather than reject them with "expression types are incompatible". JSONObject actual = executeQuery( String.format( @@ -512,6 +515,55 @@ public void testDateBetween() throws IOException { actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00")); } + @Test + public void testDateIn() throws IOException { + // birthdate is a TIMESTAMP-typed field; the IN values are DATE literals. visitIn must compare + // each value in the field's temporal domain (via PPL `=`) rather than letting leastRestrictive + // collapse the common type to VARCHAR and string-compare mismatched renderings — which silently + // matched nothing without pushdown. See visitIn in CalciteRexNodeVisitor. + JSONObject actual = + executeQuery( + String.format( + "source=%s | where birthdate in (DATE '2018-06-23', DATE '2018-06-27') |" + + " fields firstname, birthdate", + TEST_INDEX_BANK)); + verifySchema(actual, schema("firstname", "string"), schema("birthdate", "timestamp")); + verifyDataRows( + actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00")); + } + + @Test + public void testDateNotIn() throws IOException { + // Complement of testDateIn: NOT IN over a temporal field. Exercises the complemented-points + // pushdown branch, which must also format timestamp values rather than emit a flat terms query. + JSONObject actual = + executeQuery( + String.format( + "source=%s | where birthdate not in (DATE '2018-06-23', DATE '2018-06-27') |" + + " fields firstname | sort firstname", + TEST_INDEX_BANK)); + verifySchema(actual, schema("firstname", "string")); + verifyDataRows( + actual, + rows("Amber JOHnny"), + rows("Dale"), + rows("Dillard"), + rows("Hattie"), + rows("Virginia")); + } + + @Test + public void testIn() throws IOException { + // Non-temporal IN keeps the leastRestrictive + makeIn path; guards against the temporal-field + // special-case in visitIn regressing membership tests over ordinary columns. + JSONObject actual = + executeQuery( + String.format( + "source=%s | where age in (32, 36) | fields firstname, age", TEST_INDEX_BANK)); + verifySchema(actual, schema("firstname", "string"), schema("age", "int")); + verifyDataRows(actual, rows("Amber JOHnny", 32), rows("Hattie", 36), rows("Elinor", 36)); + } + @Test public void testXor() throws IOException { JSONObject result = diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 6389fb2839..29f44adf08 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -784,16 +784,29 @@ private QueryExpression like(RexCall call) { private static QueryExpression constructQueryExpressionForSearch( RexCall call, SwapResult pair) { + boolean isTimeStamp = + (pair.getKey() instanceof NamedFieldExpression namedField) + && namedField.isTimeStampType(); if (isSearchWithComplementedPoints(call)) { + // A NOT IN over a timestamp field cannot use a flat terms query: timestamp values must be + // normalized + formatted (see in()/termsQuery, which does neither). Decompose into the + // complement of an OR of formatted per-point range queries, mirroring the range branch. + if (isTimeStamp) { + return pointsAsTimestampOr(call, pair).not(); + } return QueryExpression.create(pair.getKey()).notIn(pair.getValue()); } else if (isSearchWithPoints(call)) { + // IN over a timestamp field: the flat terms query in in() emits unformatted values like + // '2018-06-23 00:00:00' that the date field cannot parse. Decompose into an OR of formatted + // per-point range queries (equals(point, true) builds gte/lte + format="date_time"), the + // same path the range branch below and visitCompare's `=` already take. + if (isTimeStamp) { + return pointsAsTimestampOr(call, pair); + } return QueryExpression.create(pair.getKey()).in(pair.getValue()); } else { Sarg sarg = pair.getValue().literal.getValueAs(Sarg.class); Set> rangeSet = requireNonNull(sarg).rangeSet.asRanges(); - boolean isTimeStamp = - (pair.getKey() instanceof NamedFieldExpression namedField) - && namedField.isTimeStampType(); List queryExpressions = rangeSet.stream() .map( @@ -811,6 +824,30 @@ private static QueryExpression constructQueryExpressionForSearch( } } + /** + * Builds an OR of per-point {@code equals(point, true)} query expressions for a points-only + * Sarg over a timestamp field. Each point becomes a {@code gte/lte} range query with {@code + * format="date_time"}, so the timestamp value is normalized to the date field's accepted format + * — unlike a flat terms query, which emits unformatted values the date field rejects. + */ + private static QueryExpression pointsAsTimestampOr(RexCall call, SwapResult pair) { + RexLiteral literal = (RexLiteral) call.getOperands().get(1); + Sarg sarg = requireNonNull(literal.getValueAs(Sarg.class), "Sarg"); + Set> ranges = + (sarg.isComplementedPoints() ? sarg.negate() : sarg).rangeSet.asRanges(); + List queryExpressions = + ranges.stream() + .map( + range -> + QueryExpression.create(pair.getKey()) + .equals(sargPointValue(range.lowerEndpoint()), true)) + .toList(); + if (queryExpressions.size() == 1) { + return queryExpressions.getFirst(); + } + return CompoundQueryExpression.or(queryExpressions.toArray(new QueryExpression[0])); + } + private QueryExpression andOr(RexCall call) { QueryExpression[] expressions = new QueryExpression[call.getOperands().size()]; PredicateAnalyzerException firstError = null;