Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -84,11 +85,13 @@
import org.opensearch.sql.calcite.utils.PlanUtils;
import org.opensearch.sql.calcite.utils.SubsearchUtils;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.CalciteUnsupportedException;
import org.opensearch.sql.exception.ExpressionEvaluationException;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.CoercionUtils;
import org.opensearch.sql.expression.function.PPLFuncImpTable;

@RequiredArgsConstructor
Expand Down Expand Up @@ -225,24 +228,41 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
final RexNode field = analyze(node.getField(), context);
final List<RexNode> valueList =
node.getValueList().stream().map(value -> analyze(value, context)).toList();
// When the field is a temporal type, do NOT use leastRestrictive + rexBuilder.makeIn. For a
// temporal field tested against string/date literals, leastRestrictive collapses the common
// type to VARCHAR (the EXPR_DATE / EXPR_TIMESTAMP UDTs are VARCHAR-backed), so makeIn casts the
// field DOWN to VARCHAR and string-compares mismatched renderings (e.g. '2018-06-23 00:00:00'
// against '2018-06-23') — silently matching nothing. Rewrite the membership test as an OR of
// PPL `=` comparisons, the same temporal-aware comparison path visitCompare takes for `=`, so
// each value is coerced to the field's timestamp domain before comparison.
ExprType fieldExprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(field.getType());
if (TEMPORAL_TYPES.contains(fieldExprType)) {
List<RexNode> equalities =
valueList.stream()
.map(value -> PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, "=", field, value))
.toList();
return context.relBuilder.or(equalities);
}
final List<RelDataType> dataTypes =
new java.util.ArrayList<>(valueList.stream().map(RexNode::getType).toList());
new ArrayList<>(valueList.stream().map(RexNode::getType).toList());
dataTypes.add(field.getType());
RelDataType commonType = context.rexBuilder.getTypeFactory().leastRestrictive(dataTypes);
if (commonType != null) {
List<RexNode> newValueList =
valueList.stream().map(value -> context.rexBuilder.makeCast(commonType, value)).toList();
return context.rexBuilder.makeIn(field, newValueList);
} else {
List<ExprType> exprTypes =
dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList();
throw new SemanticCheckException(
StringUtils.format(
"In expression types are incompatible: fields type %s, values type %s",
exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1)));
}
List<ExprType> exprTypes =
dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList();
throw new SemanticCheckException(
StringUtils.format(
"In expression types are incompatible: fields type %s, values type %s",
exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1)));
}

private static final Set<ExprType> TEMPORAL_TYPES =
Set.of(ExprCoreType.DATE, ExprCoreType.TIME, ExprCoreType.TIMESTAMP);

@Override
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
RexNode left = analyze(node.getLeft(), context);
Expand All @@ -258,6 +278,25 @@ public RexNode visitCompare(Compare node, CalcitePlanContext context) {
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
}

/**
* Widens a set of operands to a common temporal type when, and only when, every operand is a
* temporal type (DATE / TIME / TIMESTAMP), including the EXPR_DATE / EXPR_TIME / EXPR_TIMESTAMP
* UDTs. Returns {@code null} otherwise so non-temporal incompatible mixes still fail the type
* check. The widening reuses {@link CoercionUtils#widenArguments} — the same path comparison
* operators take — which resolves DATE / TIME to TIMESTAMP via the shared widening graph.
*/
private static @Nullable List<RexNode> widenTemporalOperands(
CalcitePlanContext context, List<RexNode> operands) {
boolean allTemporal =
operands.stream()
.map(node -> OpenSearchTypeFactory.convertRelDataTypeToExprType(node.getType()))
.allMatch(TEMPORAL_TYPES::contains);
if (!allTemporal) {
return null;
}
return CoercionUtils.widenArguments(context.rexBuilder, operands);
}

@Override
public RexNode visitBetween(Between node, CalcitePlanContext context) {
RexNode value = analyze(node.getValue(), context);
Expand All @@ -268,12 +307,25 @@ public RexNode visitBetween(Between node, CalcitePlanContext context) {
lowerBound = context.rexBuilder.makeCast(commonType, lowerBound);
upperBound = context.rexBuilder.makeCast(commonType, upperBound);
} else {
throw new SemanticCheckException(
StringUtils.format(
"BETWEEN expression types are incompatible: [%s, %s, %s]",
OpenSearchTypeFactory.convertRelDataTypeToExprType(value.getType()),
OpenSearchTypeFactory.convertRelDataTypeToExprType(lowerBound.getType()),
OpenSearchTypeFactory.convertRelDataTypeToExprType(upperBound.getType())));
// leastRestrictive() has no common type for mixed temporal representations — e.g. a standard
// Calcite TIMESTAMP field compared against EXPR_DATE UDT bounds (`ts between date('...') and
// date('...')`). Comparison operators coerce these through CoercionUtils; BETWEEN calls
// leastRestrictive directly and would otherwise reject them. Fall back to the same temporal
// widening, scoped to all-temporal operands so genuinely incompatible mixes (e.g.
// `age between '35' and 38.5`) still raise SemanticCheckException.
List<RexNode> widened =
widenTemporalOperands(context, List.of(value, lowerBound, upperBound));
if (widened == null) {
throw new SemanticCheckException(
StringUtils.format(
"BETWEEN expression types are incompatible: [%s, %s, %s]",
OpenSearchTypeFactory.convertRelDataTypeToExprType(value.getType()),
OpenSearchTypeFactory.convertRelDataTypeToExprType(lowerBound.getType()),
OpenSearchTypeFactory.convertRelDataTypeToExprType(upperBound.getType())));
}
value = widened.get(0);
lowerBound = widened.get(1);
upperBound = widened.get(2);
}
return context.relBuilder.between(value, lowerBound, upperBound);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.type.SqlTypeName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
Expand Down Expand Up @@ -51,6 +52,40 @@ public void findCommonWidestType(
expectedCommonType, CoercionUtils.resolveCommonType(left, right).orElseGet(() -> null));
}

@Test
void widenArgumentsUnifiesPlainTimestampWithDateUdtBounds() {
// Reproduces the BETWEEN/IN operand set seen on a non-UDT path (e.g. the analytics engine's
// parquet scan): a standard Calcite TIMESTAMP field against EXPR_DATE UDT bounds.
// leastRestrictive
// returns no common type for this mix, but the temporal widening used by comparison operators
// must resolve all three to a single timestamp type so the predicate can run.
RexNode plainTimestampField =
REX_BUILDER.makeInputRef(
OpenSearchTypeFactory.TYPE_FACTORY.createTypeWithNullability(
OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP), true),
0);
RexNode dateLower =
REX_BUILDER.makeNullLiteral(
OpenSearchTypeFactory.TYPE_FACTORY.createUDT(
OpenSearchTypeFactory.ExprUDT.EXPR_DATE, true));
RexNode dateUpper =
REX_BUILDER.makeNullLiteral(
OpenSearchTypeFactory.TYPE_FACTORY.createUDT(
OpenSearchTypeFactory.ExprUDT.EXPR_DATE, true));

List<RexNode> widened =
CoercionUtils.widenArguments(
REX_BUILDER, List.of(plainTimestampField, dateLower, dateUpper));

assertNotNull(widened);
assertEquals(3, widened.size());
for (RexNode node : widened) {
assertEquals(
ExprCoreType.TIMESTAMP,
OpenSearchTypeFactory.convertRelDataTypeToExprType(node.getType()));
}
}

@Test
void castArgumentsReturnsExactMatchWhenAvailable() {
PPLTypeChecker typeChecker = new StubTypeChecker(List.of(List.of(INTEGER), List.of(DOUBLE)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,10 @@ public void testNotBetween3() throws IOException {
verifyDataRows(actual, rows("Hattie", 36), rows("Elinor", 36));
}

@Test
public void testDateBetween() throws IOException {
// birthdate is a TIMESTAMP-typed field; the bounds are DATE literals. BETWEEN must coerce the
// mixed temporal operands rather than reject them with "expression types are incompatible".
JSONObject actual =
executeQuery(
String.format(
Expand All @@ -512,6 +515,55 @@ public void testDateBetween() throws IOException {
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
}

@Test
public void testDateIn() throws IOException {
// birthdate is a TIMESTAMP-typed field; the IN values are DATE literals. visitIn must compare
// each value in the field's temporal domain (via PPL `=`) rather than letting leastRestrictive
// collapse the common type to VARCHAR and string-compare mismatched renderings — which silently
// matched nothing without pushdown. See visitIn in CalciteRexNodeVisitor.
JSONObject actual =
executeQuery(
String.format(
"source=%s | where birthdate in (DATE '2018-06-23', DATE '2018-06-27') |"
+ " fields firstname, birthdate",
TEST_INDEX_BANK));
verifySchema(actual, schema("firstname", "string"), schema("birthdate", "timestamp"));
verifyDataRows(
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
}

@Test
public void testDateNotIn() throws IOException {
// Complement of testDateIn: NOT IN over a temporal field. Exercises the complemented-points
// pushdown branch, which must also format timestamp values rather than emit a flat terms query.
JSONObject actual =
executeQuery(
String.format(
"source=%s | where birthdate not in (DATE '2018-06-23', DATE '2018-06-27') |"
+ " fields firstname | sort firstname",
TEST_INDEX_BANK));
verifySchema(actual, schema("firstname", "string"));
verifyDataRows(
actual,
rows("Amber JOHnny"),
rows("Dale"),
rows("Dillard"),
rows("Hattie"),
rows("Virginia"));
}

@Test
public void testIn() throws IOException {
// Non-temporal IN keeps the leastRestrictive + makeIn path; guards against the temporal-field
// special-case in visitIn regressing membership tests over ordinary columns.
JSONObject actual =
executeQuery(
String.format(
"source=%s | where age in (32, 36) | fields firstname, age", TEST_INDEX_BANK));
verifySchema(actual, schema("firstname", "string"), schema("age", "int"));
verifyDataRows(actual, rows("Amber JOHnny", 32), rows("Hattie", 36), rows("Elinor", 36));
}

@Test
public void testXor() throws IOException {
JSONObject result =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -784,16 +784,29 @@ private QueryExpression like(RexCall call) {

private static QueryExpression constructQueryExpressionForSearch(
RexCall call, SwapResult pair) {
boolean isTimeStamp =
(pair.getKey() instanceof NamedFieldExpression namedField)
&& namedField.isTimeStampType();
if (isSearchWithComplementedPoints(call)) {
// A NOT IN over a timestamp field cannot use a flat terms query: timestamp values must be
// normalized + formatted (see in()/termsQuery, which does neither). Decompose into the
// complement of an OR of formatted per-point range queries, mirroring the range branch.
if (isTimeStamp) {
return pointsAsTimestampOr(call, pair).not();
}
return QueryExpression.create(pair.getKey()).notIn(pair.getValue());
} else if (isSearchWithPoints(call)) {
// IN over a timestamp field: the flat terms query in in() emits unformatted values like
// '2018-06-23 00:00:00' that the date field cannot parse. Decompose into an OR of formatted
// per-point range queries (equals(point, true) builds gte/lte + format="date_time"), the
// same path the range branch below and visitCompare's `=` already take.
if (isTimeStamp) {
return pointsAsTimestampOr(call, pair);
}
return QueryExpression.create(pair.getKey()).in(pair.getValue());
} else {
Sarg<?> sarg = pair.getValue().literal.getValueAs(Sarg.class);
Set<? extends Range<?>> rangeSet = requireNonNull(sarg).rangeSet.asRanges();
boolean isTimeStamp =
(pair.getKey() instanceof NamedFieldExpression namedField)
&& namedField.isTimeStampType();
List<QueryExpression> queryExpressions =
rangeSet.stream()
.map(
Expand All @@ -811,6 +824,30 @@ private static QueryExpression constructQueryExpressionForSearch(
}
}

/**
* Builds an OR of per-point {@code equals(point, true)} query expressions for a points-only
* Sarg over a timestamp field. Each point becomes a {@code gte/lte} range query with {@code
* format="date_time"}, so the timestamp value is normalized to the date field's accepted format
* — unlike a flat terms query, which emits unformatted values the date field rejects.
*/
private static QueryExpression pointsAsTimestampOr(RexCall call, SwapResult pair) {
RexLiteral literal = (RexLiteral) call.getOperands().get(1);
Sarg<?> sarg = requireNonNull(literal.getValueAs(Sarg.class), "Sarg");
Set<? extends Range<?>> ranges =
(sarg.isComplementedPoints() ? sarg.negate() : sarg).rangeSet.asRanges();
List<QueryExpression> queryExpressions =
ranges.stream()
.map(
range ->
QueryExpression.create(pair.getKey())
.equals(sargPointValue(range.lowerEndpoint()), true))
.toList();
if (queryExpressions.size() == 1) {
return queryExpressions.getFirst();
}
return CompoundQueryExpression.or(queryExpressions.toArray(new QueryExpression[0]));
}

private QueryExpression andOr(RexCall call) {
QueryExpression[] expressions = new QueryExpression[call.getOperands().size()];
PredicateAnalyzerException firstError = null;
Expand Down
Loading