From ef0844f8c6973e87db825ea6b1748671de493f2b Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Thu, 4 Jun 2026 12:39:22 +0530 Subject: [PATCH] WIP --- .../PostgresFromTypeExpressionVisitor.java | 183 +++++++++++++++++- .../query/v1/PostgresQueryParserTest.java | 114 +++++++++++ 2 files changed, 290 insertions(+), 7 deletions(-) diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java index 640637bf8..01ccb33fc 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java @@ -1,14 +1,28 @@ package org.hypertrace.core.documentstore.postgres.query.v1.vistors; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import lombok.Getter; import org.hypertrace.core.documentstore.DocumentType; +import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; +import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression; +import org.hypertrace.core.documentstore.expression.impl.LogicalExpression; +import org.hypertrace.core.documentstore.expression.impl.RelationalExpression; import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression; import org.hypertrace.core.documentstore.expression.impl.UnnestExpression; +import org.hypertrace.core.documentstore.expression.operators.LogicalOperator; +import org.hypertrace.core.documentstore.expression.operators.RelationalOperator; +import org.hypertrace.core.documentstore.expression.type.FilterTypeExpression; +import org.hypertrace.core.documentstore.expression.type.FromTypeExpression; import org.hypertrace.core.documentstore.parser.FromTypeExpressionVisitor; import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser; +import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresDataType; import org.hypertrace.core.documentstore.postgres.utils.PostgresUtils; public class PostgresFromTypeExpressionVisitor implements FromTypeExpressionVisitor { @@ -136,20 +150,175 @@ private static String prepareTable0Query(PostgresQueryParser postgresQueryParser postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; if (isFlatCollection) { - // For flat collections with unnest, skip filters in table0 - return String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()); + // For flat collections with unnest, scalar/array-column filters cannot be applied verbatim in + // table0 (unnested scalar columns don't exist yet, and array columns reject scalar operators + // like LIKE). However, an equality/IN filter on a native array column has an equivalent, + // GIN-indexable array-overlap (&&) form that CAN be evaluated on the base array column before + // the row-multiplying unnest. Emit that as a pruning pre-filter when available. + Optional arrayOverlapPrefilterMaybe = + buildFlatArrayOverlapPrefilter(postgresQueryParser); + return arrayOverlapPrefilterMaybe + .map( + prefilter -> + String.format( + TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), prefilter)) + .orElseGet( + () -> String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier())); } else { - // For nested collections, apply filters in table0 as usual (preserves existing behavior) + // For nested collections, apply filters in table0 as usual Optional whereFilter = PostgresFilterTypeExpressionVisitor.getFilterClause(postgresQueryParser); - return whereFilter.isPresent() - ? String.format( - TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), whereFilter.get()) - : String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()); + return whereFilter + .map( + s -> + String.format( + TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), s)) + .orElseGet( + () -> String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier())); } } + /** + * Builds an array-overlap ({@code &&}) pre-filter for flat collections so rows can be pruned at + * the base array column before the row-multiplying unnest. Only equality/IN predicates on a + * native (non-JSONB) array column are translated, since only those have a correct, GIN-indexable + * array-membership equivalent. The generated predicate is necessary-but-not-sufficient: the + * post-unnest element filter in the outer WHERE still selects the matching element and is left + * unchanged. + * + *

Example. For a query that unnests the native array column {@code + * attributes.domainIds} with an equality filter on the unnested element: + * + *

{@code
+   * Without this pre-filter (filter only applied after unnest, so the GIN index on the array
+   * column cannot be used and every candidate row is expanded first):
+   *
+   *   With
+   *     table0 as (SELECT * FROM "entities_api"),
+   *     table1 as (SELECT * FROM table0 t0
+   *       LEFT JOIN LATERAL unnest("attributes.domainIds")
+   *         p1("attributes_dot_domainIds_unnested") ON TRUE)
+   *   SELECT ... FROM table1
+   *   WHERE "attributes_dot_domainIds_unnested" = ANY(?)
+   *
+   * With this pre-filter (rows pruned at the base array column before unnest, via GIN):
+   *
+   *   With
+   *     table0 as (SELECT * FROM "entities_api"
+   *       WHERE "attributes.domainIds" && ?),          -- added by this method
+   *     table1 as (SELECT * FROM table0 t0
+   *       LEFT JOIN LATERAL unnest("attributes.domainIds")
+   *         p1("attributes_dot_domainIds_unnested") ON TRUE)
+   *   SELECT ... FROM table1
+   *   WHERE "attributes_dot_domainIds_unnested" = ANY(?)   -- unchanged, still selects the element
+   * }
+ * + *

Here the {@code &&} parameter is the same value list as the post-unnest {@code = ANY(?)} + * filter, bound as a single array parameter. The outer filter is intentionally left in place to + * pick the matching unnested element from the surviving rows. + */ + private static Optional buildFlatArrayOverlapPrefilter( + PostgresQueryParser postgresQueryParser) { + List overlapClauses = new ArrayList<>(); + Set seenClauses = new LinkedHashSet<>(); + + for (FromTypeExpression fromTypeExpression : + postgresQueryParser.getQuery().getFromTypeExpressions()) { + // && applies to native array columns only (JSONB arrays are excluded). Eligible element + // predicates are moved onto the unnest's own filter by PostgresUnnestQueryTransformer, so the + // unnest filter is the only place we need to look. + if (!(fromTypeExpression instanceof UnnestExpression)) { + continue; + } + UnnestExpression unnest = (UnnestExpression) fromTypeExpression; + if (unnest.getIdentifierExpression() instanceof JsonIdentifierExpression + || unnest.getFilterTypeExpression() == null) { + continue; + } + + String fieldName = unnest.getIdentifierExpression().getName(); + for (RelationalExpression predicate : + findConjunctiveEqualityPredicates(unnest.getFilterTypeExpression(), fieldName)) { + addOverlapClause(predicate, fieldName, postgresQueryParser, overlapClauses, seenClauses); + } + } + + return overlapClauses.isEmpty() + ? Optional.empty() + : Optional.of(String.join(" AND ", overlapClauses)); + } + + /** + * Returns the AND-connected equality/IN predicates on {@code fieldName} that have a constant RHS. + * + *

Only AND nodes are traversed: a predicate under OR/NOT is not implied by the overall filter, + * so pre-filtering on it would be incorrect. Returning fewer predicates is always safe, since the + * pre-filter is purely additive pruning. + */ + private static List findConjunctiveEqualityPredicates( + FilterTypeExpression filter, String fieldName) { + if (filter instanceof LogicalExpression) { + LogicalExpression logicalExpression = (LogicalExpression) filter; + if (logicalExpression.getOperator() != LogicalOperator.AND) { + return List.of(); + } + List predicates = new ArrayList<>(); + for (FilterTypeExpression operand : logicalExpression.getOperands()) { + predicates.addAll(findConjunctiveEqualityPredicates(operand, fieldName)); + } + return predicates; + } + + if (filter instanceof RelationalExpression) { + RelationalExpression predicate = (RelationalExpression) filter; + RelationalOperator operator = predicate.getOperator(); + boolean isEqualityOrIn = + operator == RelationalOperator.EQ || operator == RelationalOperator.IN; + boolean isOnField = + predicate.getLhs() instanceof IdentifierExpression + && ((IdentifierExpression) predicate.getLhs()).getName().equals(fieldName); + if (isEqualityOrIn && isOnField && predicate.getRhs() instanceof ConstantExpression) { + return List.of(predicate); + } + } + + return List.of(); + } + + /** + * Renders a single equality/IN predicate into an array-overlap clause ({@code "col" && ?}), binds + * its value(s) as one array parameter, and appends the clause. Equivalent predicates (a query can + * carry the same condition more than once) are emitted at most once, since each clause consumes a + * positional parameter. + */ + private static void addOverlapClause( + RelationalExpression predicate, + String fieldName, + PostgresQueryParser postgresQueryParser, + List overlapClauses, + Set seenClauses) { + Object constantValue = ((ConstantExpression) predicate.getRhs()).getValue(); + List values = + constantValue instanceof List + ? new ArrayList<>((List) constantValue) + : Collections.singletonList(constantValue); + if (values.isEmpty() || values.get(0) == null) { + return; + } + + PostgresDataType elementType = PostgresDataType.fromJavaValue(values.get(0)); + if (elementType == PostgresDataType.UNKNOWN || !seenClauses.add(fieldName + "::" + values)) { + return; + } + + String arrayColumn = postgresQueryParser.transformField(fieldName).getPgColumn(); + postgresQueryParser + .getParamsBuilder() + .addArrayParam(values.toArray(), elementType.getSqlType()); + overlapClauses.add(arrayColumn + " && ?"); + } + /* Returns the column name with double quotes if the collection is flat to prevent folding to lower-case by PG */ diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index 5a42e1cd7..7f81f173d 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -19,6 +19,7 @@ import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.GT; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.GTE; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.IN; +import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LIKE; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LTE; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NEQ; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NOT_CONTAINS; @@ -27,6 +28,8 @@ import static org.hypertrace.core.documentstore.expression.operators.SortOrder.ASC; import static org.hypertrace.core.documentstore.expression.operators.SortOrder.DESC; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.List; @@ -1560,6 +1563,117 @@ void testContainsWithFlatCollectionNonJsonField() { assertEquals("java", params.getObjectParams().get(1)); } + @Nested + class FlatCollectionUnnestArrayOverlapPrefilterTest { + + @Test + void testEqualityOnUnnestedNativeArrayAddsOverlapPrefilterToTable0() { + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(true) + .filterTypeExpression( + RelationalExpression.of( + IdentifierExpression.of("tags"), EQ, ConstantExpression.of("java"))) + .build()) + .build(); + + PostgresQueryParser postgresQueryParser = + new PostgresQueryParser( + TEST_TABLE, + PostgresQueryTransformer.transform(query), + new FlatPostgresFieldTransformer()); + + String sql = postgresQueryParser.parse(); + + assertEquals( + "With \n" + + "table0 as (SELECT * from \"testCollection\" WHERE \"tags\" && ?),\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL unnest(\"tags\") p1(\"tags_unnested\") on TRUE)\n" + + "SELECT \"tags_unnested\" AS \"tags\" FROM table1 WHERE \"tags_unnested\" = ?", + sql); + + Params params = postgresQueryParser.getParamsBuilder().build(); + assertEquals(2, params.getObjectParams().size()); + Object prefilterParam = params.getObjectParams().get(1); + assertTrue(prefilterParam instanceof Params.ArrayParam); + Params.ArrayParam arrayParam = (Params.ArrayParam) prefilterParam; + assertEquals("text", arrayParam.getSqlType()); + assertEquals(List.of("java"), List.of(arrayParam.getValues())); + assertEquals("java", params.getObjectParams().get(2)); + } + + @Test + void testInOnUnnestedNativeArrayAddsOverlapPrefilterToTable0() { + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(false) + .filterTypeExpression( + RelationalExpression.of( + IdentifierExpression.of("tags"), + IN, + ConstantExpression.ofStrings(List.of("java", "go")))) + .build()) + .build(); + + PostgresQueryParser postgresQueryParser = + new PostgresQueryParser( + TEST_TABLE, + PostgresQueryTransformer.transform(query), + new FlatPostgresFieldTransformer()); + + String sql = postgresQueryParser.parse(); + + assertTrue( + sql.contains("table0 as (SELECT * from \"testCollection\" WHERE \"tags\" && ?)"), + "Expected array-overlap pre-filter in table0, but got: " + sql); + + Params params = postgresQueryParser.getParamsBuilder().build(); + Object prefilterParam = params.getObjectParams().get(1); + assertTrue(prefilterParam instanceof Params.ArrayParam); + Params.ArrayParam arrayParam = (Params.ArrayParam) prefilterParam; + assertEquals("text", arrayParam.getSqlType()); + assertEquals(List.of("java", "go"), List.of(arrayParam.getValues())); + } + + @Test + void testNonTranslatableOperatorDoesNotAddPrefilterToTable0() { + // LIKE has no correct array-membership equivalent, so table0 must remain unfiltered. + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(true) + .filterTypeExpression( + RelationalExpression.of( + IdentifierExpression.of("tags"), LIKE, ConstantExpression.of("jav"))) + .build()) + .build(); + + PostgresQueryParser postgresQueryParser = + new PostgresQueryParser( + TEST_TABLE, + PostgresQueryTransformer.transform(query), + new FlatPostgresFieldTransformer()); + + String sql = postgresQueryParser.parse(); + + assertTrue( + sql.contains("table0 as (SELECT * from \"testCollection\"),"), + "Expected unfiltered table0 for non-translatable operator, but got: " + sql); + assertFalse(sql.contains("&&"), "Did not expect an overlap pre-filter, but got: " + sql); + } + } + @Nested class FlatCollectionExistsNotExistsParserTest {