Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
package org.hypertrace.core.documentstore.postgres.query.v1.vistors;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.Getter;
import org.hypertrace.core.documentstore.DocumentType;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.LogicalExpression;
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression;
import org.hypertrace.core.documentstore.expression.impl.UnnestExpression;
import org.hypertrace.core.documentstore.expression.operators.LogicalOperator;
import org.hypertrace.core.documentstore.expression.operators.RelationalOperator;
import org.hypertrace.core.documentstore.expression.type.FilterTypeExpression;
import org.hypertrace.core.documentstore.expression.type.FromTypeExpression;
import org.hypertrace.core.documentstore.parser.FromTypeExpressionVisitor;
import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser;
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresDataType;
import org.hypertrace.core.documentstore.postgres.utils.PostgresUtils;

public class PostgresFromTypeExpressionVisitor implements FromTypeExpressionVisitor {
Expand Down Expand Up @@ -136,20 +150,175 @@ private static String prepareTable0Query(PostgresQueryParser postgresQueryParser
postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT;

if (isFlatCollection) {
// For flat collections with unnest, skip filters in table0
return String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier());
// For flat collections with unnest, scalar/array-column filters cannot be applied verbatim in
// table0 (unnested scalar columns don't exist yet, and array columns reject scalar operators
// like LIKE). However, an equality/IN filter on a native array column has an equivalent,
// GIN-indexable array-overlap (&&) form that CAN be evaluated on the base array column before
// the row-multiplying unnest. Emit that as a pruning pre-filter when available.
Optional<String> arrayOverlapPrefilterMaybe =
buildFlatArrayOverlapPrefilter(postgresQueryParser);
return arrayOverlapPrefilterMaybe
.map(
prefilter ->
String.format(
TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), prefilter))
.orElseGet(
() -> String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()));
} else {
// For nested collections, apply filters in table0 as usual (preserves existing behavior)
// For nested collections, apply filters in table0 as usual
Optional<String> whereFilter =
PostgresFilterTypeExpressionVisitor.getFilterClause(postgresQueryParser);

return whereFilter.isPresent()
? String.format(
TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), whereFilter.get())
: String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier());
return whereFilter
.map(
s ->
String.format(
TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), s))
.orElseGet(
() -> String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()));
}
}

/**
* Builds an array-overlap ({@code &&}) pre-filter for flat collections so rows can be pruned at
* the base array column before the row-multiplying unnest. Only equality/IN predicates on a
* native (non-JSONB) array column are translated, since only those have a correct, GIN-indexable
* array-membership equivalent. The generated predicate is necessary-but-not-sufficient: the
* post-unnest element filter in the outer WHERE still selects the matching element and is left
* unchanged.
*
* <p><b>Example.</b> For a query that unnests the native array column {@code
* attributes.domainIds} with an equality filter on the unnested element:
*
* <pre>{@code
* Without this pre-filter (filter only applied after unnest, so the GIN index on the array
* column cannot be used and every candidate row is expanded first):
*
* With
* table0 as (SELECT * FROM "entities_api"),
* table1 as (SELECT * FROM table0 t0
* LEFT JOIN LATERAL unnest("attributes.domainIds")
* p1("attributes_dot_domainIds_unnested") ON TRUE)
* SELECT ... FROM table1
* WHERE "attributes_dot_domainIds_unnested" = ANY(?)
*
* With this pre-filter (rows pruned at the base array column before unnest, via GIN):
*
* With
* table0 as (SELECT * FROM "entities_api"
* WHERE "attributes.domainIds" && ?), -- added by this method
* table1 as (SELECT * FROM table0 t0
* LEFT JOIN LATERAL unnest("attributes.domainIds")
* p1("attributes_dot_domainIds_unnested") ON TRUE)
* SELECT ... FROM table1
* WHERE "attributes_dot_domainIds_unnested" = ANY(?) -- unchanged, still selects the element
* }</pre>
*
* <p>Here the {@code &&} parameter is the same value list as the post-unnest {@code = ANY(?)}
* filter, bound as a single array parameter. The outer filter is intentionally left in place to
* pick the matching unnested element from the surviving rows.
*/
private static Optional<String> buildFlatArrayOverlapPrefilter(
PostgresQueryParser postgresQueryParser) {
List<String> overlapClauses = new ArrayList<>();
Set<String> seenClauses = new LinkedHashSet<>();

for (FromTypeExpression fromTypeExpression :
postgresQueryParser.getQuery().getFromTypeExpressions()) {
// && applies to native array columns only (JSONB arrays are excluded). Eligible element
// predicates are moved onto the unnest's own filter by PostgresUnnestQueryTransformer, so the
// unnest filter is the only place we need to look.
if (!(fromTypeExpression instanceof UnnestExpression)) {
continue;
}
UnnestExpression unnest = (UnnestExpression) fromTypeExpression;
if (unnest.getIdentifierExpression() instanceof JsonIdentifierExpression
|| unnest.getFilterTypeExpression() == null) {
continue;
}

String fieldName = unnest.getIdentifierExpression().getName();
for (RelationalExpression predicate :
findConjunctiveEqualityPredicates(unnest.getFilterTypeExpression(), fieldName)) {
addOverlapClause(predicate, fieldName, postgresQueryParser, overlapClauses, seenClauses);
}
}

return overlapClauses.isEmpty()
? Optional.empty()
: Optional.of(String.join(" AND ", overlapClauses));
}

/**
* Returns the AND-connected equality/IN predicates on {@code fieldName} that have a constant RHS.
*
* <p>Only AND nodes are traversed: a predicate under OR/NOT is not implied by the overall filter,
* so pre-filtering on it would be incorrect. Returning fewer predicates is always safe, since the
* pre-filter is purely additive pruning.
*/
private static List<RelationalExpression> findConjunctiveEqualityPredicates(
FilterTypeExpression filter, String fieldName) {
if (filter instanceof LogicalExpression) {
LogicalExpression logicalExpression = (LogicalExpression) filter;
if (logicalExpression.getOperator() != LogicalOperator.AND) {
return List.of();
}
List<RelationalExpression> predicates = new ArrayList<>();
for (FilterTypeExpression operand : logicalExpression.getOperands()) {
predicates.addAll(findConjunctiveEqualityPredicates(operand, fieldName));
}
return predicates;
}

if (filter instanceof RelationalExpression) {
RelationalExpression predicate = (RelationalExpression) filter;
RelationalOperator operator = predicate.getOperator();
boolean isEqualityOrIn =
operator == RelationalOperator.EQ || operator == RelationalOperator.IN;
boolean isOnField =
predicate.getLhs() instanceof IdentifierExpression
&& ((IdentifierExpression) predicate.getLhs()).getName().equals(fieldName);
if (isEqualityOrIn && isOnField && predicate.getRhs() instanceof ConstantExpression) {
return List.of(predicate);
}
}

return List.of();
}

/**
* Renders a single equality/IN predicate into an array-overlap clause ({@code "col" && ?}), binds
* its value(s) as one array parameter, and appends the clause. Equivalent predicates (a query can
* carry the same condition more than once) are emitted at most once, since each clause consumes a
* positional parameter.
*/
private static void addOverlapClause(
RelationalExpression predicate,
String fieldName,
PostgresQueryParser postgresQueryParser,
List<String> overlapClauses,
Set<String> seenClauses) {
Object constantValue = ((ConstantExpression) predicate.getRhs()).getValue();
List<Object> values =
constantValue instanceof List
? new ArrayList<>((List<?>) constantValue)
: Collections.singletonList(constantValue);
if (values.isEmpty() || values.get(0) == null) {
return;
}

PostgresDataType elementType = PostgresDataType.fromJavaValue(values.get(0));
if (elementType == PostgresDataType.UNKNOWN || !seenClauses.add(fieldName + "::" + values)) {
return;
}

String arrayColumn = postgresQueryParser.transformField(fieldName).getPgColumn();
postgresQueryParser
.getParamsBuilder()
.addArrayParam(values.toArray(), elementType.getSqlType());
overlapClauses.add(arrayColumn + " && ?");
}

/*
Returns the column name with double quotes if the collection is flat to prevent folding to lower-case by PG
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.GT;
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.GTE;
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.IN;
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LIKE;
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LTE;
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NEQ;
import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NOT_CONTAINS;
Expand All @@ -27,6 +28,8 @@
import static org.hypertrace.core.documentstore.expression.operators.SortOrder.ASC;
import static org.hypertrace.core.documentstore.expression.operators.SortOrder.DESC;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.IOException;
import java.util.List;
Expand Down Expand Up @@ -1560,6 +1563,117 @@ void testContainsWithFlatCollectionNonJsonField() {
assertEquals("java", params.getObjectParams().get(1));
}

@Nested
class FlatCollectionUnnestArrayOverlapPrefilterTest {

@Test
void testEqualityOnUnnestedNativeArrayAddsOverlapPrefilterToTable0() {
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("tags"))
.addFromClause(
UnnestExpression.builder()
.identifierExpression(IdentifierExpression.of("tags"))
.preserveNullAndEmptyArrays(true)
.filterTypeExpression(
RelationalExpression.of(
IdentifierExpression.of("tags"), EQ, ConstantExpression.of("java")))
.build())
.build();

PostgresQueryParser postgresQueryParser =
new PostgresQueryParser(
TEST_TABLE,
PostgresQueryTransformer.transform(query),
new FlatPostgresFieldTransformer());

String sql = postgresQueryParser.parse();

assertEquals(
"With \n"
+ "table0 as (SELECT * from \"testCollection\" WHERE \"tags\" && ?),\n"
+ "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL unnest(\"tags\") p1(\"tags_unnested\") on TRUE)\n"
+ "SELECT \"tags_unnested\" AS \"tags\" FROM table1 WHERE \"tags_unnested\" = ?",
sql);

Params params = postgresQueryParser.getParamsBuilder().build();
assertEquals(2, params.getObjectParams().size());
Object prefilterParam = params.getObjectParams().get(1);
assertTrue(prefilterParam instanceof Params.ArrayParam);
Params.ArrayParam arrayParam = (Params.ArrayParam) prefilterParam;
assertEquals("text", arrayParam.getSqlType());
assertEquals(List.of("java"), List.of(arrayParam.getValues()));
assertEquals("java", params.getObjectParams().get(2));
}

@Test
void testInOnUnnestedNativeArrayAddsOverlapPrefilterToTable0() {
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("tags"))
.addFromClause(
UnnestExpression.builder()
.identifierExpression(IdentifierExpression.of("tags"))
.preserveNullAndEmptyArrays(false)
.filterTypeExpression(
RelationalExpression.of(
IdentifierExpression.of("tags"),
IN,
ConstantExpression.ofStrings(List.of("java", "go"))))
.build())
.build();

PostgresQueryParser postgresQueryParser =
new PostgresQueryParser(
TEST_TABLE,
PostgresQueryTransformer.transform(query),
new FlatPostgresFieldTransformer());

String sql = postgresQueryParser.parse();

assertTrue(
sql.contains("table0 as (SELECT * from \"testCollection\" WHERE \"tags\" && ?)"),
"Expected array-overlap pre-filter in table0, but got: " + sql);

Params params = postgresQueryParser.getParamsBuilder().build();
Object prefilterParam = params.getObjectParams().get(1);
assertTrue(prefilterParam instanceof Params.ArrayParam);
Params.ArrayParam arrayParam = (Params.ArrayParam) prefilterParam;
assertEquals("text", arrayParam.getSqlType());
assertEquals(List.of("java", "go"), List.of(arrayParam.getValues()));
}

@Test
void testNonTranslatableOperatorDoesNotAddPrefilterToTable0() {
// LIKE has no correct array-membership equivalent, so table0 must remain unfiltered.
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("tags"))
.addFromClause(
UnnestExpression.builder()
.identifierExpression(IdentifierExpression.of("tags"))
.preserveNullAndEmptyArrays(true)
.filterTypeExpression(
RelationalExpression.of(
IdentifierExpression.of("tags"), LIKE, ConstantExpression.of("jav")))
.build())
.build();

PostgresQueryParser postgresQueryParser =
new PostgresQueryParser(
TEST_TABLE,
PostgresQueryTransformer.transform(query),
new FlatPostgresFieldTransformer());

String sql = postgresQueryParser.parse();

assertTrue(
sql.contains("table0 as (SELECT * from \"testCollection\"),"),
"Expected unfiltered table0 for non-translatable operator, but got: " + sql);
assertFalse(sql.contains("&&"), "Did not expect an overlap pre-filter, but got: " + sql);
}
}

@Nested
class FlatCollectionExistsNotExistsParserTest {

Expand Down
Loading