From 9142d612043197a138fac8cb68ca6cafbc49e5d2 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Wed, 22 Apr 2026 19:12:18 +0500 Subject: [PATCH] Propagate null through unnest and single() three-valued logic Two related defects in how AGE handled cypher null inside list-iterating constructs. age_unnest packaged every iterated element as a non-SQL-NULL agtype datum, even AGTV_NULL scalars. SQL `IS NULL` / `IS NOT NULL` then couldn't see those nulls, so `[x IN [null, 1] WHERE x IS NULL]` dropped the null it was meant to keep, and `WHERE x IS NOT NULL` kept the null it was meant to drop. The same mismatch surfaced in UNWIND. AGE already treats SQL NULL as the row-level representation of cypher null elsewhere (`RETURN null AS v` yields SQL NULL, strict operators short-circuit on it); age_unnest now does the same by emitting the row with `nulls[0] = true` when the element is AGTV_NULL. single() previously transformed to `SELECT count(*) FROM unnest(list) AS x WHERE pred IS TRUE`, with the grammar wrapping the result as `(subquery) = 1`. With the unnest fix, `[null, 5] WHERE x > 0` left one definite true after the WHERE filter -> count = 1 -> true. Neo4j returns null because the unknown predicate could itself be a second match. Rewritten to a CASE built on `count(*) FILTER (WHERE pred IS TRUE)` and `bool_or(pred IS NULL)`: CASE WHEN count(*) FILTER (WHERE pred IS TRUE) >= 2 THEN false WHEN bool_or(pred IS NULL) THEN NULL WHEN count(*) FILTER (WHERE pred IS TRUE) = 1 THEN true ELSE false END The >=2 arm runs first so two definite trues dominate any unknowns. Fits inside the existing make_predicate_case_expr helper alongside all/any/none, removes the special-case transform branch and the grammar `= 1` wrap. A small `make_count_star_filter_agg` helper mirrors the existing `make_bool_or_agg`. Verified against Neo4j for the new edge cases (one-true-plus-null, two-trues-plus-null, all-nulls, mixed-true-false-null). The predicate_functions regression also picks up the corrected behavior of any/all/none over null elements: `null > 0` now yields SQL NULL instead of being silently treated as true, so the three-valued combinators in those functions produce the openCypher results the comments previously documented as buggy. Co-Authored-By: Claude Opus 4.7 --- regress/expected/list_comprehension.out | 33 ++++ regress/expected/predicate_functions.out | 116 ++++++++++++-- regress/sql/list_comprehension.sql | 8 + regress/sql/predicate_functions.sql | 75 +++++++-- src/backend/parser/cypher_clause.c | 195 ++++++++++++++--------- src/backend/parser/cypher_gram.y | 37 ++--- src/backend/utils/adt/agtype.c | 20 ++- 7 files changed, 351 insertions(+), 133 deletions(-) diff --git a/regress/expected/list_comprehension.out b/regress/expected/list_comprehension.out index e12ad621d..fc25beb4f 100644 --- a/regress/expected/list_comprehension.out +++ b/regress/expected/list_comprehension.out @@ -721,6 +721,39 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10 {"id": 281474976710668, "label": "", "properties": {"b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex (2 rows) +-- Issue 2393 - WHERE filter over null elements should use openCypher's +-- three-valued logic: IS NULL must keep nulls, IS NOT NULL must drop them. +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [null, 1] WHERE x IS NULL] $$) AS (result agtype); + result +-------- + [null] +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [null, 1, null] WHERE x IS NULL] $$) AS (result agtype); + result +-------------- + [null, null] +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [null, 1] WHERE x IS NOT NULL] $$) AS (result agtype); + result +-------- + [1] +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [1, 2, 3] WHERE x IS NULL] $$) AS (result agtype); + result +-------- + [] +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ UNWIND [null, 1] AS x RETURN x, x IS NULL, x IS NOT NULL $$) AS (x agtype, a agtype, b agtype); + x | a | b +---+-------+------- + | true | false + 1 | false | true +(2 rows) + -- Clean up SELECT * FROM drop_graph('list_comprehension', true); NOTICE: drop cascades to 4 other objects diff --git a/regress/expected/predicate_functions.out b/regress/expected/predicate_functions.out index 47226453d..9a821d675 100644 --- a/regress/expected/predicate_functions.out +++ b/regress/expected/predicate_functions.out @@ -194,20 +194,20 @@ $$) AS (result agtype); -- -- NULL predicate results: three-valued logic -- --- Note: In AGE's agtype, null is a first-class value. The comparison --- agtype_null > agtype_integer evaluates to true (not SQL NULL). --- Three-valued logic only applies when the predicate itself is a --- literal null constant, which becomes SQL NULL after coercion. --- agtype null in list: null > 0 = true in AGE, so any() = true +-- Null list elements arrive at the predicate as SQL NULL, so the usual +-- strict-operator short-circuit applies: `null > 0` yields NULL, and the +-- predicate functions combine NULLs with the openCypher three-valued +-- logic (true trumps null in any(), false trumps null in all(), etc.). +-- [null]: only null predicate, no true -> any() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN any(x IN [null] WHERE x > 0) $$) AS (result agtype); result -------- - true + (1 row) --- agtype null + real values: all comparisons are true +-- one true (1 > 0) is enough: any() = true SELECT * FROM cypher('predicate_functions', $$ RETURN any(x IN [null, 1, 2] WHERE x > 0) $$) AS (result agtype); @@ -226,16 +226,16 @@ $$) AS (result agtype); (1 row) --- agtype null in list: null > 0 = true in AGE, so all() = true +-- no false, but one null -> all() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN all(x IN [1, null, 2] WHERE x > 0) $$) AS (result agtype); result -------- - true + (1 row) --- -1 > 0 = false, so all() = false +-- -1 > 0 = false dominates the null -> all() = false SELECT * FROM cypher('predicate_functions', $$ RETURN all(x IN [1, null, -1] WHERE x > 0) $$) AS (result agtype); @@ -244,16 +244,16 @@ $$) AS (result agtype); false (1 row) --- agtype null > 0 = true in AGE, so none() = false +-- [null]: only null predicate, no true -> none() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN none(x IN [null] WHERE x > 0) $$) AS (result agtype); result -------- - false + (1 row) --- 5 > 0 = true, so none() = false +-- one true (5 > 0) dominates: none() = false SELECT * FROM cypher('predicate_functions', $$ RETURN none(x IN [null, 5] WHERE x > 0) $$) AS (result agtype); @@ -262,24 +262,108 @@ $$) AS (result agtype); false (1 row) --- agtype null > 0 = true AND 5 > 0 = true: 2 matches, single = false +-- one definite true (5 > 0) and one null predicate: the null could also +-- be a match, so we cannot conclude exactly-one -> single() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN single(x IN [null, 5] WHERE x > 0) $$) AS (result agtype); result -------- + +(1 row) + +-- two definite trues dominate any null -> single() = false +SELECT * FROM cypher('predicate_functions', $$ + RETURN single(x IN [null, 5, 6] WHERE x > 0) +$$) AS (result agtype); + result +-------- false (1 row) --- single() with null list: NULL (same as other predicate functions) +-- only null predicates -> single() = NULL SELECT * FROM cypher('predicate_functions', $$ - RETURN single(x IN null WHERE x > 0) + RETURN single(x IN [null, null] WHERE x > 0) +$$) AS (result agtype); + result +-------- + +(1 row) + +-- one true + one false + one null: the null could be the second true +-- so we cannot conclude exactly-one -> single() = NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN single(x IN [1, null, -1] WHERE x > 0) +$$) AS (result agtype); + result +-------- + +(1 row) + +-- +-- Additional null/three-valued coverage for any()/all()/none() +-- +-- any() with no true and one null -> NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN any(x IN [null, -1] WHERE x > 0) +$$) AS (result agtype); + result +-------- + +(1 row) + +-- all() with one true and one null -> NULL (the null might be false) +SELECT * FROM cypher('predicate_functions', $$ + RETURN all(x IN [1, null] WHERE x > 0) +$$) AS (result agtype); + result +-------- + +(1 row) + +-- all() with one definite false dominates any null -> false +SELECT * FROM cypher('predicate_functions', $$ + RETURN all(x IN [null, -1] WHERE x > 0) +$$) AS (result agtype); + result +-------- + false +(1 row) + +-- none() with no true and one null -> NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN none(x IN [null, -1] WHERE x > 0) $$) AS (result agtype); result -------- (1 row) +-- IS NULL predicate now sees unwound null elements (issue #2393) +SELECT * FROM cypher('predicate_functions', $$ + RETURN any(x IN [1, null] WHERE x IS NULL) +$$) AS (result agtype); + result +-------- + true +(1 row) + +SELECT * FROM cypher('predicate_functions', $$ + RETURN none(x IN [1, null] WHERE x IS NULL) +$$) AS (result agtype); + result +-------- + false +(1 row) + +SELECT * FROM cypher('predicate_functions', $$ + RETURN all(x IN [null, null] WHERE x IS NULL) +$$) AS (result agtype); + result +-------- + true +(1 row) + -- -- Integration with graph data -- diff --git a/regress/sql/list_comprehension.sql b/regress/sql/list_comprehension.sql index 572b2e6bb..813dcd1de 100644 --- a/regress/sql/list_comprehension.sql +++ b/regress/sql/list_comprehension.sql @@ -174,5 +174,13 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10 SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WHERE u.list = [u IN [1, u]] RETURN u $$) AS (u agtype); SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WHERE u.list IN [u IN [1, u.list]] RETURN u $$) AS (u agtype); +-- Issue 2393 - WHERE filter over null elements should use openCypher's +-- three-valued logic: IS NULL must keep nulls, IS NOT NULL must drop them. +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [null, 1] WHERE x IS NULL] $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [null, 1, null] WHERE x IS NULL] $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [null, 1] WHERE x IS NOT NULL] $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ RETURN [x IN [1, 2, 3] WHERE x IS NULL] $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ UNWIND [null, 1] AS x RETURN x, x IS NULL, x IS NOT NULL $$) AS (x agtype, a agtype, b agtype); + -- Clean up SELECT * FROM drop_graph('list_comprehension', true); \ No newline at end of file diff --git a/regress/sql/predicate_functions.sql b/regress/sql/predicate_functions.sql index 7466cc2a4..e48d27b52 100644 --- a/regress/sql/predicate_functions.sql +++ b/regress/sql/predicate_functions.sql @@ -123,17 +123,17 @@ $$) AS (result agtype); -- -- NULL predicate results: three-valued logic -- --- Note: In AGE's agtype, null is a first-class value. The comparison --- agtype_null > agtype_integer evaluates to true (not SQL NULL). --- Three-valued logic only applies when the predicate itself is a --- literal null constant, which becomes SQL NULL after coercion. +-- Null list elements arrive at the predicate as SQL NULL, so the usual +-- strict-operator short-circuit applies: `null > 0` yields NULL, and the +-- predicate functions combine NULLs with the openCypher three-valued +-- logic (true trumps null in any(), false trumps null in all(), etc.). --- agtype null in list: null > 0 = true in AGE, so any() = true +-- [null]: only null predicate, no true -> any() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN any(x IN [null] WHERE x > 0) $$) AS (result agtype); --- agtype null + real values: all comparisons are true +-- one true (1 > 0) is enough: any() = true SELECT * FROM cypher('predicate_functions', $$ RETURN any(x IN [null, 1, 2] WHERE x > 0) $$) AS (result agtype); @@ -144,34 +144,83 @@ SELECT * FROM cypher('predicate_functions', $$ RETURN all(x IN [1] WHERE null) $$) AS (result agtype); --- agtype null in list: null > 0 = true in AGE, so all() = true +-- no false, but one null -> all() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN all(x IN [1, null, 2] WHERE x > 0) $$) AS (result agtype); --- -1 > 0 = false, so all() = false +-- -1 > 0 = false dominates the null -> all() = false SELECT * FROM cypher('predicate_functions', $$ RETURN all(x IN [1, null, -1] WHERE x > 0) $$) AS (result agtype); --- agtype null > 0 = true in AGE, so none() = false +-- [null]: only null predicate, no true -> none() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN none(x IN [null] WHERE x > 0) $$) AS (result agtype); --- 5 > 0 = true, so none() = false +-- one true (5 > 0) dominates: none() = false SELECT * FROM cypher('predicate_functions', $$ RETURN none(x IN [null, 5] WHERE x > 0) $$) AS (result agtype); --- agtype null > 0 = true AND 5 > 0 = true: 2 matches, single = false +-- one definite true (5 > 0) and one null predicate: the null could also +-- be a match, so we cannot conclude exactly-one -> single() = NULL SELECT * FROM cypher('predicate_functions', $$ RETURN single(x IN [null, 5] WHERE x > 0) $$) AS (result agtype); --- single() with null list: NULL (same as other predicate functions) +-- two definite trues dominate any null -> single() = false SELECT * FROM cypher('predicate_functions', $$ - RETURN single(x IN null WHERE x > 0) + RETURN single(x IN [null, 5, 6] WHERE x > 0) +$$) AS (result agtype); + +-- only null predicates -> single() = NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN single(x IN [null, null] WHERE x > 0) +$$) AS (result agtype); + +-- one true + one false + one null: the null could be the second true +-- so we cannot conclude exactly-one -> single() = NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN single(x IN [1, null, -1] WHERE x > 0) +$$) AS (result agtype); + +-- +-- Additional null/three-valued coverage for any()/all()/none() +-- + +-- any() with no true and one null -> NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN any(x IN [null, -1] WHERE x > 0) +$$) AS (result agtype); + +-- all() with one true and one null -> NULL (the null might be false) +SELECT * FROM cypher('predicate_functions', $$ + RETURN all(x IN [1, null] WHERE x > 0) +$$) AS (result agtype); + +-- all() with one definite false dominates any null -> false +SELECT * FROM cypher('predicate_functions', $$ + RETURN all(x IN [null, -1] WHERE x > 0) +$$) AS (result agtype); + +-- none() with no true and one null -> NULL +SELECT * FROM cypher('predicate_functions', $$ + RETURN none(x IN [null, -1] WHERE x > 0) +$$) AS (result agtype); + +-- IS NULL predicate now sees unwound null elements (issue #2393) +SELECT * FROM cypher('predicate_functions', $$ + RETURN any(x IN [1, null] WHERE x IS NULL) +$$) AS (result agtype); + +SELECT * FROM cypher('predicate_functions', $$ + RETURN none(x IN [1, null] WHERE x IS NULL) +$$) AS (result agtype); + +SELECT * FROM cypher('predicate_functions', $$ + RETURN all(x IN [null, null] WHERE x IS NULL) $$) AS (result agtype); -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 3083c52e1..83573f872 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -1676,25 +1676,71 @@ static Node *make_bool_or_agg(ParseState *pstate, Node *arg) return (Node *) agg; } +/* + * Helper: build a fully-transformed `count(*) FILTER (WHERE filter)` Aggref. + * + * The filter must already be a transformed boolean expression. + */ +static Node *make_count_star_filter_agg(ParseState *pstate, Node *filter) +{ + Aggref *agg; + Oid count_oid; + + /* count() -- the zero-argument count-star form */ + count_oid = LookupFuncName(list_make1(makeString("count")), + 0, NULL, false); + + agg = makeNode(Aggref); + agg->aggfnoid = count_oid; + agg->aggtype = INT8OID; + agg->aggcollid = InvalidOid; + agg->inputcollid = InvalidOid; + agg->aggtranstype = InvalidOid; /* filled by planner */ + agg->aggargtypes = NIL; + agg->aggdirectargs = NIL; + agg->args = NIL; + agg->aggorder = NIL; + agg->aggdistinct = NIL; + agg->aggfilter = (Expr *) filter; + agg->aggstar = true; + agg->aggvariadic = false; + agg->aggkind = AGGKIND_NORMAL; + agg->aggpresorted = false; + agg->agglevelsup = 0; + agg->aggsplit = AGGSPLIT_SIMPLE; + agg->aggno = -1; + agg->aggtransno = -1; + agg->location = -1; + + pstate->p_hasAggs = true; + + return (Node *) agg; +} + /* * Helper: build a transformed CASE expression implementing three-valued - * predicate logic for all(), any(), and none(). + * predicate logic for all(), any(), none(), and single(). * - * any(): CASE WHEN bool_or(pred IS TRUE) THEN true - * WHEN bool_or(pred IS NULL) THEN NULL - * ELSE false END + * any(): CASE WHEN bool_or(pred IS TRUE) THEN true + * WHEN bool_or(pred IS NULL) THEN NULL + * ELSE false END * - * all(): CASE WHEN bool_or(pred IS FALSE) THEN false - * WHEN bool_or(pred IS NULL) THEN NULL - * ELSE true END + * all(): CASE WHEN bool_or(pred IS FALSE) THEN false + * WHEN bool_or(pred IS NULL) THEN NULL + * ELSE true END * - * none(): CASE WHEN bool_or(pred IS TRUE) THEN false - * WHEN bool_or(pred IS NULL) THEN NULL - * ELSE true END + * none(): CASE WHEN bool_or(pred IS TRUE) THEN false + * WHEN bool_or(pred IS NULL) THEN NULL + * ELSE true END * - * Empty list: both bool_or calls return NULL (no rows), so the CASE - * falls through to the default: false for any(), true for all()/none(). - * This matches Cypher's vacuous truth semantics. + * single(): CASE WHEN count(*) FILTER (WHERE pred IS TRUE) >= 2 THEN false + * WHEN bool_or(pred IS NULL) THEN NULL + * WHEN count(*) FILTER (WHERE pred IS TRUE) = 1 THEN true + * ELSE false END + * + * Empty list: all aggregates return NULL on zero rows, so the CASE + * falls through to the default: false for any()/single(), true for + * all()/none(). This matches Cypher's vacuous truth semantics. */ static Node *make_predicate_case_expr(ParseState *pstate, Node *pred, cypher_predicate_function_kind kind) @@ -1752,7 +1798,7 @@ static Node *make_predicate_case_expr(ParseState *pstate, Node *pred, cexpr->defresult = (Expr *) false_const; cexpr->location = -1; } - else /* CPFK_NONE */ + else if (kind == CPFK_NONE) { /* bool_or(pred IS TRUE) -> false */ bool_or_first = make_bool_or_agg(pstate, @@ -1769,6 +1815,62 @@ static Node *make_predicate_case_expr(ParseState *pstate, Node *pred, cexpr->defresult = (Expr *) true_const; cexpr->location = -1; } + else /* CPFK_SINGLE */ + { + /* + * Three WHEN arms, in order: + * count(*) FILTER (pred IS TRUE) >= 2 -> false (definitely >1) + * bool_or(pred IS NULL) -> NULL (unknown arm) + * count(*) FILTER (pred IS TRUE) = 1 -> true (exactly one) + * else -> false (zero true) + * + * The >=2 arm is tested first so that 2-or-more definite trues + * win over any null predicates, matching Neo4j semantics. + */ + CaseWhen *when0, *when3; + Node *count_ge_2, *count_eq_1; + Node *count_true_a, *count_true_b; + Node *int_2 = (Node *) makeConst(INT8OID, -1, InvalidOid, sizeof(int64), + Int64GetDatum(2), false, FLOAT8PASSBYVAL); + Node *int_1 = (Node *) makeConst(INT8OID, -1, InvalidOid, sizeof(int64), + Int64GetDatum(1), false, FLOAT8PASSBYVAL); + + /* + * Two separate Aggref nodes for count(*) FILTER (pred IS TRUE); + * the planner will common-subexpression them back to a single + * aggregate evaluation. + */ + count_true_a = make_count_star_filter_agg(pstate, + make_boolean_test(pred, IS_TRUE)); + count_true_b = make_count_star_filter_agg(pstate, + make_boolean_test(pred, IS_TRUE)); + + count_ge_2 = (Node *) make_op(pstate, list_make1(makeString(">=")), + count_true_a, int_2, + pstate->p_last_srf, -1); + count_eq_1 = (Node *) make_op(pstate, list_make1(makeString("=")), + count_true_b, int_1, + pstate->p_last_srf, -1); + + /* first arm: count >= 2 -> false */ + when0 = makeNode(CaseWhen); + when0->expr = (Expr *) count_ge_2; + when0->result = (Expr *) false_const; + when0->location = -1; + + /* third arm: count = 1 -> true */ + when3 = makeNode(CaseWhen); + when3->expr = (Expr *) count_eq_1; + when3->result = (Expr *) true_const; + when3->location = -1; + + cexpr = makeNode(CaseExpr); + cexpr->casetype = BOOLOID; + cexpr->arg = NULL; + cexpr->args = list_make3(when0, when2, when3); + cexpr->defresult = (Expr *) false_const; + cexpr->location = -1; + } return (Node *) cexpr; } @@ -1845,67 +1947,12 @@ static Query *transform_cypher_predicate_function(cypher_parsestate *cpstate, pred = coerce_to_boolean(child_pstate, pred, "WHERE"); } - if (pred_func->kind == CPFK_SINGLE) - { - /* - * single(): SELECT count(*) FROM unnest(list) AS x - * WHERE pred IS TRUE - * - * Using IS TRUE ensures NULL predicates are not counted as - * matches, preserving correct semantics. The grammar layer - * compares the result = 1. - * - * Note: a LIMIT 2 optimization (to short-circuit after two - * matches) would require a nested subquery that breaks - * correlated variable references. Deferred to a future - * optimization pass. - */ - FuncCall *count_call; - Node *count_expr; - Node *is_true_qual; - - /* WHERE pred IS TRUE -- NULLs are not counted */ - is_true_qual = make_boolean_test(pred, IS_TRUE); - - count_call = makeFuncCall(list_make1(makeString("count")), - NIL, COERCE_SQL_SYNTAX, -1); - count_call->agg_star = true; - - count_expr = transformExpr(child_pstate, (Node *) count_call, - EXPR_KIND_SELECT_TARGET); - - te = makeTargetEntry((Expr *) count_expr, - (AttrNumber) child_pstate->p_next_resno++, - "count", false); - - query->targetList = lappend(query->targetList, te); - query->jointree = makeFromExpr(child_pstate->p_joinlist, - is_true_qual); - query->rtable = child_pstate->p_rtable; - query->rteperminfos = child_pstate->p_rteperminfos; - query->hasAggs = child_pstate->p_hasAggs; - query->hasSubLinks = child_pstate->p_hasSubLinks; - query->hasTargetSRFs = child_pstate->p_hasTargetSRFs; - - assign_query_collations(child_pstate, query); - - if (child_pstate->p_hasAggs || - query->groupClause || query->groupingSets || query->havingQual) - { - parse_check_aggregates(child_pstate, query); - } - - free_cypher_parsestate(child_cpstate); - - return query; - } - else + /* + * Build a CASE expression that preserves three-valued NULL semantics. + * No WHERE clause -- the logic is entirely in the SELECT list, so the + * CASE can see null predicates and react to them. + */ { - /* - * all()/any()/none(): Build a CASE expression with bool_or() - * aggregates that preserves three-valued NULL semantics. - * No WHERE clause -- the logic is entirely in the SELECT list. - */ Node *case_expr; case_expr = make_predicate_case_expr(child_pstate, pred, diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index c857724dd..4de3f0920 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -3445,12 +3445,10 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, * none(x IN list WHERE predicate) * single(x IN list WHERE predicate) * - * All four use EXPR_SUBLINK (scalar subquery). The transform layer - * generates aggregate-based queries (using bool_or + CASE) for - * all/any/none to preserve three-valued NULL semantics, and - * count(*) with IS TRUE filtering for single(). - * - * For single(), the subquery result is compared = 1. + * All four use EXPR_SUBLINK (scalar subquery). The transform layer + * generates aggregate-based queries that preserve three-valued NULL + * semantics: bool_or + CASE for all/any/none, and a CASE built on + * count(*) FILTER for single(). */ static Node *build_predicate_function_node(cypher_predicate_function_kind kind, Node *var, Node *expr, @@ -3490,28 +3488,11 @@ static Node *build_predicate_function_node(cypher_predicate_function_kind kind, sub->location = location; sub->subLinkType = EXPR_SUBLINK; - if (kind == CPFK_SINGLE) - { - /* - * single() -> (subquery) = 1 - * The subquery returns count(*) with IS TRUE filtering. - */ - Node *eq_expr; - - eq_expr = (Node *) makeSimpleA_Expr(AEXPR_OP, "=", - (Node *) sub, - make_int_const(1, location), - location); - result = (Node *) node_to_agtype(eq_expr, "boolean", location); - } - else - { - /* - * all()/any()/none(): the subquery returns a boolean directly - * from the CASE+bool_or() aggregate expression. - */ - result = (Node *) node_to_agtype((Node *) sub, "boolean", location); - } + /* + * The subquery returns a boolean directly from the CASE+bool_or() + * aggregate expression (count(*) FILTER for single()). + */ + result = (Node *) node_to_agtype((Node *) sub, "boolean", location); /* * NULL-list guard: CASE WHEN expr IS NULL THEN NULL ELSE result END diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 6700be3f3..fd4aa1175 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -12425,12 +12425,28 @@ Datum age_unnest(PG_FUNCTION_ARGS) HeapTuple tuple; Datum values[1]; bool nulls[1] = {false}; - agtype *val = agtype_value_to_agtype(&v); /* use the tmp context so we can clean up after each tuple is done */ old_cxt = MemoryContextSwitchTo(tmp_cxt); - values[0] = PointerGetDatum(val); + /* + * Emit an agtype AGTV_NULL element as a SQL-NULL row so that + * `IS NULL` / `IS NOT NULL` on the unwound variable match + * openCypher's null semantics (issue #2393). Wrapping it as a + * non-SQL-NULL agtype container would leave SQL's IS NULL blind + * to it, dropping `WHERE x IS NULL` filters and passing through + * `WHERE x IS NOT NULL` filters. + */ + if (v.type == AGTV_NULL) + { + values[0] = (Datum) 0; + nulls[0] = true; + } + else + { + agtype *val = agtype_value_to_agtype(&v); + values[0] = PointerGetDatum(val); + } tuple = heap_form_tuple(ret_tdesc, values, nulls);