From f686e93423fc42e0ffe78d993997466ad75e4bbd Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Tue, 28 Apr 2026 21:54:42 +0000 Subject: [PATCH] Allow safe Cypher reserved keywords in alias positions (#2355) Cypher productions in `cypher_gram.y` that bind an alias via the AS keyword (RETURN/WITH/YIELD ... AS x and UNWIND ... AS x) only accepted plain identifiers. As a result, completely valid Cypher such as SELECT * FROM cypher('g', $$ RETURN 1 AS count $$) AS (a agtype); failed with `syntax error at or near "count"`, even though `count` is already accepted in other identifier positions (it appears in the existing `safe_keywords` list and is permitted in `func_name`; `schema_name` accepts the broader `reserved_keyword` set). This patch introduces a dedicated `var_name_alias` non-terminal used only in the three alias-binding sites (yield_item, return_item, unwind). It accepts everything `var_name` accepts, plus the entire `safe_keywords` set, so the 49 non-conflicting reserved keywords (count, exists, coalesce, match, return, where, order, limit, distinct, optional, detach, contains, starts, ends, in, is, not, ...) are now usable as aliases. The change is intentionally scoped to alias positions: * `var_name` itself (used by pattern-variable bindings like `(x:Label)`, edge bindings, named paths, and `expr_var` references) is unchanged. Allowing safe_keywords there triggers 156 shift/reduce conflicts because keyword tokens collide with their roles inside expressions and patterns. * `conflicted_keywords` (END, NULL, TRUE, FALSE) remain rejected in every position; they are genuinely ambiguous with literal/CASE productions. Reading a keyword-named alias back through `expr_var` still fails (e.g. `WITH 1 AS count RETURN count`) because `expr_var` reads through `var_name`. That asymmetry is captured as a known limitation in the regression suite and tracked separately in #2416. Regression coverage lives in `regress/sql/reserved_keyword_alias.sql` and `regress/expected/reserved_keyword_alias.out`, exercising: * the original repro, * representative safe_keywords across RETURN/WITH/UNWIND, * multiple keyword aliases in one projection, * a backtick-quoted alias positive case, * the known read-back limitation as a negative test, and * explicit negatives proving END/NULL/TRUE/FALSE and pattern-position keywords still error out. Closes #2355. --- Makefile | 3 +- regress/expected/reserved_keyword_alias.out | 248 ++++++++++++++++++++ regress/sql/reserved_keyword_alias.sql | 102 ++++++++ src/backend/parser/cypher_gram.y | 44 +++- 4 files changed, 392 insertions(+), 5 deletions(-) create mode 100644 regress/expected/reserved_keyword_alias.out create mode 100755 regress/sql/reserved_keyword_alias.sql diff --git a/Makefile b/Makefile index 5405665d8..cb33e1047 100644 --- a/Makefile +++ b/Makefile @@ -177,7 +177,8 @@ REGRESS = scan \ predicate_functions \ map_projection \ direct_field_access \ - security + security \ + reserved_keyword_alias ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/reserved_keyword_alias.out b/regress/expected/reserved_keyword_alias.out new file mode 100644 index 000000000..0127dbb0e --- /dev/null +++ b/regress/expected/reserved_keyword_alias.out @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Regression coverage for issue #2355: + * Cypher reserved keywords from the `safe_keywords` set must be accepted + * in alias positions (RETURN/WITH/YIELD ... AS , UNWIND ... AS ). + * Conflicting tokens (END / NULL / TRUE / FALSE) must remain rejected. + * Pattern variable bindings are intentionally still restricted to plain + * identifiers. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT * FROM create_graph('issue_2355'); +NOTICE: graph "issue_2355" has been created + create_graph +-------------- + +(1 row) + +-- The exact reproducer from the issue (previously failed with +-- "syntax error at or near "count""). +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS count $$) AS (a agtype); + a +--- + 1 +(1 row) + +-- Representative coverage across the safe_keywords set as RETURN aliases. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS exists $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS coalesce $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS match $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS return $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS where $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS order $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS limit $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS distinct $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS optional $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS detach $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS contains $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS starts $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS ends $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS in $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS is $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS not $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS yield $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS call $$) AS (a agtype); + a +--- + 1 +(1 row) + +-- Multiple keyword aliases in one projection. +SELECT * FROM cypher('issue_2355', + $$ RETURN 1 AS count, 2 AS exists, 3 AS where $$ +) AS (count agtype, ex agtype, w agtype); + count | ex | w +-------+----+--- + 1 | 2 | 3 +(1 row) + +-- WITH ... AS : alias binding works. +SELECT * FROM cypher('issue_2355', + $$ WITH 1 AS count RETURN 1 AS x $$ +) AS (a agtype); + a +--- + 1 +(1 row) + +-- UNWIND ... AS : alias binding works. +SELECT * FROM cypher('issue_2355', + $$ UNWIND [1, 2, 3] AS row RETURN 1 AS x $$ +) AS (a agtype); + a +--- + 1 + 1 + 1 +(3 rows) + +-- conflicted_keywords (END, NULL, TRUE, FALSE) MUST still be rejected +-- because they introduce real grammar ambiguity with literal/expression +-- productions. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS null $$) AS (a agtype); +ERROR: syntax error at or near "null" +LINE 1: SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS null $$) ... + ^ +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS true $$) AS (a agtype); +ERROR: syntax error at or near "true" +LINE 1: SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS true $$) ... + ^ +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS false $$) AS (a agtype); +ERROR: syntax error at or near "false" +LINE 1: SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS false $$) ... + ^ +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS end $$) AS (a agtype); +ERROR: syntax error at or near "end" +LINE 1: SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS end $$) ... + ^ +-- Pattern-variable positions are intentionally NOT broadened (would +-- create shift/reduce conflicts). Confirm they still error. +SELECT * FROM cypher('issue_2355', + $$ MATCH (count) RETURN 1 AS x $$ +) AS (a agtype); +ERROR: syntax error at or near "count" +LINE 2: $$ MATCH (count) RETURN 1 AS x $$ + ^ +-- Plain identifiers naturally remain unaffected. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS my_alias $$) AS (a agtype); + a +--- + 1 +(1 row) + +-- Backtick-quoted alias positive case: forces the IDENTIFIER token +-- path, so future grammar refactors don't accidentally regress quoted +-- identifiers when the unquoted form is also a keyword. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS `count` $$) AS (a agtype); + a +--- + 1 +(1 row) + +SELECT * FROM cypher('issue_2355', $$ WITH 1 AS `count` RETURN `count` $$) AS (a agtype); + a +--- + 1 +(1 row) + +-- Known limitation: reading a keyword-named alias back fails because +-- expr_var reads through var_name (which is unchanged here). Tracked +-- in issue #2416. Captured in the expected output so the next +-- contributor who fixes expr_var has a precise file to update. +SELECT * FROM cypher('issue_2355', $$ WITH 1 AS count RETURN count $$) AS (a agtype); +ERROR: syntax error at end of input +LINE 1: ...her('issue_2355', $$ WITH 1 AS count RETURN count $$) AS (a ... + ^ +SELECT * FROM drop_graph('issue_2355', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2355._ag_label_vertex +drop cascades to table issue_2355._ag_label_edge +NOTICE: graph "issue_2355" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/reserved_keyword_alias.sql b/regress/sql/reserved_keyword_alias.sql new file mode 100755 index 000000000..eab27ee37 --- /dev/null +++ b/regress/sql/reserved_keyword_alias.sql @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Regression coverage for issue #2355: + * Cypher reserved keywords from the `safe_keywords` set must be accepted + * in alias positions (RETURN/WITH/YIELD ... AS , UNWIND ... AS ). + * Conflicting tokens (END / NULL / TRUE / FALSE) must remain rejected. + * Pattern variable bindings are intentionally still restricted to plain + * identifiers. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT * FROM create_graph('issue_2355'); + +-- The exact reproducer from the issue (previously failed with +-- "syntax error at or near "count""). +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS count $$) AS (a agtype); + +-- Representative coverage across the safe_keywords set as RETURN aliases. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS exists $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS coalesce $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS match $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS return $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS where $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS order $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS limit $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS distinct $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS optional $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS detach $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS contains $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS starts $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS ends $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS in $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS is $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS not $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS yield $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS call $$) AS (a agtype); + +-- Multiple keyword aliases in one projection. +SELECT * FROM cypher('issue_2355', + $$ RETURN 1 AS count, 2 AS exists, 3 AS where $$ +) AS (count agtype, ex agtype, w agtype); + +-- WITH ... AS : alias binding works. +SELECT * FROM cypher('issue_2355', + $$ WITH 1 AS count RETURN 1 AS x $$ +) AS (a agtype); + +-- UNWIND ... AS : alias binding works. +SELECT * FROM cypher('issue_2355', + $$ UNWIND [1, 2, 3] AS row RETURN 1 AS x $$ +) AS (a agtype); + +-- conflicted_keywords (END, NULL, TRUE, FALSE) MUST still be rejected +-- because they introduce real grammar ambiguity with literal/expression +-- productions. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS null $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS true $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS false $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS end $$) AS (a agtype); + +-- Pattern-variable positions are intentionally NOT broadened (would +-- create shift/reduce conflicts). Confirm they still error. +SELECT * FROM cypher('issue_2355', + $$ MATCH (count) RETURN 1 AS x $$ +) AS (a agtype); + +-- Plain identifiers naturally remain unaffected. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS my_alias $$) AS (a agtype); + +-- Backtick-quoted alias positive case: forces the IDENTIFIER token +-- path, so future grammar refactors don't accidentally regress quoted +-- identifiers when the unquoted form is also a keyword. +SELECT * FROM cypher('issue_2355', $$ RETURN 1 AS `count` $$) AS (a agtype); +SELECT * FROM cypher('issue_2355', $$ WITH 1 AS `count` RETURN `count` $$) AS (a agtype); + +-- Known limitation: reading a keyword-named alias back fails because +-- expr_var reads through var_name (which is unchanged here). Tracked +-- in issue #2416. Captured in the expected output so the next +-- contributor who fixes expr_var has a precise file to update. +SELECT * FROM cypher('issue_2355', $$ WITH 1 AS count RETURN count $$) AS (a agtype); + +SELECT * FROM drop_graph('issue_2355', true); diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index c857724dd..c614e1dbe 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -175,7 +175,7 @@ %type expr_subquery /* names */ -%type property_key_name var_name var_name_opt label_name +%type property_key_name var_name var_name_alias var_name_opt label_name %type symbolic_name schema_name type_name %type reserved_keyword safe_keywords conflicted_keywords %type func_name @@ -480,7 +480,7 @@ yield_item_list: ; yield_item: - expr AS var_name + expr AS var_name_alias { ResTarget *n; @@ -791,7 +791,7 @@ return_item_list: ; return_item: - expr AS var_name + expr AS var_name_alias { ResTarget *n; @@ -985,7 +985,7 @@ optional_opt: unwind: - UNWIND expr AS var_name + UNWIND expr AS var_name_alias { ResTarget *res; cypher_unwind *n; @@ -2337,6 +2337,42 @@ var_name: } ; +/* + * var_name_alias is used in alias positions (RETURN/WITH/YIELD ... AS x, + * UNWIND ... AS x) where the AS keyword removes any lookahead ambiguity. + * Beyond plain identifiers, it permits the same set of non-conflicting + * reserved keywords accepted by safe_keywords (already accepted in + * func_name; schema_name accepts the broader reserved_keyword), so that + * legitimate Cypher such as + * RETURN 1 AS count + * RETURN n AS exists + * UNWIND [1, 2] AS row + * is parsed correctly. Truly conflicting tokens (END, NULL, TRUE, FALSE) + * are listed under conflicted_keywords (not safe_keywords) and remain + * rejected here. See issue #2355. + * + * It is intentionally NOT used in pattern variable positions + * ((x:Label), [r:REL]) or named-path bindings (p = ...), because + * allowing reserved keywords there introduces shift/reduce ambiguity. + * + * NOTE: Reading a keyword-named alias back (e.g. WITH 1 AS count + * RETURN count) is intentionally still rejected -- broadening expr_var + * (which reads through var_name) to accept safe_keywords reintroduces + * ~156 shift/reduce conflicts in bison. That asymmetry (writable but + * not readable) is tracked in issue #2416. + */ +var_name_alias: + var_name + | safe_keywords + { + /* safe_keywords already returns a pnstrdup-allocated copy via + * KEYWORD_STRDUP, so no further pstrdup is needed. Mirrors the + * established pattern used by schema_name's reserved_keyword + * branch. */ + $$ = (char *) $1; + } + ; + var_name_opt: /* empty */ {