From dce84e02763a836e15aed70af3bc512dbc94a2a2 Mon Sep 17 00:00:00 2001 From: Dejan Krakovic Date: Sun, 31 May 2026 22:25:50 +0000 Subject: [PATCH 1/2] [SPARK-57188][SQL] Parameterless function takes precedence over UDF parameter ### What changes were proposed in this pull request? A parameterless built-in function (`current_user`, `current_date`, `current_time`, `current_timestamp`, `user`, `session_user`, `grouping__id`) now takes precedence over a SQL UDF parameter that shares its name. Previously the UDF parameter alias shadowed the built-in, e.g. `CREATE FUNCTION f(current_user STRING) RETURNS STRING RETURN current_user; SELECT f('alice')` returned `'alice'` instead of the actual current user. SQL UDF input-parameter aliases are marked with a named `Metadata` key, `SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY`. Producers are `SessionCatalog.makeSQLFunctionPlan` and `makeSQLTableFunctionPlan` (CALL time, scalar and table UDFs) and `CreateSQLFunctionCommand` (CREATE time). Storing the marker in `Metadata` (rather than a `TreeNodeTag`) lets it propagate automatically through `Alias.toAttribute` and `AttributeReference` copies. In `ColumnResolutionHelper`, when name-based resolution returns an attribute carrying this metadata, `LiteralFunctionResolution` is preferred over it. Real columns from relations (no marker) still win, preserving the overall precedence (column > parameterless function > UDF parameter). The new behavior is gated behind a legacy kill-switch, `spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction` (default `false`); set to `true` to restore the previous behavior. ### Why are the changes needed? To match the documented SQL name resolution rules: a parameterless built-in function should not be shadowed by a same-named UDF parameter. ### Does this PR introduce _any_ user-facing change? Yes. A SQL UDF parameter named like a parameterless built-in function no longer shadows that function in the function body. The legacy conf restores the old behavior. ### How was this patch tested? Added golden-file tests `sql-udf-name-precedence(.legacy)` and `parameterless-function-name-precedence(.legacy)` covering column > param, LCA > param, outer-ref > param, parameterless function > param (scalar and table UDF), param > session variable, nested-UDF inner-scope binding, and the legacy (flag-on) behavior. All 4 scenarios pass. --- .../analysis/ColumnResolutionHelper.scala | 31 +- .../sql/catalyst/catalog/SessionCatalog.scala | 29 +- .../apache/spark/sql/internal/SQLConf.scala | 13 + .../command/CreateSQLFunctionCommand.scala | 9 +- ...ss-function-name-precedence-legacy.sql.out | 127 ++++++ ...meterless-function-name-precedence.sql.out | 127 ++++++ .../sql-udf-name-precedence-legacy.sql.out | 293 ++++++++++++++ .../sql-udf-name-precedence.sql.out | 320 ++++++++++++++++ ...erless-function-name-precedence-legacy.sql | 3 + ...parameterless-function-name-precedence.sql | 48 +++ .../inputs/sql-udf-name-precedence-legacy.sql | 3 + .../inputs/sql-udf-name-precedence.sql | 94 +++++ ...ss-function-name-precedence-legacy.sql.out | 131 +++++++ ...meterless-function-name-precedence.sql.out | 131 +++++++ .../sql-udf-name-precedence-legacy.sql.out | 327 ++++++++++++++++ .../results/sql-udf-name-precedence.sql.out | 360 ++++++++++++++++++ 16 files changed, 2034 insertions(+), 12 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index 0c8ecbab48ecb..eaa61a2a91633 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -23,6 +23,7 @@ import scala.collection.mutable import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils.wrapOuterReference import org.apache.spark.sql.catalyst.plans.logical._ @@ -147,8 +148,20 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { // When strict DataFrame column resolution is disabled, we also allow name-based // resolution as a fallback for tagged attributes. val result = withPosition(u) { - resolveColumnByName(nameParts) - .orElse(LiteralFunctionResolution.resolve(nameParts)) + // A parameterless built-in function takes precedence over a SQL UDF parameter + // that happens to share its name (per the documented SQL name resolution rules). + // Real columns from relations -- which don't carry the + // SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY -- continue to win as before. + // Gated by a legacy kill-switch conf so the pre-fix behavior can be restored. + val column = resolveColumnByName(nameParts) + val resolved = column match { + case Some(c) if isSQLFunctionParameterAlias(c) && !conf.getConf( + SQLConf.LEGACY_ALLOW_UDF_PARAMETER_TO_SHADOW_PARAMETERLESS_FUNCTION) => + LiteralFunctionResolution.resolve(nameParts).orElse(column) + case Some(_) => column + case None => LiteralFunctionResolution.resolve(nameParts) + } + resolved .map { // We trim unnecessary alias here. Note that, we cannot trim the alias at top-level, // as we should resolve `UnresolvedAttribute` to a named expression. The caller side @@ -696,4 +709,18 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { r.expressions.forall(_.references.subsetOf(outputSet)) } } + + /** + * True if `e` originates from a SQL UDF input parameter alias, as marked by + * `SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY` at parameter-alias + * construction sites. Unwraps `OuterReference` so callers that pass post-outer-resolution + * expressions still get a correct answer; the metadata lives on the underlying named + * expression. + */ + private def isSQLFunctionParameterAlias(e: Expression): Boolean = e match { + case OuterReference(inner) => isSQLFunctionParameterAlias(inner) + case n: NamedExpression => + n.metadata.contains(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY) + case _ => false + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 1b36a29438c92..f44113c6c189f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -60,6 +60,13 @@ import org.apache.spark.util.Utils object SessionCatalog { val DEFAULT_DATABASE = "default" + /** + * Metadata key marking an Alias / Attribute as originating from a SQL UDF input parameter. + * Consumed by name resolution: a parameterless built-in function takes precedence over a + * tagged alias of the same name. + */ + val SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY: String = "__funcInputAlias" + /** * Kind of session-scoped function namespace for lookup/resolve. * Used by the kind-based API to avoid separate methods per @@ -1880,11 +1887,6 @@ class SessionCatalog( name: String, function: SQLFunction, input: Seq[Expression]): LogicalPlan = { - def metaForFuncInputAlias = { - new MetadataBuilder() - .putString("__funcInputAlias", "true") - .build() - } assert(!function.isTableFunc, "Function '" + function.name + "' is a table function. " + "Use makeSQLTableFunctionPlan() instead of makeSQLFunctionPlan().") @@ -1941,6 +1943,9 @@ class SessionCatalog( } } + val funcInputMetadata = new MetadataBuilder() + .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true) + .build() paddedInput.zip(param.fields).map { case (expr, param) => // Add outer references to all resolved attributes and outer references in the function @@ -1950,10 +1955,11 @@ class SessionCatalog( case a: Attribute if a.resolved => OuterReference(a) case o: OuterReference => OuterReference(o) } + // Mark the alias as function input so name resolution can give a parameterless + // built-in function precedence over a same-named UDF parameter. Alias(Cast(outer, param.dataType), param.name)( qualifier = qualifier, - // mark the alias as function input - explicitMetadata = Some(metaForFuncInputAlias)) + explicitMetadata = Some(funcInputMetadata)) } }.getOrElse(Nil) @@ -2040,13 +2046,20 @@ class SessionCatalog( } } + val funcInputMetadata = new MetadataBuilder() + .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true) + .build() val inputCast = paddedInput.zip(param.fields).map { case (expr, param) => // Add outer references to all attributes in the function input. val outer = expr.transform { case a: Attribute => OuterReference(a) } - Alias(Cast(outer, param.dataType), param.name)(qualifier = qualifier) + // Mark the alias as function input so name resolution can give a parameterless + // built-in function precedence over a same-named UDF parameter. + Alias(Cast(outer, param.dataType), param.name)( + qualifier = qualifier, + explicitMetadata = Some(funcInputMetadata)) } val inputPlan = Project(inputCast, OneRowRelation()) LateralJoin(inputPlan, LateralSubquery(query.get), Inner, None) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 41a511666af82..7fadd084fe72d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3651,6 +3651,19 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_ALLOW_UDF_PARAMETER_TO_SHADOW_PARAMETERLESS_FUNCTION = + buildConf("spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction") + .internal() + .doc("When true (legacy behavior), a SQL UDF parameter alias shadows a parameterless " + + "built-in function (current_user, current_date, current_time, current_timestamp, " + + "user, session_user, grouping__id) of the same name. When false (the default), the " + + "parameterless built-in function takes precedence, matching the documented name " + + "resolution rules.") + .version("4.2.0") + .withBindingPolicy(ConfigBindingPolicy.SESSION) + .booleanConf + .createWithDefault(false) + val ALLOW_NON_EMPTY_LOCATION_IN_CTAS = buildConf("spark.sql.legacy.allowNonEmptyLocationInCTAS") .internal() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala index 9bfdff127c5a5..501bd7a38f6b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.CreateUserDefinedFunctionCommand._ -import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.types.{DataType, MetadataBuilder, StructField, StructType} /** * The DDL command that creates a SQL function. @@ -109,6 +109,11 @@ case class CreateSQLFunctionCommand( // Qualify the input parameters with the function name so that attributes referencing // the function input parameters can be resolved correctly. val qualifier = Seq(name.funcName) + // Mark the parameter aliases as function input so name resolution can give a + // parameterless built-in function precedence over a same-named UDF parameter. + val funcInputMetadata = new MetadataBuilder() + .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true) + .build() val input = param.map(p => Alias( { val defaultExpr = p.getDefault() @@ -131,7 +136,7 @@ case class CreateSQLFunctionCommand( } Cast(defaultPlan, p.dataType) } - }, p.name)(qualifier = qualifier)) + }, p.name)(qualifier = qualifier, explicitMetadata = Some(funcInputMetadata))) Project(input, OneRowRelation()) } else { OneRowRelation() diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out new file mode 100644 index 0000000000000..4b5ed0896741f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out @@ -0,0 +1,127 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user +-- !query analysis +CreateViewCommand `v_user`, SELECT 'admin.admin' AS current_user, false, true, LocalTempView, UNSUPPORTED, true + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time +-- !query analysis +CreateViewCommand `v_time`, SELECT CAST(0 AS INT) AS current_time, false, true, LocalTempView, UNSUPPORTED, true + +- Project [cast(0 as int) AS current_time#x] + +- OneRowRelation + + +-- !query +SELECT current_user FROM v_user +-- !query analysis +Project [current_user#x] ++- SubqueryAlias v_user + +- View (`v_user`, [current_user#x]) + +- Project [cast(current_user#x as string) AS current_user#x] + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +SELECT current_time FROM v_time +-- !query analysis +Project [current_time#x] ++- SubqueryAlias v_time + +- View (`v_time`, [current_time#x]) + +- Project [cast(current_time#x as int) AS current_time#x] + +- Project [cast(0 as int) AS current_time#x] + +- OneRowRelation + + +-- !query +SELECT 'abc' AS current_user, current_user = current_user() AS function_won +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT (SELECT current_user) = current_user() AS function_won FROM v_user +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +DECLARE current_user = 'abc' +-- !query analysis +CreateVariable default(abc, sql=''abc''), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user + + +-- !query +SELECT current_user, current_user FROM v_user +-- !query analysis +Project [current_user#x, current_user#x] ++- SubqueryAlias v_user + +- View (`v_user`, [current_user#x]) + +- Project [cast(current_user#x as string) AS current_user#x] + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +DROP TEMPORARY VARIABLE current_user +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user + + +-- !query +WITH t1 AS (SELECT 1 AS current_date) +SELECT typeof((SELECT current_date)) FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +WITH t1 AS (SELECT 1 AS current_timestamp) +SELECT typeof((SELECT current_timestamp)) FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +WITH t1 AS (SELECT 1 AS user) +SELECT (SELECT user) = current_user() AS function_won FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +WITH t1 AS (SELECT 1 AS session_user) +SELECT (SELECT session_user) = current_user() AS function_won FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user)) +-- !query analysis +Aggregate [current_user#x, spark_grouping_id#xL], [typeof(spark_grouping_id#xL) AS typeof(grouping_id())#x] ++- Expand [[current_user#x, current_user#x, 0]], [current_user#x, current_user#x, spark_grouping_id#xL] + +- Project [current_user#x, current_user#x AS current_user#x] + +- SubqueryAlias v_user + +- View (`v_user`, [current_user#x]) + +- Project [cast(current_user#x as string) AS current_user#x] + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +DROP VIEW v_user +-- !query analysis +DropTempViewCommand v_user, false + + +-- !query +DROP VIEW v_time +-- !query analysis +DropTempViewCommand v_time, false diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out new file mode 100644 index 0000000000000..4b5ed0896741f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out @@ -0,0 +1,127 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user +-- !query analysis +CreateViewCommand `v_user`, SELECT 'admin.admin' AS current_user, false, true, LocalTempView, UNSUPPORTED, true + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time +-- !query analysis +CreateViewCommand `v_time`, SELECT CAST(0 AS INT) AS current_time, false, true, LocalTempView, UNSUPPORTED, true + +- Project [cast(0 as int) AS current_time#x] + +- OneRowRelation + + +-- !query +SELECT current_user FROM v_user +-- !query analysis +Project [current_user#x] ++- SubqueryAlias v_user + +- View (`v_user`, [current_user#x]) + +- Project [cast(current_user#x as string) AS current_user#x] + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +SELECT current_time FROM v_time +-- !query analysis +Project [current_time#x] ++- SubqueryAlias v_time + +- View (`v_time`, [current_time#x]) + +- Project [cast(current_time#x as int) AS current_time#x] + +- Project [cast(0 as int) AS current_time#x] + +- OneRowRelation + + +-- !query +SELECT 'abc' AS current_user, current_user = current_user() AS function_won +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT (SELECT current_user) = current_user() AS function_won FROM v_user +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +DECLARE current_user = 'abc' +-- !query analysis +CreateVariable default(abc, sql=''abc''), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user + + +-- !query +SELECT current_user, current_user FROM v_user +-- !query analysis +Project [current_user#x, current_user#x] ++- SubqueryAlias v_user + +- View (`v_user`, [current_user#x]) + +- Project [cast(current_user#x as string) AS current_user#x] + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +DROP TEMPORARY VARIABLE current_user +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user + + +-- !query +WITH t1 AS (SELECT 1 AS current_date) +SELECT typeof((SELECT current_date)) FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +WITH t1 AS (SELECT 1 AS current_timestamp) +SELECT typeof((SELECT current_timestamp)) FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +WITH t1 AS (SELECT 1 AS user) +SELECT (SELECT user) = current_user() AS function_won FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +WITH t1 AS (SELECT 1 AS session_user) +SELECT (SELECT session_user) = current_user() AS function_won FROM t1 +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user)) +-- !query analysis +Aggregate [current_user#x, spark_grouping_id#xL], [typeof(spark_grouping_id#xL) AS typeof(grouping_id())#x] ++- Expand [[current_user#x, current_user#x, 0]], [current_user#x, current_user#x, spark_grouping_id#xL] + +- Project [current_user#x, current_user#x AS current_user#x] + +- SubqueryAlias v_user + +- View (`v_user`, [current_user#x]) + +- Project [cast(current_user#x as string) AS current_user#x] + +- Project [admin.admin AS current_user#x] + +- OneRowRelation + + +-- !query +DROP VIEW v_user +-- !query analysis +DropTempViewCommand v_user, false + + +-- !query +DROP VIEW v_time +-- !query analysis +DropTempViewCommand v_time, false diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out new file mode 100644 index 0000000000000..04fb7715961d0 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out @@ -0,0 +1,293 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x +-- !query analysis +CreateViewCommand `v1`, SELECT 1 AS x, false, true, LocalTempView, UNSUPPORTED, true + +- Project [1 AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x +-- !query analysis +CreateSQLFunctionCommand identity_fn, x INT, INT, x, false, true, false, true + + +-- !query +SELECT identity_fn(42) +-- !query analysis +Project [identity_fn(x#x) AS identity_fn(42)#x] ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1) +-- !query analysis +CreateSQLFunctionCommand col_vs_param, x INT, INT, (SELECT x FROM v1), false, true, false, true + + +-- !query +SELECT col_vs_param(42) +-- !query analysis +Project [col_vs_param(x#x) AS col_vs_param(42)#x] +: +- Project [x#x] +: +- SubqueryAlias v1 +: +- View (`v1`, [x#x]) +: +- Project [cast(x#x as int) AS x#x] +: +- Project [1 AS x#x] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING) +RETURNS STRING RETURN current_user +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param, current_user STRING, STRING, current_user, false, true, false, true + + +-- !query +SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT) +RETURNS STRING RETURN typeof(current_date) +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param_date, current_date INT, STRING, typeof(current_date), false, true, false, true + + +-- !query +SELECT paramless_vs_param_date(42) +-- !query analysis +Project [paramless_vs_param_date(current_date#x) AS paramless_vs_param_date(42)#x] ++- Project [cast(42 as int) AS current_date#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT) +RETURNS STRING RETURN typeof(current_time) +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param_time, current_time INT, STRING, typeof(current_time), false, true, false, true + + +-- !query +SELECT paramless_vs_param_time(42) +-- !query analysis +Project [paramless_vs_param_time(current_time#x) AS paramless_vs_param_time(42)#x] ++- Project [cast(42 as int) AS current_time#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT) +RETURNS INT RETURN grouping__id +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param_grouping, grouping__id INT, INT, grouping__id, false, true, false, true + + +-- !query +SELECT paramless_vs_param_grouping(42) +-- !query analysis +Project [paramless_vs_param_grouping(grouping__id#x) AS paramless_vs_param_grouping(42)#x] ++- Project [cast(42 as int) AS grouping__id#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT) +RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)) +-- !query analysis +CreateSQLFunctionCommand lca_vs_param, x INT, INT, (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)), false, true, false, true + + +-- !query +SELECT lca_vs_param(42) +-- !query analysis +Project [lca_vs_param(x#x) AS lca_vs_param(42)#x] +: +- Project [y#x] +: +- SubqueryAlias __auto_generated_subquery_name +: +- Project [x#x, (x#x + 1) AS y#x] +: +- Project [999 AS x#x] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT) +RETURNS INT RETURN (SELECT (SELECT x) FROM v1) +-- !query analysis +CreateSQLFunctionCommand outer_vs_param, x INT, INT, (SELECT (SELECT x) FROM v1), false, true, false, true + + +-- !query +SELECT outer_vs_param(42) +-- !query analysis +Project [outer_vs_param(x#x) AS outer_vs_param(42)#x] +: +- Project [scalar-subquery#x [x#x] AS scalarsubquery(x)#x] +: : +- Project [outer(x#x)] +: : +- OneRowRelation +: +- SubqueryAlias v1 +: +- View (`v1`, [x#x]) +: +- Project [cast(x#x as int) AS x#x] +: +- Project [1 AS x#x] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT) +RETURNS INT RETURN (SELECT (SELECT x)) +-- !query analysis +CreateSQLFunctionCommand outer_param_pure, x INT, INT, (SELECT (SELECT x)), false, true, false, true + + +-- !query +SELECT outer_param_pure(42) +-- !query analysis +Project [outer_param_pure(x#x) AS outer_param_pure(42)#x] +: +- Project [outer(x#x)] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +DECLARE x = 999 +-- !query analysis +CreateVariable default(999, sql='999'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x + + +-- !query +SELECT identity_fn(42) +-- !query analysis +Project [identity_fn(x#x) AS identity_fn(42)#x] ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x +-- !query analysis +CreateSQLFunctionCommand inner_fn, y INT, INT, x, false, true, false, true + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x) +-- !query analysis +CreateSQLFunctionCommand outer_fn, x INT, INT, inner_fn(x), false, true, false, true + + +-- !query +SELECT outer_fn(42) +-- !query analysis +Project [outer_fn(x#x) AS outer_fn(42)#x] ++- Project [x#x, cast(x#x as int) AS y#x] + +- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING) +RETURNS TABLE(c STRING) RETURN SELECT current_user AS c +-- !query analysis +CreateSQLFunctionCommand tvf_paramless_vs_param, current_user STRING, c STRING, SELECT current_user AS c, true, true, false, true + + +-- !query +SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +DROP TEMPORARY VARIABLE x +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x + + +-- !query +DROP VIEW v1 +-- !query analysis +DropTempViewCommand v1, false + + +-- !query +DROP TEMPORARY FUNCTION identity_fn +-- !query analysis +DropFunctionCommand identity_fn, false, true + + +-- !query +DROP TEMPORARY FUNCTION col_vs_param +-- !query analysis +DropFunctionCommand col_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param +-- !query analysis +DropFunctionCommand paramless_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_date +-- !query analysis +DropFunctionCommand paramless_vs_param_date, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_time +-- !query analysis +DropFunctionCommand paramless_vs_param_time, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_grouping +-- !query analysis +DropFunctionCommand paramless_vs_param_grouping, false, true + + +-- !query +DROP TEMPORARY FUNCTION lca_vs_param +-- !query analysis +DropFunctionCommand lca_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION outer_vs_param +-- !query analysis +DropFunctionCommand outer_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION outer_param_pure +-- !query analysis +DropFunctionCommand outer_param_pure, false, true + + +-- !query +DROP TEMPORARY FUNCTION inner_fn +-- !query analysis +DropFunctionCommand inner_fn, false, true + + +-- !query +DROP TEMPORARY FUNCTION outer_fn +-- !query analysis +DropFunctionCommand outer_fn, false, true + + +-- !query +DROP TEMPORARY FUNCTION tvf_paramless_vs_param +-- !query analysis +DropFunctionCommand tvf_paramless_vs_param, false, true diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out new file mode 100644 index 0000000000000..03e2b3c6d8cb3 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out @@ -0,0 +1,320 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x +-- !query analysis +CreateViewCommand `v1`, SELECT 1 AS x, false, true, LocalTempView, UNSUPPORTED, true + +- Project [1 AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x +-- !query analysis +CreateSQLFunctionCommand identity_fn, x INT, INT, x, false, true, false, true + + +-- !query +SELECT identity_fn(42) +-- !query analysis +Project [identity_fn(x#x) AS identity_fn(42)#x] ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1) +-- !query analysis +CreateSQLFunctionCommand col_vs_param, x INT, INT, (SELECT x FROM v1), false, true, false, true + + +-- !query +SELECT col_vs_param(42) +-- !query analysis +Project [col_vs_param(x#x) AS col_vs_param(42)#x] +: +- Project [x#x] +: +- SubqueryAlias v1 +: +- View (`v1`, [x#x]) +: +- Project [cast(x#x as int) AS x#x] +: +- Project [1 AS x#x] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING) +RETURNS STRING RETURN current_user +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param, current_user STRING, STRING, current_user, false, true, false, true + + +-- !query +SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT) +RETURNS STRING RETURN typeof(current_date) +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param_date, current_date INT, STRING, typeof(current_date), false, true, false, true + + +-- !query +SELECT paramless_vs_param_date(42) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT) +RETURNS STRING RETURN typeof(current_time) +-- !query analysis +CreateSQLFunctionCommand paramless_vs_param_time, current_time INT, STRING, typeof(current_time), false, true, false, true + + +-- !query +SELECT paramless_vs_param_time(42) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT) +RETURNS INT RETURN grouping__id +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION", + "sqlState" : "42K0E", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 12, + "fragment" : "grouping__id" + } ] +} + + +-- !query +SELECT paramless_vs_param_grouping(42) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`paramless_vs_param_grouping`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 38, + "fragment" : "paramless_vs_param_grouping(42)" + } ] +} + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT) +RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)) +-- !query analysis +CreateSQLFunctionCommand lca_vs_param, x INT, INT, (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)), false, true, false, true + + +-- !query +SELECT lca_vs_param(42) +-- !query analysis +Project [lca_vs_param(x#x) AS lca_vs_param(42)#x] +: +- Project [y#x] +: +- SubqueryAlias __auto_generated_subquery_name +: +- Project [x#x, (x#x + 1) AS y#x] +: +- Project [999 AS x#x] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT) +RETURNS INT RETURN (SELECT (SELECT x) FROM v1) +-- !query analysis +CreateSQLFunctionCommand outer_vs_param, x INT, INT, (SELECT (SELECT x) FROM v1), false, true, false, true + + +-- !query +SELECT outer_vs_param(42) +-- !query analysis +Project [outer_vs_param(x#x) AS outer_vs_param(42)#x] +: +- Project [scalar-subquery#x [x#x] AS scalarsubquery(x)#x] +: : +- Project [outer(x#x)] +: : +- OneRowRelation +: +- SubqueryAlias v1 +: +- View (`v1`, [x#x]) +: +- Project [cast(x#x as int) AS x#x] +: +- Project [1 AS x#x] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT) +RETURNS INT RETURN (SELECT (SELECT x)) +-- !query analysis +CreateSQLFunctionCommand outer_param_pure, x INT, INT, (SELECT (SELECT x)), false, true, false, true + + +-- !query +SELECT outer_param_pure(42) +-- !query analysis +Project [outer_param_pure(x#x) AS outer_param_pure(42)#x] +: +- Project [outer(x#x)] +: +- OneRowRelation ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +DECLARE x = 999 +-- !query analysis +CreateVariable default(999, sql='999'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x + + +-- !query +SELECT identity_fn(42) +-- !query analysis +Project [identity_fn(x#x) AS identity_fn(42)#x] ++- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x +-- !query analysis +CreateSQLFunctionCommand inner_fn, y INT, INT, x, false, true, false, true + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x) +-- !query analysis +CreateSQLFunctionCommand outer_fn, x INT, INT, inner_fn(x), false, true, false, true + + +-- !query +SELECT outer_fn(42) +-- !query analysis +Project [outer_fn(x#x) AS outer_fn(42)#x] ++- Project [x#x, cast(x#x as int) AS y#x] + +- Project [cast(42 as int) AS x#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING) +RETURNS TABLE(c STRING) RETURN SELECT current_user AS c +-- !query analysis +CreateSQLFunctionCommand tvf_paramless_vs_param, current_user STRING, c STRING, SELECT current_user AS c, true, true, false, true + + +-- !query +SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +DROP TEMPORARY VARIABLE x +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x + + +-- !query +DROP VIEW v1 +-- !query analysis +DropTempViewCommand v1, false + + +-- !query +DROP TEMPORARY FUNCTION identity_fn +-- !query analysis +DropFunctionCommand identity_fn, false, true + + +-- !query +DROP TEMPORARY FUNCTION col_vs_param +-- !query analysis +DropFunctionCommand col_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param +-- !query analysis +DropFunctionCommand paramless_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_date +-- !query analysis +DropFunctionCommand paramless_vs_param_date, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_time +-- !query analysis +DropFunctionCommand paramless_vs_param_time, false, true + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_grouping +-- !query analysis +org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException +{ + "errorClass" : "ROUTINE_NOT_FOUND", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`paramless_vs_param_grouping`" + } +} + + +-- !query +DROP TEMPORARY FUNCTION lca_vs_param +-- !query analysis +DropFunctionCommand lca_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION outer_vs_param +-- !query analysis +DropFunctionCommand outer_vs_param, false, true + + +-- !query +DROP TEMPORARY FUNCTION outer_param_pure +-- !query analysis +DropFunctionCommand outer_param_pure, false, true + + +-- !query +DROP TEMPORARY FUNCTION inner_fn +-- !query analysis +DropFunctionCommand inner_fn, false, true + + +-- !query +DROP TEMPORARY FUNCTION outer_fn +-- !query analysis +DropFunctionCommand outer_fn, false, true + + +-- !query +DROP TEMPORARY FUNCTION tvf_paramless_vs_param +-- !query analysis +DropFunctionCommand tvf_paramless_vs_param, false, true diff --git a/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql new file mode 100644 index 0000000000000..b7f70266cc8f2 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql @@ -0,0 +1,3 @@ +--IMPORT parameterless-function-name-precedence.sql + +--SET spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction=true diff --git a/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql new file mode 100644 index 0000000000000..6d2222cf3308c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql @@ -0,0 +1,48 @@ +-- Precedence between parameterless built-in functions and other resolution candidates +-- (column, LCA, outer reference, session variable). The outer-reference cases for +-- `current_time` are omitted because the value is non-deterministic; other `current_time` +-- patterns are covered (column-wins below, and the UDF-param case in the companion +-- sql-udf-name-precedence.sql). + +CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user; +CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time; + +-- Column wins over parameterless function. +SELECT current_user FROM v_user; +SELECT current_time FROM v_time; + +-- Parameterless function wins over LCA. Compared against `current_user()` (with alias) so +-- the golden stays stable across test envs that return different user names. +SELECT 'abc' AS current_user, current_user = current_user() AS function_won; + +-- Parameterless function wins over outer reference. +SELECT (SELECT current_user) = current_user() AS function_won FROM v_user; + +DECLARE current_user = 'abc'; + +-- Column wins over both the parameterless function and the session variable. +SELECT current_user, current_user FROM v_user; + +DROP TEMPORARY VARIABLE current_user; + +-- Parameterless function wins over outer reference (current_date / current_timestamp). +-- typeof keeps the golden stable across clock changes. +WITH t1 AS (SELECT 1 AS current_date) +SELECT typeof((SELECT current_date)) FROM t1; + +WITH t1 AS (SELECT 1 AS current_timestamp) +SELECT typeof((SELECT current_timestamp)) FROM t1; + +-- Parameterless function wins over outer reference (user / session_user). +WITH t1 AS (SELECT 1 AS user) +SELECT (SELECT user) = current_user() AS function_won FROM t1; + +WITH t1 AS (SELECT 1 AS session_user) +SELECT (SELECT session_user) = current_user() AS function_won FROM t1; + +-- grouping__id: the resolution rule applies but the function itself only makes sense +-- inside grouping analytics. Assert the rule fires structurally via type/error pattern. +SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user)); + +DROP VIEW v_user; +DROP VIEW v_time; diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql new file mode 100644 index 0000000000000..6b2fb51f4e6a9 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql @@ -0,0 +1,3 @@ +--IMPORT sql-udf-name-precedence.sql + +--SET spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction=true diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql new file mode 100644 index 0000000000000..f120052baad7e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql @@ -0,0 +1,94 @@ +-- Precedence between a SQL UDF parameter and other resolution candidates (column, +-- parameterless built-in function, LCA, outer reference, session variable, nested UDF). + +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x; +CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x; + +-- UDF parameter resolves when no column conflict. +SELECT identity_fn(42); + +CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1); + +-- Column wins over UDF parameter. +SELECT col_vs_param(42); + +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING) +RETURNS STRING RETURN current_user; + +-- Parameterless function wins over UDF parameter (current_user). Comparison against +-- `current_user()` keeps the golden stable across envs returning different user names. +SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won; + +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT) +RETURNS STRING RETURN typeof(current_date); + +-- Parameterless function wins over UDF parameter (current_date): the body returns 'date', +-- which would be 'int' if the parameter alias had won. +SELECT paramless_vs_param_date(42); + +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT) +RETURNS STRING RETURN typeof(current_time); + +-- Parameterless function wins over UDF parameter (current_time): the body returns a +-- time-typed value, vs 'int' if the parameter alias had won. +SELECT paramless_vs_param_time(42); + +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT) +RETURNS INT RETURN grouping__id; + +-- Parameterless function (grouping__id) wins over UDF parameter. grouping__id outside of +-- a GROUPING SETS context fails analysis; assert via the resulting error class rather +-- than a value comparison. +SELECT paramless_vs_param_grouping(42); + +CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT) +RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)); + +-- LCA wins over UDF parameter (1000 = LCA won, 43 = param won). +SELECT lca_vs_param(42); + +CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT) +RETURNS INT RETURN (SELECT (SELECT x) FROM v1); + +-- Outer column wins over UDF parameter. +SELECT outer_vs_param(42); + +CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT) +RETURNS INT RETURN (SELECT (SELECT x)); + +-- UDF parameter is visible via outer reference when no other binding is in scope. +SELECT outer_param_pure(42); + +DECLARE x = 999; + +-- UDF parameter wins over session variable. +SELECT identity_fn(42); + +CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x; +CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x); + +-- Nested UDF only sees innermost scope: inner_fn resolves 'x' from session variable (999), +-- not outer_fn parameter (42). +SELECT outer_fn(42); + +CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING) +RETURNS TABLE(c STRING) RETURN SELECT current_user AS c; + +-- Parameterless function wins over TVF parameter (same rule must apply on the +-- makeSQLTableFunctionPlan path). +SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored'); + +DROP TEMPORARY VARIABLE x; +DROP VIEW v1; +DROP TEMPORARY FUNCTION identity_fn; +DROP TEMPORARY FUNCTION col_vs_param; +DROP TEMPORARY FUNCTION paramless_vs_param; +DROP TEMPORARY FUNCTION paramless_vs_param_date; +DROP TEMPORARY FUNCTION paramless_vs_param_time; +DROP TEMPORARY FUNCTION paramless_vs_param_grouping; +DROP TEMPORARY FUNCTION lca_vs_param; +DROP TEMPORARY FUNCTION outer_vs_param; +DROP TEMPORARY FUNCTION outer_param_pure; +DROP TEMPORARY FUNCTION inner_fn; +DROP TEMPORARY FUNCTION outer_fn; +DROP TEMPORARY FUNCTION tvf_paramless_vs_param; diff --git a/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out new file mode 100644 index 0000000000000..c47ba461ba6cc --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out @@ -0,0 +1,131 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_user FROM v_user +-- !query schema +struct +-- !query output +admin.admin + + +-- !query +SELECT current_time FROM v_time +-- !query schema +struct +-- !query output +0 + + +-- !query +SELECT 'abc' AS current_user, current_user = current_user() AS function_won +-- !query schema +struct +-- !query output +abc true + + +-- !query +SELECT (SELECT current_user) = current_user() AS function_won FROM v_user +-- !query schema +struct +-- !query output +true + + +-- !query +DECLARE current_user = 'abc' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_user, current_user FROM v_user +-- !query schema +struct +-- !query output +admin.admin admin.admin + + +-- !query +DROP TEMPORARY VARIABLE current_user +-- !query schema +struct<> +-- !query output + + + +-- !query +WITH t1 AS (SELECT 1 AS current_date) +SELECT typeof((SELECT current_date)) FROM t1 +-- !query schema +struct +-- !query output +date + + +-- !query +WITH t1 AS (SELECT 1 AS current_timestamp) +SELECT typeof((SELECT current_timestamp)) FROM t1 +-- !query schema +struct +-- !query output +timestamp + + +-- !query +WITH t1 AS (SELECT 1 AS user) +SELECT (SELECT user) = current_user() AS function_won FROM t1 +-- !query schema +struct +-- !query output +true + + +-- !query +WITH t1 AS (SELECT 1 AS session_user) +SELECT (SELECT session_user) = current_user() AS function_won FROM t1 +-- !query schema +struct +-- !query output +true + + +-- !query +SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user)) +-- !query schema +struct +-- !query output +bigint + + +-- !query +DROP VIEW v_user +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW v_time +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out new file mode 100644 index 0000000000000..c47ba461ba6cc --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out @@ -0,0 +1,131 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_user FROM v_user +-- !query schema +struct +-- !query output +admin.admin + + +-- !query +SELECT current_time FROM v_time +-- !query schema +struct +-- !query output +0 + + +-- !query +SELECT 'abc' AS current_user, current_user = current_user() AS function_won +-- !query schema +struct +-- !query output +abc true + + +-- !query +SELECT (SELECT current_user) = current_user() AS function_won FROM v_user +-- !query schema +struct +-- !query output +true + + +-- !query +DECLARE current_user = 'abc' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_user, current_user FROM v_user +-- !query schema +struct +-- !query output +admin.admin admin.admin + + +-- !query +DROP TEMPORARY VARIABLE current_user +-- !query schema +struct<> +-- !query output + + + +-- !query +WITH t1 AS (SELECT 1 AS current_date) +SELECT typeof((SELECT current_date)) FROM t1 +-- !query schema +struct +-- !query output +date + + +-- !query +WITH t1 AS (SELECT 1 AS current_timestamp) +SELECT typeof((SELECT current_timestamp)) FROM t1 +-- !query schema +struct +-- !query output +timestamp + + +-- !query +WITH t1 AS (SELECT 1 AS user) +SELECT (SELECT user) = current_user() AS function_won FROM t1 +-- !query schema +struct +-- !query output +true + + +-- !query +WITH t1 AS (SELECT 1 AS session_user) +SELECT (SELECT session_user) = current_user() AS function_won FROM t1 +-- !query schema +struct +-- !query output +true + + +-- !query +SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user)) +-- !query schema +struct +-- !query output +bigint + + +-- !query +DROP VIEW v_user +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW v_time +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out new file mode 100644 index 0000000000000..92d2370e2b980 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out @@ -0,0 +1,327 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT identity_fn(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT col_vs_param(42) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING) +RETURNS STRING RETURN current_user +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won +-- !query schema +struct +-- !query output +false + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT) +RETURNS STRING RETURN typeof(current_date) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param_date(42) +-- !query schema +struct +-- !query output +int + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT) +RETURNS STRING RETURN typeof(current_time) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param_time(42) +-- !query schema +struct +-- !query output +int + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT) +RETURNS INT RETURN grouping__id +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param_grouping(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT) +RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT lca_vs_param(42) +-- !query schema +struct +-- !query output +1000 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT) +RETURNS INT RETURN (SELECT (SELECT x) FROM v1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT outer_vs_param(42) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT) +RETURNS INT RETURN (SELECT (SELECT x)) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT outer_param_pure(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +DECLARE x = 999 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT identity_fn(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT outer_fn(42) +-- !query schema +struct +-- !query output +999 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING) +RETURNS TABLE(c STRING) RETURN SELECT current_user AS c +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored') +-- !query schema +struct +-- !query output +true + + +-- !query +DROP TEMPORARY VARIABLE x +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW v1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION identity_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION col_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_date +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_time +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_grouping +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION lca_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION outer_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION outer_param_pure +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION inner_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION outer_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION tvf_paramless_vs_param +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out new file mode 100644 index 0000000000000..2baca91aa987b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out @@ -0,0 +1,360 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT identity_fn(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT col_vs_param(42) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING) +RETURNS STRING RETURN current_user +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won +-- !query schema +struct +-- !query output +true + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT) +RETURNS STRING RETURN typeof(current_date) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param_date(42) +-- !query schema +struct +-- !query output +date + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT) +RETURNS STRING RETURN typeof(current_time) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT paramless_vs_param_time(42) +-- !query schema +struct +-- !query output +time(6) + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT) +RETURNS INT RETURN grouping__id +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION", + "sqlState" : "42K0E", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 12, + "fragment" : "grouping__id" + } ] +} + + +-- !query +SELECT paramless_vs_param_grouping(42) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`paramless_vs_param_grouping`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 38, + "fragment" : "paramless_vs_param_grouping(42)" + } ] +} + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT) +RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT lca_vs_param(42) +-- !query schema +struct +-- !query output +1000 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT) +RETURNS INT RETURN (SELECT (SELECT x) FROM v1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT outer_vs_param(42) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT) +RETURNS INT RETURN (SELECT (SELECT x)) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT outer_param_pure(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +DECLARE x = 999 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT identity_fn(42) +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT outer_fn(42) +-- !query schema +struct +-- !query output +999 + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING) +RETURNS TABLE(c STRING) RETURN SELECT current_user AS c +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored') +-- !query schema +struct +-- !query output +true + + +-- !query +DROP TEMPORARY VARIABLE x +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW v1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION identity_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION col_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_date +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_time +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION paramless_vs_param_grouping +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException +{ + "errorClass" : "ROUTINE_NOT_FOUND", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`paramless_vs_param_grouping`" + } +} + + +-- !query +DROP TEMPORARY FUNCTION lca_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION outer_vs_param +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION outer_param_pure +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION inner_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION outer_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION tvf_paramless_vs_param +-- !query schema +struct<> +-- !query output + From dc4bafc3951e8f4d014cbf72a709ef5e96f3ae62 Mon Sep 17 00:00:00 2001 From: Dejan Krakovic Date: Mon, 1 Jun 2026 23:09:36 +0000 Subject: [PATCH 2/2] [SPARK-57188][SQL] Address review: drop dead table-function parameter markers Per review feedback: the SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY marker stamped on table-UDF parameter aliases was never consumed. A table UDF body references its parameter as an outer reference (the param lives in the lateral join's left child), so `resolveColumnByName` returns None and a parameterless built-in function already wins via the pre-existing "function beats outer reference" precedence -- before `isSQLFunctionParameterAlias` is evaluated. The legacy flag therefore never restored param-shadowing for table UDFs (the legacy golden showed `function_won = true` for the TVF case, identical to non-legacy). - Remove the marker from `SessionCatalog.makeSQLTableFunctionPlan` (with a comment explaining why table UDFs need no marker). - Restrict the `CreateSQLFunctionCommand` marker to scalar functions (`!isTableFunc`). - Reword the TVF golden test comment: it is a regression guard for the pre-existing function-vs-outer-reference precedence, not this PR's marker logic. Goldens are unchanged (table-UDF behavior is identical), confirming the markers were inert. The scalar direct-body UDF fix (the bug-report shape) is unaffected. --- .../sql/catalyst/catalog/SessionCatalog.scala | 13 +++++-------- .../command/CreateSQLFunctionCommand.scala | 19 +++++++++++++------ .../inputs/sql-udf-name-precedence.sql | 6 ++++-- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index f44113c6c189f..fd700f1ee0130 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -2046,20 +2046,17 @@ class SessionCatalog( } } - val funcInputMetadata = new MetadataBuilder() - .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true) - .build() val inputCast = paddedInput.zip(param.fields).map { case (expr, param) => // Add outer references to all attributes in the function input. val outer = expr.transform { case a: Attribute => OuterReference(a) } - // Mark the alias as function input so name resolution can give a parameterless - // built-in function precedence over a same-named UDF parameter. - Alias(Cast(outer, param.dataType), param.name)( - qualifier = qualifier, - explicitMetadata = Some(funcInputMetadata)) + // No SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY marker here: a table UDF body references + // its parameter as an outer reference (the param lives in the lateral join's left + // child), so resolveColumnByName returns None and a parameterless built-in function + // already wins via the pre-existing "function beats outer reference" precedence. + Alias(Cast(outer, param.dataType), param.name)(qualifier = qualifier) } val inputPlan = Project(inputCast, OneRowRelation()) LateralJoin(inputPlan, LateralSubquery(query.get), Inner, None) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala index 501bd7a38f6b3..a597087085b42 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala @@ -109,11 +109,18 @@ case class CreateSQLFunctionCommand( // Qualify the input parameters with the function name so that attributes referencing // the function input parameters can be resolved correctly. val qualifier = Seq(name.funcName) - // Mark the parameter aliases as function input so name resolution can give a - // parameterless built-in function precedence over a same-named UDF parameter. - val funcInputMetadata = new MetadataBuilder() - .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true) - .build() + // Mark scalar UDF parameter aliases as function input so name resolution can give a + // parameterless built-in function precedence over a same-named UDF parameter. Table UDF + // bodies reference parameters as outer references, where a parameterless function already + // wins via the pre-existing "function beats outer reference" precedence, so the marker is + // not applied (and would not be consumed) there. + val funcInputMetadata = if (isTableFunc) { + None + } else { + Some(new MetadataBuilder() + .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true) + .build()) + } val input = param.map(p => Alias( { val defaultExpr = p.getDefault() @@ -136,7 +143,7 @@ case class CreateSQLFunctionCommand( } Cast(defaultPlan, p.dataType) } - }, p.name)(qualifier = qualifier, explicitMetadata = Some(funcInputMetadata))) + }, p.name)(qualifier = qualifier, explicitMetadata = funcInputMetadata)) Project(input, OneRowRelation()) } else { OneRowRelation() diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql index f120052baad7e..a96e98323a872 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql @@ -74,8 +74,10 @@ SELECT outer_fn(42); CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING) RETURNS TABLE(c STRING) RETURN SELECT current_user AS c; --- Parameterless function wins over TVF parameter (same rule must apply on the --- makeSQLTableFunctionPlan path). +-- Parameterless function wins over a same-named table UDF parameter. Note this is NOT the +-- SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY rule: a table UDF body references its parameter as +-- an outer reference, so the function already wins via the pre-existing "function beats outer +-- reference" precedence (the legacy flag does not change this). Kept as a regression guard. SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored'); DROP TEMPORARY VARIABLE x;