diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 32daf65a71fa4..051eb9f916ad6 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -16,7 +16,7 @@ // under the License. //! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST) -use std::collections::HashMap; +use std::collections::{HashMap, hash_map::Entry}; use std::str::FromStr; use std::sync::Arc; use std::vec; @@ -29,7 +29,7 @@ use datafusion_common::datatype::{DataTypeExt, FieldExt}; use datafusion_common::error::add_possible_columns_to_diag; use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; use datafusion_common::{ - DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err, + DFSchemaRef, Diagnostic, SchemaError, Span, field_not_found, internal_err, plan_datafusion_err, }; use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; @@ -274,6 +274,19 @@ pub struct PlannerContext { /// (UNION/INTERSECT/EXCEPT), holds the schema of the left-most query. /// Used to alias duplicate expressions to match the left side's field names. set_expr_left_schema: Option, + + /// Relation aliases and names bound by currently planned FROM scopes. + relation_scopes: Vec, +} + +#[derive(Debug, Clone, Default)] +struct RelationScope { + bindings: HashMap, +} + +#[derive(Debug, Clone)] +struct RelationBinding { + span: Option, } impl Default for PlannerContext { @@ -292,6 +305,7 @@ impl PlannerContext { outer_from_schema: None, create_table_schema: None, set_expr_left_schema: None, + relation_scopes: vec![], } } @@ -377,6 +391,56 @@ impl PlannerContext { Ok(()) } + /// Run `f` with a fresh relation binding scope for a single FROM clause. + pub(crate) fn with_new_relation_scope( + &mut self, + f: impl FnOnce(&mut Self) -> Result, + ) -> Result { + self.relation_scopes.push(RelationScope::default()); + let result = f(self); + self.relation_scopes.pop(); + result + } + + /// Clear relation scopes inherited by cloning an outer query context. + pub(crate) fn clear_relation_scopes(&mut self) { + self.relation_scopes.clear(); + } + + pub(crate) fn insert_relation_binding( + &mut self, + display_name: impl Into, + span: Option, + ) -> Result<()> { + let Some(scope) = self.relation_scopes.last_mut() else { + return Ok(()); + }; + + let display_name = display_name.into(); + match scope.bindings.entry(display_name.clone()) { + Entry::Occupied(existing) => { + let existing_name = existing.key(); + let existing_span = existing.get().span; + let mut diagnostic = Diagnostic::new_error( + format!("duplicate relation alias or name '{display_name}'"), + span, + ); + diagnostic.add_note( + format!("'{existing_name}' was previously bound here"), + existing_span, + ); + plan_err!( + "duplicate relation alias or name '{display_name}'"; + diagnostic = diagnostic + ) + } + Entry::Vacant(entry) => { + entry.insert(RelationBinding { span }); + Ok(()) + } + } + } + /// Return the types of parameters (`$1`, `$2`, etc) if known pub fn prepare_param_data_types(&self) -> &[FieldRef] { &self.prepare_param_data_types diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs index e320d2ee6e9c1..2223380f221cf 100644 --- a/datafusion/sql/src/query.rs +++ b/datafusion/sql/src/query.rs @@ -44,6 +44,7 @@ impl SqlToRel<'_, S> { // Each query has its own planner context, including CTEs that are visible within that query. // It also inherits the CTEs from the outer query by cloning the outer planner context. let mut query_plan_context = outer_planner_context.clone(); + query_plan_context.clear_relation_scopes(); let planner_context = &mut query_plan_context; let Query { diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs index 8e1a8817309f0..f61d01c23c64c 100644 --- a/datafusion/sql/src/relation/join.rs +++ b/datafusion/sql/src/relation/join.rs @@ -29,6 +29,7 @@ impl SqlToRel<'_, S> { t: TableWithJoins, planner_context: &mut PlannerContext, ) -> Result { + self.register_relation_binding(&t.relation, planner_context)?; let mut left = if is_lateral(&t.relation) { self.create_relation_subquery(t.relation, planner_context)? } else { @@ -49,6 +50,7 @@ impl SqlToRel<'_, S> { join: Join, planner_context: &mut PlannerContext, ) -> Result { + self.register_relation_binding(&join.relation, planner_context)?; let right = if is_lateral_join(&join)? { self.create_relation_subquery(join.relation, planner_context)? } else { diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 14ccab870317e..e904c129f2e53 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -29,7 +29,9 @@ use datafusion_expr::planner::{ }; use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder, expr::Unnest}; use datafusion_expr::{Subquery, SubqueryAlias}; -use sqlparser::ast::{FunctionArg, FunctionArgExpr, Spanned, TableFactor}; +use sqlparser::ast::{ + FunctionArg, FunctionArgExpr, Spanned, TableAlias, TableFactor, TableWithJoins, +}; mod join; @@ -80,6 +82,69 @@ impl<'a, 'b, S: ContextProvider> RelationPlannerContext } impl SqlToRel<'_, S> { + pub(crate) fn register_relation_binding( + &self, + relation: &TableFactor, + planner_context: &mut PlannerContext, + ) -> Result<()> { + match relation { + TableFactor::Table { + name, alias, args, .. + } => { + if let Some(alias) = alias { + self.register_table_alias_binding(alias, planner_context) + } else if args.is_none() { + let table_ref = self.object_name_to_table_reference(name.clone())?; + let display_name = table_ref.to_string(); + planner_context.insert_relation_binding( + display_name, + Span::try_from_sqlparser_span(relation.span()), + ) + } else { + Ok(()) + } + } + TableFactor::Derived { alias, .. } + | TableFactor::NestedJoin { alias, .. } + | TableFactor::UNNEST { alias, .. } + | TableFactor::Function { alias, .. } => { + if let Some(alias) = alias { + self.register_table_alias_binding(alias, planner_context) + } else { + Ok(()) + } + } + _ => Ok(()), + } + } + + fn register_table_alias_binding( + &self, + alias: &TableAlias, + planner_context: &mut PlannerContext, + ) -> Result<()> { + let display_name = self.ident_normalizer.normalize(alias.name.clone()); + planner_context.insert_relation_binding( + display_name, + Span::try_from_sqlparser_span(alias.name.span), + ) + } + + fn plan_nested_join_relation( + &self, + table_with_joins: TableWithJoins, + is_aliased_nested_join: bool, + planner_context: &mut PlannerContext, + ) -> Result { + if is_aliased_nested_join { + planner_context.with_new_relation_scope(|planner_context| { + self.plan_table_with_joins(table_with_joins, planner_context) + }) + } else { + self.plan_table_with_joins(table_with_joins, planner_context) + } + } + /// Create a `LogicalPlan` that scans the named relation. /// /// First tries any registered extension planners. If no extension handles @@ -214,10 +279,14 @@ impl SqlToRel<'_, S> { TableFactor::NestedJoin { table_with_joins, alias, - } => ( - self.plan_table_with_joins(*table_with_joins, planner_context)?, - alias, - ), + } => { + let logical_plan = self.plan_nested_join_relation( + *table_with_joins, + alias.is_some(), + planner_context, + )?; + (logical_plan, alias) + } TableFactor::UNNEST { alias, array_exprs, diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 82cca91d4e8ae..c1e73f8ff0d11 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -704,7 +704,7 @@ impl SqlToRel<'_, S> { mut from: Vec, planner_context: &mut PlannerContext, ) -> Result { - match from.len() { + planner_context.with_new_relation_scope(|planner_context| match from.len() { 0 => Ok(LogicalPlanBuilder::empty(true).build()?), 1 => { let input = from.remove(0); @@ -732,7 +732,7 @@ impl SqlToRel<'_, S> { planner_context.set_outer_from_schema(old_outer_from_schema); left.build() } - } + }) } /// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions. diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 574468f4b111f..b8f862f02a5ff 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2423,6 +2423,106 @@ fn equijoin_explicit_syntax() { ); } +#[test] +fn join_duplicate_relation_alias_errors() { + assert_duplicate_relation_alias_error( + "SELECT * FROM person p JOIN orders p ON true", + "p", + ); +} + +#[test] +fn comma_join_duplicate_relation_alias_errors() { + assert_duplicate_relation_alias_error("SELECT * FROM person p, orders p", "p"); +} + +#[test] +fn join_duplicate_unaliased_relation_and_alias_errors() { + assert_duplicate_relation_alias_error( + "SELECT * FROM person JOIN orders person ON true", + "person", + ); +} + +fn assert_duplicate_relation_alias_error(sql: &str, alias: &str) { + let err = logical_plan(sql).expect_err("query should have failed"); + assert_eq!( + err.strip_backtrace(), + format!("Error during planning: duplicate relation alias or name '{alias}'") + ); +} + +#[test] +fn join_distinct_relation_aliases_ok() { + let sql = "SELECT p.id, o.order_id FROM person p JOIN orders o ON true"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r" + Projection: p.id, o.order_id + Inner Join: Filter: Boolean(true) + SubqueryAlias: p + TableScan: person + SubqueryAlias: o + TableScan: orders + " + ); +} + +#[test] +fn join_qualified_same_leaf_relation_names_ok() { + let sql = "SELECT public.orders.order_id AS public_order_id, \ + other.orders.order_id AS other_order_id \ + FROM public.orders JOIN other.orders ON true"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r" + Projection: public.orders.order_id AS public_order_id, other.orders.order_id AS other_order_id + Inner Join: Filter: Boolean(true) + TableScan: public.orders + TableScan: other.orders + " + ); +} + +#[test] +fn nested_join_relation_scope_does_not_leak() { + let sql = "SELECT p.id FROM person p WHERE EXISTS (SELECT 1 FROM orders p)"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r" + Projection: p.id + Filter: EXISTS () + Subquery: + Projection: Int64(1) + SubqueryAlias: p + TableScan: orders + SubqueryAlias: p + TableScan: person + " + ); +} + +#[test] +fn aliased_nested_join_relation_scope_does_not_leak() { + let sql = "SELECT p.id FROM (person p JOIN orders o ON p.id = o.customer_id) p"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r" + Projection: p.id + SubqueryAlias: p + Inner Join: Filter: p.id = o.customer_id + SubqueryAlias: p + TableScan: person + SubqueryAlias: o + TableScan: orders + " + ); +} + #[test] fn equijoin_with_condition() { let sql = "SELECT id, order_id \ diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part index b9d163d877596..9b487572883e3 100644 --- a/datafusion/sqllogictest/test_files/join.slt.part +++ b/datafusion/sqllogictest/test_files/join.slt.part @@ -1231,13 +1231,13 @@ statement ok create table t1(v1 int) as values(100); ## Query with Ambiguous column reference -query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1 +query error DataFusion error: Error during planning: duplicate relation alias or name 't1' select count(*) from t1 right outer join t1 on t1.v1 > 0; -query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1 +query error DataFusion error: Error during planning: duplicate relation alias or name 't1' select t1.v1 from t1 join t1 using(v1) cross join (select struct('foo' as v1) as t1); statement ok