Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 66 additions & 2 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
use std::collections::HashMap;
use std::collections::{HashMap, hash_map::Entry};
use std::str::FromStr;
use std::sync::Arc;
use std::vec;
Expand All @@ -29,7 +29,7 @@ use datafusion_common::datatype::{DataTypeExt, FieldExt};
use datafusion_common::error::add_possible_columns_to_diag;
use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err};
use datafusion_common::{
DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err,
DFSchemaRef, Diagnostic, SchemaError, Span, field_not_found, internal_err,
plan_datafusion_err,
};
use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
Expand Down Expand Up @@ -274,6 +274,19 @@ pub struct PlannerContext {
/// (UNION/INTERSECT/EXCEPT), holds the schema of the left-most query.
/// Used to alias duplicate expressions to match the left side's field names.
set_expr_left_schema: Option<DFSchemaRef>,

/// Relation aliases and names bound by currently planned FROM scopes.
relation_scopes: Vec<RelationScope>,
}

#[derive(Debug, Clone, Default)]
struct RelationScope {
bindings: HashMap<String, RelationBinding>,
}

#[derive(Debug, Clone)]
struct RelationBinding {
span: Option<Span>,
}

impl Default for PlannerContext {
Expand All @@ -292,6 +305,7 @@ impl PlannerContext {
outer_from_schema: None,
create_table_schema: None,
set_expr_left_schema: None,
relation_scopes: vec![],
}
}

Expand Down Expand Up @@ -377,6 +391,56 @@ impl PlannerContext {
Ok(())
}

/// Run `f` with a fresh relation binding scope for a single FROM clause.
pub(crate) fn with_new_relation_scope<T>(
&mut self,
f: impl FnOnce(&mut Self) -> Result<T>,
) -> Result<T> {
self.relation_scopes.push(RelationScope::default());
let result = f(self);
self.relation_scopes.pop();
result
}

/// Clear relation scopes inherited by cloning an outer query context.
pub(crate) fn clear_relation_scopes(&mut self) {
self.relation_scopes.clear();
}

pub(crate) fn insert_relation_binding(
&mut self,
display_name: impl Into<String>,
span: Option<Span>,
) -> Result<()> {
let Some(scope) = self.relation_scopes.last_mut() else {
return Ok(());
};

let display_name = display_name.into();
match scope.bindings.entry(display_name.clone()) {
Entry::Occupied(existing) => {
let existing_name = existing.key();
let existing_span = existing.get().span;
let mut diagnostic = Diagnostic::new_error(
format!("duplicate relation alias or name '{display_name}'"),
span,
);
diagnostic.add_note(
format!("'{existing_name}' was previously bound here"),
existing_span,
);
plan_err!(
"duplicate relation alias or name '{display_name}'";
diagnostic = diagnostic
)
}
Entry::Vacant(entry) => {
entry.insert(RelationBinding { span });
Ok(())
}
}
}

/// Return the types of parameters (`$1`, `$2`, etc) if known
pub fn prepare_param_data_types(&self) -> &[FieldRef] {
&self.prepare_param_data_types
Expand Down
1 change: 1 addition & 0 deletions datafusion/sql/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
// Each query has its own planner context, including CTEs that are visible within that query.
// It also inherits the CTEs from the outer query by cloning the outer planner context.
let mut query_plan_context = outer_planner_context.clone();
query_plan_context.clear_relation_scopes();
let planner_context = &mut query_plan_context;

let Query {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sql/src/relation/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
t: TableWithJoins,
planner_context: &mut PlannerContext,
) -> Result<LogicalPlan> {
self.register_relation_binding(&t.relation, planner_context)?;
let mut left = if is_lateral(&t.relation) {
self.create_relation_subquery(t.relation, planner_context)?
} else {
Expand All @@ -49,6 +50,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
join: Join,
planner_context: &mut PlannerContext,
) -> Result<LogicalPlan> {
self.register_relation_binding(&join.relation, planner_context)?;
let right = if is_lateral_join(&join)? {
self.create_relation_subquery(join.relation, planner_context)?
} else {
Expand Down
79 changes: 74 additions & 5 deletions datafusion/sql/src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use datafusion_expr::planner::{
};
use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder, expr::Unnest};
use datafusion_expr::{Subquery, SubqueryAlias};
use sqlparser::ast::{FunctionArg, FunctionArgExpr, Spanned, TableFactor};
use sqlparser::ast::{
FunctionArg, FunctionArgExpr, Spanned, TableAlias, TableFactor, TableWithJoins,
};

mod join;

Expand Down Expand Up @@ -80,6 +82,69 @@ impl<'a, 'b, S: ContextProvider> RelationPlannerContext
}

impl<S: ContextProvider> SqlToRel<'_, S> {
pub(crate) fn register_relation_binding(
&self,
relation: &TableFactor,
planner_context: &mut PlannerContext,
) -> Result<()> {
match relation {
TableFactor::Table {
name, alias, args, ..
} => {
if let Some(alias) = alias {
self.register_table_alias_binding(alias, planner_context)
} else if args.is_none() {
let table_ref = self.object_name_to_table_reference(name.clone())?;
let display_name = table_ref.to_string();
planner_context.insert_relation_binding(
display_name,
Span::try_from_sqlparser_span(relation.span()),
)
} else {
Ok(())
}
}
TableFactor::Derived { alias, .. }
| TableFactor::NestedJoin { alias, .. }
| TableFactor::UNNEST { alias, .. }
| TableFactor::Function { alias, .. } => {
if let Some(alias) = alias {
self.register_table_alias_binding(alias, planner_context)
} else {
Ok(())
}
}
_ => Ok(()),
}
}

fn register_table_alias_binding(
&self,
alias: &TableAlias,
planner_context: &mut PlannerContext,
) -> Result<()> {
let display_name = self.ident_normalizer.normalize(alias.name.clone());
planner_context.insert_relation_binding(
display_name,
Span::try_from_sqlparser_span(alias.name.span),
)
}

fn plan_nested_join_relation(
&self,
table_with_joins: TableWithJoins,
is_aliased_nested_join: bool,
planner_context: &mut PlannerContext,
) -> Result<LogicalPlan> {
if is_aliased_nested_join {
planner_context.with_new_relation_scope(|planner_context| {
self.plan_table_with_joins(table_with_joins, planner_context)
})
} else {
self.plan_table_with_joins(table_with_joins, planner_context)
}
}

/// Create a `LogicalPlan` that scans the named relation.
///
/// First tries any registered extension planners. If no extension handles
Expand Down Expand Up @@ -214,10 +279,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
TableFactor::NestedJoin {
table_with_joins,
alias,
} => (
self.plan_table_with_joins(*table_with_joins, planner_context)?,
alias,
),
} => {
let logical_plan = self.plan_nested_join_relation(
*table_with_joins,
alias.is_some(),
planner_context,
)?;
(logical_plan, alias)
}
TableFactor::UNNEST {
alias,
array_exprs,
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
mut from: Vec<TableWithJoins>,
planner_context: &mut PlannerContext,
) -> Result<LogicalPlan> {
match from.len() {
planner_context.with_new_relation_scope(|planner_context| match from.len() {
0 => Ok(LogicalPlanBuilder::empty(true).build()?),
1 => {
let input = from.remove(0);
Expand Down Expand Up @@ -732,7 +732,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context.set_outer_from_schema(old_outer_from_schema);
left.build()
}
}
})
}

/// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
Expand Down
100 changes: 100 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2423,6 +2423,106 @@ fn equijoin_explicit_syntax() {
);
}

#[test]
fn join_duplicate_relation_alias_errors() {
assert_duplicate_relation_alias_error(
"SELECT * FROM person p JOIN orders p ON true",
"p",
);
}

#[test]
fn comma_join_duplicate_relation_alias_errors() {
assert_duplicate_relation_alias_error("SELECT * FROM person p, orders p", "p");
}

#[test]
fn join_duplicate_unaliased_relation_and_alias_errors() {
assert_duplicate_relation_alias_error(
"SELECT * FROM person JOIN orders person ON true",
"person",
);
}

fn assert_duplicate_relation_alias_error(sql: &str, alias: &str) {
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
err.strip_backtrace(),
format!("Error during planning: duplicate relation alias or name '{alias}'")
);
}

#[test]
fn join_distinct_relation_aliases_ok() {
let sql = "SELECT p.id, o.order_id FROM person p JOIN orders o ON true";
let plan = logical_plan(sql).unwrap();
assert_snapshot!(
plan,
@r"
Projection: p.id, o.order_id
Inner Join: Filter: Boolean(true)
SubqueryAlias: p
TableScan: person
SubqueryAlias: o
TableScan: orders
"
);
}

#[test]
fn join_qualified_same_leaf_relation_names_ok() {
let sql = "SELECT public.orders.order_id AS public_order_id, \
other.orders.order_id AS other_order_id \
FROM public.orders JOIN other.orders ON true";
let plan = logical_plan(sql).unwrap();
assert_snapshot!(
plan,
@r"
Projection: public.orders.order_id AS public_order_id, other.orders.order_id AS other_order_id
Inner Join: Filter: Boolean(true)
TableScan: public.orders
TableScan: other.orders
"
);
}

#[test]
fn nested_join_relation_scope_does_not_leak() {
let sql = "SELECT p.id FROM person p WHERE EXISTS (SELECT 1 FROM orders p)";
let plan = logical_plan(sql).unwrap();
assert_snapshot!(
plan,
@r"
Projection: p.id
Filter: EXISTS (<subquery>)
Subquery:
Projection: Int64(1)
SubqueryAlias: p
TableScan: orders
SubqueryAlias: p
TableScan: person
"
);
}

#[test]
fn aliased_nested_join_relation_scope_does_not_leak() {
let sql = "SELECT p.id FROM (person p JOIN orders o ON p.id = o.customer_id) p";
let plan = logical_plan(sql).unwrap();
assert_snapshot!(
plan,
@r"
Projection: p.id
SubqueryAlias: p
Inner Join: Filter: p.id = o.customer_id
SubqueryAlias: p
TableScan: person
SubqueryAlias: o
TableScan: orders
"
);
}

#[test]
fn equijoin_with_condition() {
let sql = "SELECT id, order_id \
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/join.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -1231,13 +1231,13 @@ statement ok
create table t1(v1 int) as values(100);

## Query with Ambiguous column reference
query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1
query error DataFusion error: Error during planning: duplicate relation alias or name 't1'
select count(*)
from t1
right outer join t1
on t1.v1 > 0;

query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1
query error DataFusion error: Error during planning: duplicate relation alias or name 't1'
select t1.v1 from t1 join t1 using(v1) cross join (select struct('foo' as v1) as t1);

statement ok
Expand Down
Loading