diff --git a/prqlc/prqlc/src/semantic/resolver/flatten.rs b/prqlc/prqlc/src/semantic/resolver/flatten.rs index d28aafb80e82..37cd2004ec99 100644 --- a/prqlc/prqlc/src/semantic/resolver/flatten.rs +++ b/prqlc/prqlc/src/semantic/resolver/flatten.rs @@ -142,7 +142,27 @@ impl PlFold for Flattener { ..pipeline }); } - kind => (self.fold_expr(*t.input)?, fold_transform_kind(self, kind)?), + kind => { + let input = self.fold_expr(*t.input)?; + + // For join/append, folding the `with` sub-pipeline (which happens + // inside `fold_transform_kind`) may set `self.sort` to the sub-pipeline's + // sort. That sort references columns of the joined relation, which are not + // in scope for downstream transforms in the outer pipeline. Per the PRQL + // spec a join retains the left (input) side's order, so snapshot the + // input's sort and restore it after folding the kind. + let input_sort = + matches!(kind, TransformKind::Join { .. } | TransformKind::Append(_)) + .then(|| self.sort.clone()); + + let kind = fold_transform_kind(self, kind)?; + + if let Some(input_sort) = input_sort { + self.sort = input_sort; + } + + (input, kind) + } }; // In case we're appending or joining another pipeline, we do not want to apply the diff --git a/prqlc/prqlc/src/sql/gen_query.rs b/prqlc/prqlc/src/sql/gen_query.rs index b0bbabd82147..cbd0589e77ee 100644 --- a/prqlc/prqlc/src/sql/gen_query.rs +++ b/prqlc/prqlc/src/sql/gen_query.rs @@ -904,4 +904,87 @@ mod test { let err = crate::tests::compile(query).unwrap_err(); assert!(err.to_string().contains("not accessible in this context")); } + + #[test] + fn test_sort_both_branches_join_take() { + // A `sort` on both the left and the joined branch (both wrapped in CTEs + // by a preceding `group`), followed by `take`, must not leak the joined + // branch's sort column into the implicit ORDER BY synthesized for the + // LIMIT. The ORDER BY must reference the left CTE alias, not the joined + // branch's out-of-scope source table. + let query = r#" + from foo.bar + group {a} (aggregate {x = count c}) + sort {a} + join side:left ( + from foo.bar + group {a} (aggregate {y = count c}) + sort {a} + derive {a_r = a} + select {a_r, y} + ) (this.a == that.a_r) + derive {g = a} + take 10 + select {g, x, y} + "#; + + assert_snapshot!(crate::tests::compile(query).unwrap(), @r" + WITH table_1 AS ( + SELECT + COUNT(*) AS x, + a + FROM + foo.bar + GROUP BY + a + ), + table_3 AS ( + SELECT + COUNT(*) AS y, + a + FROM + foo.bar + GROUP BY + a + ), + table_4 AS ( + SELECT + a AS a_r, + y, + a + FROM + table_3 + ), + table_0 AS ( + SELECT + a_r, + y, + a + FROM + table_4 + ), + table_2 AS ( + SELECT + table_1.a AS g, + table_1.x, + table_0.y, + table_1.a + FROM + table_1 + LEFT OUTER JOIN table_0 ON table_1.a = table_0.a_r + ORDER BY + table_1.a + LIMIT + 10 + ) + SELECT + g, + x, + y + FROM + table_2 + ORDER BY + g + "); + } }