From 2dfc3ba42dccee191c9f2ee8c6544d6f50184f9b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 1 May 2026 14:53:03 +0200 Subject: [PATCH 1/7] Fix CREATE + VISUALISE FROM producing invalid double-wrapped SQL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit transform_global_sql concatenated side-effect SQL (CREATE, INSERT, …) with the VISUALISE FROM injection, then the caller wrapped the whole thing in another CREATE TABLE AS, producing invalid SQL like: CREATE TABLE __ggsql_global__ AS CREATE TEMP TABLE data … Split the return into side-effect statements (executed directly) and the queryable part (wrapped as the global temp table). Co-Authored-By: Claude Opus 4.6 --- src/execute/cte.rs | 70 ++++++++++++++++++++++++++++++++++++---------- src/execute/mod.rs | 37 ++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 17 deletions(-) diff --git a/src/execute/cte.rs b/src/execute/cte.rs index 0ab34848..0e6dcc5e 100644 --- a/src/execute/cte.rs +++ b/src/execute/cte.rs @@ -210,15 +210,15 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> { Some((cte_prefix, trailing)) } -/// Transform global SQL for execution with temp tables +/// Transform global SQL for execution with temp tables. /// -/// If the SQL has a WITH clause followed by SELECT, extracts just the SELECT -/// portion and transforms CTE references to temp table names. -/// For SQL without WITH clause, just transforms any CTE references. +/// Returns statements to execute directly as side effects (CREATE, INSERT, …) +/// and an optional query whose result should be wrapped as the global temp +/// table. pub fn transform_global_sql( source_tree: &SourceTree, materialized_ctes: &HashSet, -) -> Option { +) -> (Vec, Option) { // Try to extract trailing SELECT (WITH...SELECT or direct SELECT) let select_sql = split_with_query(source_tree) .map(|(_, select)| select) @@ -229,16 +229,58 @@ pub fn transform_global_sql( }); if let Some(select_sql) = select_sql { - Some(transform_cte_references(&select_sql, materialized_ctes)) - } else if does_consume_cte(source_tree) { - // Non-SELECT executable SQL (CREATE, INSERT, UPDATE, DELETE) - // OR VISUALISE FROM (which injects SELECT * FROM ) - // Extract SQL (with injection if VISUALISE FROM) and transform CTE references - let sql = source_tree.extract_sql()?; - Some(transform_cte_references(&sql, materialized_ctes)) + return ( + vec![], + Some(transform_cte_references(&select_sql, materialized_ctes)), + ); + } + + if !does_consume_cte(source_tree) { + return (vec![], None); + } + + // We have non-SELECT executable SQL (CREATE, INSERT, …) and/or + // VISUALISE FROM. Split them: side-effect statements run directly, + // VISUALISE FROM becomes the queryable part. + // + // Only actual statements (CREATE, INSERT, …) are side effects — a bare + // WITH clause without a trailing statement is not executable on its own + // (its CTEs are already materialized separately). + let root = source_tree.root(); + + let side_effect_stmts = r#" + (sql_statement + [(create_statement) + (insert_statement) + (update_statement) + (delete_statement)] @stmt) + "#; + let side_effects: Vec = source_tree + .find_texts(&root, side_effect_stmts) + .into_iter() + .map(|s| transform_cte_references(s.trim(), materialized_ctes)) + .filter(|s| !s.is_empty()) + .collect(); + + let viz_from_query = source_tree + .find_text( + &root, + r#"(visualise_statement (from_clause (table_ref) @table))"#, + ) + .map(|table| { + let q = format!("SELECT * FROM {}", table); + transform_cte_references(&q, materialized_ctes) + }); + + if !side_effects.is_empty() || viz_from_query.is_some() { + (side_effects, viz_from_query) } else { - // No executable SQL (just CTEs) - None + // does_consume_cte was true but we found no specific statements or + // VISUALISE FROM — fall back to extract_sql as the query. + let query = source_tree + .extract_sql() + .map(|s| transform_cte_references(&s, materialized_ctes)); + (vec![], query) } } diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 8a1a3911..ef67a919 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1063,16 +1063,22 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result Date: Fri, 1 May 2026 14:55:02 +0200 Subject: [PATCH 2/7] add news bullet --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2b3e140..199ffbab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## [Unreleased] +### Fixed + +- Side effects like `CREATE TEMP TABLE` before the `VISUALISE` statement are now + separated from directly feeding into the visualisation data (#415) + ## 0.3.0 - 2026-04-29 ### Added From 254f400ef0af5af31fe7a46bece90a0c475b5056 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 1 May 2026 15:11:41 +0200 Subject: [PATCH 3/7] cargo fmt --- src/execute/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index ef67a919..2962e085 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1065,8 +1065,7 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result Date: Mon, 4 May 2026 12:16:43 +0200 Subject: [PATCH 4/7] Fix grammar: INSERT/UPDATE/DELETE parsed as other_sql_statement The tree-sitter lexer was misclassifying INSERT, UPDATE, and DELETE keywords as bare_identifier tokens, causing them to fall through to other_sql_statement instead of their specific statement rules. This happened because keyword prefixes (e.g. IN for INSERT) derailed the lexer's specific keyword path. Fix: wrap the three leading keywords in token(prec(1, ...)) so they get dedicated high-priority lexer tokens, and downgrade other_sql_statement's catch-all regex from token() to a bare regex. Co-Authored-By: Claude Opus 4.6 --- src/execute/mod.rs | 20 ++++ tree-sitter-ggsql/grammar.js | 8 +- tree-sitter-ggsql/test/corpus/basic.txt | 126 ++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 4 deletions(-) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 2962e085..13ebdda3 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1925,6 +1925,26 @@ mod tests { assert_eq!(result.data.get(key).unwrap().height(), 5); } + #[cfg(feature = "duckdb")] + #[test] + fn test_visualise_from_after_create_and_insert() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + CREATE TEMP TABLE data(x INTEGER, y INTEGER); + INSERT INTO data VALUES (1, 10), (2, 20), (3, 30); + VISUALISE x, y FROM data + DRAW point + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + let key = result.specs[0].layers[0] + .data_key + .as_ref() + .expect("Layer should have data_key"); + assert_eq!(result.data.get(key).unwrap().height(), 3); + } + /// Test that literal mappings survive stat transforms (e.g., histogram grouping). /// /// This tests the fix for issue #129 where literal aesthetic columns like diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 732cfdb6..a1b53e87 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -135,7 +135,7 @@ module.exports = grammar({ // INSERT statement insert_statement: $ => prec.right(seq( - caseInsensitive('INSERT'), + token(prec(1, caseInsensitive('INSERT'))), repeat1(choice( $.sql_keyword, $.identifier, @@ -149,7 +149,7 @@ module.exports = grammar({ // UPDATE statement update_statement: $ => prec.right(seq( - caseInsensitive('UPDATE'), + token(prec(1, caseInsensitive('UPDATE'))), repeat1(choice( $.sql_keyword, $.identifier, @@ -163,7 +163,7 @@ module.exports = grammar({ // DELETE statement delete_statement: $ => prec.right(seq( - caseInsensitive('DELETE'), + token(prec(1, caseInsensitive('DELETE'))), repeat1(choice( $.sql_keyword, $.identifier, @@ -177,7 +177,7 @@ module.exports = grammar({ other_sql_statement: $ => prec(-1, repeat1(choice( $.non_from_sql_keyword, - token(/[^\s;(),'"]+/), + /[^\s;(),'"]+/, $.string, $.number, $.subquery, diff --git a/tree-sitter-ggsql/test/corpus/basic.txt b/tree-sitter-ggsql/test/corpus/basic.txt index ec39fc83..43bd2895 100644 --- a/tree-sitter-ggsql/test/corpus/basic.txt +++ b/tree-sitter-ggsql/test/corpus/basic.txt @@ -2865,6 +2865,132 @@ VISUALISE DRAW point MAPPING x AS x, y AS y (bare_identifier)))) (aesthetic_name))))))))) +================================================================================ +CREATE statement before VISUALISE FROM +================================================================================ + +CREATE TEMP TABLE data AS SELECT 1; +VISUALISE FROM data DRAW point + +-------------------------------------------------------------------------------- + +(query + (sql_portion + (sql_statement + (create_statement + (select_statement + (select_body + (number)))))) + (visualise_statement + (visualise_keyword) + (from_clause + (table_ref + (qualified_name + (identifier + (bare_identifier))))) + (viz_clause + (draw_clause + (geom_type))))) + +================================================================================ +INSERT statement before VISUALISE FROM +================================================================================ + +INSERT INTO data VALUES (1, 2); +VISUALISE FROM data DRAW point + +-------------------------------------------------------------------------------- + +(query + (sql_portion + (sql_statement + (insert_statement))) + (visualise_statement + (visualise_keyword) + (from_clause + (table_ref + (qualified_name + (identifier + (bare_identifier))))) + (viz_clause + (draw_clause + (geom_type))))) + +================================================================================ +UPDATE statement before VISUALISE FROM +================================================================================ + +UPDATE data SET x = 1; +VISUALISE FROM data DRAW point + +-------------------------------------------------------------------------------- + +(query + (sql_portion + (sql_statement + (update_statement))) + (visualise_statement + (visualise_keyword) + (from_clause + (table_ref + (qualified_name + (identifier + (bare_identifier))))) + (viz_clause + (draw_clause + (geom_type))))) + +================================================================================ +DELETE statement before VISUALISE FROM +================================================================================ + +DELETE FROM data WHERE x = 1; +VISUALISE FROM data DRAW point + +-------------------------------------------------------------------------------- + +(query + (sql_portion + (sql_statement + (delete_statement))) + (visualise_statement + (visualise_keyword) + (from_clause + (table_ref + (qualified_name + (identifier + (bare_identifier))))) + (viz_clause + (draw_clause + (geom_type))))) + +================================================================================ +CREATE and INSERT before VISUALISE FROM +================================================================================ + +CREATE TABLE data (x INT); +INSERT INTO data VALUES (1); +VISUALISE FROM data DRAW point + +-------------------------------------------------------------------------------- + +(query + (sql_portion + (sql_statement + (create_statement)) + (sql_statement + (insert_statement))) + (visualise_statement + (visualise_keyword) + (from_clause + (table_ref + (qualified_name + (identifier + (bare_identifier))))) + (viz_clause + (draw_clause + (geom_type))))) + ================================================================================ Arbitrary SQL setup statements ================================================================================ From ba38f645a207b4423534c156dea40af77cd5fb67 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 4 May 2026 12:19:50 +0200 Subject: [PATCH 5/7] Add cross-references between the two preamble execution paths Co-Authored-By: Claude Opus 4.6 --- src/execute/cte.rs | 7 ++++--- src/execute/mod.rs | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/execute/cte.rs b/src/execute/cte.rs index 0e6dcc5e..7f93939c 100644 --- a/src/execute/cte.rs +++ b/src/execute/cte.rs @@ -243,9 +243,10 @@ pub fn transform_global_sql( // VISUALISE FROM. Split them: side-effect statements run directly, // VISUALISE FROM becomes the queryable part. // - // Only actual statements (CREATE, INSERT, …) are side effects — a bare - // WITH clause without a trailing statement is not executable on its own - // (its CTEs are already materialized separately). + // Only structured DML is handled here — other_sql_statement nodes + // (INSTALL, LOAD, SET, …) are pre-executed in prepare_data_with_reader. + // A bare WITH clause without a trailing statement is not executable on + // its own (its CTEs are already materialized separately). let root = source_tree.root(); let side_effect_stmts = r#" diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 13ebdda3..1d049cc8 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1043,7 +1043,9 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result Date: Mon, 4 May 2026 12:23:32 +0200 Subject: [PATCH 6/7] Rename does_consume_cte to has_executable_sql Co-Authored-By: Claude Opus 4.6 --- src/execute/cte.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/execute/cte.rs b/src/execute/cte.rs index 7f93939c..54e68ee7 100644 --- a/src/execute/cte.rs +++ b/src/execute/cte.rs @@ -235,7 +235,7 @@ pub fn transform_global_sql( ); } - if !does_consume_cte(source_tree) { + if !has_executable_sql(source_tree) { return (vec![], None); } @@ -276,7 +276,7 @@ pub fn transform_global_sql( if !side_effects.is_empty() || viz_from_query.is_some() { (side_effects, viz_from_query) } else { - // does_consume_cte was true but we found no specific statements or + // has_executable_sql was true but we found no specific statements or // VISUALISE FROM — fall back to extract_sql as the query. let query = source_tree .extract_sql() @@ -291,7 +291,7 @@ pub fn transform_global_sql( /// This handles cases like `WITH a AS (...), b AS (...) VISUALISE` where the WITH /// clause has no trailing SELECT - these CTEs are still extracted for layer use /// but shouldn't be executed as global data. -pub fn does_consume_cte(source_tree: &SourceTree) -> bool { +pub fn has_executable_sql(source_tree: &SourceTree) -> bool { let root = source_tree.root(); // Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE, From b6805de6d98346d796d36efca08e58a38caaca25 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 4 May 2026 12:28:57 +0200 Subject: [PATCH 7/7] Execute side-effect statements before trailing SELECT transform_global_sql returned empty side-effects when a SELECT was present, so CREATE/INSERT preambles were skipped. Hoist side-effect extraction so it runs regardless of the query type. Co-Authored-By: Claude Opus 4.6 --- src/execute/cte.rs | 49 +++++++++++++++++++++++----------------------- src/execute/mod.rs | 21 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/execute/cte.rs b/src/execute/cte.rs index 54e68ee7..9b544988 100644 --- a/src/execute/cte.rs +++ b/src/execute/cte.rs @@ -219,18 +219,39 @@ pub fn transform_global_sql( source_tree: &SourceTree, materialized_ctes: &HashSet, ) -> (Vec, Option) { + // Collect side-effect statements (CREATE, INSERT, UPDATE, DELETE) that + // need to run before the main query. These appear alongside a trailing + // SELECT or VISUALISE FROM. + // + // Only structured DML is handled here — other_sql_statement nodes + // (INSTALL, LOAD, SET, …) are pre-executed in prepare_data_with_reader. + let root = source_tree.root(); + + let side_effect_stmts = r#" + (sql_statement + [(create_statement) + (insert_statement) + (update_statement) + (delete_statement)] @stmt) + "#; + let side_effects: Vec = source_tree + .find_texts(&root, side_effect_stmts) + .into_iter() + .map(|s| transform_cte_references(s.trim(), materialized_ctes)) + .filter(|s| !s.is_empty()) + .collect(); + // Try to extract trailing SELECT (WITH...SELECT or direct SELECT) let select_sql = split_with_query(source_tree) .map(|(_, select)| select) .or_else(|| { // Fallback: direct SELECT statement (no WITH clause) - let root = source_tree.root(); source_tree.find_text(&root, "(sql_statement (select_statement) @select)") }); if let Some(select_sql) = select_sql { return ( - vec![], + side_effects, Some(transform_cte_references(&select_sql, materialized_ctes)), ); } @@ -239,30 +260,10 @@ pub fn transform_global_sql( return (vec![], None); } - // We have non-SELECT executable SQL (CREATE, INSERT, …) and/or - // VISUALISE FROM. Split them: side-effect statements run directly, - // VISUALISE FROM becomes the queryable part. - // - // Only structured DML is handled here — other_sql_statement nodes - // (INSTALL, LOAD, SET, …) are pre-executed in prepare_data_with_reader. + // We have non-SELECT executable SQL and/or VISUALISE FROM. + // Side-effects run directly, VISUALISE FROM becomes the queryable part. // A bare WITH clause without a trailing statement is not executable on // its own (its CTEs are already materialized separately). - let root = source_tree.root(); - - let side_effect_stmts = r#" - (sql_statement - [(create_statement) - (insert_statement) - (update_statement) - (delete_statement)] @stmt) - "#; - let side_effects: Vec = source_tree - .find_texts(&root, side_effect_stmts) - .into_iter() - .map(|s| transform_cte_references(s.trim(), materialized_ctes)) - .filter(|s| !s.is_empty()) - .collect(); - let viz_from_query = source_tree .find_text( &root, diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 1d049cc8..0ad813cc 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1947,6 +1947,27 @@ mod tests { assert_eq!(result.data.get(key).unwrap().height(), 3); } + #[cfg(feature = "duckdb")] + #[test] + fn test_select_after_create_and_insert() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + CREATE TEMP TABLE data(x INTEGER, y INTEGER); + INSERT INTO data VALUES (1, 10), (2, 20), (3, 30); + SELECT * FROM data + VISUALISE x, y + DRAW point + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + let key = result.specs[0].layers[0] + .data_key + .as_ref() + .expect("Layer should have data_key"); + assert_eq!(result.data.get(key).unwrap().height(), 3); + } + /// Test that literal mappings survive stat transforms (e.g., histogram grouping). /// /// This tests the fix for issue #129 where literal aesthetic columns like