From e5d28c3f7243574165b68c5c2911fa0f4195a9a9 Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Fri, 5 Jun 2026 16:34:28 -0400 Subject: [PATCH 1/9] interactive: import/export in the IR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `import "name"` and `export "name" = expr` to the IR and both surface syntaxes, and remove the `result` statement: `export` is now the sole way to declare a program output, and only at the root scope. `Program.export` is a named list (was a single result id); `Node::Import` resolves against a registry at install time. `Node::Import` is a stub outside the server (the example renderers panic on it, no example uses it); the intended end-state is one named-source substrate that subsumes `Input(usize)` — there should not be two ways to bring in a source. `survey_sources` (was `count_inputs`) returns both kinds until that cutover. Threaded through lower, both parsers, explain, and the ddir_vec/ddir_col/ dump_explain examples; example programs now use `export "result" = …`. The string lexer is read-until-quote (no escapes — names don't use them). Co-Authored-By: Claude Opus 4.8 --- interactive/examples/ddir_col.rs | 18 ++- interactive/examples/ddir_vec.rs | 22 +++- interactive/examples/dump_explain.rs | 20 +++- interactive/examples/programs/kcore.ddir | 2 +- interactive/examples/programs/reach.ddir | 2 +- interactive/examples/programs/reach.ddp | 2 +- interactive/examples/programs/scc.ddir | 2 +- interactive/examples/programs/scc.ddp | 2 +- interactive/examples/programs/stable.ddir | 2 +- interactive/examples/programs/stable.ddp | 2 +- interactive/src/explain.rs | 127 +++++++++++++++++----- interactive/src/ir.rs | 32 ++++-- interactive/src/lib.rs | 40 ++++--- interactive/src/lower.rs | 56 +++++++--- interactive/src/parse/applicative.rs | 38 ++++++- interactive/src/parse/mod.rs | 7 +- interactive/src/parse/pipe.rs | 38 ++++++- 17 files changed, 314 insertions(+), 98 deletions(-) diff --git a/interactive/examples/ddir_col.rs b/interactive/examples/ddir_col.rs index 8be9ad863..eb7e54898 100644 --- a/interactive/examples/ddir_col.rs +++ b/interactive/examples/ddir_col.rs @@ -147,6 +147,7 @@ mod render { Node::Input(i) => { nodes.insert(id, Rendered::Collection(inputs[*i].clone())); }, + Node::Import { name } => panic!("ddir_col: Import {:?} not supported in this harness (no trace registry).", name), Node::Linear { input, ops } => { let c = nodes[input].collection(); let ops = ops.clone(); @@ -305,10 +306,18 @@ fn run(name: &str, stmts: Vec, n_inputs: usize, nodes: u64, edges: let mut compiled: Program = lower::lower(stmts); println!("{}: {} IR nodes (before optimize)", name, compiled.nodes.len()); compiled.optimize(); - println!("{}: {} IR nodes (after optimize), result = {}", name, compiled.nodes.len(), compiled.result); + println!("{}: {} IR nodes (after optimize), exports = {:?}", + name, compiled.nodes.len(), + compiled.export.iter().map(|(n, id)| (n.as_str(), *id)).collect::>()); compiled.dump(); let name = name.to_string(); - let result_id = compiled.result; + let (driven_name, result_id) = { + let pick = compiled.export.iter().find(|(n, _)| n == "result") + .or_else(|| compiled.export.first()) + .expect("ddir_col: program declares no exports"); + (pick.0.clone(), pick.1) + }; + println!("{}: driving export {:?} (id {})", name, driven_name, result_id); timely::execute_from_args(std::env::args().skip(4), move |worker| { use timely::dataflow::InputHandle; @@ -412,7 +421,10 @@ fn main() { } else { parse::applicative::parse(&source) }; - let n_inputs = interactive::count_inputs(&stmts); + let (n_inputs, imports) = interactive::survey_sources(&stmts); + if !imports.is_empty() { + panic!("ddir_col: program references imports {:?} but this harness has no trace registry.", imports); + } let name = std::path::Path::new(&program).file_stem().map(|s| s.to_string_lossy().into_owned()).unwrap_or(program.clone()); run(&name, stmts, n_inputs, nodes, edges, arity, batch, rounds); } diff --git a/interactive/examples/ddir_vec.rs b/interactive/examples/ddir_vec.rs index 33b13d1b1..de8b0eb7b 100644 --- a/interactive/examples/ddir_vec.rs +++ b/interactive/examples/ddir_vec.rs @@ -53,6 +53,7 @@ fn render_program<'scope>(program: &Program, scope: Scope<'scope, DdirTime>, inp for (&id, node) in program.nodes.iter() { match node { Node::Input(i) => { nodes.insert(id, Rendered::Collection(inputs[*i].clone())); }, + Node::Import { name } => panic!("ddir_vec: Import {:?} not supported in this harness (no trace registry).", name), Node::Linear { input, ops } => { use differential_dataflow::AsCollection; use differential_dataflow::lattice::Lattice; @@ -172,14 +173,24 @@ fn run( // seeded from `QUERY=`. if explain { let input_arities = vec![(arity, 0usize); n_inputs]; - compiled = interactive::explain::explain(&compiled, &input_arities); + let import_arities = std::collections::BTreeMap::new(); + compiled = interactive::explain::explain(&compiled, &input_arities, &import_arities); } println!("{}: {} IR nodes (before optimize)", name, compiled.nodes.len()); compiled.optimize(); - println!("{}: {} IR nodes (after optimize), result = {}", name, compiled.nodes.len(), compiled.result); + println!("{}: {} IR nodes (after optimize), exports = {:?}", + name, compiled.nodes.len(), + compiled.export.iter().map(|(n, id)| (n.as_str(), *id)).collect::>()); compiled.dump(); let name = name.to_string(); - let result_id = compiled.result; + // Drive one export: prefer `$result`, else the first declared. + let (driven_name, result_id) = { + let pick = compiled.export.iter().find(|(n, _)| n == "result") + .or_else(|| compiled.export.first()) + .expect("ddir_vec: program declares no exports"); + (pick.0.clone(), pick.1) + }; + println!("{}: driving export {:?} (id {})", name, driven_name, result_id); let total_inputs = if explain { n_inputs + 1 } else { n_inputs }; let query_input_idx = if explain { Some(n_inputs) } else { None }; @@ -297,7 +308,10 @@ fn main() { } else { parse::applicative::parse(&source) }; - let n_inputs = interactive::count_inputs(&stmts); + let (n_inputs, imports) = interactive::survey_sources(&stmts); + if !imports.is_empty() { + panic!("ddir_vec: program references imports {:?} but this harness has no trace registry.", imports); + } let name = std::path::Path::new(&program).file_stem().map(|s| s.to_string_lossy().into_owned()).unwrap_or(program.clone()); run(&name, stmts, n_inputs, nodes, edges, arity, batch, rounds, explain); } diff --git a/interactive/examples/dump_explain.rs b/interactive/examples/dump_explain.rs index 3692501be..800e27f92 100644 --- a/interactive/examples/dump_explain.rs +++ b/interactive/examples/dump_explain.rs @@ -23,7 +23,7 @@ fn main() { } else { parse::applicative::parse(&source) }; - let n_inputs = interactive::count_inputs(&stmts); + let (n_inputs, imports) = interactive::survey_sources(&stmts); let original = lower::lower(stmts); println!("-- ===================================================="); @@ -32,7 +32,11 @@ fn main() { print_ddp(&original); let input_arities = vec![(arity, 0usize); n_inputs]; - let rewritten = explain::explain(&original, &input_arities); + let import_arities: BTreeMap = imports + .iter() + .map(|n| (n.clone(), (arity, 0usize))) + .collect(); + let rewritten = explain::explain(&original, &input_arities, &import_arities); println!(); println!("-- ===================================================="); @@ -85,6 +89,9 @@ fn print_ddp(p: &Program) { Node::Input(i) => { println!("{}let n{} = input {};", pad, id, i); } + Node::Import { name } => { + println!("{}let n{} = import {:?};", pad, id, name); + } Node::Linear { input, ops } => { println!("{}let n{} = n{} | {};", pad, id, input, fmt_linear_ops(ops)); } @@ -138,7 +145,14 @@ fn print_ddp(p: &Program) { } } } - println!("{}result n{};", " ".repeat(indent), p.result); + let pad = " ".repeat(indent); + for (name, id) in &p.export { + if name == "result" { + println!("{}result n{};", pad, id); + } else { + println!("{}export {:?} = n{};", pad, name, id); + } + } } fn fmt_linear_ops(ops: &[LinearOp]) -> String { diff --git a/interactive/examples/programs/kcore.ddir b/interactive/examples/programs/kcore.ddir index 2823adbb5..fa4540b84 100644 --- a/interactive/examples/programs/kcore.ddir +++ b/interactive/examples/programs/kcore.ddir @@ -25,4 +25,4 @@ peel: { } let core_edges = CONCAT(symm, peel::removals); -result INSPECT(REDUCE(MAP(core_edges, ($0[0] ;)), COUNT), kcore_degrees); +export "result" = INSPECT(REDUCE(MAP(core_edges, ($0[0] ;)), COUNT), kcore_degrees); diff --git a/interactive/examples/programs/reach.ddir b/interactive/examples/programs/reach.ddir index 349594cea..b606964a7 100644 --- a/interactive/examples/programs/reach.ddir +++ b/interactive/examples/programs/reach.ddir @@ -10,4 +10,4 @@ reach: { var reach = REDUCE(CONCAT(roots, proposals), DISTINCT); } -result INSPECT(ARRANGE(MAP(reach::reach, (;))), total); +export "result" = INSPECT(ARRANGE(MAP(reach::reach, (;))), total); diff --git a/interactive/examples/programs/reach.ddp b/interactive/examples/programs/reach.ddp index 36d24b979..0dede6e31 100644 --- a/interactive/examples/programs/reach.ddp +++ b/interactive/examples/programs/reach.ddp @@ -10,4 +10,4 @@ reach: { var reach = roots + proposals | distinct; } -result reach::reach | key(;) | arrange | inspect(total); +export "result" = reach::reach | key(;) | arrange | inspect(total); diff --git a/interactive/examples/programs/scc.ddir b/interactive/examples/programs/scc.ddir index a219586be..121433d21 100644 --- a/interactive/examples/programs/scc.ddir +++ b/interactive/examples/programs/scc.ddir @@ -35,4 +35,4 @@ outer: { var scc_raw = CONCAT(trim_bwd, NEGATE(edges)); } -result INSPECT(ARRANGE(MAP(outer::scc, (;))), total); +export "result" = INSPECT(ARRANGE(MAP(outer::scc, (;))), total); diff --git a/interactive/examples/programs/scc.ddp b/interactive/examples/programs/scc.ddp index 88ef7e790..91c673ddf 100644 --- a/interactive/examples/programs/scc.ddp +++ b/interactive/examples/programs/scc.ddp @@ -34,4 +34,4 @@ outer: { var trim = trim_bwd - edges; } -result outer::scc | map(;) | arrange | inspect(total); +export "result" = outer::scc | map(;) | arrange | inspect(total); diff --git a/interactive/examples/programs/stable.ddir b/interactive/examples/programs/stable.ddir index b245c7fe7..ed0918e54 100644 --- a/interactive/examples/programs/stable.ddir +++ b/interactive/examples/programs/stable.ddir @@ -19,4 +19,4 @@ matching: { var removals = ARRANGE(CONCAT(removals, by_a, NEGATE(props))); } -result INSPECT(ARRANGE(MAP(matching::props, (;))), total); +export "result" = INSPECT(ARRANGE(MAP(matching::props, (;))), total); diff --git a/interactive/examples/programs/stable.ddp b/interactive/examples/programs/stable.ddp index d4e21126d..afd5d626d 100644 --- a/interactive/examples/programs/stable.ddp +++ b/interactive/examples/programs/stable.ddp @@ -12,4 +12,4 @@ matching: { var removals = removals + by_a - props | arrange; } -result matching::props | key(;) | arrange | inspect(total); +export "result" = matching::props | key(;) | arrange | inspect(total); diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index 4a9e944a1..dc536f6ef 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -184,12 +184,18 @@ use clone::CloneResult; /// demand-set explanations for queries against the original's result. /// See the module doc for the architecture. /// -/// `input_arities` gives `(key_arity, val_arity)` per input, in input -/// order. Necessary because input row shapes aren't recoverable from the -/// IR alone (Projections only invert with known input arity). -pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { +/// `input_arities` gives `(key_arity, val_arity)` per positional input; +/// `import_arities` gives the same per named import (entries needed for +/// every distinct `Import { name }` referenced in `p`). Both are +/// necessary because data-source shapes aren't recoverable from the IR +/// alone (Projections only invert with known input arity). +pub fn explain( + p: &Program, + input_arities: &[(usize, usize)], + import_arities: &BTreeMap, +) -> Program { let mut b = Builder::new(); - let arities = compute_arities(p, input_arities); + let arities = compute_arities(p, input_arities, import_arities); let depths = p.depths(); // The two user-chain lengths we track at each node: // @@ -220,21 +226,37 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { let dep_user_lens: &BTreeMap = &depths; let n_inputs = input_arities.len(); + // Distinct import names referenced by `p`, in deterministic order. + let import_names: Vec = { + let mut s = std::collections::BTreeSet::new(); + for node in p.nodes.values() { + if let Node::Import { name } = node { s.insert(name.clone()); } + } + s.into_iter().collect() + }; + // ---- outer scope ---- - // Original inputs of `p`, plus one extra "query" input at index n. + // Original inputs of `p`, one outer-scope Import per referenced name, + // plus one extra "query" input. let original_inputs: Vec = (0..n_inputs).map(|i| b.input(i)).collect(); + let original_imports: BTreeMap = import_names.iter() + .map(|n| (n.clone(), b.push(Node::Import { name: n.clone() }))) + .collect(); let query_input = b.input(n_inputs); // witness: a clone of `p`, with lift_iter chains so every witness // collection has a host-visible `(data, user)` form via auto-leave at // each enclosing user scope's exit. - let witness = b.clone_with_lifts(p, &original_inputs, 0); + let witness = b.clone_with_lifts(p, &original_inputs, &original_imports, 0); // ---- explain scope ---- b.scope_open(); - // Demand-set Variables (one per input). + // Demand-set Variables (one per input, one per import). let demand_sets: Vec = (0..n_inputs).map(|_| b.variable()).collect(); + let import_demand_sets: BTreeMap = import_names.iter() + .map(|n| (n.clone(), b.variable())) + .collect(); // forward inputs: demand_set_ | semijoin(actual_input_). // Enter actual inputs into explain scope implicitly; semijoin restricts to @@ -245,11 +267,18 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { b.semijoin_data(demand_sets[i], original_inputs[i], k, v) }) .collect(); + let forward_imports: BTreeMap = import_names.iter() + .map(|n| { + let (k, v) = import_arities[n]; + let semi = b.semijoin_data(import_demand_sets[n], original_imports[n], k, v); + (n.clone(), semi) + }) + .collect(); // forward: same clone procedure as witness, with substituted inputs. // Offset = 1 because this clone lives INSIDE the explain scope: its real // PointStamp depth at any point is one more than its local user_level. - let forward = b.clone_with_lifts(p, &forward_inputs, 1); + let forward = b.clone_with_lifts(p, &forward_inputs, &forward_imports, 1); // Demand Variables are pre-allocated *only* for user-program `var` IR // nodes (`Node::Variable`). These are the only places where the demand @@ -271,10 +300,14 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { // from `contribs[id]`, store it in `demand_var`, then dispatch the // node's bward rule (which pushes onto its inputs' contribs). // - // Query input directly seeds `contribs[result]` — the result demand - // starts with the query rows. + // Query input directly seeds `contribs[first_export]` — the demand + // starts with the query rows against the first export. v0 only + // explains a single output; multi-export programs would need one + // query input per export and per-output dispatch in the seeding. + let primary_export = p.export.first() + .expect("explain: program has no export to seed query input against").1; let mut contribs: BTreeMap> = BTreeMap::new(); - contribs.entry(p.result).or_default().push(query_input); + contribs.entry(primary_export).or_default().push(query_input); for (&id, node) in p.nodes.iter().rev() { // Scope / EndScope carry no demand and have no bward action. @@ -319,13 +352,19 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { b.emit_reverse(id, node, &witness, &forward, &demand_var, &arities, &host_user_lens, dep_user_lens, &mut contribs); } - // Bind demand-set variables: demand_set_ := distinct(demand_set_ + (demand_ | strip | semijoin actual)). + // Bind demand-set variables for inputs and imports symmetrically: + // demand_set_X := distinct(demand_set_X + (demand_ | strip | semijoin actual)). // Build a Vec mapping input index `i` to its IR id in `p`, so the - // per-input loop below is O(n) total instead of O(n^2). + // per-input loop below is O(n) total instead of O(n^2). Imports are + // looked up by name; multiple `Import { name }` nodes in `p` share + // demand via the dedup pass — we route from any one of them. let mut input_ids: Vec> = vec![None; n_inputs]; + let mut import_ids: BTreeMap = BTreeMap::new(); for (&id, node) in &p.nodes { - if let Node::Input(i) = node { - input_ids[*i] = Some(id); + match node { + Node::Input(i) => { input_ids[*i] = Some(id); } + Node::Import { name } => { import_ids.entry(name.clone()).or_insert(id); } + _ => {} } } for i in 0..n_inputs { @@ -338,17 +377,30 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { let dist = b.distinct_full(combined, kx, vx); b.bind(demand_sets[i], dist); } - - // Inspects on demand-sets. - let mut last_inspect: Option = None; - for (i, &mv) in demand_sets.iter().enumerate() { - last_inspect = Some(b.inspect(mv, format!("demand_set_{}", i))); + for name in &import_names { + let imp_id = import_ids[name]; + let (kx, vx) = arities[&imp_id]; + let stripped = b.project(demand_var[&imp_id], strip_user_and_q(kx, vx)); + let semi = b.semijoin_data(stripped, original_imports[name], kx, vx); + let combined = b.concat(vec![import_demand_sets[name], semi]); + let dist = b.distinct_full(combined, kx, vx); + b.bind(import_demand_sets[name], dist); } - let result_inner = last_inspect.unwrap_or_else(|| demand_sets.first().copied().unwrap_or(0)); + + // Per-source demand-set ids inside the explain scope; we leave each + // out and register as a named export after closing the scope. + let inputs_leaves: Vec<(String, Id)> = demand_sets.iter().enumerate() + .map(|(i, &mv)| (format!("demand:input{}", i), mv)) + .collect(); + let imports_leaves: Vec<(String, Id)> = import_names.iter() + .map(|n| (format!("demand:{}", n), import_demand_sets[n])) + .collect(); b.scope_close(); - let result_outer = b.leave(result_inner, 1); - b.set_result(result_outer); + for (name, inner) in inputs_leaves.into_iter().chain(imports_leaves) { + let outer = b.leave(inner, 1); + b.add_export(name, outer); + } b.into_program() } @@ -371,7 +423,7 @@ mod builder { impl Builder { pub(super) fn new() -> Self { Builder { - program: Program { nodes: BTreeMap::new(), result: 0 }, + program: Program { nodes: BTreeMap::new(), export: Vec::new() }, next_id: 0, } } @@ -414,7 +466,7 @@ mod builder { } pub(super) fn scope_open(&mut self) { self.push(Node::Scope); } pub(super) fn scope_close(&mut self) { self.push(Node::EndScope); } - pub(super) fn set_result(&mut self, id: Id) { self.program.result = id; } + pub(super) fn add_export(&mut self, name: String, id: Id) { self.program.export.push((name, id)); } pub(super) fn into_program(self) -> Program { self.program } } } @@ -426,13 +478,17 @@ mod builder { /// recoverable from the IR alone — `Projection`s only invert with known /// input arity, and lift_iter sites need to know how many user-iter coords /// already sit in the val. -mod arities { +pub mod arities { use std::collections::BTreeMap; use crate::ir::{Id, LinearOp, Node, Program}; use crate::parse::Reducer; - pub(super) fn compute_arities(p: &Program, input_arities: &[(usize, usize)]) -> BTreeMap { + pub fn compute_arities( + p: &Program, + input_arities: &[(usize, usize)], + import_arities: &BTreeMap, + ) -> BTreeMap { // Variables are referenced before their Binds appear in id order; // resolve a Variable's shape via its body. let var_body: BTreeMap = p.nodes.iter().filter_map(|(_, n)| { @@ -448,6 +504,10 @@ mod arities { if out.contains_key(&id) { continue; } let shape = match node { Node::Input(i) => Some(input_arities[*i]), + Node::Import { name } => Some( + *import_arities.get(name) + .unwrap_or_else(|| panic!("explain: no arity registered for import {:?}", name)) + ), Node::Linear { input, ops } => out.get(input).map(|s| apply_ops_arity(*s, ops)), // Try each input — for self-recursive Variables that appear // as `Concat([var, ...])`, the first input's shape isn't @@ -512,6 +572,7 @@ mod clone { &mut self, p: &Program, input_subst: &[Id], + import_subst: &BTreeMap, enclosing_scope_depth: usize, ) -> CloneResult { let mut in_scope: BTreeMap = BTreeMap::new(); @@ -570,6 +631,14 @@ mod clone { host.insert(id, input_subst[*i]); None } + Node::Import { name } => { + // Imports are at depth 0, host-visible directly. + let sub = *import_subst.get(name) + .unwrap_or_else(|| panic!("clone: no substitution for import {:?}", name)); + in_scope.insert(id, sub); + host.insert(id, sub); + None + } Node::Linear { input, ops } => { Some(self.linear(in_scope[input], ops.clone())) } @@ -758,7 +827,7 @@ mod reverse { let side = |inp: Id| Side::for_input(inp, witness, forward, arities, host_user_lens, dep_user_lens); match node { - Node::Input(_) => { /* terminal; feeds demand-set seeding. */ } + Node::Input(_) | Node::Import { .. } => { /* terminal; feeds demand-set seeding. */ } Node::Linear { input, ops } => { let op = match ops.as_slice() { diff --git a/interactive/src/ir.rs b/interactive/src/ir.rs index 37b8643d5..8c0fc45c4 100644 --- a/interactive/src/ir.rs +++ b/interactive/src/ir.rs @@ -56,6 +56,15 @@ pub enum LinearOp { /// Symbolic IR node. pub enum Node { Input(usize), + /// A named external trace, resolved against a registry at install time; + /// shape is inferred from the registry, not the IR. + /// + /// STUB: only the server resolves this; the example renderers don't, and no + /// example program uses it yet. The intended end-state is a single + /// named-source substrate that also subsumes `Input(usize)` — there should + /// not be two ways to bring in a source. Until that cutover, `Input` is the + /// working input and `Import` is forward-looking. + Import { name: String }, /// A chain of linear operations on a stream of (data, time, diff) triples. Linear { input: Id, ops: Vec }, Concat(Vec), @@ -72,7 +81,8 @@ pub enum Node { pub struct Program { pub nodes: BTreeMap, - pub result: Id, + /// Named outputs of the program. + pub export: Vec<(String, Id)>, } impl Program { @@ -81,6 +91,7 @@ impl Program { for (&id, node) in &self.nodes { let desc = match node { Node::Input(i) => format!("Input({})", i), + Node::Import { name } => format!("Import({:?})", name), Node::Linear { input, ops } => { let ops_str: Vec = ops.iter().map(|op| match op { LinearOp::Project(_) => "Project".into(), @@ -104,7 +115,9 @@ impl Program { }; println!(" {:3}: {}", id, desc); } - println!(" result: {}", self.result); + for (name, id) in &self.export { + println!(" export {:?} = {}", name, id); + } } /// Per-node user-scope depth. Computed by walking `nodes` in id order @@ -141,7 +154,7 @@ impl Program { Node::Concat(ids) => ids.clone(), Node::Leave(id, _) => vec![*id], Node::Bind { value, .. } => vec![*value], - Node::Input(_) | Node::Variable | Node::Scope | Node::EndScope => vec![], + Node::Input(_) | Node::Import { .. } | Node::Variable | Node::Scope | Node::EndScope => vec![], }; for input in inputs { users.entry(input).or_default().push(user_id); @@ -173,7 +186,9 @@ impl Program { Ok(()) } - /// Replace all references to `from` with `to` across the IR. + /// Redirect every reference to node `from` so it points at `to`, across all + /// nodes' inputs and the export list. Used by `optimize` when it collapses + /// or fuses one node into another and the old id must be retargeted. fn rewrite(&mut self, from: Id, to: Id) { for node in self.nodes.values_mut() { match node { @@ -195,10 +210,12 @@ impl Program { if *variable == from { *variable = to; } if *value == from { *value = to; } }, - Node::Input(_) | Node::Variable | Node::Scope | Node::EndScope => {}, + Node::Input(_) | Node::Import { .. } | Node::Variable | Node::Scope | Node::EndScope => {}, } } - if self.result == from { self.result = to; } + for (_, id) in self.export.iter_mut() { + if *id == from { *id = to; } + } } /// Optimize the IR in place, iterating to a fixed point. @@ -247,7 +264,7 @@ impl Program { _ => {}, } } - if self.result != usize::MAX { *ref_counts.entry(self.result).or_default() += 1; } + for (_, id) in &self.export { *ref_counts.entry(*id).or_default() += 1; } let fusions: Vec<(Id, Id)> = self.nodes.iter() .filter_map(|(&id, node)| { @@ -285,6 +302,7 @@ impl Program { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.0 { Node::Input(i) => write!(f, "Input({})", i), + Node::Import { name } => write!(f, "Import({:?})", name), Node::Linear { input, ops } => write!(f, "Linear({},{:?})", input, ops), Node::Concat(ids) => write!(f, "Concat({:?})", ids), Node::Arrange(input) => write!(f, "Arrange({})", input), diff --git a/interactive/src/lib.rs b/interactive/src/lib.rs index 45c9ff9a2..0a34ead7b 100644 --- a/interactive/src/lib.rs +++ b/interactive/src/lib.rs @@ -3,33 +3,41 @@ pub mod ir; pub mod lower; pub mod explain; +use std::collections::BTreeSet; + use parse::{Stmt, Expr}; -/// Count the number of distinct inputs referenced in a program. -pub fn count_inputs(stmts: &[Stmt]) -> usize { - let mut max_input = 0usize; +/// Survey a program's external sources: the count of positional inputs (one +/// more than the largest `input N` index, zero if none appear) and the set of +/// names referenced by `import "name"`. Two kinds because `import` does not yet +/// subsume `input` — see `ir::Node::Import`; this returns one number when that +/// cutover happens. +pub fn survey_sources(stmts: &[Stmt]) -> (usize, BTreeSet) { + let mut positional = 0usize; + let mut imports = BTreeSet::new(); + walk_stmts(stmts, &mut positional, &mut imports); + (positional, imports) +} + +fn walk_stmts(stmts: &[Stmt], positional: &mut usize, imports: &mut BTreeSet) { for stmt in stmts { match stmt { - Stmt::Let(_, expr) | Stmt::Var(_, expr) | Stmt::Result(expr) => { - max_input = max_input.max(count_inputs_expr(expr)); - }, - Stmt::Scope(_, body) => { - max_input = max_input.max(count_inputs(body)); - }, + Stmt::Let(_, expr) | Stmt::Var(_, expr) | Stmt::Export(_, expr) => walk_expr(expr, positional, imports), + Stmt::Scope(_, body) => walk_stmts(body, positional, imports), } } - max_input } -fn count_inputs_expr(expr: &Expr) -> usize { +fn walk_expr(expr: &Expr, positional: &mut usize, imports: &mut BTreeSet) { match expr { - Expr::Input(n) => n + 1, + Expr::Input(n) => { *positional = (*positional).max(n + 1); }, + Expr::Import(name) => { imports.insert(name.clone()); }, Expr::Map(e, _) | Expr::Negate(e) | Expr::Arrange(e) | Expr::EnterAt(e, _) | Expr::LiftIter(e) | Expr::Filter(e, _) - | Expr::Reduce(e, _) | Expr::Inspect(e, _) => count_inputs_expr(e), - Expr::Join(l, r, _) => count_inputs_expr(l).max(count_inputs_expr(r)), - Expr::Concat(es) => es.iter().map(|e| count_inputs_expr(e)).max().unwrap_or(0), - Expr::Name(_) | Expr::Qualified(_, _) => 0, + | Expr::Reduce(e, _) | Expr::Inspect(e, _) => walk_expr(e, positional, imports), + Expr::Join(l, r, _) => { walk_expr(l, positional, imports); walk_expr(r, positional, imports); }, + Expr::Concat(es) => { for e in es { walk_expr(e, positional, imports); } }, + Expr::Name(_) | Expr::Qualified(_, _) => {}, } } diff --git a/interactive/src/lower.rs b/interactive/src/lower.rs index 01e8e535c..64a026c20 100644 --- a/interactive/src/lower.rs +++ b/interactive/src/lower.rs @@ -10,7 +10,7 @@ //! each item is lowered once all the names it transitively needs at this //! level are bound. A cycle among `let`s is an error (use a `var` to //! introduce recursion). -//! 4. Lower the (single) `result` expression, if any. +//! 4. Lower the `export` expressions (root scope only). //! 5. Lower each `var`'s body and emit a `Bind` from the placeholder to the //! resulting value. @@ -51,12 +51,15 @@ impl Lowering { } fn lower_program(mut self, stmts: Vec) -> Program { - let mut result_id = None; - self.lower_stmts(stmts, &mut result_id); - Program { result: result_id.expect("No result statement"), nodes: self.nodes } + let mut exports = Vec::new(); + self.lower_stmts(stmts, &mut exports); + if exports.is_empty() { + panic!("Program has no `export` statement"); + } + Program { nodes: self.nodes, export: exports } } - fn lower_stmts(&mut self, stmts: Vec, result_id: &mut Option) { + fn lower_stmts(&mut self, stmts: Vec, exports: &mut Vec<(String, Id)>) { // ---- 1. Bucket statements; reject duplicate names. ---- // `order` records the original textual order so the topological pass // is deterministic when several items are simultaneously ready. @@ -64,7 +67,8 @@ impl Lowering { let mut lets: HashMap = HashMap::new(); let mut scopes: HashMap> = HashMap::new(); let mut order: Vec<(ItemKind, String)> = Vec::new(); - let mut results: Vec = Vec::new(); + // Exports in declaration order (root scope only — rejected below if nested). + let mut local_exports: Vec<(String, Expr)> = Vec::new(); let mut seen: BTreeSet = BTreeSet::new(); for stmt in stmts { match stmt { @@ -82,10 +86,27 @@ impl Lowering { order.push((ItemKind::Scope, name.clone())); scopes.insert(name, body); }, - Stmt::Result(expr) => results.push(expr), + Stmt::Export(name, expr) => { + // Exports are the program's output interface and only make + // sense at the root; reject nested ones rather than silently + // dropping them. + if self.level > 0 { + panic!("`export {:?}` is nested; exports are only allowed at the root scope", name); + } + local_exports.push((name, expr)); + }, + } + } + // Reject duplicate export names (root-only, so this is the whole + // program's output interface). + { + let mut names: BTreeSet<&str> = BTreeSet::new(); + for (n, _) in &local_exports { + if !names.insert(n) { + panic!("Duplicate export name: {:?}", n); + } } } - if results.len() > 1 { panic!("Multiple `result` statements at the same scope level"); } // ---- 2. Pre-bind `Variable` placeholders. ---- for (name, _) in &vars { @@ -132,8 +153,10 @@ impl Lowering { self.push(Node::Scope); self.level += 1; self.scopes.push(HashMap::new()); - let mut inner_result = None; - self.lower_stmts(body, &mut inner_result); + // Exports are root-only (lower_stmts rejects nested ones), + // so this stays empty. + let mut inner_exports = Vec::new(); + self.lower_stmts(body, &mut inner_exports); let inner_scope = self.scopes.pop().unwrap(); let scope_level = self.level; self.named_scopes.insert(name, (scope_level, inner_scope)); @@ -143,10 +166,10 @@ impl Lowering { } } - // ---- 4. Lower the result expression (if any). ---- - if let Some(expr) = results.into_iter().next() { + // ---- 4. Lower export expressions (if any) and record them. ---- + for (name, expr) in local_exports { let id = self.lower_expr(expr); - *result_id = Some(id); + exports.push((name, id)); } // ---- 5. Lower var bodies and emit Bind nodes. ---- @@ -160,6 +183,7 @@ impl Lowering { fn lower_expr(&mut self, expr: Expr) -> Id { match expr { Expr::Input(n) => self.push(Node::Input(n)), + Expr::Import(name) => self.push(Node::Import { name }), Expr::Name(name) => self.resolve_name(&name), Expr::Qualified(scope_name, name) => { let (scope_level, inner_id) = { @@ -223,7 +247,7 @@ fn scope_body_deps(body: &[Stmt], defined: &BTreeSet<&str>, self_name: &str) -> /// field is resolved within that scope's environment, not the enclosing one). fn expr_free_names<'a>(expr: &'a Expr, out: &mut BTreeSet<&'a str>) { match expr { - Expr::Input(_) => {}, + Expr::Input(_) | Expr::Import(_) => {}, Expr::Name(n) => { out.insert(n.as_str()); }, Expr::Qualified(scope, _) => { out.insert(scope.as_str()); }, Expr::Map(e, _) | Expr::Reduce(e, _) | Expr::Filter(e, _) @@ -242,13 +266,13 @@ fn collect_body_free_names<'a>(body: &'a [Stmt], out: &mut BTreeSet<&'a str>) { for stmt in body { match stmt { Stmt::Let(n, _) | Stmt::Var(n, _) | Stmt::Scope(n, _) => { local.insert(n.as_str()); }, - Stmt::Result(_) => {}, + Stmt::Export(_, _) => {}, } } let mut inner: BTreeSet<&'a str> = BTreeSet::new(); for stmt in body { match stmt { - Stmt::Let(_, e) | Stmt::Var(_, e) | Stmt::Result(e) => expr_free_names(e, &mut inner), + Stmt::Let(_, e) | Stmt::Var(_, e) | Stmt::Export(_, e) => expr_free_names(e, &mut inner), Stmt::Scope(_, b) => collect_body_free_names(b, &mut inner), } } diff --git a/interactive/src/parse/applicative.rs b/interactive/src/parse/applicative.rs index 88b80feff..9daeb90e4 100644 --- a/interactive/src/parse/applicative.rs +++ b/interactive/src/parse/applicative.rs @@ -6,10 +6,10 @@ use super::*; #[derive(Debug, Clone, PartialEq)] enum Token { - Let, Var, Scope, Result, - Input, Map, Join, Reduce, Concat, Arrange, Filter, Negate, EnterAt, LiftIter, Inspect, + Let, Var, Scope, Export, + Input, Import, Map, Join, Reduce, Concat, Arrange, Filter, Negate, EnterAt, LiftIter, Inspect, Min, Distinct, Count, - Ident(String), Int(i64), + Ident(String), Int(i64), Str(String), Dollar, LParen, RParen, LBrace, RBrace, LBracket, RBracket, Comma, Semi, Colon, ColonColon, Eq, EqEq, NotEq, Lt, LtEq, Gt, GtEq, AndAnd, Plus, Minus, Star, Eof, @@ -40,6 +40,19 @@ fn tokenize(input: &str) -> Vec { '+' => { chars.next(); tokens.push(Token::Plus); }, '*' => { chars.next(); tokens.push(Token::Star); }, '$' => { chars.next(); tokens.push(Token::Dollar); }, + '"' => { + // String literal: the quoted name in `IMPORT "..."` / `EXPORT "..." = ...`. + // Names carry no escapes, so read verbatim up to the closing quote. + chars.next(); + let mut s = String::new(); + let mut closed = false; + while let Some(c) = chars.next() { + if c == '"' { closed = true; break; } + s.push(c); + } + if !closed { panic!("Unterminated string literal"); } + tokens.push(Token::Str(s)); + }, '-' => { chars.next(); tokens.push(Token::Minus); }, ':' => { chars.next(); if chars.peek() == Some(&':') { chars.next(); tokens.push(Token::ColonColon); } else { tokens.push(Token::Colon); } }, c if c.is_ascii_digit() => { @@ -51,8 +64,10 @@ fn tokenize(input: &str) -> Vec { let mut ident = String::new(); while let Some(&c) = chars.peek() { if c.is_ascii_alphanumeric() || c == '_' { ident.push(c); chars.next(); } else { break; } } tokens.push(match ident.as_str() { - "let" => Token::Let, "var" => Token::Var, "scope" => Token::Scope, "result" => Token::Result, - "INPUT" => Token::Input, "MAP" => Token::Map, "JOIN" => Token::Join, + "let" => Token::Let, "var" => Token::Var, "scope" => Token::Scope, + "export" => Token::Export, + "INPUT" => Token::Input, "IMPORT" => Token::Import, + "MAP" => Token::Map, "JOIN" => Token::Join, "REDUCE" => Token::Reduce, "CONCAT" => Token::Concat, "ARRANGE" => Token::Arrange, "FILTER" => Token::Filter, "NEGATE" => Token::Negate, "ENTER_AT" => Token::EnterAt, "INSPECT" => Token::Inspect, "MIN" => Token::Min, "DISTINCT" => Token::Distinct, "COUNT" => Token::Count, @@ -85,7 +100,17 @@ impl Parser { match self.peek().clone() { Token::Let => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_expr(); self.expect(&Token::Semi); Stmt::Let(n, e) }, Token::Var => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_expr(); self.expect(&Token::Semi); Stmt::Var(n, e) }, - Token::Result => { self.next(); let e = self.parse_expr(); self.expect(&Token::Semi); Stmt::Result(e) }, + Token::Export => { + self.next(); + let name = match self.next() { + Token::Str(s) => s, + o => panic!("Expected string literal after `export`, got {:?}", o), + }; + self.expect(&Token::Eq); + let e = self.parse_expr(); + self.expect(&Token::Semi); + Stmt::Export(name, e) + }, Token::Ident(_) => { let n = self.parse_ident(); self.expect(&Token::Colon); if *self.peek() == Token::Scope { self.next(); } @@ -100,6 +125,7 @@ impl Parser { fn parse_expr(&mut self) -> Expr { match self.peek().clone() { Token::Input => { self.next(); match self.next() { Token::Int(n) => Expr::Input(n as usize), o => panic!("Expected int, got {:?}", o) } }, + Token::Import => { self.next(); match self.next() { Token::Str(s) => Expr::Import(s), o => panic!("Expected string literal after IMPORT, got {:?}", o) } }, Token::Map => { self.next(); self.expect(&Token::LParen); let i = self.parse_expr(); self.expect(&Token::Comma); let p = self.parse_projection(); self.expect(&Token::RParen); Expr::Map(Box::new(i), p) }, Token::Join => { self.next(); self.expect(&Token::LParen); let l = self.parse_expr(); self.expect(&Token::Comma); let r = self.parse_expr(); self.expect(&Token::Comma); let p = self.parse_projection(); self.expect(&Token::RParen); Expr::Join(Box::new(l), Box::new(r), p) }, Token::Reduce => { self.next(); self.expect(&Token::LParen); let i = self.parse_expr(); self.expect(&Token::Comma); let r = self.parse_reducer(); self.expect(&Token::RParen); Expr::Reduce(Box::new(i), r) }, diff --git a/interactive/src/parse/mod.rs b/interactive/src/parse/mod.rs index 4804301d5..867026afd 100644 --- a/interactive/src/parse/mod.rs +++ b/interactive/src/parse/mod.rs @@ -37,6 +37,9 @@ pub enum Reducer { Min, Distinct, Count } #[derive(Debug, Clone)] pub enum Expr { Input(usize), + /// Named external trace resolved at install time. Carries only the name; + /// shape comes from the registry the program is installed against. + Import(String), Name(String), Qualified(String, String), Map(Box, Projection), @@ -65,5 +68,7 @@ pub enum Stmt { Let(String, Expr), Var(String, Expr), Scope(String, Vec), - Result(Expr), + /// `export "name" = expr;` — registers a named output in the program. + /// Only valid at the root scope. + Export(String, Expr), } diff --git a/interactive/src/parse/pipe.rs b/interactive/src/parse/pipe.rs index f9bcefbc5..44f91b3f7 100644 --- a/interactive/src/parse/pipe.rs +++ b/interactive/src/parse/pipe.rs @@ -6,9 +6,9 @@ use super::*; #[derive(Debug, Clone, PartialEq)] enum Token { - Let, Var, Result, - Input, Key, Map, Join, Min, Distinct, Count, Arrange, Negate, Filter, EnterAt, LiftIter, Inspect, - Ident(String), Int(i64), + Let, Var, Export, + Input, Import, Key, Map, Join, Min, Distinct, Count, Arrange, Negate, Filter, EnterAt, LiftIter, Inspect, + Ident(String), Int(i64), Str(String), Dollar, LParen, RParen, LBrace, RBrace, LBracket, RBracket, Comma, Semi, Colon, ColonColon, Eq, EqEq, NotEq, Lt, LtEq, Gt, GtEq, AndAnd, Pipe, Plus, Minus, Eof, @@ -39,6 +39,19 @@ fn tokenize(input: &str) -> Vec { '<' => { chars.next(); if chars.peek() == Some(&'=') { chars.next(); tokens.push(Token::LtEq); } else { tokens.push(Token::Lt); } }, '>' => { chars.next(); if chars.peek() == Some(&'=') { chars.next(); tokens.push(Token::GtEq); } else { tokens.push(Token::Gt); } }, '$' => { chars.next(); tokens.push(Token::Dollar); }, + '"' => { + // String literal: the quoted name in `import "..."` / `export "..." = ...`. + // Names carry no escapes, so read verbatim up to the closing quote. + chars.next(); + let mut s = String::new(); + let mut closed = false; + while let Some(c) = chars.next() { + if c == '"' { closed = true; break; } + s.push(c); + } + if !closed { panic!("Unterminated string literal"); } + tokens.push(Token::Str(s)); + }, '-' => { chars.next(); tokens.push(Token::Minus); }, ':' => { chars.next(); if chars.peek() == Some(&':') { chars.next(); tokens.push(Token::ColonColon); } else { tokens.push(Token::Colon); } }, c if c.is_ascii_digit() => { @@ -50,8 +63,10 @@ fn tokenize(input: &str) -> Vec { let mut ident = String::new(); while let Some(&c) = chars.peek() { if c.is_ascii_alphanumeric() || c == '_' { ident.push(c); chars.next(); } else { break; } } tokens.push(match ident.as_str() { - "let" => Token::Let, "var" => Token::Var, "result" => Token::Result, - "input" => Token::Input, "key" => Token::Key, "map" => Token::Map, + "let" => Token::Let, "var" => Token::Var, + "export" => Token::Export, + "input" => Token::Input, "import" => Token::Import, + "key" => Token::Key, "map" => Token::Map, "join" => Token::Join, "min" => Token::Min, "distinct" => Token::Distinct, "count" => Token::Count, "arrange" => Token::Arrange, "negate" => Token::Negate, "filter" => Token::Filter, "enter_at" => Token::EnterAt, "inspect" => Token::Inspect, @@ -84,7 +99,17 @@ impl Parser { match self.peek().clone() { Token::Let => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_pipe_expr(); self.expect(&Token::Semi); Stmt::Let(n, e) }, Token::Var => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_pipe_expr(); self.expect(&Token::Semi); Stmt::Var(n, e) }, - Token::Result => { self.next(); let e = self.parse_pipe_expr(); self.expect(&Token::Semi); Stmt::Result(e) }, + Token::Export => { + self.next(); + let name = match self.next() { + Token::Str(s) => s, + o => panic!("Expected string literal after `export`, got {:?}", o), + }; + self.expect(&Token::Eq); + let e = self.parse_pipe_expr(); + self.expect(&Token::Semi); + Stmt::Export(name, e) + }, Token::Ident(_) => { let n = self.parse_ident(); self.expect(&Token::Colon); self.expect(&Token::LBrace); let b = self.parse_program(); self.expect(&Token::RBrace); Stmt::Scope(n, b) @@ -120,6 +145,7 @@ impl Parser { fn parse_atom(&mut self) -> Expr { match self.peek().clone() { Token::Input => { self.next(); match self.next() { Token::Int(n) => Expr::Input(n as usize), o => panic!("Expected int, got {:?}", o) } }, + Token::Import => { self.next(); match self.next() { Token::Str(s) => Expr::Import(s), o => panic!("Expected string literal after `import`, got {:?}", o) } }, Token::Ident(_) => { let n = self.parse_ident(); if *self.peek() == Token::ColonColon { self.next(); let f = self.parse_ident(); Expr::Qualified(n, f) } else { Expr::Name(n) } }, Token::LParen => { self.next(); let e = self.parse_pipe_expr(); self.expect(&Token::RParen); e }, other => panic!("Unexpected token in atom: {:?}", other), From d009a14af61aa6b9b71709bb7460b841e17ec41d Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 19:59:34 -0400 Subject: [PATCH 2/9] interactive/explain: fix two level errors that made SCC explanation unsound Two depth/index bugs in the reverse-tracing rewrite, both surfacing only at user-scope depth >= 2 (reach and single-scope programs were spared): 1. Projection arity counted field-exprs, not the sum of field widths. `Pos(r)` is a whole-row reference of width = input row r's arity, so a join projection like `($0 ; $1, $2)` over a 2-column value row has width 3, not 2. The undercount shifted the user-iteration coord one slot in the lossy/keyed lookups, so `filter_time_and_strip` compared a data field against the output time and rejected every contribution -- SCC's demand-set came back empty. `proj_arity` now expands each field. 2. `filter_time_and_strip` aligned the `user_in <= user_out` time filter from index 0, but user_chain is innermost-first; crossing a `Leave` drops the innermost coord, so the shared scopes are the outer ones. Compare at the outer ends instead. This repaired depth-2 min-label explanation (a nested connected-components reproducer). With both fixes, querying an SCC-internal edge returns a demand-set that regenerates it (the whole cycle, not just the edge). reach and nested CC remain sound; interactive lib tests pass. Co-Authored-By: Claude Opus 4.8 --- interactive/src/explain.rs | 52 ++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index dc536f6ef..7ca051022 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -482,7 +482,7 @@ pub mod arities { use std::collections::BTreeMap; use crate::ir::{Id, LinearOp, Node, Program}; - use crate::parse::Reducer; + use crate::parse::{FieldExpr, Reducer}; pub fn compute_arities( p: &Program, @@ -515,7 +515,17 @@ pub mod arities { // shape and let fixed-point iteration propagate. Node::Concat(ids) => ids.iter().find_map(|i| out.get(i).copied()), Node::Arrange(input) => out.get(input).copied(), - Node::Join { projection, .. } => Some((projection.key.len(), projection.val.len())), + // A projection's arity is the sum of each field's *width*, + // not the count of field-exprs: `Pos(r)` is a whole-row + // reference that expands to input row `r`'s arity. The + // join's input rows are [key, left_val, right_val]. + Node::Join { left, right, projection } => match (out.get(left), out.get(right)) { + (Some(&(kl, vl)), Some(&(_kr, vr))) => { + let rows = [kl, vl, vr]; + Some((proj_arity(&projection.key, &rows), proj_arity(&projection.val, &rows))) + } + _ => None, + }, Node::Reduce { input, reducer } => out.get(input).map(|s| match reducer { Reducer::Distinct => (s.0, 0), Reducer::Min => (s.0, s.1), @@ -536,13 +546,37 @@ pub mod arities { fn apply_ops_arity((mut k, mut v): (usize, usize), ops: &[LinearOp]) -> (usize, usize) { for op in ops { match op { - LinearOp::Project(p) => { k = p.key.len(); v = p.val.len(); } + // Project's input rows are [key, val]; expand `Pos` refs to + // their row arities rather than counting field-exprs. + LinearOp::Project(p) => { + let rows = [k, v]; + k = proj_arity(&p.key, &rows); + v = proj_arity(&p.val, &rows); + } LinearOp::Filter(_) | LinearOp::Negate | LinearOp::EnterAt(_) => {} LinearOp::LiftIter => { v += 1; } } } (k, v) } + + /// Width (output columns) a single `FieldExpr` expands to, given the + /// arities of the input rows it may reference. `Pos(r)` is a whole-row + /// reference of width `rows[r]`; index/const are single columns. + fn field_width(f: &FieldExpr, rows: &[usize]) -> usize { + match f { + FieldExpr::Pos(r) => rows.get(*r).copied().unwrap_or(0), + FieldExpr::Index(_, _) | FieldExpr::Const(_) => 1, + FieldExpr::Neg(inner) => field_width(inner, rows), + FieldExpr::Sub(a, _) => field_width(a, rows), + } + } + + /// Total arity of one projection side (`key`/`val`): the sum of its + /// fields' widths. + fn proj_arity(fields: &[FieldExpr], rows: &[usize]) -> usize { + fields.iter().map(|f| field_width(f, rows)).sum() + } } /// IR cloning with implicit lift_iter at scope exits. @@ -995,11 +1029,19 @@ mod reverse { let mut cur = coll; let cmp_len = in_len.min(out_len); if cmp_len > 0 { + // user_chain is innermost-first. When `in_len != out_len` (a + // `Leave` crossing drops the innermost coord), the *shared* + // scopes are the outer ones — the last `cmp_len` coords of each + // chain. Align both to their outer ends so we compare the same + // scope's iter coord on each side; comparing from index 0 would + // pair the just-left inner scope against an enclosing scope. + let in_off = in_len - cmp_len; + let out_off = out_len - cmp_len; let mut acc: Option = None; for i in 0..cmp_len { let cond = Condition::Le( - FieldExpr::Index(1, v_pre + i), // user_in[i] - FieldExpr::Index(1, v_pre + in_len + i), // user_out[i] + FieldExpr::Index(1, v_pre + in_off + i), // user_in[in_off + i] + FieldExpr::Index(1, v_pre + in_len + out_off + i), // user_out[out_off + i] ); acc = Some(match acc { None => cond, From 34fffd56a0a68a5f5f4e2826d5d5287eea14730b Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 19:59:34 -0400 Subject: [PATCH 3/9] interactive/folded: centralize user_chain algebra (locks out bug-2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reverse rewrite folds iteration time into a demand row's value (the user_chain). Hand-indexing those columns produced SCC explain's off-by-one: comparing a just-left inner scope's coord against an enclosing scope's, instead of aligning at the shared outer ends. New `folded::Joined` owns that arithmetic — `time_le` (outer-end aligned) and `strip` — in one place, with tests pinning the alignment (the depth-2 cross-Leave case yields user_in[1] vs user_out[0], not [0] vs [0]). `explain::filter_time_and_strip` now delegates to it, so the real rewrite uses the single correct implementation and future scope-builder ports can't re-introduce the miscount. SCC explanation verified unchanged (same 7-edge demand for the 4->1 query). Co-Authored-By: Claude Opus 4.8 --- interactive/src/explain.rs | 42 ++---------- interactive/src/folded.rs | 133 +++++++++++++++++++++++++++++++++++++ interactive/src/lib.rs | 1 + 3 files changed, 141 insertions(+), 35 deletions(-) create mode 100644 interactive/src/folded.rs diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index 7ca051022..7e5284b87 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -1025,43 +1025,15 @@ mod reverse { out_len: usize, keep_in_len: usize, ) -> Id { - assert!(keep_in_len <= in_len); + // The user_chain index arithmetic (the outer-end alignment whose + // off-by-one made SCC explain unsound) lives in `folded::Joined`, the + // single home shared with the scope-builder ports. + let layout = crate::folded::Joined { v_pre, in_len, out_len }; let mut cur = coll; - let cmp_len = in_len.min(out_len); - if cmp_len > 0 { - // user_chain is innermost-first. When `in_len != out_len` (a - // `Leave` crossing drops the innermost coord), the *shared* - // scopes are the outer ones — the last `cmp_len` coords of each - // chain. Align both to their outer ends so we compare the same - // scope's iter coord on each side; comparing from index 0 would - // pair the just-left inner scope against an enclosing scope. - let in_off = in_len - cmp_len; - let out_off = out_len - cmp_len; - let mut acc: Option = None; - for i in 0..cmp_len { - let cond = Condition::Le( - FieldExpr::Index(1, v_pre + in_off + i), // user_in[in_off + i] - FieldExpr::Index(1, v_pre + in_len + out_off + i), // user_out[out_off + i] - ); - acc = Some(match acc { - None => cond, - Some(prev) => Condition::And(Box::new(prev), Box::new(cond)), - }); - } - cur = self.filter(cur, acc.unwrap()); + if let Some(cond) = layout.time_le() { + cur = self.filter(cur, cond); } - // Strip user_out and the innermost coords of user_in past `keep_in_len`, - // preserving (K; V_pre ++ user_in[in_len - keep_in_len .. in_len] ++ [q]). - // user_chain is innermost-first, so the *last* `keep_in_len` coords - // correspond to the outer scopes that contribs at the input side care - // about; the dropped innermost coords belong to scope(s) we've left. - let key: Vec = (0..k_out).map(|i| FieldExpr::Index(0, i)).collect(); - let mut val: Vec = Vec::new(); - for i in 0..v_pre { val.push(FieldExpr::Index(1, i)); } - let drop_in = in_len - keep_in_len; - for i in 0..keep_in_len { val.push(FieldExpr::Index(1, v_pre + drop_in + i)); } - val.push(FieldExpr::Index(1, v_pre + in_len + out_len)); // q - self.project(cur, Projection { key, val }) + self.project(cur, layout.strip(k_out, keep_in_len)) } /// Keyed lookup (Reduce-style): demand on `(K; V_out ++ user_out ++ q)` diff --git a/interactive/src/folded.rs b/interactive/src/folded.rs new file mode 100644 index 000000000..19d6143b1 --- /dev/null +++ b/interactive/src/folded.rs @@ -0,0 +1,133 @@ +//! Centralized `user_chain` (folded iteration-time) algebra. +//! +//! The reverse-tracing explanation rewrite moves iteration time *into data*: a +//! demand row's value carries some number of user-chain coordinates +//! (innermost-first) plus a trailing query id. Indexing those by hand — +//! `FieldExpr::Index(1, v_pre + …)` scattered across the reverse rules — is +//! what produced SCC explain's off-by-one (comparing a just-left inner scope's +//! coord against an enclosing scope's). This module owns that arithmetic so it +//! exists once, correct, and every rule (and the scope builder's ports) routes +//! through it. + +use crate::parse::{Condition, FieldExpr, Projection}; + +/// The value layout of a *joined* demand row, as the reverse lookups produce it +/// before filtering: in the value row (`$1`), +/// `[V_pre (v_pre)] [user_in (in_len)] [user_out (out_len)] [q]`, +/// with each user-chain innermost-first. +#[derive(Clone, Copy, Debug)] +pub struct Joined { + /// Non-time value columns carried through (e.g. `V_in`). + pub v_pre: usize, + /// user-chain length of the input side. + pub in_len: usize, + /// user-chain length of the output (demand) side. + pub out_len: usize, +} + +impl Joined { + /// Value-row index of input-side user coord `i` (0 = innermost). + fn user_in(self, i: usize) -> usize { + self.v_pre + i + } + /// Value-row index of output-side user coord `i` (0 = innermost). + fn user_out(self, i: usize) -> usize { + self.v_pre + self.in_len + i + } + /// Value-row index of the trailing query id. + fn q(self) -> usize { + self.v_pre + self.in_len + self.out_len + } + + /// The soundness filter `user_in.time ≤ user_out.time`, aligned at the + /// **outer** ends. user_chain is innermost-first, so when the two sides + /// differ in length (a `Leave` crossing drops the innermost coord) the + /// *shared* scopes are the outer ones — the last `min(in,out)` coords of + /// each side. Aligning from index 0 instead is exactly the bug that made + /// SCC explain unsound. Returns `None` when there's nothing to compare. + pub fn time_le(self) -> Option { + let n = self.in_len.min(self.out_len); + let in_off = self.in_len - n; + let out_off = self.out_len - n; + let mut acc: Option = None; + for i in 0..n { + let cond = Condition::Le( + FieldExpr::Index(1, self.user_in(in_off + i)), + FieldExpr::Index(1, self.user_out(out_off + i)), + ); + acc = Some(match acc { + None => cond, + Some(prev) => Condition::And(Box::new(prev), Box::new(cond)), + }); + } + acc + } + + /// The projection that strips `user_out` and the innermost `user_in` coords + /// past `keep_in_len`, leaving `(K[k_out]; V_pre ++ outer keep_in_len ++ [q])`. + /// The kept coords are the *outer* ones (last `keep_in_len`), matching the + /// outer alignment in `time_le`. + pub fn strip(self, k_out: usize, keep_in_len: usize) -> Projection { + assert!(keep_in_len <= self.in_len, "strip: keep_in_len exceeds in_len"); + let key = (0..k_out).map(|i| FieldExpr::Index(0, i)).collect(); + let mut val: Vec = Vec::new(); + for i in 0..self.v_pre { + val.push(FieldExpr::Index(1, i)); + } + let drop_in = self.in_len - keep_in_len; + for i in 0..keep_in_len { + val.push(FieldExpr::Index(1, self.user_in(drop_in + i))); + } + val.push(FieldExpr::Index(1, self.q())); + Projection { key, val } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn le_indices(c: &Condition) -> (usize, usize) { + match c { + Condition::Le(FieldExpr::Index(1, a), FieldExpr::Index(1, b)) => (*a, *b), + other => panic!("expected a single value-row Le, got {:?}", other), + } + } + + #[test] + fn time_le_aligns_outer_ends_across_a_leave() { + // Leave crossing: input is one scope deeper (in_len 2) than the demand + // (out_len 1). The shared scope is the OUTER one, so compare + // user_in[1] (not user_in[0], the just-left inner scope) vs user_out[0]. + let j = Joined { v_pre: 3, in_len: 2, out_len: 1 }; + // user_in[1] = 3 + 1 = 4 ; user_out[0] = 3 + 2 + 0 = 5 + assert_eq!(le_indices(&j.time_le().unwrap()), (4, 5)); + } + + #[test] + fn time_le_equal_depth_aligns_from_zero() { + // Same depth (e.g. a Concat input): align from index 0. + let j = Joined { v_pre: 2, in_len: 1, out_len: 1 }; + // user_in[0] = 2 ; user_out[0] = 2 + 1 = 3 + assert_eq!(le_indices(&j.time_le().unwrap()), (2, 3)); + } + + #[test] + fn time_le_empty_when_no_shared_coords() { + assert!(Joined { v_pre: 1, in_len: 1, out_len: 0 }.time_le().is_none()); + } + + #[test] + fn strip_keeps_outer_coords_and_q() { + // in_len 2, keep 1 → drop the innermost input coord, keep the outer one. + let j = Joined { v_pre: 1, in_len: 2, out_len: 1 }; + let p = j.strip(1, 1); + let idx = |f: &FieldExpr| match f { + FieldExpr::Index(r, c) => (*r, *c), + other => panic!("expected Index, got {:?}", other), + }; + assert_eq!(p.key.iter().map(idx).collect::>(), vec![(0, 0)]); + // val = [V_pre[0]=(1,0), user_in[1]=(1,2) (outer kept coord), q=(1,4)] + assert_eq!(p.val.iter().map(idx).collect::>(), vec![(1, 0), (1, 2), (1, 4)]); + } +} diff --git a/interactive/src/lib.rs b/interactive/src/lib.rs index 0a34ead7b..2a84882dc 100644 --- a/interactive/src/lib.rs +++ b/interactive/src/lib.rs @@ -2,6 +2,7 @@ pub mod parse; pub mod ir; pub mod lower; pub mod explain; +pub mod folded; use std::collections::BTreeSet; From b0d0dba027c16a4ed88437bd62d96d1cf8d0f328 Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 19:59:35 -0400 Subject: [PATCH 4/9] interactive/explain: narrow lossy-project reverse by output value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Linear[Project] reverse via emit_lookup_lossy joined the demand against the witness keyed on K_out *only*. When a re-key drops a field from the key (e.g. `by_b = tc | key($0[1]; $0[0])`, keying tc by its destination), that key-only match recovered every input row sharing the coarsened key — all same-destination pairs regardless of source. Those siblings then resolved to their own input edges, so explaining a multi-hop `tc(0,1)` pulled spurious dead-end edges (e.g. `5→3`, `2→1`, `3→1`) that aren't on any path 0⇝1. Fix: extend the existing pure-map shortcut from depth-0 projects to same-scope projects of any depth. A Linear[Project] doesn't cross a scope boundary, so the input's user_chain equals the output's (in_user_len == out_user_len == keep_in_len when the input isn't a Leave); when the projection is also total (every input field recoverable from the output) the whole reverse is a direct map from dep_y that narrows by *all* demanded fields — key and value — so the source is pinned and the siblings vanish. Verified: plain TC `0→1` → {0→3,3→1} (was {0→3,3→1,5→3}); shortest-path TC `0→2`@2 → {0→1,1→2} (was +{2→1,3→1}); both sound (reproduce the query). SCC is structurally unaffected (its `filter|key` projects are non-total, `trans` is depth-0) and still explains internal edges soundly; reach unaffected (no total depth>0 lossy project). Lib tests green. Co-Authored-By: Claude Opus 4.8 --- interactive/src/explain.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index 7e5284b87..de7c8de3d 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -1148,7 +1148,18 @@ mod reverse { let total = (0..k_in).all(|c| known.contains_key(&(0, c))) && (0..v_in).all(|c| known.contains_key(&(1, c))); - if total && in_user_len == 0 { + // Pure-map shortcut, extended to same-scope projects of any depth. + // A Linear[Project] doesn't cross a scope boundary, so the input's + // user_chain equals the output's (in_user_len == out_user_len == + // keep_in_len when the input isn't a Leave). When every input field is + // also recoverable from the output, the whole reverse is a direct map + // from dep_y, narrowing by *all* demanded fields — including value + // fields the pair-table join (keyed on K_out only) ignores. That + // key-only match is what let sibling rows leak through a re-key that + // drops a field from the key (e.g. by_b = tc | key($0[1]; …), keyed by + // tc's destination only → fallback recovered every same-destination + // pair regardless of source). + if total && in_user_len == out_user_len && keep_in_len == in_user_len { // Map flat output position p (into [K_out ++ V_out]) to an access // expression against dep_y's (key, val) layout: // p < k_out → $0[p] (key) @@ -1158,9 +1169,11 @@ mod reverse { else { FieldExpr::Index(1, p - k_out) } }; let key: Vec = (0..k_in).map(|c| access(known[&(0, c)])).collect(); - let mut val: Vec = Vec::with_capacity(v_in + 1); + let mut val: Vec = Vec::with_capacity(v_in + in_user_len + 1); for c in 0..v_in { val.push(access(known[&(1, c)])); } - // user_in is empty (in_user_len == 0); q is at $1[v_out + out_user_len]. + // user_in == user_out (same scope), so copy it through. The filter + // user_in ≤ user_out then holds with equality. q at $1[v_out+out_user_len]. + for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } val.push(FieldExpr::Index(1, v_out + out_user_len)); return self.project(dep_y, Projection { key, val }); } From 8ed45ba6c607bcbaaf3081406984eacd29fe54bf Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 19:59:35 -0400 Subject: [PATCH 5/9] interactive/explain: document EnterAt reverse over-approximation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EnterAt is a data→time lift (sets a new innermost user_chain coord t_in = delay($field)), so its output is one scope deeper than its input and the correct reverse would drop that coord 1:1. Today the reverse is a pass-through: tenable only because depths() is positional and treats EnterAt as depth-neutral, so the entry coord is stripped UNCONSTRAINED by the neighboring Project that crosses the scope boundary (no time filter, since in_len=0). Net: the input demand is a superset — sound (kept so by the semijoin(actual_input) at seeding) but over-broad (inflates e.g. SCC label cones); it never drops a needed edge. Not fixing now (the proper fix is structural — make depths() let EnterAt own its level and the reverse a coord-dropping projection). Instead: split EnterAt out of the Negate pass-through arm with a TRIAGE note, and cross-reference it from Program::depths(), so a later pass can find it. Behavior unchanged; lib tests green. Co-Authored-By: Claude Opus 4.8 --- interactive/src/explain.rs | 20 +++++++++++++++++--- interactive/src/ir.rs | 8 ++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index de7c8de3d..52801b4fb 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -886,9 +886,23 @@ mod reverse { let contrib = self.filter(dep_this, cond.clone()); contribs.entry(*input).or_default().push(contrib); } - LinearOp::Negate | LinearOp::EnterAt(_) => { - // Pure pass-through: data unchanged, scope - // unchanged. + LinearOp::Negate => { + // Pure pass-through: data unchanged, scope unchanged. + contribs.entry(*input).or_default().push(dep_this); + } + LinearOp::EnterAt(_) => { + // Sound but over-broad. EnterAt is a data→time lift: it sets a new + // innermost user_chain coord `t_in = delay($field)` from the value, + // so its output is one scope deeper than its input. The 1:1 reverse + // would DROP that coord (recoverable as `delay($field)` from the + // preserved value): out ((k,v),[t_in,t_out..]) -> in ((k,v),[t_out..]). + // We instead pass demand through unchanged — tenable only because + // `depths()` is positional and treats EnterAt as depth-neutral, so + // the coord is stripped *unconstrained* by the neighboring Project + // that crosses the scope boundary. The result is a superset (kept + // sound by the `semijoin(actual_input)` at seeding); it never drops a + // needed edge. Tight fix: let `depths()` give EnterAt its own level + // and make this arm drop the innermost coord — see `depths()` (ir.rs). contribs.entry(*input).or_default().push(dep_this); } LinearOp::LiftIter => { diff --git a/interactive/src/ir.rs b/interactive/src/ir.rs index 8c0fc45c4..88d3d1946 100644 --- a/interactive/src/ir.rs +++ b/interactive/src/ir.rs @@ -125,6 +125,14 @@ impl Program { /// active at the moment it was lowered; `Scope` itself sits at its /// outer depth (the increment applies to subsequent nodes), and /// `EndScope` sits at its inner depth (the decrement applies after). + /// + /// Note: this is purely positional, so `enter_at` — a data→time lift that + /// semantically adds one scope coordinate (its output is one level deeper + /// than its input) — is counted depth-NEUTRAL here. The coordinate it + /// introduces is instead absorbed by a neighboring Project's depth jump and + /// stripped *unconstrained* in the reverse rewrite: sound but over-broad. + /// The fix is to let `enter_at` own its level (output = input depth + 1); + /// see the `LinearOp::EnterAt` arm in `explain.rs::emit_reverse`. pub fn depths(&self) -> BTreeMap { let mut out = BTreeMap::new(); let mut depth = 0usize; From 47fd5053bc16945c8f8fe61a9057b12ccec5d079 Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 20:00:26 -0400 Subject: [PATCH 6/9] interactive/ir: explicit scope tree from Scope/EndScope markers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `Program::scopes()` — a computed view (like `depths()`, no constructor change) that surfaces the scope structure the IR only encodes implicitly via `Scope`/`EndScope` markers: a scope tree (`parent`/`children`), a per-node scope assignment (`of_node`, same scope convention as `depths()`), and an `iterative` tag per scope (true iff it carries a `Variable` feedback; false for a purely structural boundary). This is the substrate for the scope-typed builder (compile-time scoping discipline) and for drawing scope structure in the viz. Non-breaking; includes a unit test on a single nested iterative scope. Co-Authored-By: Claude Opus 4.8 --- interactive/src/ir.rs | 95 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/interactive/src/ir.rs b/interactive/src/ir.rs index 88d3d1946..de5a1fee0 100644 --- a/interactive/src/ir.rs +++ b/interactive/src/ir.rs @@ -85,6 +85,31 @@ pub struct Program { pub export: Vec<(String, Id)>, } +/// Identifier for a scope (a scope nesting). Scope 0 is the root. +pub type ScopeId = usize; + +/// Structural view of the program's scope nesting, derived from the +/// `Scope`/`EndScope` markers — the explicit form of what `depths()` only +/// counts. Scope 0 is the top level; each `Scope` opens a child scope. +/// +/// `iterative[r]` is true when scope `r` carries an iteration feedback (a +/// `Variable`); a scope with none is purely *structural* — a nesting +/// boundary that does not advance the timestamp. The scope-typed builder +/// (see `scope_builder`) leverages the latter for cheap, lifetime- +/// based scoping discipline, and a viz can draw scopes as nested boxes. +#[derive(Debug, Clone)] +pub struct ScopeTree { + /// Parent of each scope; `None` for the root. + pub parent: Vec>, + /// Direct children of each scope, in the order they were opened. + pub children: Vec>, + /// The scope each node sits in (same scope convention as `depths()`: + /// a `Scope` marker sits in the outer scope, an `EndScope` in the inner). + pub of_node: BTreeMap, + /// Whether each scope carries an iteration feedback (a `Variable`). + pub iterative: Vec, +} + impl Program { /// Print a human-readable summary of the IR. pub fn dump(&self) { @@ -146,6 +171,44 @@ impl Program { out } + /// Derive the scope tree and per-node scope assignment from the + /// `Scope`/`EndScope` markers. Companion to `depths()`: that gives the + /// numeric nesting level, this gives the structure itself plus an + /// iterative-vs-structural tag per scope. + pub fn scopes(&self) -> ScopeTree { + let mut parent: Vec> = vec![None]; + let mut children: Vec> = vec![Vec::new()]; + let mut iterative: Vec = vec![false]; + let mut of_node: BTreeMap = BTreeMap::new(); + let mut stack: Vec = vec![0]; // current scope path; top = current + for (&id, node) in &self.nodes { + let cur = *stack.last().unwrap(); + match node { + Node::Scope => { + // The marker sits in the outer scope; open a child scope. + of_node.insert(id, cur); + let new = parent.len(); + parent.push(Some(cur)); + children.push(Vec::new()); + iterative.push(false); + children[cur].push(new); + stack.push(new); + } + Node::EndScope => { + // Sits in the inner scope (matches `depths()`), then closes. + of_node.insert(id, cur); + if stack.len() > 1 { stack.pop(); } + } + Node::Variable => { + of_node.insert(id, cur); + iterative[cur] = true; // this scope has a feedback + } + _ => { of_node.insert(id, cur); } + } + } + ScopeTree { parent, children, of_node, iterative } + } + /// Reject programs where a `LinearOp::LiftIter` result is referenced /// inside its own scope. See `Expr::LiftIter` for the rationale: in- /// scope use would let loop bodies branch on iter, defeating the @@ -399,3 +462,35 @@ fn eval_field_raw(field: &FieldExpr, inputs: &[&[i64]], result: &mut Vec) { } } } + +#[cfg(test)] +mod scope_tests { + use super::*; + use std::collections::BTreeMap; + + #[test] + fn scopes_one_iterative_scope() { + // n0 input; scope { n2 var; n3 linear; n4 bind } n6 leave + let mut nodes = BTreeMap::new(); + nodes.insert(0usize, Node::Input(0)); + nodes.insert(1, Node::Scope); + nodes.insert(2, Node::Variable); + nodes.insert(3, Node::Linear { input: 0, ops: vec![] }); + nodes.insert(4, Node::Bind { variable: 2, value: 3 }); + nodes.insert(5, Node::EndScope); + nodes.insert(6, Node::Leave(2, 1)); + let p = Program { nodes, export: vec![("$result".into(), 6)] }; + let r = p.scopes(); + // outer nodes (incl. the Scope marker) sit in scope 0; inner nodes + // (incl. the EndScope marker) in scope 1 — matching `depths()`. + assert_eq!(r.of_node[&0], 0); + assert_eq!(r.of_node[&1], 0); + assert_eq!(r.of_node[&2], 1); + assert_eq!(r.of_node[&5], 1); + assert_eq!(r.of_node[&6], 0); + // tree shape + iterative tag (scope 1 holds the Variable feedback). + assert_eq!(r.parent, vec![None, Some(0)]); + assert_eq!(r.children[0], vec![1]); + assert_eq!(r.iterative, vec![false, true]); + } +} From c005bf41c9da8875b95be71c30d84287e9819acb Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 20:00:26 -0400 Subject: [PATCH 7/9] interactive: lifetime-bounded scope builder A builder facade that enforces scoping discipline at compile time. Each `scope(...)` introduces a fresh, invariant brand `'r`; collections built inside are `Coll<'r>` and can leave only through `leave`, whose `Left<'r>` result is the only value a scope body may return. This makes the two level errors behind the explanation bugs unrepresentable: - forgetting to leave a scope is a type error (body must return Left<'r>); - an inner Coll<'r> cannot escape to an outer scope ('r is generative). Both are proven by `compile_fail` doctests (they pass = the bad programs are rejected). A unit test builds a nested iterative scope and checks the resulting IR via `scopes()`. Maps to the existing iterative-scope IR for now; the structural (no-coord) variant and outer->inner `enter` (which needs a borrow-based rather than generative relation) are the next refinements. Co-Authored-By: Claude Opus 4.8 --- interactive/src/lib.rs | 1 + interactive/src/scope_builder.rs | 210 +++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 interactive/src/scope_builder.rs diff --git a/interactive/src/lib.rs b/interactive/src/lib.rs index 2a84882dc..a6cd906ba 100644 --- a/interactive/src/lib.rs +++ b/interactive/src/lib.rs @@ -3,6 +3,7 @@ pub mod ir; pub mod lower; pub mod explain; pub mod folded; +pub mod scope_builder; use std::collections::BTreeSet; diff --git a/interactive/src/scope_builder.rs b/interactive/src/scope_builder.rs new file mode 100644 index 000000000..7372f6756 --- /dev/null +++ b/interactive/src/scope_builder.rs @@ -0,0 +1,210 @@ +//! Lifetime-bounded scope builder — a checked IR facade. +//! +//! Compile-time scoping discipline. A `scope(...)` introduces a fresh, +//! *invariant* scope brand `'r`; collections built inside are `Coll<'r>` and +//! can leave the scope only through `leave`, whose result `Left<'r>` is the +//! only value a scope body may return. The borrow checker then rejects, at +//! compile time, the two mistakes behind the explanation rewrite's level +//! errors: +//! +//! - **forgetting to leave** — returning a `Coll<'r>` from the body is a type +//! error (the body must return `Left<'r>`); +//! - **smuggling an inner collection out** — a `Coll<'r>` cannot be stored in +//! or returned to an outer scope, because `'r` is generative and invariant. +//! +//! See the `compile_fail` doctests below for both, made executable. +//! +//! Scope of this increment: it establishes the *discipline*. The scope maps +//! to the existing iterative scope IR (`Scope`/`EndScope` + a `Leave` at +//! exit); the purely-structural (timestamp-unchanged, no-`Leave`) variant and +//! importing an outer collection *into* a scope (`enter`, which needs a +//! borrow-based lifetime relation rather than a generative one) are the next +//! refinements. The constructs built here are self-contained, so no `enter` +//! is required yet. + +use std::collections::BTreeMap; +use std::marker::PhantomData; + +use crate::ir::{Id, LinearOp, Node, Program}; + +/// Invariant brand for a scope's lifetime: fresh per `scope`/`build` call, +/// un-unifiable with any other, so a handle can't silently cross scopes. +type Brand<'r> = PhantomData &'r ()>; + +/// A collection handle confined to scope `'r` (wraps an IR node id). +#[derive(Clone, Copy)] +pub struct Coll<'r> { + id: Id, + _brand: Brand<'r>, +} + +impl<'r> Coll<'r> { + /// The underlying IR node id. (Read-only; minting a `Coll` is the + /// builder's job, so a raw id can't be re-branded into a scope.) + pub fn id(self) -> Id { + self.id + } +} + +/// Proof that a `Coll<'r>` was surrendered via `leave` — the only value a +/// scope body is allowed to return. +pub struct Left<'r> { + id: Id, + _brand: Brand<'r>, +} + +/// A scope brand token (Copy), threaded into constructors so their results +/// are branded with this scope. +#[derive(Clone, Copy)] +pub struct Scope<'r> { + _brand: Brand<'r>, +} + +/// IR builder: owns the node map and hands out scope-branded handles. +pub struct Builder { + nodes: BTreeMap, + next: Id, +} + +impl Builder { + fn push(&mut self, n: Node) -> Id { + let id = self.next; + self.next += 1; + self.nodes.insert(id, n); + id + } + + fn wrap<'r>(id: Id) -> Coll<'r> { + Coll { id, _brand: PhantomData } + } + + /// A positional input. + pub fn input<'r>(&mut self, _r: Scope<'r>, i: usize) -> Coll<'r> { + let id = self.push(Node::Input(i)); + Self::wrap(id) + } + + /// An iteration variable (the feedback edge of a `var`). + pub fn variable<'r>(&mut self, _r: Scope<'r>) -> Coll<'r> { + let id = self.push(Node::Variable); + Self::wrap(id) + } + + /// A linear (map/filter/…) transform of one collection. + pub fn linear<'r>(&mut self, _r: Scope<'r>, c: Coll<'r>, ops: Vec) -> Coll<'r> { + let id = self.push(Node::Linear { input: c.id, ops }); + Self::wrap(id) + } + + /// Concatenation of collections (all in the same scope). + pub fn concat<'r>(&mut self, _r: Scope<'r>, cs: &[Coll<'r>]) -> Coll<'r> { + let id = self.push(Node::Concat(cs.iter().map(|c| c.id).collect())); + Self::wrap(id) + } + + /// Close the loop: bind an iteration `variable` to its `value`. + pub fn bind<'r>(&mut self, _r: Scope<'r>, variable: Coll<'r>, value: Coll<'r>) { + self.push(Node::Bind { variable: variable.id, value: value.id }); + } + + /// Surrender a scope collection so the scope body can return it. + pub fn leave<'r>(&mut self, _r: Scope<'r>, c: Coll<'r>) -> Left<'r> { + Left { id: c.id, _brand: PhantomData } + } + + /// Open a nested scope. The body receives a fresh brand `'c`; whatever it + /// `leave`s comes back as a collection in the parent scope `'r`. The + /// `for<'c>` quantification is what makes `'c` generative — a `Coll<'c>` + /// cannot escape the closure. + pub fn scope<'r, F>(&mut self, _parent: Scope<'r>, body: F) -> Coll<'r> + where + F: for<'c> FnOnce(&mut Builder, Scope<'c>) -> Left<'c>, + { + self.push(Node::Scope); + let inner = body(self, Scope { _brand: PhantomData }); + self.push(Node::EndScope); + // Leave one nesting level. (For an iterative scope this strips the + // iteration coordinate; the exact level is recomputed during a real + // lowering. scopes() doesn't depend on it.) + let id = self.push(Node::Leave(inner.id, 1)); + Self::wrap(id) + } +} + +/// Build a program. The body runs in the root scope and returns the result, +/// which is exported as `"$result"`. +pub fn build(body: F) -> Program +where + F: for<'root> FnOnce(&mut Builder, Scope<'root>) -> Coll<'root>, +{ + let mut b = Builder { nodes: BTreeMap::new(), next: 0 }; + let result = body(&mut b, Scope { _brand: PhantomData }); + Program { nodes: b.nodes, export: vec![("$result".into(), result.id())] } +} + +/// Discipline check: forgetting to `leave` is a type error — the scope body +/// must return `Left<'c>`, not a bare `Coll<'c>`. +/// +/// ```compile_fail +/// use interactive::scope_builder::build; +/// let _p = build(|b, root| { +/// let _ = b.scope(root, |b, r| { +/// let v = b.variable(r); +/// v // ERROR: expected `Left<'_>`, found `Coll<'_>` +/// }); +/// b.input(root, 0) +/// }); +/// ``` +/// +/// Discipline check: an inner collection cannot escape to an outer scope — +/// `'c` is generative, so it can't be stored in an outer-scoped variable. +/// +/// ```compile_fail +/// use interactive::scope_builder::build; +/// let _p = build(|b, root| { +/// let mut escaped = None; +/// let _ = b.scope(root, |b, r| { +/// let v = b.variable(r); +/// escaped = Some(v); // ERROR: `v`'s scope `'c` would escape here +/// b.leave(r, v) +/// }); +/// let _ = escaped; +/// b.input(root, 0) +/// }); +/// ``` +/// +/// And the well-formed version compiles and runs (see the unit test below). +#[cfg(doctest)] +struct DisciplineDoctests; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_nested_iterative_scope() { + // root: input e; scope { var v; v' = linear(v); bind v <- v'; leave v } + // result = concat(e, leaved) + let p = build(|b, root| { + let e = b.input(root, 0); + let out = b.scope(root, |b, r| { + let v = b.variable(r); + let step = b.linear(r, v, vec![]); + b.bind(r, v, step); + b.leave(r, v) + }); + b.concat(root, &[e, out]) + }); + + // Structure: one nested scope, tagged iterative (it holds the var). + let scopes = p.scopes(); + assert_eq!(scopes.parent, vec![None, Some(0)]); + assert_eq!(scopes.children[0], vec![1]); + assert_eq!(scopes.iterative, vec![false, true]); + + // The exported result is the top-level Concat (in the root scope). + let (_, result) = &p.export[0]; + assert_eq!(scopes.of_node[result], 0); + assert!(matches!(p.nodes[result], Node::Concat(_))); + } +} From bdb3ac64c926e1e9ed19da7ed655559f70536ff9 Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 20:00:26 -0400 Subject: [PATCH 8/9] interactive/scope_builder: enter via imports; port reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unblock porting real programs onto the lifetime-bounded builder: - `scope(parent, imports, body)` now takes outer collections to use inside; each is re-branded to the body's fresh `'c`. Sound — `Coll` has no public constructor from a raw id, so the re-brand happens only here, on handles the caller already holds at the outer scope. - Added `arrange`/`join`/`reduce` so iterative programs are expressible. - `ports_reach` test builds the full reach program through the facade (outer edges/roots imported into the loop, var bound, result leaved) and checks the scope structure via `scopes()`. The two compile_fail discipline proofs still hold under the new signature: inner collections can't escape, and a scope body must `leave`. Co-Authored-By: Claude Opus 4.8 --- interactive/src/scope_builder.rs | 125 +++++++++++++++++++++++++------ 1 file changed, 101 insertions(+), 24 deletions(-) diff --git a/interactive/src/scope_builder.rs b/interactive/src/scope_builder.rs index 7372f6756..d6dda8b75 100644 --- a/interactive/src/scope_builder.rs +++ b/interactive/src/scope_builder.rs @@ -12,20 +12,29 @@ //! - **smuggling an inner collection out** — a `Coll<'r>` cannot be stored in //! or returned to an outer scope, because `'r` is generative and invariant. //! -//! See the `compile_fail` doctests below for both, made executable. +//! Both are proven by the `compile_fail` doctests below. //! -//! Scope of this increment: it establishes the *discipline*. The scope maps -//! to the existing iterative scope IR (`Scope`/`EndScope` + a `Leave` at -//! exit); the purely-structural (timestamp-unchanged, no-`Leave`) variant and -//! importing an outer collection *into* a scope (`enter`, which needs a -//! borrow-based lifetime relation rather than a generative one) are the next -//! refinements. The constructs built here are self-contained, so no `enter` -//! is required yet. +//! ### Importing outer collections (`enter`) +//! +//! Real programs reference outer collections inside a scope (e.g. SCC's `fwd` +//! joins the outer `edges`). `scope` takes an explicit `imports: &[Coll<'r>]` +//! list; each is re-branded to the inner `'c` and handed to the body. This is +//! sound — there is no public way to mint a `Coll` from a raw id, so the +//! re-brand can only happen here, on collections the caller already holds at +//! the outer scope. (For an iterative scope an outer reference needs no IR +//! node; the import is a pure re-brand.) The `ports_reach` test below builds +//! the full reach program this way. +//! +//! Scope of this increment: the scope maps to the existing iterative scope IR +//! (`Scope`/`EndScope` + a `Leave` at exit). The purely-structural +//! (timestamp-unchanged, no-`Leave`) variant needs backend support for +//! non-iterating scopes and is the next focused change. use std::collections::BTreeMap; use std::marker::PhantomData; use crate::ir::{Id, LinearOp, Node, Program}; +use crate::parse::{Projection, Reducer}; /// Invariant brand for a scope's lifetime: fresh per `scope`/`build` call, /// un-unifiable with any other, so a handle can't silently cross scopes. @@ -39,8 +48,8 @@ pub struct Coll<'r> { } impl<'r> Coll<'r> { - /// The underlying IR node id. (Read-only; minting a `Coll` is the - /// builder's job, so a raw id can't be re-branded into a scope.) + /// The underlying IR node id. Read-only: there is no public constructor + /// from an id, so a raw id can't be re-branded into a scope. pub fn id(self) -> Id { self.id } @@ -102,6 +111,27 @@ impl Builder { Self::wrap(id) } + /// Arrange a collection (the IR's explicit arrangement node). + pub fn arrange<'r>(&mut self, _r: Scope<'r>, c: Coll<'r>) -> Coll<'r> { + let id = self.push(Node::Arrange(c.id)); + Self::wrap(id) + } + + /// Join two collections (arranges both, as the IR requires). + pub fn join<'r>(&mut self, r: Scope<'r>, left: Coll<'r>, right: Coll<'r>, projection: Projection) -> Coll<'r> { + let l = self.arrange(r, left); + let rt = self.arrange(r, right); + let id = self.push(Node::Join { left: l.id, right: rt.id, projection }); + Self::wrap(id) + } + + /// Reduce a collection (arranges its input). + pub fn reduce<'r>(&mut self, r: Scope<'r>, c: Coll<'r>, reducer: Reducer) -> Coll<'r> { + let a = self.arrange(r, c); + let id = self.push(Node::Reduce { input: a.id, reducer }); + Self::wrap(id) + } + /// Close the loop: bind an iteration `variable` to its `value`. pub fn bind<'r>(&mut self, _r: Scope<'r>, variable: Coll<'r>, value: Coll<'r>) { self.push(Node::Bind { variable: variable.id, value: value.id }); @@ -112,16 +142,23 @@ impl Builder { Left { id: c.id, _brand: PhantomData } } - /// Open a nested scope. The body receives a fresh brand `'c`; whatever it - /// `leave`s comes back as a collection in the parent scope `'r`. The - /// `for<'c>` quantification is what makes `'c` generative — a `Coll<'c>` - /// cannot escape the closure. - pub fn scope<'r, F>(&mut self, _parent: Scope<'r>, body: F) -> Coll<'r> + /// Open a nested scope. `imports` are outer collections to use inside; + /// each is re-branded to the body's fresh `'c`. Whatever the body `leave`s + /// comes back as a collection in the parent scope `'r`. The `for<'c>` + /// quantification makes `'c` generative — a `Coll<'c>` cannot escape. + pub fn scope<'r, F>(&mut self, _parent: Scope<'r>, imports: &[Coll<'r>], body: F) -> Coll<'r> where - F: for<'c> FnOnce(&mut Builder, Scope<'c>) -> Left<'c>, + F: for<'c> FnOnce(&mut Builder, Scope<'c>, Vec>) -> Left<'c>, { + let import_ids: Vec = imports.iter().map(|c| c.id).collect(); self.push(Node::Scope); - let inner = body(self, Scope { _brand: PhantomData }); + // The import `Coll`s are minted here, at the body's `'c` (inferred from + // the closure's parameter type) — the only place an outer id crosses in. + let inner = body( + self, + Scope { _brand: PhantomData }, + import_ids.iter().map(|&id| Coll { id, _brand: PhantomData }).collect(), + ); self.push(Node::EndScope); // Leave one nesting level. (For an iterative scope this strips the // iteration coordinate; the exact level is recomputed during a real @@ -148,7 +185,7 @@ where /// ```compile_fail /// use interactive::scope_builder::build; /// let _p = build(|b, root| { -/// let _ = b.scope(root, |b, r| { +/// let _ = b.scope(root, &[], |b, r, _imp| { /// let v = b.variable(r); /// v // ERROR: expected `Left<'_>`, found `Coll<'_>` /// }); @@ -163,7 +200,7 @@ where /// use interactive::scope_builder::build; /// let _p = build(|b, root| { /// let mut escaped = None; -/// let _ = b.scope(root, |b, r| { +/// let _ = b.scope(root, &[], |b, r, _imp| { /// let v = b.variable(r); /// escaped = Some(v); // ERROR: `v`'s scope `'c` would escape here /// b.leave(r, v) @@ -172,14 +209,13 @@ where /// b.input(root, 0) /// }); /// ``` -/// -/// And the well-formed version compiles and runs (see the unit test below). #[cfg(doctest)] struct DisciplineDoctests; #[cfg(test)] mod tests { use super::*; + use crate::parse::{FieldExpr, Projection, Reducer}; #[test] fn builds_nested_iterative_scope() { @@ -187,7 +223,7 @@ mod tests { // result = concat(e, leaved) let p = build(|b, root| { let e = b.input(root, 0); - let out = b.scope(root, |b, r| { + let out = b.scope(root, &[], |b, r, _imp| { let v = b.variable(r); let step = b.linear(r, v, vec![]); b.bind(r, v, step); @@ -196,15 +232,56 @@ mod tests { b.concat(root, &[e, out]) }); - // Structure: one nested scope, tagged iterative (it holds the var). let scopes = p.scopes(); assert_eq!(scopes.parent, vec![None, Some(0)]); assert_eq!(scopes.children[0], vec![1]); assert_eq!(scopes.iterative, vec![false, true]); - // The exported result is the top-level Concat (in the root scope). let (_, result) = &p.export[0]; assert_eq!(scopes.of_node[result], 0); assert!(matches!(p.nodes[result], Node::Concat(_))); } + + #[test] + fn ports_reach() { + // The reach program, built through the scope facade — outer `edges` + // and `roots` are imported into the loop, the body can't leak, and the + // result must be `leave`d. (Projections are well-formed but unexercised; + // this checks structure, not execution.) + let key0 = Projection { key: vec![FieldExpr::Index(0, 0)], val: vec![FieldExpr::Index(0, 1)] }; + let key0_only = Projection { key: vec![FieldExpr::Index(0, 0)], val: vec![] }; + let join_proj = Projection { key: vec![FieldExpr::Pos(2)], val: vec![] }; + + let p = build(|b, root| { + let in0 = b.input(root, 0); + let edges = b.linear(root, in0, vec![LinearOp::Project(key0.clone())]); + let in1 = b.input(root, 1); + let roots = b.linear(root, in1, vec![LinearOp::Project(key0_only.clone())]); + b.scope(root, &[edges, roots], |b, r, imp| { + let edges = imp[0]; + let roots = imp[1]; + let reach = b.variable(r); + let proposals = b.join(r, reach, edges, join_proj.clone()); + let body = b.concat(r, &[roots, proposals]); + let next = b.reduce(r, body, Reducer::Distinct); + b.bind(r, reach, next); + b.leave(r, reach) + }) + }); + + let scopes = p.scopes(); + // One iterative scope (holds `reach`); edges/roots/Scope/Leave at root. + assert_eq!(scopes.parent, vec![None, Some(0)]); + assert_eq!(scopes.iterative, vec![false, true]); + // The exported result is the Leave of the loop, sitting at the root. + let (_, result) = &p.export[0]; + assert_eq!(scopes.of_node[result], 0); + assert!(matches!(p.nodes[result], Node::Leave(_, _))); + // The reach Variable and the join live inside scope 1. + let in_scope_1 = |pred: fn(&Node) -> bool| { + p.nodes.iter().any(|(id, n)| scopes.of_node[id] == 1 && pred(n)) + }; + assert!(in_scope_1(|n| matches!(n, Node::Variable))); + assert!(in_scope_1(|n| matches!(n, Node::Join { .. }))); + } } From f54b9584daa2d6e23ffcd271f506ce2c3cb1dec3 Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Mon, 8 Jun 2026 20:00:26 -0400 Subject: [PATCH 9/9] interactive/scope_builder: per-Coll shape tracking by construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Carry Shape {k,v} on every Coll, computed at each op via the same width-summing logic the IR uses (explain::arities, now pub(crate)). A projection field is a whole-row reference of width = that row's arity, not one column — the miscount that made SCC's explanation unsound. Building through the facade can't reintroduce it: `join_arity_sums_field_widths` asserts a whole-row-Pos join projection yields v=3, not 2. Also adds shape-consistency asserts (concat shapes equal, join key arities match, bind variable/value shapes match) and `arrange`/`join`/`reduce` ops; the reach port now threads shapes through the loop. Lifetime discipline (leave-required, inner confinement) unchanged — both compile_fail proofs still hold. Next: fold-depth typing for the time-as-data user_chain (the other unsound miscount), then porting reverse rules onto the builder. Co-Authored-By: Claude Opus 4.8 --- interactive/src/explain.rs | 4 +- interactive/src/scope_builder.rs | 226 +++++++++++++++++-------------- 2 files changed, 127 insertions(+), 103 deletions(-) diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index 52801b4fb..2e36d4120 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -543,7 +543,7 @@ pub mod arities { out } - fn apply_ops_arity((mut k, mut v): (usize, usize), ops: &[LinearOp]) -> (usize, usize) { + pub(crate) fn apply_ops_arity((mut k, mut v): (usize, usize), ops: &[LinearOp]) -> (usize, usize) { for op in ops { match op { // Project's input rows are [key, val]; expand `Pos` refs to @@ -574,7 +574,7 @@ pub mod arities { /// Total arity of one projection side (`key`/`val`): the sum of its /// fields' widths. - fn proj_arity(fields: &[FieldExpr], rows: &[usize]) -> usize { + pub(crate) fn proj_arity(fields: &[FieldExpr], rows: &[usize]) -> usize { fields.iter().map(|f| field_width(f, rows)).sum() } } diff --git a/interactive/src/scope_builder.rs b/interactive/src/scope_builder.rs index d6dda8b75..08f291ec4 100644 --- a/interactive/src/scope_builder.rs +++ b/interactive/src/scope_builder.rs @@ -1,75 +1,80 @@ //! Lifetime-bounded scope builder — a checked IR facade. //! -//! Compile-time scoping discipline. A `scope(...)` introduces a fresh, -//! *invariant* scope brand `'r`; collections built inside are `Coll<'r>` and -//! can leave the scope only through `leave`, whose result `Left<'r>` is the -//! only value a scope body may return. The borrow checker then rejects, at -//! compile time, the two mistakes behind the explanation rewrite's level -//! errors: +//! Two guarantees, both at compile or construction time: //! -//! - **forgetting to leave** — returning a `Coll<'r>` from the body is a type -//! error (the body must return `Left<'r>`); -//! - **smuggling an inner collection out** — a `Coll<'r>` cannot be stored in -//! or returned to an outer scope, because `'r` is generative and invariant. +//! **Scoping discipline (lifetimes).** A `scope(...)` introduces a fresh, +//! *invariant* brand `'r`; collections built inside are `Coll<'r>` and leave +//! only through `leave` (whose `Left<'r>` is the sole value a scope body may +//! return). The borrow checker then rejects, at compile time, both mistakes +//! behind the explanation rewrite's *scope* errors: forgetting to leave, and +//! smuggling an inner collection out (see the `compile_fail` doctests). //! -//! Both are proven by the `compile_fail` doctests below. +//! **Shape correctness (by construction).** Each `Coll` carries its `Shape` +//! `{k, v}`, computed at every op via the same width-summing logic the IR uses +//! (`explain::arities`). A projection field is a *whole-row* ref of width = +//! that row's arity, not one column — the miscount that made SCC's explanation +//! unsound. Building through this facade can't reintroduce it: see +//! `join_arity_sums_field_widths`. //! -//! ### Importing outer collections (`enter`) +//! Outer collections are brought into a scope as explicit `imports`, +//! re-branded to the inner `'c` (sound: `Coll` has no public constructor from +//! a raw id). `ports_reach` builds the full reach program this way. //! -//! Real programs reference outer collections inside a scope (e.g. SCC's `fwd` -//! joins the outer `edges`). `scope` takes an explicit `imports: &[Coll<'r>]` -//! list; each is re-branded to the inner `'c` and handed to the body. This is -//! sound — there is no public way to mint a `Coll` from a raw id, so the -//! re-brand can only happen here, on collections the caller already holds at -//! the outer scope. (For an iterative scope an outer reference needs no IR -//! node; the import is a pure re-brand.) The `ports_reach` test below builds -//! the full reach program this way. -//! -//! Scope of this increment: the scope maps to the existing iterative scope IR -//! (`Scope`/`EndScope` + a `Leave` at exit). The purely-structural -//! (timestamp-unchanged, no-`Leave`) variant needs backend support for -//! non-iterating scopes and is the next focused change. +//! Not yet covered (next increments): fold-depth typing for the time-as-data +//! `user_chain` (the other unsound miscount), and the purely-structural +//! (no-coord) scope variant. -use std::collections::BTreeMap; use std::marker::PhantomData; +use std::collections::BTreeMap; +use crate::explain::arities::{apply_ops_arity, proj_arity}; use crate::ir::{Id, LinearOp, Node, Program}; use crate::parse::{Projection, Reducer}; -/// Invariant brand for a scope's lifetime: fresh per `scope`/`build` call, -/// un-unifiable with any other, so a handle can't silently cross scopes. +/// The (key arity, value arity) of a collection. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Shape { + pub k: usize, + pub v: usize, +} + +/// Invariant brand for a scope's lifetime: fresh per `scope`/`build` call. type Brand<'r> = PhantomData &'r ()>; -/// A collection handle confined to scope `'r` (wraps an IR node id). +/// A collection handle confined to scope `'r`, carrying its shape. #[derive(Clone, Copy)] pub struct Coll<'r> { id: Id, + shape: Shape, _brand: Brand<'r>, } impl<'r> Coll<'r> { - /// The underlying IR node id. Read-only: there is no public constructor - /// from an id, so a raw id can't be re-branded into a scope. + /// The underlying IR node id (read-only; no public constructor from an id). pub fn id(self) -> Id { self.id } + /// The collection's `(k, v)` shape. + pub fn shape(self) -> Shape { + self.shape + } } /// Proof that a `Coll<'r>` was surrendered via `leave` — the only value a -/// scope body is allowed to return. +/// scope body may return. Carries the shape so `scope` can re-stamp it. pub struct Left<'r> { id: Id, + shape: Shape, _brand: Brand<'r>, } -/// A scope brand token (Copy), threaded into constructors so their results -/// are branded with this scope. +/// A scope brand token (Copy), threaded into constructors. #[derive(Clone, Copy)] pub struct Scope<'r> { _brand: Brand<'r>, } -/// IR builder: owns the node map and hands out scope-branded handles. +/// IR builder: owns the node map and hands out scope-branded, shaped handles. pub struct Builder { nodes: BTreeMap, next: Id, @@ -83,93 +88,107 @@ impl Builder { id } - fn wrap<'r>(id: Id) -> Coll<'r> { - Coll { id, _brand: PhantomData } + fn wrap<'r>(id: Id, shape: Shape) -> Coll<'r> { + Coll { id, shape, _brand: PhantomData } } - /// A positional input. - pub fn input<'r>(&mut self, _r: Scope<'r>, i: usize) -> Coll<'r> { + /// A positional input of declared shape `(k, v)`. + pub fn input<'r>(&mut self, _r: Scope<'r>, i: usize, k: usize, v: usize) -> Coll<'r> { let id = self.push(Node::Input(i)); - Self::wrap(id) + Self::wrap(id, Shape { k, v }) } - /// An iteration variable (the feedback edge of a `var`). - pub fn variable<'r>(&mut self, _r: Scope<'r>) -> Coll<'r> { + /// An iteration variable of declared shape `(k, v)` (its `bind` must match). + pub fn variable<'r>(&mut self, _r: Scope<'r>, k: usize, v: usize) -> Coll<'r> { let id = self.push(Node::Variable); - Self::wrap(id) + Self::wrap(id, Shape { k, v }) } - /// A linear (map/filter/…) transform of one collection. + /// A linear transform; shape follows the ops (width-summing for Project). pub fn linear<'r>(&mut self, _r: Scope<'r>, c: Coll<'r>, ops: Vec) -> Coll<'r> { + let (k, v) = apply_ops_arity((c.shape.k, c.shape.v), &ops); let id = self.push(Node::Linear { input: c.id, ops }); - Self::wrap(id) + Self::wrap(id, Shape { k, v }) } - /// Concatenation of collections (all in the same scope). + /// Concatenation; all inputs must share a shape. pub fn concat<'r>(&mut self, _r: Scope<'r>, cs: &[Coll<'r>]) -> Coll<'r> { + assert!(!cs.is_empty(), "concat: needs at least one input"); + let shape = cs[0].shape; + assert!(cs.iter().all(|c| c.shape == shape), "concat: inputs must share a shape"); let id = self.push(Node::Concat(cs.iter().map(|c| c.id).collect())); - Self::wrap(id) + Self::wrap(id, shape) } - /// Arrange a collection (the IR's explicit arrangement node). + /// Arrange a collection (shape-preserving). pub fn arrange<'r>(&mut self, _r: Scope<'r>, c: Coll<'r>) -> Coll<'r> { let id = self.push(Node::Arrange(c.id)); - Self::wrap(id) + Self::wrap(id, c.shape) } - /// Join two collections (arranges both, as the IR requires). + /// Join two collections; their key arities must match. Output shape is the + /// projection over input rows `[key, left_val, right_val]` (width-summed). pub fn join<'r>(&mut self, r: Scope<'r>, left: Coll<'r>, right: Coll<'r>, projection: Projection) -> Coll<'r> { + assert_eq!(left.shape.k, right.shape.k, "join: key arities must match"); + let rows = [left.shape.k, left.shape.v, right.shape.v]; + let shape = Shape { + k: proj_arity(&projection.key, &rows), + v: proj_arity(&projection.val, &rows), + }; let l = self.arrange(r, left); let rt = self.arrange(r, right); let id = self.push(Node::Join { left: l.id, right: rt.id, projection }); - Self::wrap(id) + Self::wrap(id, shape) } - /// Reduce a collection (arranges its input). + /// Reduce a collection; output value arity follows the reducer. pub fn reduce<'r>(&mut self, r: Scope<'r>, c: Coll<'r>, reducer: Reducer) -> Coll<'r> { + let v = match reducer { + Reducer::Distinct => 0, + Reducer::Min => c.shape.v, + Reducer::Count => 1, + }; + let shape = Shape { k: c.shape.k, v }; let a = self.arrange(r, c); let id = self.push(Node::Reduce { input: a.id, reducer }); - Self::wrap(id) + Self::wrap(id, shape) } - /// Close the loop: bind an iteration `variable` to its `value`. + /// Close the loop: bind `variable` to `value`; their shapes must match. pub fn bind<'r>(&mut self, _r: Scope<'r>, variable: Coll<'r>, value: Coll<'r>) { + assert_eq!(variable.shape, value.shape, "bind: variable and value shapes must match"); self.push(Node::Bind { variable: variable.id, value: value.id }); } /// Surrender a scope collection so the scope body can return it. pub fn leave<'r>(&mut self, _r: Scope<'r>, c: Coll<'r>) -> Left<'r> { - Left { id: c.id, _brand: PhantomData } + Left { id: c.id, shape: c.shape, _brand: PhantomData } } - /// Open a nested scope. `imports` are outer collections to use inside; - /// each is re-branded to the body's fresh `'c`. Whatever the body `leave`s - /// comes back as a collection in the parent scope `'r`. The `for<'c>` - /// quantification makes `'c` generative — a `Coll<'c>` cannot escape. + /// Open a nested scope. `imports` are outer collections to use inside, + /// each re-branded to the body's fresh `'c`. Whatever the body `leave`s + /// returns as a collection in the parent scope `'r`. pub fn scope<'r, F>(&mut self, _parent: Scope<'r>, imports: &[Coll<'r>], body: F) -> Coll<'r> where F: for<'c> FnOnce(&mut Builder, Scope<'c>, Vec>) -> Left<'c>, { - let import_ids: Vec = imports.iter().map(|c| c.id).collect(); + let imports: Vec<(Id, Shape)> = imports.iter().map(|c| (c.id, c.shape)).collect(); self.push(Node::Scope); - // The import `Coll`s are minted here, at the body's `'c` (inferred from - // the closure's parameter type) — the only place an outer id crosses in. + // Import handles are minted here, at the body's `'c` (inferred from the + // closure's parameter type) — the only place an outer id crosses in. let inner = body( self, Scope { _brand: PhantomData }, - import_ids.iter().map(|&id| Coll { id, _brand: PhantomData }).collect(), + imports.iter().map(|&(id, shape)| Coll { id, shape, _brand: PhantomData }).collect(), ); self.push(Node::EndScope); - // Leave one nesting level. (For an iterative scope this strips the - // iteration coordinate; the exact level is recomputed during a real - // lowering. scopes() doesn't depend on it.) let id = self.push(Node::Leave(inner.id, 1)); - Self::wrap(id) + Self::wrap(id, inner.shape) } } -/// Build a program. The body runs in the root scope and returns the result, -/// which is exported as `"$result"`. +/// Build a program; the body runs in the root scope and returns the result, +/// exported as `"$result"`. pub fn build(body: F) -> Program where F: for<'root> FnOnce(&mut Builder, Scope<'root>) -> Coll<'root>, @@ -179,34 +198,32 @@ where Program { nodes: b.nodes, export: vec![("$result".into(), result.id())] } } -/// Discipline check: forgetting to `leave` is a type error — the scope body -/// must return `Left<'c>`, not a bare `Coll<'c>`. +/// Discipline check: forgetting to `leave` is a type error. /// /// ```compile_fail /// use interactive::scope_builder::build; /// let _p = build(|b, root| { /// let _ = b.scope(root, &[], |b, r, _imp| { -/// let v = b.variable(r); +/// let v = b.variable(r, 1, 0); /// v // ERROR: expected `Left<'_>`, found `Coll<'_>` /// }); -/// b.input(root, 0) +/// b.input(root, 0, 2, 0) /// }); /// ``` /// -/// Discipline check: an inner collection cannot escape to an outer scope — -/// `'c` is generative, so it can't be stored in an outer-scoped variable. +/// Discipline check: an inner collection can't escape to an outer scope. /// /// ```compile_fail /// use interactive::scope_builder::build; /// let _p = build(|b, root| { /// let mut escaped = None; /// let _ = b.scope(root, &[], |b, r, _imp| { -/// let v = b.variable(r); +/// let v = b.variable(r, 1, 0); /// escaped = Some(v); // ERROR: `v`'s scope `'c` would escape here /// b.leave(r, v) /// }); /// let _ = escaped; -/// b.input(root, 0) +/// b.input(root, 0, 2, 0) /// }); /// ``` #[cfg(doctest)] @@ -215,16 +232,14 @@ struct DisciplineDoctests; #[cfg(test)] mod tests { use super::*; - use crate::parse::{FieldExpr, Projection, Reducer}; + use crate::parse::FieldExpr; #[test] fn builds_nested_iterative_scope() { - // root: input e; scope { var v; v' = linear(v); bind v <- v'; leave v } - // result = concat(e, leaved) let p = build(|b, root| { - let e = b.input(root, 0); + let e = b.input(root, 0, 1, 0); let out = b.scope(root, &[], |b, r, _imp| { - let v = b.variable(r); + let v = b.variable(r, 1, 0); let step = b.linear(r, v, vec![]); b.bind(r, v, step); b.leave(r, v) @@ -236,48 +251,57 @@ mod tests { assert_eq!(scopes.parent, vec![None, Some(0)]); assert_eq!(scopes.children[0], vec![1]); assert_eq!(scopes.iterative, vec![false, true]); - let (_, result) = &p.export[0]; assert_eq!(scopes.of_node[result], 0); assert!(matches!(p.nodes[result], Node::Concat(_))); } + #[test] + fn join_arity_sums_field_widths() { + // The bug-1 scenario: a join projection whose val uses whole-row `Pos` + // refs to a multi-column side. Width-summing must give v = 1 + 2 = 3, + // not the field-expr count of 2 (the miscount that broke SCC explain). + let _p = build(|b, root| { + let l = b.input(root, 0, 1, 1); // shape {k:1, v:1} + let r = b.input(root, 1, 1, 2); // shape {k:1, v:2} + let j = b.join(root, l, r, Projection { + key: vec![FieldExpr::Pos(0)], + val: vec![FieldExpr::Pos(1), FieldExpr::Pos(2)], + }); + assert_eq!(j.shape(), Shape { k: 1, v: 3 }); + j + }); + } + #[test] fn ports_reach() { - // The reach program, built through the scope facade — outer `edges` - // and `roots` are imported into the loop, the body can't leak, and the - // result must be `leave`d. (Projections are well-formed but unexercised; - // this checks structure, not execution.) - let key0 = Projection { key: vec![FieldExpr::Index(0, 0)], val: vec![FieldExpr::Index(0, 1)] }; - let key0_only = Projection { key: vec![FieldExpr::Index(0, 0)], val: vec![] }; + let key_kv = Projection { key: vec![FieldExpr::Index(0, 0)], val: vec![FieldExpr::Index(0, 1)] }; + let key_k = Projection { key: vec![FieldExpr::Index(0, 0)], val: vec![] }; let join_proj = Projection { key: vec![FieldExpr::Pos(2)], val: vec![] }; let p = build(|b, root| { - let in0 = b.input(root, 0); - let edges = b.linear(root, in0, vec![LinearOp::Project(key0.clone())]); - let in1 = b.input(root, 1); - let roots = b.linear(root, in1, vec![LinearOp::Project(key0_only.clone())]); + let in0 = b.input(root, 0, 2, 0); + let edges = b.linear(root, in0, vec![LinearOp::Project(key_kv.clone())]); // {1,1} + let in1 = b.input(root, 1, 2, 0); + let roots = b.linear(root, in1, vec![LinearOp::Project(key_k.clone())]); // {1,0} b.scope(root, &[edges, roots], |b, r, imp| { let edges = imp[0]; let roots = imp[1]; - let reach = b.variable(r); - let proposals = b.join(r, reach, edges, join_proj.clone()); - let body = b.concat(r, &[roots, proposals]); - let next = b.reduce(r, body, Reducer::Distinct); - b.bind(r, reach, next); + let reach = b.variable(r, 1, 0); + let proposals = b.join(r, reach, edges, join_proj.clone()); // {1,0} + let body = b.concat(r, &[roots, proposals]); // {1,0} + let next = b.reduce(r, body, Reducer::Distinct); // {1,0} + b.bind(r, reach, next); // shapes match b.leave(r, reach) }) }); let scopes = p.scopes(); - // One iterative scope (holds `reach`); edges/roots/Scope/Leave at root. assert_eq!(scopes.parent, vec![None, Some(0)]); assert_eq!(scopes.iterative, vec![false, true]); - // The exported result is the Leave of the loop, sitting at the root. let (_, result) = &p.export[0]; assert_eq!(scopes.of_node[result], 0); assert!(matches!(p.nodes[result], Node::Leave(_, _))); - // The reach Variable and the join live inside scope 1. let in_scope_1 = |pred: fn(&Node) -> bool| { p.nodes.iter().any(|(id, n)| scopes.of_node[id] == 1 && pred(n)) };