From e5d28c3f7243574165b68c5c2911fa0f4195a9a9 Mon Sep 17 00:00:00 2001 From: Frank McSherry Date: Fri, 5 Jun 2026 16:34:28 -0400 Subject: [PATCH] interactive: import/export in the IR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `import "name"` and `export "name" = expr` to the IR and both surface syntaxes, and remove the `result` statement: `export` is now the sole way to declare a program output, and only at the root scope. `Program.export` is a named list (was a single result id); `Node::Import` resolves against a registry at install time. `Node::Import` is a stub outside the server (the example renderers panic on it, no example uses it); the intended end-state is one named-source substrate that subsumes `Input(usize)` — there should not be two ways to bring in a source. `survey_sources` (was `count_inputs`) returns both kinds until that cutover. Threaded through lower, both parsers, explain, and the ddir_vec/ddir_col/ dump_explain examples; example programs now use `export "result" = …`. The string lexer is read-until-quote (no escapes — names don't use them). Co-Authored-By: Claude Opus 4.8 --- interactive/examples/ddir_col.rs | 18 ++- interactive/examples/ddir_vec.rs | 22 +++- interactive/examples/dump_explain.rs | 20 +++- interactive/examples/programs/kcore.ddir | 2 +- interactive/examples/programs/reach.ddir | 2 +- interactive/examples/programs/reach.ddp | 2 +- interactive/examples/programs/scc.ddir | 2 +- interactive/examples/programs/scc.ddp | 2 +- interactive/examples/programs/stable.ddir | 2 +- interactive/examples/programs/stable.ddp | 2 +- interactive/src/explain.rs | 127 +++++++++++++++++----- interactive/src/ir.rs | 32 ++++-- interactive/src/lib.rs | 40 ++++--- interactive/src/lower.rs | 56 +++++++--- interactive/src/parse/applicative.rs | 38 ++++++- interactive/src/parse/mod.rs | 7 +- interactive/src/parse/pipe.rs | 38 ++++++- 17 files changed, 314 insertions(+), 98 deletions(-) diff --git a/interactive/examples/ddir_col.rs b/interactive/examples/ddir_col.rs index 8be9ad863..eb7e54898 100644 --- a/interactive/examples/ddir_col.rs +++ b/interactive/examples/ddir_col.rs @@ -147,6 +147,7 @@ mod render { Node::Input(i) => { nodes.insert(id, Rendered::Collection(inputs[*i].clone())); }, + Node::Import { name } => panic!("ddir_col: Import {:?} not supported in this harness (no trace registry).", name), Node::Linear { input, ops } => { let c = nodes[input].collection(); let ops = ops.clone(); @@ -305,10 +306,18 @@ fn run(name: &str, stmts: Vec, n_inputs: usize, nodes: u64, edges: let mut compiled: Program = lower::lower(stmts); println!("{}: {} IR nodes (before optimize)", name, compiled.nodes.len()); compiled.optimize(); - println!("{}: {} IR nodes (after optimize), result = {}", name, compiled.nodes.len(), compiled.result); + println!("{}: {} IR nodes (after optimize), exports = {:?}", + name, compiled.nodes.len(), + compiled.export.iter().map(|(n, id)| (n.as_str(), *id)).collect::>()); compiled.dump(); let name = name.to_string(); - let result_id = compiled.result; + let (driven_name, result_id) = { + let pick = compiled.export.iter().find(|(n, _)| n == "result") + .or_else(|| compiled.export.first()) + .expect("ddir_col: program declares no exports"); + (pick.0.clone(), pick.1) + }; + println!("{}: driving export {:?} (id {})", name, driven_name, result_id); timely::execute_from_args(std::env::args().skip(4), move |worker| { use timely::dataflow::InputHandle; @@ -412,7 +421,10 @@ fn main() { } else { parse::applicative::parse(&source) }; - let n_inputs = interactive::count_inputs(&stmts); + let (n_inputs, imports) = interactive::survey_sources(&stmts); + if !imports.is_empty() { + panic!("ddir_col: program references imports {:?} but this harness has no trace registry.", imports); + } let name = std::path::Path::new(&program).file_stem().map(|s| s.to_string_lossy().into_owned()).unwrap_or(program.clone()); run(&name, stmts, n_inputs, nodes, edges, arity, batch, rounds); } diff --git a/interactive/examples/ddir_vec.rs b/interactive/examples/ddir_vec.rs index 33b13d1b1..de8b0eb7b 100644 --- a/interactive/examples/ddir_vec.rs +++ b/interactive/examples/ddir_vec.rs @@ -53,6 +53,7 @@ fn render_program<'scope>(program: &Program, scope: Scope<'scope, DdirTime>, inp for (&id, node) in program.nodes.iter() { match node { Node::Input(i) => { nodes.insert(id, Rendered::Collection(inputs[*i].clone())); }, + Node::Import { name } => panic!("ddir_vec: Import {:?} not supported in this harness (no trace registry).", name), Node::Linear { input, ops } => { use differential_dataflow::AsCollection; use differential_dataflow::lattice::Lattice; @@ -172,14 +173,24 @@ fn run( // seeded from `QUERY=`. if explain { let input_arities = vec![(arity, 0usize); n_inputs]; - compiled = interactive::explain::explain(&compiled, &input_arities); + let import_arities = std::collections::BTreeMap::new(); + compiled = interactive::explain::explain(&compiled, &input_arities, &import_arities); } println!("{}: {} IR nodes (before optimize)", name, compiled.nodes.len()); compiled.optimize(); - println!("{}: {} IR nodes (after optimize), result = {}", name, compiled.nodes.len(), compiled.result); + println!("{}: {} IR nodes (after optimize), exports = {:?}", + name, compiled.nodes.len(), + compiled.export.iter().map(|(n, id)| (n.as_str(), *id)).collect::>()); compiled.dump(); let name = name.to_string(); - let result_id = compiled.result; + // Drive one export: prefer `$result`, else the first declared. + let (driven_name, result_id) = { + let pick = compiled.export.iter().find(|(n, _)| n == "result") + .or_else(|| compiled.export.first()) + .expect("ddir_vec: program declares no exports"); + (pick.0.clone(), pick.1) + }; + println!("{}: driving export {:?} (id {})", name, driven_name, result_id); let total_inputs = if explain { n_inputs + 1 } else { n_inputs }; let query_input_idx = if explain { Some(n_inputs) } else { None }; @@ -297,7 +308,10 @@ fn main() { } else { parse::applicative::parse(&source) }; - let n_inputs = interactive::count_inputs(&stmts); + let (n_inputs, imports) = interactive::survey_sources(&stmts); + if !imports.is_empty() { + panic!("ddir_vec: program references imports {:?} but this harness has no trace registry.", imports); + } let name = std::path::Path::new(&program).file_stem().map(|s| s.to_string_lossy().into_owned()).unwrap_or(program.clone()); run(&name, stmts, n_inputs, nodes, edges, arity, batch, rounds, explain); } diff --git a/interactive/examples/dump_explain.rs b/interactive/examples/dump_explain.rs index 3692501be..800e27f92 100644 --- a/interactive/examples/dump_explain.rs +++ b/interactive/examples/dump_explain.rs @@ -23,7 +23,7 @@ fn main() { } else { parse::applicative::parse(&source) }; - let n_inputs = interactive::count_inputs(&stmts); + let (n_inputs, imports) = interactive::survey_sources(&stmts); let original = lower::lower(stmts); println!("-- ===================================================="); @@ -32,7 +32,11 @@ fn main() { print_ddp(&original); let input_arities = vec![(arity, 0usize); n_inputs]; - let rewritten = explain::explain(&original, &input_arities); + let import_arities: BTreeMap = imports + .iter() + .map(|n| (n.clone(), (arity, 0usize))) + .collect(); + let rewritten = explain::explain(&original, &input_arities, &import_arities); println!(); println!("-- ===================================================="); @@ -85,6 +89,9 @@ fn print_ddp(p: &Program) { Node::Input(i) => { println!("{}let n{} = input {};", pad, id, i); } + Node::Import { name } => { + println!("{}let n{} = import {:?};", pad, id, name); + } Node::Linear { input, ops } => { println!("{}let n{} = n{} | {};", pad, id, input, fmt_linear_ops(ops)); } @@ -138,7 +145,14 @@ fn print_ddp(p: &Program) { } } } - println!("{}result n{};", " ".repeat(indent), p.result); + let pad = " ".repeat(indent); + for (name, id) in &p.export { + if name == "result" { + println!("{}result n{};", pad, id); + } else { + println!("{}export {:?} = n{};", pad, name, id); + } + } } fn fmt_linear_ops(ops: &[LinearOp]) -> String { diff --git a/interactive/examples/programs/kcore.ddir b/interactive/examples/programs/kcore.ddir index 2823adbb5..fa4540b84 100644 --- a/interactive/examples/programs/kcore.ddir +++ b/interactive/examples/programs/kcore.ddir @@ -25,4 +25,4 @@ peel: { } let core_edges = CONCAT(symm, peel::removals); -result INSPECT(REDUCE(MAP(core_edges, ($0[0] ;)), COUNT), kcore_degrees); +export "result" = INSPECT(REDUCE(MAP(core_edges, ($0[0] ;)), COUNT), kcore_degrees); diff --git a/interactive/examples/programs/reach.ddir b/interactive/examples/programs/reach.ddir index 349594cea..b606964a7 100644 --- a/interactive/examples/programs/reach.ddir +++ b/interactive/examples/programs/reach.ddir @@ -10,4 +10,4 @@ reach: { var reach = REDUCE(CONCAT(roots, proposals), DISTINCT); } -result INSPECT(ARRANGE(MAP(reach::reach, (;))), total); +export "result" = INSPECT(ARRANGE(MAP(reach::reach, (;))), total); diff --git a/interactive/examples/programs/reach.ddp b/interactive/examples/programs/reach.ddp index 36d24b979..0dede6e31 100644 --- a/interactive/examples/programs/reach.ddp +++ b/interactive/examples/programs/reach.ddp @@ -10,4 +10,4 @@ reach: { var reach = roots + proposals | distinct; } -result reach::reach | key(;) | arrange | inspect(total); +export "result" = reach::reach | key(;) | arrange | inspect(total); diff --git a/interactive/examples/programs/scc.ddir b/interactive/examples/programs/scc.ddir index a219586be..121433d21 100644 --- a/interactive/examples/programs/scc.ddir +++ b/interactive/examples/programs/scc.ddir @@ -35,4 +35,4 @@ outer: { var scc_raw = CONCAT(trim_bwd, NEGATE(edges)); } -result INSPECT(ARRANGE(MAP(outer::scc, (;))), total); +export "result" = INSPECT(ARRANGE(MAP(outer::scc, (;))), total); diff --git a/interactive/examples/programs/scc.ddp b/interactive/examples/programs/scc.ddp index 88ef7e790..91c673ddf 100644 --- a/interactive/examples/programs/scc.ddp +++ b/interactive/examples/programs/scc.ddp @@ -34,4 +34,4 @@ outer: { var trim = trim_bwd - edges; } -result outer::scc | map(;) | arrange | inspect(total); +export "result" = outer::scc | map(;) | arrange | inspect(total); diff --git a/interactive/examples/programs/stable.ddir b/interactive/examples/programs/stable.ddir index b245c7fe7..ed0918e54 100644 --- a/interactive/examples/programs/stable.ddir +++ b/interactive/examples/programs/stable.ddir @@ -19,4 +19,4 @@ matching: { var removals = ARRANGE(CONCAT(removals, by_a, NEGATE(props))); } -result INSPECT(ARRANGE(MAP(matching::props, (;))), total); +export "result" = INSPECT(ARRANGE(MAP(matching::props, (;))), total); diff --git a/interactive/examples/programs/stable.ddp b/interactive/examples/programs/stable.ddp index d4e21126d..afd5d626d 100644 --- a/interactive/examples/programs/stable.ddp +++ b/interactive/examples/programs/stable.ddp @@ -12,4 +12,4 @@ matching: { var removals = removals + by_a - props | arrange; } -result matching::props | key(;) | arrange | inspect(total); +export "result" = matching::props | key(;) | arrange | inspect(total); diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs index 4a9e944a1..dc536f6ef 100644 --- a/interactive/src/explain.rs +++ b/interactive/src/explain.rs @@ -184,12 +184,18 @@ use clone::CloneResult; /// demand-set explanations for queries against the original's result. /// See the module doc for the architecture. /// -/// `input_arities` gives `(key_arity, val_arity)` per input, in input -/// order. Necessary because input row shapes aren't recoverable from the -/// IR alone (Projections only invert with known input arity). -pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { +/// `input_arities` gives `(key_arity, val_arity)` per positional input; +/// `import_arities` gives the same per named import (entries needed for +/// every distinct `Import { name }` referenced in `p`). Both are +/// necessary because data-source shapes aren't recoverable from the IR +/// alone (Projections only invert with known input arity). +pub fn explain( + p: &Program, + input_arities: &[(usize, usize)], + import_arities: &BTreeMap, +) -> Program { let mut b = Builder::new(); - let arities = compute_arities(p, input_arities); + let arities = compute_arities(p, input_arities, import_arities); let depths = p.depths(); // The two user-chain lengths we track at each node: // @@ -220,21 +226,37 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { let dep_user_lens: &BTreeMap = &depths; let n_inputs = input_arities.len(); + // Distinct import names referenced by `p`, in deterministic order. + let import_names: Vec = { + let mut s = std::collections::BTreeSet::new(); + for node in p.nodes.values() { + if let Node::Import { name } = node { s.insert(name.clone()); } + } + s.into_iter().collect() + }; + // ---- outer scope ---- - // Original inputs of `p`, plus one extra "query" input at index n. + // Original inputs of `p`, one outer-scope Import per referenced name, + // plus one extra "query" input. let original_inputs: Vec = (0..n_inputs).map(|i| b.input(i)).collect(); + let original_imports: BTreeMap = import_names.iter() + .map(|n| (n.clone(), b.push(Node::Import { name: n.clone() }))) + .collect(); let query_input = b.input(n_inputs); // witness: a clone of `p`, with lift_iter chains so every witness // collection has a host-visible `(data, user)` form via auto-leave at // each enclosing user scope's exit. - let witness = b.clone_with_lifts(p, &original_inputs, 0); + let witness = b.clone_with_lifts(p, &original_inputs, &original_imports, 0); // ---- explain scope ---- b.scope_open(); - // Demand-set Variables (one per input). + // Demand-set Variables (one per input, one per import). let demand_sets: Vec = (0..n_inputs).map(|_| b.variable()).collect(); + let import_demand_sets: BTreeMap = import_names.iter() + .map(|n| (n.clone(), b.variable())) + .collect(); // forward inputs: demand_set_ | semijoin(actual_input_). // Enter actual inputs into explain scope implicitly; semijoin restricts to @@ -245,11 +267,18 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { b.semijoin_data(demand_sets[i], original_inputs[i], k, v) }) .collect(); + let forward_imports: BTreeMap = import_names.iter() + .map(|n| { + let (k, v) = import_arities[n]; + let semi = b.semijoin_data(import_demand_sets[n], original_imports[n], k, v); + (n.clone(), semi) + }) + .collect(); // forward: same clone procedure as witness, with substituted inputs. // Offset = 1 because this clone lives INSIDE the explain scope: its real // PointStamp depth at any point is one more than its local user_level. - let forward = b.clone_with_lifts(p, &forward_inputs, 1); + let forward = b.clone_with_lifts(p, &forward_inputs, &forward_imports, 1); // Demand Variables are pre-allocated *only* for user-program `var` IR // nodes (`Node::Variable`). These are the only places where the demand @@ -271,10 +300,14 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { // from `contribs[id]`, store it in `demand_var`, then dispatch the // node's bward rule (which pushes onto its inputs' contribs). // - // Query input directly seeds `contribs[result]` — the result demand - // starts with the query rows. + // Query input directly seeds `contribs[first_export]` — the demand + // starts with the query rows against the first export. v0 only + // explains a single output; multi-export programs would need one + // query input per export and per-output dispatch in the seeding. + let primary_export = p.export.first() + .expect("explain: program has no export to seed query input against").1; let mut contribs: BTreeMap> = BTreeMap::new(); - contribs.entry(p.result).or_default().push(query_input); + contribs.entry(primary_export).or_default().push(query_input); for (&id, node) in p.nodes.iter().rev() { // Scope / EndScope carry no demand and have no bward action. @@ -319,13 +352,19 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { b.emit_reverse(id, node, &witness, &forward, &demand_var, &arities, &host_user_lens, dep_user_lens, &mut contribs); } - // Bind demand-set variables: demand_set_ := distinct(demand_set_ + (demand_ | strip | semijoin actual)). + // Bind demand-set variables for inputs and imports symmetrically: + // demand_set_X := distinct(demand_set_X + (demand_ | strip | semijoin actual)). // Build a Vec mapping input index `i` to its IR id in `p`, so the - // per-input loop below is O(n) total instead of O(n^2). + // per-input loop below is O(n) total instead of O(n^2). Imports are + // looked up by name; multiple `Import { name }` nodes in `p` share + // demand via the dedup pass — we route from any one of them. let mut input_ids: Vec> = vec![None; n_inputs]; + let mut import_ids: BTreeMap = BTreeMap::new(); for (&id, node) in &p.nodes { - if let Node::Input(i) = node { - input_ids[*i] = Some(id); + match node { + Node::Input(i) => { input_ids[*i] = Some(id); } + Node::Import { name } => { import_ids.entry(name.clone()).or_insert(id); } + _ => {} } } for i in 0..n_inputs { @@ -338,17 +377,30 @@ pub fn explain(p: &Program, input_arities: &[(usize, usize)]) -> Program { let dist = b.distinct_full(combined, kx, vx); b.bind(demand_sets[i], dist); } - - // Inspects on demand-sets. - let mut last_inspect: Option = None; - for (i, &mv) in demand_sets.iter().enumerate() { - last_inspect = Some(b.inspect(mv, format!("demand_set_{}", i))); + for name in &import_names { + let imp_id = import_ids[name]; + let (kx, vx) = arities[&imp_id]; + let stripped = b.project(demand_var[&imp_id], strip_user_and_q(kx, vx)); + let semi = b.semijoin_data(stripped, original_imports[name], kx, vx); + let combined = b.concat(vec![import_demand_sets[name], semi]); + let dist = b.distinct_full(combined, kx, vx); + b.bind(import_demand_sets[name], dist); } - let result_inner = last_inspect.unwrap_or_else(|| demand_sets.first().copied().unwrap_or(0)); + + // Per-source demand-set ids inside the explain scope; we leave each + // out and register as a named export after closing the scope. + let inputs_leaves: Vec<(String, Id)> = demand_sets.iter().enumerate() + .map(|(i, &mv)| (format!("demand:input{}", i), mv)) + .collect(); + let imports_leaves: Vec<(String, Id)> = import_names.iter() + .map(|n| (format!("demand:{}", n), import_demand_sets[n])) + .collect(); b.scope_close(); - let result_outer = b.leave(result_inner, 1); - b.set_result(result_outer); + for (name, inner) in inputs_leaves.into_iter().chain(imports_leaves) { + let outer = b.leave(inner, 1); + b.add_export(name, outer); + } b.into_program() } @@ -371,7 +423,7 @@ mod builder { impl Builder { pub(super) fn new() -> Self { Builder { - program: Program { nodes: BTreeMap::new(), result: 0 }, + program: Program { nodes: BTreeMap::new(), export: Vec::new() }, next_id: 0, } } @@ -414,7 +466,7 @@ mod builder { } pub(super) fn scope_open(&mut self) { self.push(Node::Scope); } pub(super) fn scope_close(&mut self) { self.push(Node::EndScope); } - pub(super) fn set_result(&mut self, id: Id) { self.program.result = id; } + pub(super) fn add_export(&mut self, name: String, id: Id) { self.program.export.push((name, id)); } pub(super) fn into_program(self) -> Program { self.program } } } @@ -426,13 +478,17 @@ mod builder { /// recoverable from the IR alone — `Projection`s only invert with known /// input arity, and lift_iter sites need to know how many user-iter coords /// already sit in the val. -mod arities { +pub mod arities { use std::collections::BTreeMap; use crate::ir::{Id, LinearOp, Node, Program}; use crate::parse::Reducer; - pub(super) fn compute_arities(p: &Program, input_arities: &[(usize, usize)]) -> BTreeMap { + pub fn compute_arities( + p: &Program, + input_arities: &[(usize, usize)], + import_arities: &BTreeMap, + ) -> BTreeMap { // Variables are referenced before their Binds appear in id order; // resolve a Variable's shape via its body. let var_body: BTreeMap = p.nodes.iter().filter_map(|(_, n)| { @@ -448,6 +504,10 @@ mod arities { if out.contains_key(&id) { continue; } let shape = match node { Node::Input(i) => Some(input_arities[*i]), + Node::Import { name } => Some( + *import_arities.get(name) + .unwrap_or_else(|| panic!("explain: no arity registered for import {:?}", name)) + ), Node::Linear { input, ops } => out.get(input).map(|s| apply_ops_arity(*s, ops)), // Try each input — for self-recursive Variables that appear // as `Concat([var, ...])`, the first input's shape isn't @@ -512,6 +572,7 @@ mod clone { &mut self, p: &Program, input_subst: &[Id], + import_subst: &BTreeMap, enclosing_scope_depth: usize, ) -> CloneResult { let mut in_scope: BTreeMap = BTreeMap::new(); @@ -570,6 +631,14 @@ mod clone { host.insert(id, input_subst[*i]); None } + Node::Import { name } => { + // Imports are at depth 0, host-visible directly. + let sub = *import_subst.get(name) + .unwrap_or_else(|| panic!("clone: no substitution for import {:?}", name)); + in_scope.insert(id, sub); + host.insert(id, sub); + None + } Node::Linear { input, ops } => { Some(self.linear(in_scope[input], ops.clone())) } @@ -758,7 +827,7 @@ mod reverse { let side = |inp: Id| Side::for_input(inp, witness, forward, arities, host_user_lens, dep_user_lens); match node { - Node::Input(_) => { /* terminal; feeds demand-set seeding. */ } + Node::Input(_) | Node::Import { .. } => { /* terminal; feeds demand-set seeding. */ } Node::Linear { input, ops } => { let op = match ops.as_slice() { diff --git a/interactive/src/ir.rs b/interactive/src/ir.rs index 37b8643d5..8c0fc45c4 100644 --- a/interactive/src/ir.rs +++ b/interactive/src/ir.rs @@ -56,6 +56,15 @@ pub enum LinearOp { /// Symbolic IR node. pub enum Node { Input(usize), + /// A named external trace, resolved against a registry at install time; + /// shape is inferred from the registry, not the IR. + /// + /// STUB: only the server resolves this; the example renderers don't, and no + /// example program uses it yet. The intended end-state is a single + /// named-source substrate that also subsumes `Input(usize)` — there should + /// not be two ways to bring in a source. Until that cutover, `Input` is the + /// working input and `Import` is forward-looking. + Import { name: String }, /// A chain of linear operations on a stream of (data, time, diff) triples. Linear { input: Id, ops: Vec }, Concat(Vec), @@ -72,7 +81,8 @@ pub enum Node { pub struct Program { pub nodes: BTreeMap, - pub result: Id, + /// Named outputs of the program. + pub export: Vec<(String, Id)>, } impl Program { @@ -81,6 +91,7 @@ impl Program { for (&id, node) in &self.nodes { let desc = match node { Node::Input(i) => format!("Input({})", i), + Node::Import { name } => format!("Import({:?})", name), Node::Linear { input, ops } => { let ops_str: Vec = ops.iter().map(|op| match op { LinearOp::Project(_) => "Project".into(), @@ -104,7 +115,9 @@ impl Program { }; println!(" {:3}: {}", id, desc); } - println!(" result: {}", self.result); + for (name, id) in &self.export { + println!(" export {:?} = {}", name, id); + } } /// Per-node user-scope depth. Computed by walking `nodes` in id order @@ -141,7 +154,7 @@ impl Program { Node::Concat(ids) => ids.clone(), Node::Leave(id, _) => vec![*id], Node::Bind { value, .. } => vec![*value], - Node::Input(_) | Node::Variable | Node::Scope | Node::EndScope => vec![], + Node::Input(_) | Node::Import { .. } | Node::Variable | Node::Scope | Node::EndScope => vec![], }; for input in inputs { users.entry(input).or_default().push(user_id); @@ -173,7 +186,9 @@ impl Program { Ok(()) } - /// Replace all references to `from` with `to` across the IR. + /// Redirect every reference to node `from` so it points at `to`, across all + /// nodes' inputs and the export list. Used by `optimize` when it collapses + /// or fuses one node into another and the old id must be retargeted. fn rewrite(&mut self, from: Id, to: Id) { for node in self.nodes.values_mut() { match node { @@ -195,10 +210,12 @@ impl Program { if *variable == from { *variable = to; } if *value == from { *value = to; } }, - Node::Input(_) | Node::Variable | Node::Scope | Node::EndScope => {}, + Node::Input(_) | Node::Import { .. } | Node::Variable | Node::Scope | Node::EndScope => {}, } } - if self.result == from { self.result = to; } + for (_, id) in self.export.iter_mut() { + if *id == from { *id = to; } + } } /// Optimize the IR in place, iterating to a fixed point. @@ -247,7 +264,7 @@ impl Program { _ => {}, } } - if self.result != usize::MAX { *ref_counts.entry(self.result).or_default() += 1; } + for (_, id) in &self.export { *ref_counts.entry(*id).or_default() += 1; } let fusions: Vec<(Id, Id)> = self.nodes.iter() .filter_map(|(&id, node)| { @@ -285,6 +302,7 @@ impl Program { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.0 { Node::Input(i) => write!(f, "Input({})", i), + Node::Import { name } => write!(f, "Import({:?})", name), Node::Linear { input, ops } => write!(f, "Linear({},{:?})", input, ops), Node::Concat(ids) => write!(f, "Concat({:?})", ids), Node::Arrange(input) => write!(f, "Arrange({})", input), diff --git a/interactive/src/lib.rs b/interactive/src/lib.rs index 45c9ff9a2..0a34ead7b 100644 --- a/interactive/src/lib.rs +++ b/interactive/src/lib.rs @@ -3,33 +3,41 @@ pub mod ir; pub mod lower; pub mod explain; +use std::collections::BTreeSet; + use parse::{Stmt, Expr}; -/// Count the number of distinct inputs referenced in a program. -pub fn count_inputs(stmts: &[Stmt]) -> usize { - let mut max_input = 0usize; +/// Survey a program's external sources: the count of positional inputs (one +/// more than the largest `input N` index, zero if none appear) and the set of +/// names referenced by `import "name"`. Two kinds because `import` does not yet +/// subsume `input` — see `ir::Node::Import`; this returns one number when that +/// cutover happens. +pub fn survey_sources(stmts: &[Stmt]) -> (usize, BTreeSet) { + let mut positional = 0usize; + let mut imports = BTreeSet::new(); + walk_stmts(stmts, &mut positional, &mut imports); + (positional, imports) +} + +fn walk_stmts(stmts: &[Stmt], positional: &mut usize, imports: &mut BTreeSet) { for stmt in stmts { match stmt { - Stmt::Let(_, expr) | Stmt::Var(_, expr) | Stmt::Result(expr) => { - max_input = max_input.max(count_inputs_expr(expr)); - }, - Stmt::Scope(_, body) => { - max_input = max_input.max(count_inputs(body)); - }, + Stmt::Let(_, expr) | Stmt::Var(_, expr) | Stmt::Export(_, expr) => walk_expr(expr, positional, imports), + Stmt::Scope(_, body) => walk_stmts(body, positional, imports), } } - max_input } -fn count_inputs_expr(expr: &Expr) -> usize { +fn walk_expr(expr: &Expr, positional: &mut usize, imports: &mut BTreeSet) { match expr { - Expr::Input(n) => n + 1, + Expr::Input(n) => { *positional = (*positional).max(n + 1); }, + Expr::Import(name) => { imports.insert(name.clone()); }, Expr::Map(e, _) | Expr::Negate(e) | Expr::Arrange(e) | Expr::EnterAt(e, _) | Expr::LiftIter(e) | Expr::Filter(e, _) - | Expr::Reduce(e, _) | Expr::Inspect(e, _) => count_inputs_expr(e), - Expr::Join(l, r, _) => count_inputs_expr(l).max(count_inputs_expr(r)), - Expr::Concat(es) => es.iter().map(|e| count_inputs_expr(e)).max().unwrap_or(0), - Expr::Name(_) | Expr::Qualified(_, _) => 0, + | Expr::Reduce(e, _) | Expr::Inspect(e, _) => walk_expr(e, positional, imports), + Expr::Join(l, r, _) => { walk_expr(l, positional, imports); walk_expr(r, positional, imports); }, + Expr::Concat(es) => { for e in es { walk_expr(e, positional, imports); } }, + Expr::Name(_) | Expr::Qualified(_, _) => {}, } } diff --git a/interactive/src/lower.rs b/interactive/src/lower.rs index 01e8e535c..64a026c20 100644 --- a/interactive/src/lower.rs +++ b/interactive/src/lower.rs @@ -10,7 +10,7 @@ //! each item is lowered once all the names it transitively needs at this //! level are bound. A cycle among `let`s is an error (use a `var` to //! introduce recursion). -//! 4. Lower the (single) `result` expression, if any. +//! 4. Lower the `export` expressions (root scope only). //! 5. Lower each `var`'s body and emit a `Bind` from the placeholder to the //! resulting value. @@ -51,12 +51,15 @@ impl Lowering { } fn lower_program(mut self, stmts: Vec) -> Program { - let mut result_id = None; - self.lower_stmts(stmts, &mut result_id); - Program { result: result_id.expect("No result statement"), nodes: self.nodes } + let mut exports = Vec::new(); + self.lower_stmts(stmts, &mut exports); + if exports.is_empty() { + panic!("Program has no `export` statement"); + } + Program { nodes: self.nodes, export: exports } } - fn lower_stmts(&mut self, stmts: Vec, result_id: &mut Option) { + fn lower_stmts(&mut self, stmts: Vec, exports: &mut Vec<(String, Id)>) { // ---- 1. Bucket statements; reject duplicate names. ---- // `order` records the original textual order so the topological pass // is deterministic when several items are simultaneously ready. @@ -64,7 +67,8 @@ impl Lowering { let mut lets: HashMap = HashMap::new(); let mut scopes: HashMap> = HashMap::new(); let mut order: Vec<(ItemKind, String)> = Vec::new(); - let mut results: Vec = Vec::new(); + // Exports in declaration order (root scope only — rejected below if nested). + let mut local_exports: Vec<(String, Expr)> = Vec::new(); let mut seen: BTreeSet = BTreeSet::new(); for stmt in stmts { match stmt { @@ -82,10 +86,27 @@ impl Lowering { order.push((ItemKind::Scope, name.clone())); scopes.insert(name, body); }, - Stmt::Result(expr) => results.push(expr), + Stmt::Export(name, expr) => { + // Exports are the program's output interface and only make + // sense at the root; reject nested ones rather than silently + // dropping them. + if self.level > 0 { + panic!("`export {:?}` is nested; exports are only allowed at the root scope", name); + } + local_exports.push((name, expr)); + }, + } + } + // Reject duplicate export names (root-only, so this is the whole + // program's output interface). + { + let mut names: BTreeSet<&str> = BTreeSet::new(); + for (n, _) in &local_exports { + if !names.insert(n) { + panic!("Duplicate export name: {:?}", n); + } } } - if results.len() > 1 { panic!("Multiple `result` statements at the same scope level"); } // ---- 2. Pre-bind `Variable` placeholders. ---- for (name, _) in &vars { @@ -132,8 +153,10 @@ impl Lowering { self.push(Node::Scope); self.level += 1; self.scopes.push(HashMap::new()); - let mut inner_result = None; - self.lower_stmts(body, &mut inner_result); + // Exports are root-only (lower_stmts rejects nested ones), + // so this stays empty. + let mut inner_exports = Vec::new(); + self.lower_stmts(body, &mut inner_exports); let inner_scope = self.scopes.pop().unwrap(); let scope_level = self.level; self.named_scopes.insert(name, (scope_level, inner_scope)); @@ -143,10 +166,10 @@ impl Lowering { } } - // ---- 4. Lower the result expression (if any). ---- - if let Some(expr) = results.into_iter().next() { + // ---- 4. Lower export expressions (if any) and record them. ---- + for (name, expr) in local_exports { let id = self.lower_expr(expr); - *result_id = Some(id); + exports.push((name, id)); } // ---- 5. Lower var bodies and emit Bind nodes. ---- @@ -160,6 +183,7 @@ impl Lowering { fn lower_expr(&mut self, expr: Expr) -> Id { match expr { Expr::Input(n) => self.push(Node::Input(n)), + Expr::Import(name) => self.push(Node::Import { name }), Expr::Name(name) => self.resolve_name(&name), Expr::Qualified(scope_name, name) => { let (scope_level, inner_id) = { @@ -223,7 +247,7 @@ fn scope_body_deps(body: &[Stmt], defined: &BTreeSet<&str>, self_name: &str) -> /// field is resolved within that scope's environment, not the enclosing one). fn expr_free_names<'a>(expr: &'a Expr, out: &mut BTreeSet<&'a str>) { match expr { - Expr::Input(_) => {}, + Expr::Input(_) | Expr::Import(_) => {}, Expr::Name(n) => { out.insert(n.as_str()); }, Expr::Qualified(scope, _) => { out.insert(scope.as_str()); }, Expr::Map(e, _) | Expr::Reduce(e, _) | Expr::Filter(e, _) @@ -242,13 +266,13 @@ fn collect_body_free_names<'a>(body: &'a [Stmt], out: &mut BTreeSet<&'a str>) { for stmt in body { match stmt { Stmt::Let(n, _) | Stmt::Var(n, _) | Stmt::Scope(n, _) => { local.insert(n.as_str()); }, - Stmt::Result(_) => {}, + Stmt::Export(_, _) => {}, } } let mut inner: BTreeSet<&'a str> = BTreeSet::new(); for stmt in body { match stmt { - Stmt::Let(_, e) | Stmt::Var(_, e) | Stmt::Result(e) => expr_free_names(e, &mut inner), + Stmt::Let(_, e) | Stmt::Var(_, e) | Stmt::Export(_, e) => expr_free_names(e, &mut inner), Stmt::Scope(_, b) => collect_body_free_names(b, &mut inner), } } diff --git a/interactive/src/parse/applicative.rs b/interactive/src/parse/applicative.rs index 88b80feff..9daeb90e4 100644 --- a/interactive/src/parse/applicative.rs +++ b/interactive/src/parse/applicative.rs @@ -6,10 +6,10 @@ use super::*; #[derive(Debug, Clone, PartialEq)] enum Token { - Let, Var, Scope, Result, - Input, Map, Join, Reduce, Concat, Arrange, Filter, Negate, EnterAt, LiftIter, Inspect, + Let, Var, Scope, Export, + Input, Import, Map, Join, Reduce, Concat, Arrange, Filter, Negate, EnterAt, LiftIter, Inspect, Min, Distinct, Count, - Ident(String), Int(i64), + Ident(String), Int(i64), Str(String), Dollar, LParen, RParen, LBrace, RBrace, LBracket, RBracket, Comma, Semi, Colon, ColonColon, Eq, EqEq, NotEq, Lt, LtEq, Gt, GtEq, AndAnd, Plus, Minus, Star, Eof, @@ -40,6 +40,19 @@ fn tokenize(input: &str) -> Vec { '+' => { chars.next(); tokens.push(Token::Plus); }, '*' => { chars.next(); tokens.push(Token::Star); }, '$' => { chars.next(); tokens.push(Token::Dollar); }, + '"' => { + // String literal: the quoted name in `IMPORT "..."` / `EXPORT "..." = ...`. + // Names carry no escapes, so read verbatim up to the closing quote. + chars.next(); + let mut s = String::new(); + let mut closed = false; + while let Some(c) = chars.next() { + if c == '"' { closed = true; break; } + s.push(c); + } + if !closed { panic!("Unterminated string literal"); } + tokens.push(Token::Str(s)); + }, '-' => { chars.next(); tokens.push(Token::Minus); }, ':' => { chars.next(); if chars.peek() == Some(&':') { chars.next(); tokens.push(Token::ColonColon); } else { tokens.push(Token::Colon); } }, c if c.is_ascii_digit() => { @@ -51,8 +64,10 @@ fn tokenize(input: &str) -> Vec { let mut ident = String::new(); while let Some(&c) = chars.peek() { if c.is_ascii_alphanumeric() || c == '_' { ident.push(c); chars.next(); } else { break; } } tokens.push(match ident.as_str() { - "let" => Token::Let, "var" => Token::Var, "scope" => Token::Scope, "result" => Token::Result, - "INPUT" => Token::Input, "MAP" => Token::Map, "JOIN" => Token::Join, + "let" => Token::Let, "var" => Token::Var, "scope" => Token::Scope, + "export" => Token::Export, + "INPUT" => Token::Input, "IMPORT" => Token::Import, + "MAP" => Token::Map, "JOIN" => Token::Join, "REDUCE" => Token::Reduce, "CONCAT" => Token::Concat, "ARRANGE" => Token::Arrange, "FILTER" => Token::Filter, "NEGATE" => Token::Negate, "ENTER_AT" => Token::EnterAt, "INSPECT" => Token::Inspect, "MIN" => Token::Min, "DISTINCT" => Token::Distinct, "COUNT" => Token::Count, @@ -85,7 +100,17 @@ impl Parser { match self.peek().clone() { Token::Let => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_expr(); self.expect(&Token::Semi); Stmt::Let(n, e) }, Token::Var => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_expr(); self.expect(&Token::Semi); Stmt::Var(n, e) }, - Token::Result => { self.next(); let e = self.parse_expr(); self.expect(&Token::Semi); Stmt::Result(e) }, + Token::Export => { + self.next(); + let name = match self.next() { + Token::Str(s) => s, + o => panic!("Expected string literal after `export`, got {:?}", o), + }; + self.expect(&Token::Eq); + let e = self.parse_expr(); + self.expect(&Token::Semi); + Stmt::Export(name, e) + }, Token::Ident(_) => { let n = self.parse_ident(); self.expect(&Token::Colon); if *self.peek() == Token::Scope { self.next(); } @@ -100,6 +125,7 @@ impl Parser { fn parse_expr(&mut self) -> Expr { match self.peek().clone() { Token::Input => { self.next(); match self.next() { Token::Int(n) => Expr::Input(n as usize), o => panic!("Expected int, got {:?}", o) } }, + Token::Import => { self.next(); match self.next() { Token::Str(s) => Expr::Import(s), o => panic!("Expected string literal after IMPORT, got {:?}", o) } }, Token::Map => { self.next(); self.expect(&Token::LParen); let i = self.parse_expr(); self.expect(&Token::Comma); let p = self.parse_projection(); self.expect(&Token::RParen); Expr::Map(Box::new(i), p) }, Token::Join => { self.next(); self.expect(&Token::LParen); let l = self.parse_expr(); self.expect(&Token::Comma); let r = self.parse_expr(); self.expect(&Token::Comma); let p = self.parse_projection(); self.expect(&Token::RParen); Expr::Join(Box::new(l), Box::new(r), p) }, Token::Reduce => { self.next(); self.expect(&Token::LParen); let i = self.parse_expr(); self.expect(&Token::Comma); let r = self.parse_reducer(); self.expect(&Token::RParen); Expr::Reduce(Box::new(i), r) }, diff --git a/interactive/src/parse/mod.rs b/interactive/src/parse/mod.rs index 4804301d5..867026afd 100644 --- a/interactive/src/parse/mod.rs +++ b/interactive/src/parse/mod.rs @@ -37,6 +37,9 @@ pub enum Reducer { Min, Distinct, Count } #[derive(Debug, Clone)] pub enum Expr { Input(usize), + /// Named external trace resolved at install time. Carries only the name; + /// shape comes from the registry the program is installed against. + Import(String), Name(String), Qualified(String, String), Map(Box, Projection), @@ -65,5 +68,7 @@ pub enum Stmt { Let(String, Expr), Var(String, Expr), Scope(String, Vec), - Result(Expr), + /// `export "name" = expr;` — registers a named output in the program. + /// Only valid at the root scope. + Export(String, Expr), } diff --git a/interactive/src/parse/pipe.rs b/interactive/src/parse/pipe.rs index f9bcefbc5..44f91b3f7 100644 --- a/interactive/src/parse/pipe.rs +++ b/interactive/src/parse/pipe.rs @@ -6,9 +6,9 @@ use super::*; #[derive(Debug, Clone, PartialEq)] enum Token { - Let, Var, Result, - Input, Key, Map, Join, Min, Distinct, Count, Arrange, Negate, Filter, EnterAt, LiftIter, Inspect, - Ident(String), Int(i64), + Let, Var, Export, + Input, Import, Key, Map, Join, Min, Distinct, Count, Arrange, Negate, Filter, EnterAt, LiftIter, Inspect, + Ident(String), Int(i64), Str(String), Dollar, LParen, RParen, LBrace, RBrace, LBracket, RBracket, Comma, Semi, Colon, ColonColon, Eq, EqEq, NotEq, Lt, LtEq, Gt, GtEq, AndAnd, Pipe, Plus, Minus, Eof, @@ -39,6 +39,19 @@ fn tokenize(input: &str) -> Vec { '<' => { chars.next(); if chars.peek() == Some(&'=') { chars.next(); tokens.push(Token::LtEq); } else { tokens.push(Token::Lt); } }, '>' => { chars.next(); if chars.peek() == Some(&'=') { chars.next(); tokens.push(Token::GtEq); } else { tokens.push(Token::Gt); } }, '$' => { chars.next(); tokens.push(Token::Dollar); }, + '"' => { + // String literal: the quoted name in `import "..."` / `export "..." = ...`. + // Names carry no escapes, so read verbatim up to the closing quote. + chars.next(); + let mut s = String::new(); + let mut closed = false; + while let Some(c) = chars.next() { + if c == '"' { closed = true; break; } + s.push(c); + } + if !closed { panic!("Unterminated string literal"); } + tokens.push(Token::Str(s)); + }, '-' => { chars.next(); tokens.push(Token::Minus); }, ':' => { chars.next(); if chars.peek() == Some(&':') { chars.next(); tokens.push(Token::ColonColon); } else { tokens.push(Token::Colon); } }, c if c.is_ascii_digit() => { @@ -50,8 +63,10 @@ fn tokenize(input: &str) -> Vec { let mut ident = String::new(); while let Some(&c) = chars.peek() { if c.is_ascii_alphanumeric() || c == '_' { ident.push(c); chars.next(); } else { break; } } tokens.push(match ident.as_str() { - "let" => Token::Let, "var" => Token::Var, "result" => Token::Result, - "input" => Token::Input, "key" => Token::Key, "map" => Token::Map, + "let" => Token::Let, "var" => Token::Var, + "export" => Token::Export, + "input" => Token::Input, "import" => Token::Import, + "key" => Token::Key, "map" => Token::Map, "join" => Token::Join, "min" => Token::Min, "distinct" => Token::Distinct, "count" => Token::Count, "arrange" => Token::Arrange, "negate" => Token::Negate, "filter" => Token::Filter, "enter_at" => Token::EnterAt, "inspect" => Token::Inspect, @@ -84,7 +99,17 @@ impl Parser { match self.peek().clone() { Token::Let => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_pipe_expr(); self.expect(&Token::Semi); Stmt::Let(n, e) }, Token::Var => { self.next(); let n = self.parse_ident(); self.expect(&Token::Eq); let e = self.parse_pipe_expr(); self.expect(&Token::Semi); Stmt::Var(n, e) }, - Token::Result => { self.next(); let e = self.parse_pipe_expr(); self.expect(&Token::Semi); Stmt::Result(e) }, + Token::Export => { + self.next(); + let name = match self.next() { + Token::Str(s) => s, + o => panic!("Expected string literal after `export`, got {:?}", o), + }; + self.expect(&Token::Eq); + let e = self.parse_pipe_expr(); + self.expect(&Token::Semi); + Stmt::Export(name, e) + }, Token::Ident(_) => { let n = self.parse_ident(); self.expect(&Token::Colon); self.expect(&Token::LBrace); let b = self.parse_program(); self.expect(&Token::RBrace); Stmt::Scope(n, b) @@ -120,6 +145,7 @@ impl Parser { fn parse_atom(&mut self) -> Expr { match self.peek().clone() { Token::Input => { self.next(); match self.next() { Token::Int(n) => Expr::Input(n as usize), o => panic!("Expected int, got {:?}", o) } }, + Token::Import => { self.next(); match self.next() { Token::Str(s) => Expr::Import(s), o => panic!("Expected string literal after `import`, got {:?}", o) } }, Token::Ident(_) => { let n = self.parse_ident(); if *self.peek() == Token::ColonColon { self.next(); let f = self.parse_ident(); Expr::Qualified(n, f) } else { Expr::Name(n) } }, Token::LParen => { self.next(); let e = self.parse_pipe_expr(); self.expect(&Token::RParen); e }, other => panic!("Unexpected token in atom: {:?}", other),