From 3fc5c2d43c458f2006286baae7ed17cf89a677d5 Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:32:33 +0300 Subject: [PATCH 1/2] feat: add parser of simplified sparql --- AGENTS.md | 78 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 930b5b8..74c1571 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,7 +14,7 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Public modules: formats, graph, sparql, lagraph_sys, utils +│ ├── lib.rs # Modules: formats, graph, sparql, utils (pub(crate)), lagraph_sys │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) @@ -131,10 +131,10 @@ builders: `{ source: String, target: String, label: String }`. ### GraphSource trait -[`GraphSource`](src/graph/mod.rs:164) is implemented by any data source that knows how to +[`GraphSource`](src/graph/mod.rs:168) is implemented by any data source that knows how to feed itself into a specific [`GraphBuilder`]: -- [`apply_to(self, builder: B) -> Result`](src/graph/mod.rs:165) — consumes the +- [`apply_to(self, builder: B) -> Result`](src/graph/mod.rs:169) — consumes the source and returns the populated builder. [`Csv`](src/formats/csv.rs), [`MatrixMarket`](src/formats/mm.rs), and [`NTriples`](src/formats/nt.rs) @@ -143,24 +143,24 @@ can be passed to [`GraphBuilder::load`] and [`Graph::try_from`]. ### GraphBuilder trait -[`GraphBuilder`](src/graph/mod.rs:169) accumulates edges and produces a -[`GraphDecomposition`](src/graph/mod.rs:188): +[`GraphBuilder`](src/graph/mod.rs:173) accumulates edges and produces a +[`GraphDecomposition`](src/graph/mod.rs:192): -- [`load>(self, source: S)`](src/graph/mod.rs:179) — primary entry point; +- [`load>(self, source: S)`](src/graph/mod.rs:183) — primary entry point; delegates to `GraphSource::apply_to`. -- [`build(self)`](src/graph/mod.rs:184) — finalise into an immutable graph. +- [`build(self)`](src/graph/mod.rs:188) — finalise into an immutable graph. `InMemoryBuilder` also exposes lower-level helpers outside the trait: -- [`push_edge(&mut self, edge: Edge)`](src/graph/inmemory.rs:62) — ingest one edge. -- [`with_stream(self, stream: I)`](src/graph/inmemory.rs:72) — consume an +- [`push_edge(&mut self, edge: Edge)`](src/graph/inmemory.rs:83) — ingest one edge. +- [`with_stream(self, stream: I)`](src/graph/inmemory.rs:93) — consume an `IntoIterator>`. -- [`push_grb_matrix(&mut self, label, matrix: GrB_Matrix)`](src/graph/inmemory.rs:85) — accept +- [`push_grb_matrix(&mut self, label, matrix: GrB_Matrix)`](src/graph/inmemory.rs:106) — accept a pre-built `GrB_Matrix` for a label, wrapping it in an `LAGraph_Graph` immediately. ### Backend trait & Graph\ handle -[`Backend`](src/graph/mod.rs:217) associates a marker type with a concrete builder/graph pair: +[`Backend`](src/graph/mod.rs:221) associates a marker type with a concrete builder/graph pair: ```rust pub trait Backend { @@ -169,28 +169,28 @@ pub trait Backend { } ``` -[`Graph`](src/graph/mod.rs:229) is a zero-sized handle parameterised by a `Backend`: +[`Graph`](src/graph/mod.rs:233) is a zero-sized handle parameterised by a `Backend`: -- [`Graph::::builder()`](src/graph/mod.rs:234) — returns a fresh `InMemoryBuilder`. -- [`Graph::::try_from(source)`](src/graph/mod.rs:238) — builds a graph from a single +- [`Graph::::builder()`](src/graph/mod.rs:238) — returns a fresh `InMemoryBuilder`. +- [`Graph::::try_from(source)`](src/graph/mod.rs:242) — builds a graph from a single source in one call. -[`InMemory`](src/graph/inmemory.rs:26) is the concrete backend marker type. +[`InMemory`](src/graph/inmemory.rs:27) is the concrete backend marker type. ### GraphDecomposition trait -[`GraphDecomposition`](src/graph/mod.rs:188) is the read-only query interface: +[`GraphDecomposition`](src/graph/mod.rs:192) is the read-only query interface: -- [`get_graph(label)`](src/graph/mod.rs:192) — returns `Arc` for a given edge label. -- [`get_node_id(string_id)`](src/graph/mod.rs:195) / [`get_node_name(mapped_id)`](src/graph/mod.rs:198) — bidirectional string ↔ integer dictionary. -- [`num_nodes()`](src/graph/mod.rs:199) — total unique nodes. +- [`get_graph(label)`](src/graph/mod.rs:196) — returns `Arc` for a given edge label. +- [`get_node_id(string_id)`](src/graph/mod.rs:199) / [`get_node_name(mapped_id)`](src/graph/mod.rs:202) — bidirectional string ↔ integer dictionary. +- [`num_nodes()`](src/graph/mod.rs:203) — total unique nodes. ### InMemoryBuilder / InMemoryGraph -[`InMemoryBuilder`](src/graph/inmemory.rs:35) is the primary `GraphBuilder` implementation. +[`InMemoryBuilder`](src/graph/inmemory.rs:36) is the primary `GraphBuilder` implementation. It collects edges in RAM, then [`build()`](src/graph/inmemory.rs:131) calls GraphBLAS to create one `GrB_Matrix` per label via COO format, wraps each in an -`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:173). +`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:174). Multiple CSV sources can be chained with repeated `.load()` calls; all edges are merged into a single graph. @@ -205,6 +205,7 @@ which is used by the MatrixMarket loader. Three built-in parsers are available, each yielding `Iterator>` and pluggable into `GraphBuilder::load()` via `GraphSource` (see [`src/graph/inmemory.rs`](src/graph/inmemory.rs)). +CSV and MatrixMarket edge loaders are available: #### `Csv` @@ -349,6 +350,41 @@ RDF files using `oxttl` and `oxrdf`. Each triple `(subject, predicate, object)` Constructor: - [`NTriples::new(reader)`](src/formats/nt.rs:56) — parses the stream; each predicate IRI is copied verbatim to the edge label. +### SPARQL parsing (`src/sparql/mod.rs`) + +The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) +crate to parse SPARQL 1.1 query strings and extract the single property-path +triple pattern that pathrex's RPQ evaluators operate on. + +**Supported query form:** `SELECT` queries with exactly one triple or property +path pattern in the `WHERE` clause, e.g.: + +```sparql +SELECT ?x ?y WHERE { ?x /* ?y . } +``` + +Key public items: + +- [`parse_query(sparql)`](src/sparql/mod.rs:45) — parses a SPARQL string into a + [`spargebra::Query`]. +- [`extract_path(query)`](src/sparql/mod.rs:67) — validates a parsed `Query` is a + `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:56). +- [`parse_rpq(sparql)`](src/sparql/mod.rs:190) — convenience function combining + `parse_query` + `extract_path` in one call. +- [`PathTriple`](src/sparql/mod.rs:56) — holds the extracted `subject` + ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` + ([`TermPattern`]). +- [`ExtractError`](src/sparql/mod.rs:25) — error enum for extraction failures + (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, + `VariablePredicate`). +- [`RpqParseError`](src/sparql/mod.rs:198) — combined error for [`parse_rpq`] + wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. +- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:38) — `"http://example.org/"`, the + default base IRI constant. + +The module also handles spargebra's desugaring of sequence paths +(`?x // ?y`) from a chain of BGP triples back into a single +[`PropertyPathExpression::Sequence`]. ### FFI layer From 3ddc86d1e752daa255e9e82f09fe8a95bcb185ae Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:49:25 +0300 Subject: [PATCH 2/2] feat: add rpq evalution with LAGraph_RegularPathQuerry --- .gitattributes | 1 + AGENTS.md | 120 ++++++---- Cargo.toml | 1 + src/graph/mod.rs | 29 +++ src/lagraph_sys_generated.rs | 18 +- src/rpq/mod.rs | 1 + src/rpq/nfarpq.rs | 334 +++++++++++++++++++++++++++ tests/nfarpq_tests.rs | 431 +++++++++++++++++++++++++++++++++++ 8 files changed, 889 insertions(+), 46 deletions(-) create mode 100644 src/rpq/nfarpq.rs create mode 100644 tests/nfarpq_tests.rs diff --git a/.gitattributes b/.gitattributes index eb6e9fa..e174acd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ tests/testdata/** filter=lfs diff=lfs merge=lfs -text AGENTS.md merge=union +build.rs merge=union diff --git a/AGENTS.md b/AGENTS.md index 74c1571..959d508 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,18 +14,19 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Modules: formats, graph, sparql, utils (pub(crate)), lagraph_sys +│ ├── lib.rs # Modules: formats, graph, rpq, sparql, utils, lagraph_sys │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) -│ ├── utils.rs # Internal helpers: CountingBuilder, CountOutput, VecSource, -│ │ # grb_ok! and la_ok! macros +│ ├── utils.rs # Public helpers: CountingBuilder, CountOutput, VecSource, +│ │ # grb_ok! and la_ok! macros, build_graph │ ├── graph/ │ │ ├── mod.rs # Core traits (GraphBuilder, GraphDecomposition, GraphSource, │ │ │ # Backend, Graph), error types, RAII wrappers, GrB init │ │ └── inmemory.rs # InMemory marker, InMemoryBuilder, InMemoryGraph │ ├── rpq/ │ │ ├── mod.rs # RpqEvaluator (assoc. Result), RpqQuery, Endpoint, PathExpr, RpqError +│ │ ├── nfarpq.rs # NfaRpqEvaluator (LAGraph_RegularPathQuery) │ │ └── rpqmatrix.rs # Matrix-plan RPQ evaluator │ ├── sparql/ │ │ └── mod.rs # parse_rpq / extract_rpq → RpqQuery (spargebra) @@ -37,6 +38,7 @@ pathrex/ ├── tests/ │ ├── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph │ ├── mm_tests.rs # Integration tests for MatrixMarket format +│ ├── nfarpq_tests.rs # Integration tests for NfaRpqEvaluator │ └── rpqmatrix_tests.rs # Integration tests for matrix-plan RPQ evaluator ├── deps/ │ └── LAGraph/ # Git submodule (SparseLinearAlgebra/LAGraph) @@ -144,7 +146,7 @@ can be passed to [`GraphBuilder::load`] and [`Graph::try_from`]. ### GraphBuilder trait [`GraphBuilder`](src/graph/mod.rs:173) accumulates edges and produces a -[`GraphDecomposition`](src/graph/mod.rs:192): +[`GraphDecomposition`](src/graph/mod.rs:193): - [`load>(self, source: S)`](src/graph/mod.rs:183) — primary entry point; delegates to `GraphSource::apply_to`. @@ -179,11 +181,11 @@ pub trait Backend { ### GraphDecomposition trait -[`GraphDecomposition`](src/graph/mod.rs:192) is the read-only query interface: +[`GraphDecomposition`](src/graph/mod.rs:193) is the read-only query interface: -- [`get_graph(label)`](src/graph/mod.rs:196) — returns `Arc` for a given edge label. -- [`get_node_id(string_id)`](src/graph/mod.rs:199) / [`get_node_name(mapped_id)`](src/graph/mod.rs:202) — bidirectional string ↔ integer dictionary. -- [`num_nodes()`](src/graph/mod.rs:203) — total unique nodes. +- [`get_graph(label)`](src/graph/mod.rs:197) — returns `Arc` for a given edge label. +- [`get_node_id(string_id)`](src/graph/mod.rs:200) / [`get_node_name(mapped_id)`](src/graph/mod.rs:203) — bidirectional string ↔ integer dictionary. +- [`num_nodes()`](src/graph/mod.rs:204) — total unique nodes. ### InMemoryBuilder / InMemoryGraph @@ -228,7 +230,7 @@ Name-based lookup requires `has_header: true`. [`MatrixMarket`](src/formats/mm.rs) loads an edge-labeled graph from a directory with: -- `vertices.txt` — one line per node: ` <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:199) returns the matching **0-based** matrix index +- `vertices.txt` — one line per node: ` <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:200) returns the matching **0-based** matrix index - `edges.txt` — one line per label: ` <1-based-index>` (selects `n.txt`) - `.txt` — MatrixMarket adjacency matrix for label with index `n` @@ -352,39 +354,61 @@ Constructor: - [`NTriples::new(reader)`](src/formats/nt.rs:56) — parses the stream; each predicate IRI is copied verbatim to the edge label. ### SPARQL parsing (`src/sparql/mod.rs`) -The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) -crate to parse SPARQL 1.1 query strings and extract the single property-path -triple pattern that pathrex's RPQ evaluators operate on. +The [`rpq`](src/rpq/mod.rs) module provides an abstraction for evaluating +Regular Path Queries (RPQs) over edge-labeled graphs using GraphBLAS/LAGraph. -**Supported query form:** `SELECT` queries with exactly one triple or property -path pattern in the `WHERE` clause, e.g.: +Key public items: -```sparql -SELECT ?x ?y WHERE { ?x /* ?y . } -``` +- [`Endpoint`](src/rpq/mod.rs) — `Variable(String)` or `Named(String)` (IRI string). +- [`PathExpr`](src/rpq/mod.rs) — `Label`, `Sequence`, `Alternative`, `ZeroOrMore`, + `OneOrMore`, `ZeroOrOne`. +- [`RpqQuery`](src/rpq/mod.rs) — `{ subject, path, object }` using the types above; + [`strip_base(&mut self, base)`](src/rpq/mod.rs) removes a shared IRI prefix from + named endpoints and labels. +- [`RpqEvaluator`](src/rpq/mod.rs) — trait with associated type `Result` and + [`evaluate(query, graph)`](src/rpq/mod.rs) taking `&RpqQuery` and + [`GraphDecomposition`], returning `Result`. + Each concrete evaluator exposes its own output type (see below). +- [`RpqError`](src/rpq/mod.rs) — unified error type for RPQ parsing and evaluation: + `Parse` (SPARQL syntax), `Extract` (query extraction), `UnsupportedPath`, + `VertexNotFound`, and `Graph` (wraps [`GraphError`](src/graph/mod.rs) for + label-not-found and GraphBLAS/LAGraph failures). -Key public items: +#### `NfaRpqEvaluator` (`src/rpq/nfarpq.rs`) -- [`parse_query(sparql)`](src/sparql/mod.rs:45) — parses a SPARQL string into a - [`spargebra::Query`]. -- [`extract_path(query)`](src/sparql/mod.rs:67) — validates a parsed `Query` is a - `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:56). -- [`parse_rpq(sparql)`](src/sparql/mod.rs:190) — convenience function combining - `parse_query` + `extract_path` in one call. -- [`PathTriple`](src/sparql/mod.rs:56) — holds the extracted `subject` - ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` - ([`TermPattern`]). -- [`ExtractError`](src/sparql/mod.rs:25) — error enum for extraction failures - (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, - `VariablePredicate`). -- [`RpqParseError`](src/sparql/mod.rs:198) — combined error for [`parse_rpq`] - wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. -- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:38) — `"http://example.org/"`, the - default base IRI constant. +[`NfaRpqEvaluator`](src/rpq/nfarpq.rs) implements [`RpqEvaluator`] by: + +1. Converting a [`PathExpr`] into an [`Nfa`](src/rpq/nfarpq.rs) via Thompson's + construction ([`Nfa::from_path_expr()`](src/rpq/nfarpq.rs)). +2. Eliminating ε-transitions via epsilon closure ([`NfaBuilder::epsilon_closure()`](src/rpq/nfarpq.rs)). +3. Building one `LAGraph_Graph` per NFA label transition + ([`Nfa::build_lagraph_matrices()`](src/rpq/nfarpq.rs)). +4. Calling [`LAGraph_RegularPathQuery`] with the NFA matrices, data-graph + matrices, start/final states, and source vertices. + +`type Result = NfaRpqResult` ([`GraphblasVector`] of reachable targets). + +Supported path operators match [`PathExpr`] variants above. `Reverse` and +`NegatedPropertySet` from SPARQL map to [`RpqError::UnsupportedPath`] when they +appear in extracted paths. + +Subject/object resolution: [`Endpoint::Variable`] means "all vertices"; +[`Endpoint::Named`] resolves to a single vertex via +[`GraphDecomposition::get_node_id()`](src/graph/mod.rs:200). + +[`NfaRpqResult`](src/rpq/nfarpq.rs) wraps a [`GraphblasVector`] of reachable **target** +vertices. When the subject is a variable, every vertex is used as a source and +`LAGraph_RegularPathQuery` returns the union of targets — individual `(source, target)` +pairs are not preserved. -The module also handles spargebra's desugaring of sequence paths -(`?x // ?y`) from a chain of BGP triples back into a single -[`PropertyPathExpression::Sequence`]. +#### `RpqMatrixEvaluator` (`src/rpq/rpqmatrix.rs`) + +[`RpqMatrixEvaluator`](src/rpq/rpqmatrix.rs) compiles [`PathExpr`] into a Boolean matrix plan +over label adjacency matrices and runs [`LAGraph_RPQMatrix`]. It returns +[`RpqMatrixResult`](src/rpq/rpqmatrix.rs): the path-relation `nnz` plus a +[`GraphblasMatrix`] duplicate of the result matrix (full reachability relation for the path). +Subject/object do not filter the matrix; a named subject is only validated to exist. +Bound objects are not supported yet ([`RpqError::UnsupportedPath`]). ### FFI layer @@ -399,7 +423,7 @@ LAGraph. Safe Rust wrappers live in [`graph::mod`](src/graph/mod.rs): - [`GraphblasMatrix`](src/graph/mod.rs) — RAII wrapper around `GrB_Matrix` (`dup` + `free` on drop). - [`ensure_grb_init()`](src/graph/mod.rs:39) — one-time `LAGraph_Init` via `std::sync::Once`. -### Macros (`src/utils.rs`) +### Macros & helpers (`src/utils.rs`) Two `#[macro_export]` macros handle FFI error mapping: @@ -409,6 +433,12 @@ Two `#[macro_export]` macros handle FFI error mapping: appending the required `*mut i8` message buffer, and maps failure to `GraphError::LAGraph(info, msg)`. +A convenience function is also provided: + +- [`build_graph(edges)`](src/utils.rs:184) — builds an `InMemoryGraph` from a + slice of `(&str, &str, &str)` triples (source, target, label). Used by + integration tests. + ## Coding Conventions - **Rust edition 2024**. @@ -420,14 +450,13 @@ Two `#[macro_export]` macros handle FFI error mapping: - `GraphError` converts into `RpqError` via `#[from] GraphError` on the `RpqError::Graph` variant, enabling `?` propagation in evaluators. - Unsafe FFI calls are confined to `lagraph_sys`, `graph/mod.rs`, - `graph/inmemory.rs`, `rpq/nfarpq.rs`, and `rpq/rpqmatrix.rs`. All raw pointers are wrapped in + `graph/inmemory.rs`, `rpq/nfarpq.rs`. All raw pointers are wrapped in RAII types that free resources on drop. - `unsafe impl Send + Sync` is provided for `LagraphGraph`, `GraphblasVector`, and `GraphblasMatrix` because GraphBLAS handles are thread-safe after init. - Unit tests live in `#[cfg(test)] mod tests` blocks inside each module. Integration tests that need GraphBLAS live in [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), - [`tests/mm_tests.rs`](tests/mm_tests.rs), [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs), - and [`tests/rpqmatrix_tests.rs`](tests/rpqmatrix_tests.rs). + [`tests/mm_tests.rs`](tests/mm_tests.rs), [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs). ## Testing @@ -447,8 +476,13 @@ Tests in `src/formats/csv.rs` and `src/formats/nt.rs` are pure Rust and need no Tests in `src/sparql/mod.rs` are pure Rust and need no native dependencies. -Tests in `src/graph/inmemory.rs` and [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs) -call real GraphBLAS/LAGraph and require the native libraries to be present. +Tests in `src/rpq/nfarpq.rs` (NFA construction unit tests) are pure Rust and need no +native dependencies. + +Tests in `src/graph/inmemory.rs`, [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), +[`tests/mm_tests.rs`](tests/mm_tests.rs), [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs), +and [`tests/rpqmatrix_tests.rs`](tests/rpqmatrix_tests.rs) call real GraphBLAS/LAGraph and +require the native libraries to be present. ## CI diff --git a/Cargo.toml b/Cargo.toml index 9f24b40..9825c87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ egg = "0.10.0" libc = "0.2" oxrdf = "0.3.3" oxttl = "0.2.3" +rustfst = "1.2" spargebra = "0.4.6" thiserror = "1.0" diff --git a/src/graph/mod.rs b/src/graph/mod.rs index 514cbb0..5a48dfa 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -141,6 +141,35 @@ impl GraphblasVector { grb_ok!(GrB_Vector_new(&mut v, GrB_BOOL, n))?; Ok(Self { inner: v }) } + + /// Returns the number of stored values in this vector. + pub fn nvals(&self) -> Result { + let mut nvals: GrB_Index = 0; + grb_ok!(GrB_Vector_nvals(&mut nvals, self.inner))?; + Ok(nvals) + } + + /// Extracts all stored indices from boolean vector. + pub fn indices(&self) -> Result, GraphError> { + let nvals = self.nvals()?; + if nvals == 0 { + return Ok(Vec::new()); + } + + let mut indices = vec![0u64; nvals as usize]; + let mut values = vec![false; nvals as usize]; + let mut actual_nvals = nvals; + + grb_ok!(GrB_Vector_extractTuples_BOOL( + indices.as_mut_ptr(), + values.as_mut_ptr(), + &mut actual_nvals, + self.inner, + ))?; + + indices.truncate(actual_nvals as usize); + Ok(indices) + } } impl Drop for GraphblasVector { diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index c1a6193..5e0de30 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -155,9 +155,6 @@ unsafe extern "C" { ncols: GrB_Index, ) -> GrB_Info; } -unsafe extern "C" { - pub fn GrB_Matrix_dup(C: *mut GrB_Matrix, A: GrB_Matrix) -> GrB_Info; -} unsafe extern "C" { pub fn GrB_Matrix_nvals(nvals: *mut GrB_Index, A: GrB_Matrix) -> GrB_Info; } @@ -272,6 +269,21 @@ unsafe extern "C" { msg: *mut ::std::os::raw::c_char, ) -> ::std::os::raw::c_int; } +unsafe extern "C" { + pub fn LAGraph_RegularPathQuery( + reachable: *mut GrB_Vector, + R: *mut LAGraph_Graph, + nl: usize, + QS: *const GrB_Index, + nqs: usize, + QF: *const GrB_Index, + nqf: usize, + G: *mut LAGraph_Graph, + S: *const GrB_Index, + ns: usize, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum RPQMatrixOp { diff --git a/src/rpq/mod.rs b/src/rpq/mod.rs index c2db0d1..a916890 100644 --- a/src/rpq/mod.rs +++ b/src/rpq/mod.rs @@ -10,6 +10,7 @@ //! let result: NfaRpqResult = NfaRpqEvaluator.evaluate(&query, &graph)?; //! ``` +pub mod nfarpq; pub mod rpqmatrix; use crate::graph::{GraphDecomposition, GraphError}; diff --git a/src/rpq/nfarpq.rs b/src/rpq/nfarpq.rs new file mode 100644 index 0000000..e52797c --- /dev/null +++ b/src/rpq/nfarpq.rs @@ -0,0 +1,334 @@ +//! NFA-based RPQ evaluation using `LAGraph_RegularPathQuery`. + +use crate::graph::{GraphDecomposition, GraphblasVector, LagraphGraph, ensure_grb_init}; +use crate::la_ok; +use crate::lagraph_sys::*; +use crate::lagraph_sys::LAGraph_Kind; +use crate::rpq::{Endpoint, PathExpr, RpqError, RpqEvaluator, RpqQuery}; +use rustfst::algorithms::closure::{ClosureType, closure}; +use rustfst::algorithms::concat::concat; +use rustfst::algorithms::rm_epsilon::rm_epsilon; +use rustfst::algorithms::union::union; +use rustfst::prelude::*; +use rustfst::semirings::TropicalWeight; +use rustfst::utils::{acceptor, epsilon_machine}; +use std::collections::HashMap; + +/// Transitions for a single edge label in the NFA. +/// +/// `rows[i]` and `cols[i]` form a parallel pair: there is a transition from +/// state `rows[i]` to state `cols[i]` on `label`. +#[derive(Debug, Clone)] +pub struct NfaLabelTransitions { + pub label: String, + pub rows: Vec, + pub cols: Vec, +} + +#[derive(Debug, Clone)] +pub struct Nfa { + pub num_states: usize, + pub start_states: Vec, + pub final_states: Vec, + pub transitions: Vec, +} + +struct SymbolTable { + label_to_id: HashMap, + id_to_label: HashMap, + next_id: Label, +} + +impl SymbolTable { + fn new() -> Self { + Self { + label_to_id: HashMap::new(), + id_to_label: HashMap::new(), + next_id: 1, + } + } + + fn get_or_insert(&mut self, label: &str) -> Label { + if let Some(&id) = self.label_to_id.get(label) { + id + } else { + let id = self.next_id; + self.next_id += 1; + self.label_to_id.insert(label.to_string(), id); + self.id_to_label.insert(id, label.to_string()); + id + } + } + + fn get_label(&self, id: Label) -> Option<&str> { + self.id_to_label.get(&id).map(|s| s.as_str()) + } +} + +fn map_fst_error(operation: &'static str, e: E) -> RpqError { + RpqError::UnsupportedPath(format!("{} failed: {}", operation, e)) +} + +impl Nfa { + /// Build an NFA from a path expression. + pub fn from_path_expr(path: &PathExpr) -> Result { + let mut symbols = SymbolTable::new(); + + let mut fst = build_fst(path, &mut symbols)?; + + rm_epsilon(&mut fst).map_err(|e| map_fst_error("rm_epsilon", e))?; + + extract_nfa(&fst, &symbols) + } + + /// Convert NFA transitions to LAGraph matrices for RPQ evaluation. + pub fn build_lagraph_matrices(&self) -> Result, RpqError> { + ensure_grb_init()?; + let n = self.num_states as GrB_Index; + let mut result = Vec::with_capacity(self.transitions.len()); + + for trans in &self.transitions { + let vals: Vec = vec![true; trans.rows.len()]; + let lg = LagraphGraph::from_coo( + &trans.rows, + &trans.cols, + &vals, + n, + LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED, + )?; + result.push((trans.label.clone(), lg)); + } + + Ok(result) + } +} + +/// Build a VectorFst from a PathExpr using Thompson-like construction. +fn build_fst( + path: &PathExpr, + symbols: &mut SymbolTable, +) -> Result, RpqError> { + match path { + PathExpr::Label(label) => { + let label_id = symbols.get_or_insert(label); + Ok(acceptor(&[label_id], TropicalWeight::one())) + } + + PathExpr::Sequence(lhs, rhs) => { + let mut fst_l = build_fst(lhs, symbols)?; + let fst_r = build_fst(rhs, symbols)?; + concat(&mut fst_l, &fst_r).map_err(|e| map_fst_error("concat", e))?; + Ok(fst_l) + } + + PathExpr::Alternative(lhs, rhs) => { + let mut fst_l = build_fst(lhs, symbols)?; + let fst_r = build_fst(rhs, symbols)?; + union(&mut fst_l, &fst_r).map_err(|e| map_fst_error("union", e))?; + Ok(fst_l) + } + + PathExpr::ZeroOrMore(inner) => { + let mut fst = build_fst(inner, symbols)?; + closure(&mut fst, ClosureType::ClosureStar); + Ok(fst) + } + + PathExpr::OneOrMore(inner) => { + let mut fst = build_fst(inner, symbols)?; + closure(&mut fst, ClosureType::ClosurePlus); + Ok(fst) + } + + PathExpr::ZeroOrOne(inner) => { + let mut fst_inner = build_fst(inner, symbols)?; + let fst_eps = epsilon_machine::>() + .map_err(|e| map_fst_error("epsilon_machine", e))?; + + union(&mut fst_inner, &fst_eps).map_err(|e| map_fst_error("union", e))?; + Ok(fst_inner) + } + } +} + +fn extract_nfa(fst: &VectorFst, symbols: &SymbolTable) -> Result { + let num_states = fst.num_states(); + + let mut label_transitions: HashMap> = HashMap::new(); + + for state in fst.states_iter() { + for tr in fst.get_trs(state).unwrap().trs() { + if tr.ilabel == EPS_LABEL { + continue; + } + + if let Some(label) = symbols.get_label(tr.ilabel) { + label_transitions + .entry(label.to_string()) + .or_default() + .push((state as usize, tr.nextstate as usize)); + } + } + } + + let start_states: Vec = fst + .start() + .map(|s| vec![s as GrB_Index]) + .unwrap_or_default(); + + let final_states: Vec = fst + .states_iter() + .filter(|&s| fst.is_final(s).unwrap_or(false)) + .map(|s| s as GrB_Index) + .collect(); + + let transitions: Vec = label_transitions + .into_iter() + .map(|(label, pairs)| { + let mut rows = Vec::with_capacity(pairs.len()); + let mut cols = Vec::with_capacity(pairs.len()); + for (r, c) in pairs { + rows.push(r as GrB_Index); + cols.push(c as GrB_Index); + } + NfaLabelTransitions { label, rows, cols } + }) + .collect(); + + Ok(Nfa { + num_states, + start_states, + final_states, + transitions, + }) +} + +#[derive(Debug)] +pub struct NfaRpqResult { + pub reachable: GraphblasVector, +} + +/// Evaluates RPQs using `LAGraph_RegularPathQuery`. +pub struct NfaRpqEvaluator; + +impl RpqEvaluator for NfaRpqEvaluator { + type Result = NfaRpqResult; + + fn evaluate( + &self, + query: &RpqQuery, + graph: &G, + ) -> Result { + let nfa = Nfa::from_path_expr(&query.path)?; + let nfa_matrices = nfa.build_lagraph_matrices()?; + + let src_id = resolve_endpoint(&query.subject, graph)?; + let _dst_id = resolve_endpoint(&query.object, graph)?; + + let n = graph.num_nodes(); + + let source_vertices: Vec = match src_id { + Some(id) => vec![id as GrB_Index], + None => (0..n as GrB_Index).collect(), + }; + + let mut nfa_graph_ptrs: Vec = + nfa_matrices.iter().map(|(_, lg)| lg.inner).collect(); + + let mut data_graph_ptrs: Vec = Vec::with_capacity(nfa_matrices.len()); + for (label, _) in &nfa_matrices { + let lg = graph.get_graph(label)?; + data_graph_ptrs.push(lg.inner); + } + + let mut reachable: GrB_Vector = std::ptr::null_mut(); + + la_ok!(LAGraph_RegularPathQuery( + &mut reachable, + nfa_graph_ptrs.as_mut_ptr(), + nfa_matrices.len(), + nfa.start_states.as_ptr(), + nfa.start_states.len(), + nfa.final_states.as_ptr(), + nfa.final_states.len(), + data_graph_ptrs.as_mut_ptr(), + source_vertices.as_ptr(), + source_vertices.len(), + ))?; + + let result_vec = GraphblasVector { inner: reachable }; + + Ok(NfaRpqResult { + reachable: result_vec, + }) + } +} + +fn resolve_endpoint( + term: &Endpoint, + graph: &G, +) -> Result, RpqError> { + match term { + Endpoint::Variable(_) => Ok(None), + Endpoint::Named(id) => graph + .get_node_id(id) + .map(Some) + .ok_or_else(|| RpqError::VertexNotFound(id.clone())), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn label(s: &str) -> PathExpr { + PathExpr::Label(s.to_owned()) + } + + #[test] + fn test_single_label() { + let nfa = Nfa::from_path_expr(&label("knows")).unwrap(); + assert!(nfa.num_states >= 2, "NFA should have at least 2 states"); + assert!(!nfa.start_states.is_empty(), "should have start states"); + assert!(!nfa.final_states.is_empty(), "should have final states"); + assert_eq!(nfa.transitions.len(), 1); + assert_eq!(nfa.transitions[0].label, "knows"); + assert!(!nfa.transitions[0].rows.is_empty()); + } + + #[test] + fn test_sequence() { + let path = PathExpr::Sequence(Box::new(label("a")), Box::new(label("b"))); + let nfa = Nfa::from_path_expr(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_alternative() { + let path = PathExpr::Alternative(Box::new(label("a")), Box::new(label("b"))); + let nfa = Nfa::from_path_expr(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_zero_or_more() { + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + let nfa = Nfa::from_path_expr(&path).unwrap(); + assert!(!nfa.start_states.is_empty()); + assert!(!nfa.final_states.is_empty()); + let start_set: std::collections::HashSet = + nfa.start_states.iter().copied().collect(); + let final_set: std::collections::HashSet = + nfa.final_states.iter().copied().collect(); + assert!( + !start_set.is_disjoint(&final_set), + "start and final states should overlap for zero-or-more, start={:?}, final={:?}", + start_set, + final_set + ); + } +} diff --git a/tests/nfarpq_tests.rs b/tests/nfarpq_tests.rs new file mode 100644 index 0000000..7483fc5 --- /dev/null +++ b/tests/nfarpq_tests.rs @@ -0,0 +1,431 @@ +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::Path; +use std::sync::LazyLock; + +use pathrex::formats::mm::MatrixMarket; +use pathrex::graph::{Graph, GraphDecomposition, GraphError, InMemory, InMemoryGraph}; +use pathrex::lagraph_sys::GrB_Index; +use pathrex::rpq::nfarpq::NfaRpqEvaluator; +use pathrex::rpq::{Endpoint, PathExpr, RpqError, RpqEvaluator, RpqQuery}; +use pathrex::sparql::parse_rpq; +use pathrex::utils::build_graph; + +const GRAPH_DIR: &str = "tests/testdata/mm_graph"; +const CASES_DIR: &str = "tests/testdata/cases"; +const BASE_IRI: &str = "http://example.org/"; + +static LA_N_EGG_GRAPH: LazyLock = LazyLock::new(|| { + let mm = MatrixMarket::from_dir(GRAPH_DIR).with_base_iri(BASE_IRI); + Graph::::try_from(mm).expect("Failed to load la-n-egg-rpq graph") +}); + +fn convert_query_line(line: &str) -> RpqQuery { + let query_str = line + .splitn(2, ',') + .nth(1) + .unwrap_or_else(|| panic!("query line has no comma: {line:?}")) + .trim(); + + let sparql = format!("BASE <{BASE_IRI}> SELECT * WHERE {{ {query_str} . }}"); + + let query = + parse_rpq(&sparql).unwrap_or_else(|e| panic!("failed to parse query {line:?}: {e}")); + query +} + +fn load_queries(case_dir: &Path) -> Vec { + let path = case_dir.join("queries.txt"); + let reader = BufReader::new( + File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {e}", path.display())), + ); + reader + .lines() + .map(|l| l.expect("I/O error reading queries.txt")) + .filter(|l| !l.trim().is_empty()) + .map(|l| convert_query_line(&l)) + .collect() +} + +fn load_expected_nnz(case_dir: &Path) -> Vec { + let path = case_dir.join("expected.txt"); + let reader = BufReader::new( + File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {e}", path.display())), + ); + reader + .lines() + .map(|l| l.expect("I/O error reading expected.txt")) + .filter(|l| !l.trim().is_empty()) + .map(|l| { + // Format: ";;" + let mut parts = l.splitn(3, ';'); + let _id = parts.next().expect("missing id field"); + parts + .next() + .expect("missing nnz field") + .parse::() + .unwrap_or_else(|e| panic!("bad nnz in {l:?}: {e}")) + }) + .collect() +} + +fn run_la_n_egg_case(case_name: &str) { + let case_dir = Path::new(CASES_DIR).join(case_name); + let queries = load_queries(&case_dir); + let expected = load_expected_nnz(&case_dir); + + assert_eq!( + queries.len(), + expected.len(), + "case '{case_name}': queries.txt and expected.txt have different line counts" + ); + + let graph = &*LA_N_EGG_GRAPH; + let evaluator = NfaRpqEvaluator; + + for (i, (query, expected_nnz)) in queries.iter().zip(expected.iter()).enumerate() { + let result = evaluator.evaluate(query, graph).unwrap_or_else(|e| { + panic!("case '{case_name}' query #{i} evaluation failed: {e}\n query: {query:?}") + }); + + let actual_nnz = result.reachable.nvals().expect("failed to get nvals"); + assert_eq!( + actual_nnz, + *expected_nnz, + "case '{case_name}' query #{i} nnz mismatch\n query: {query:?}\n expected: {expected_nnz}\n actual: {actual_nnz}", + ); + } +} + +fn label(s: &str) -> PathExpr { + PathExpr::Label(s.to_string()) +} + +fn var(name: &str) -> Endpoint { + Endpoint::Variable(name.to_string()) +} + +fn named_ep(s: &str) -> Endpoint { + Endpoint::Named(s.to_string()) +} + +fn rq(subject: Endpoint, path: PathExpr, object: Endpoint) -> RpqQuery { + RpqQuery { + subject, + path, + object, + } +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?x ?y +#[test] +fn test_single_label_variable_variable() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&rq(var("x"), label("knows"), var("y")), &graph) + .expect("evaluate should succeed"); + + let count = result.reachable.nvals().expect("failed to get nvals"); + assert_eq!(count, 2); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?y +#[test] +fn test_single_label_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&rq(named_ep("A"), label("knows"), var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let b_id = graph.get_node_id("B").expect("B should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B (id={b_id}) should be reachable from A via 'knows', got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: ?x / ?y (two-hop sequence) +#[test] +fn test_sequence_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(var("x"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let count = result.reachable.nvals().expect("failed to get nvals"); + assert_eq!(count, 1); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: / ?y +#[test] +fn test_sequence_path_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C (id={c_id}) should be reachable from A via knows/likes, got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B, A --likes--> C +/// Query: | ?y +#[test] +fn test_alternative_path() { + let graph = build_graph(&[("A", "B", "knows"), ("A", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Alternative(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable via knows|likes" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable via knows|likes" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: * ?y +#[test] +fn test_zero_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: + ?y +#[test] +fn test_one_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::OneOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + !indices.contains(&(a_id as GrB_Index)), + "A shouldn't be reachable" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ? ?y +#[test] +fn test_zero_or_one_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::ZeroOrOne(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + !indices.contains(&(c_id as GrB_Index)), + "C should NOT be reachable (two hops, but path is ?)" + ); +} + +#[test] +fn test_label_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&rq(var("x"), label("nonexistent"), var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::Graph(GraphError::LabelNotFound(ref l))) if l == "nonexistent"), + "expected LabelNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&rq(named_ep("Z"), label("knows"), var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_object_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&rq(var("x"), label("knows"), named_ep("Z")), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error for object, got: {result:?}" + ); +} + +#[test] +fn test_negated_property_set_rejected_by_sparql_conversion() { + let sparql = "BASE SELECT ?x ?y WHERE { ?x !() ?y . }"; + let r = pathrex::sparql::parse_rpq(sparql); + assert!(matches!(r, Err(RpqError::UnsupportedPath(_)))); +} + +/// Graph: A --knows--> B --knows--> C --knows--> A (cycle) +/// Query: * ?y +#[test] +fn test_cycle_graph_star() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("C", "A", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let count = result.reachable.nvals().expect("failed to get nvals"); + assert_eq!(count, 3, "all 3 nodes should be reachable in a cycle"); +} + +/// Graph: A --knows--> B --likes--> C --knows--> D +/// Query: ?x /*/ ?y +#[test] +fn test_complex_path() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "likes"), + ("C", "D", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + // knows / likes* / knows + let path = PathExpr::Sequence( + Box::new(PathExpr::Sequence( + Box::new(label("knows")), + Box::new(PathExpr::ZeroOrMore(Box::new(label("likes")))), + )), + Box::new(label("knows")), + ); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = result.reachable.indices().expect("failed to extract indices"); + let d_id = graph.get_node_id("D").expect("D should exist"); + assert!( + indices.contains(&(d_id as GrB_Index)), + "D should be reachable via knows/likes*/knows, got indices: {indices:?}" + ); +} + +#[test] +fn test_no_matching_path() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator.evaluate(&rq(var("x"), path, var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::Graph(GraphError::LabelNotFound(ref l))) if l == "likes"), + "expected LabelNotFound for 'likes', got: {result:?}" + ); +} + +#[test] +fn test_la_n_egg_con_any() { + run_la_n_egg_case("con-any"); +}