diff --git a/README.md b/README.md index dcb1b08c8..46cb484d5 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Every clip runs the **real binary** against real NTFS data with unedited timings ## Benchmark snapshot (v0.5.120 Β· June 2026) -πŸ“– **The story behind these numbers:** [*I benchmarked my Rust file search engine against Everything until I ran out of excuses*](https://skyllc-ai.github.io/blog/benchmarking-against-everything/) β€” the methodology I had to fix first, the bulk-export workload Everything's CLI can't run, and the two regressions published anyway. +πŸ“– **The story behind these numbers:** [*I benchmarked my Rust file search engine against Everything until I ran out of excuses*](https://uffs.io/blog/benchmarking-against-everything/) β€” the methodology I had to fix first, the bulk-export workload Everything's CLI can't run, and the two regressions published anyway. Measured 2026-06-11 on AMD Ryzen 9 3900XT, 64 GB RAM, Windows 11 Pro 24H2 β€” cross-tool on four NTFS volumes (C/D/F/G, 12.8 M records, the Everything-RAM-budget-negotiated set), full-scan on all seven (25.9 M records; that workload is UFFS-only, so the negotiation doesn't constrain it). Raw data: [`cross-tool-summary.csv`](docs/benchmarks/raw/2026-06-v0.5.120_cross-tool-summary.csv) Β· [`full-scan-all-drives.csv`](docs/benchmarks/raw/2026-06-v0.5.120_full-scan-all-drives.csv). Publication-grade report: [**docs/benchmarks/**](docs/benchmarks/). diff --git a/crates/uffs-core/src/aggregate/rollup.rs b/crates/uffs-core/src/aggregate/rollup.rs index a2e749e78..28cf7e094 100644 --- a/crates/uffs-core/src/aggregate/rollup.rs +++ b/crates/uffs-core/src/aggregate/rollup.rs @@ -194,6 +194,8 @@ pub(crate) fn resolve_rollup_key(key: u32, mode: RollupMode, drive: &DriveCompac reason = "tests assert against fixtures with known shape; indexing panic = test failure" )] mod tests { + use alloc::sync::Arc; + use super::*; #[test] @@ -233,7 +235,6 @@ mod tests { use crate::compact::{ChildrenIndex, CompactRecord, ExtensionIndex, IndexSource}; use crate::trigram::TrigramIndex; - // Build names blob: concatenated UTF-8 strings. let name_strs = [ "root", @@ -254,8 +255,6 @@ mod tests { let dir = 0x0010_u32; // FILE_ATTRIBUTE_DIRECTORY let records = vec![ CompactRecord { - size: 0, - allocated: 0, name_offset: offsets[0], flags: dir, parent_idx: 0, @@ -263,8 +262,6 @@ mod tests { ..Default::default() }, CompactRecord { - size: 0, - allocated: 0, name_offset: offsets[1], flags: dir, parent_idx: 0, @@ -272,8 +269,6 @@ mod tests { ..Default::default() }, CompactRecord { - size: 0, - allocated: 0, name_offset: offsets[2], flags: dir, parent_idx: 0, @@ -290,8 +285,6 @@ mod tests { ..Default::default() }, CompactRecord { - size: 0, - allocated: 0, name_offset: offsets[4], flags: dir, parent_idx: 1, @@ -324,9 +317,9 @@ mod tests { letter: uffs_mft::platform::DriveLetter::C, records: crate::compact_storage::ColumnStorage::from_vec(records), names: crate::compact_storage::ColumnStorage::from_vec(names_blob), - trigram: TrigramIndex::empty(), - children, - ext_index: ExtensionIndex::build(&[]), + trigram: Arc::new(TrigramIndex::empty()), + children: Arc::new(children), + ext_index: Arc::new(ExtensionIndex::build(&[])), fold: uffs_text::case_fold::CaseFold::default_table(), ext_names: vec![], source: IndexSource::MftFile(PathBuf::from("C:")), @@ -335,6 +328,7 @@ mod tests { path_trie: None, // unused by aggregation tests β€” see compact.rs::frs_to_compact docs. frs_to_compact: Vec::new(), + delta: None, } } diff --git a/crates/uffs-core/src/compact.rs b/crates/uffs-core/src/compact.rs index 0a1ddb1a9..11aa103aa 100644 --- a/crates/uffs-core/src/compact.rs +++ b/crates/uffs-core/src/compact.rs @@ -9,13 +9,14 @@ //! `.uffs` cache file. //! //! See `docs/architecture/COMPACT_INDEX_DESIGN.md` for the full design. -//! Exception: `file_size_policy` β€” core data structures + builder, tightly -//! coupled. - -use std::time::Instant; +//! +//! This module owns [`DriveCompactIndex`] (the loaded drive + its search choke +//! points) and re-exports the row type, the CSR indexes, path-length +//! computation, and the MFTβ†’compact builder from focused submodules +//! (`record`, `children`, `extension`, `path_len`, `builder`, `delta`). -use rayon::prelude::*; -use uffs_mft::index::MftIndex; +use alloc::borrow::Cow; +use alloc::sync::Arc; use crate::bloom::Bloom; pub use crate::compact_loader::apply_usn_patch; @@ -28,386 +29,34 @@ use crate::compact_storage::ColumnStorage; use crate::path_trie::PathTrie; use crate::trigram::TrigramIndex; -/// Compact per-record data for in-memory search, filter, and sort. -/// -/// 80 bytes per record (76 data + 4 explicit tail padding). -/// Derives `bytemuck::Pod` + `Zeroable` so the entire record array can be -/// serialized/deserialized as a single bulk `memcpy` β€” no per-field encoding. -#[derive(Debug, Clone, Copy, Default, bytemuck::Pod, bytemuck::Zeroable)] -#[repr(C)] -pub struct CompactRecord { - // ── u64 fields first (8-byte aligned) ───────────────────────── - /// Logical file size in bytes. - pub size: u64, - /// Allocated size on disk in bytes ("Size on Disk" column). - pub allocated: u64, - /// Sum of logical file sizes in entire subtree. - pub treesize: u64, - /// Sum of allocated sizes in entire subtree. - pub tree_allocated: u64, - /// Creation time (Unix microseconds). - pub created: i64, - /// Last write time (Unix microseconds). - pub modified: i64, - /// Last access time (Unix microseconds). - pub accessed: i64, - - // ── u32 fields (4-byte aligned) ─────────────────────────────── - /// Byte offset into the names blob. - pub name_offset: u32, - /// Raw NTFS `FILE_ATTRIBUTE_*` flags. - pub flags: u32, - /// Index into the compact array of the parent directory. - /// `u32::MAX` = root or orphan. - pub parent_idx: u32, - /// Count of all descendants in subtree. 0 for files. - pub descendants: u32, - - // ── u16 fields (2-byte aligned) ─────────────────────────────── - /// UTF-8 byte length of the filename. - pub name_len: u16, - /// Interned extension ID (0 = no extension). - pub extension_id: u16, - /// Full path length in UTF-8 bytes (e.g. `C:\Windows\System32\cmd.exe` = - /// 28). Precomputed at index build time via top-down parent-chain walk. - /// Saturates at `u16::MAX` (65 535) for extremely deep paths. - pub path_len: u16, - - /// First byte of the filename (e.g. `b'$'` for NTFS metafiles). - /// - /// Cached here as a cheap hot-path *gate*: only `$`-prefixed records can be - /// NTFS metafiles, so [`is_system_metafile`](Self::is_system_metafile) can - /// reject virtually every record with one sequential field read instead of - /// a random cache-miss into the names arena. The handful of `$`-prefixed - /// candidates then pay one arena lookup for the authoritative name check. - pub name_first_byte: u8, - - /// Explicit tail padding for 8-byte struct alignment. - /// Required by `bytemuck::Pod` β€” no implicit padding allowed. - #[expect( - clippy::pub_underscore_fields, - reason = "bytemuck Pod requires all fields same visibility" - )] - pub _pad: [u8; 1], -} - -/// The fixed set of reserved NTFS metafile names: the `$`-prefixed records at -/// reserved FRS 0–15 and under the `$Extend` directory. An NTFS volume can -/// only ever contain *these* specific metafiles. -/// -/// Any *other* `$`-prefixed name β€” `$Recycle.Bin`, `$PatchCache`, -/// `$WinREAgent`, the `WinSxS` `$$_*.cdf-ms` filemaps, or a user file literally -/// named `$foo` β€” is an ordinary file that file managers and tools like -/// Everything display. Classifying those as metafiles is exactly the bug -/// `--hide-system` had. -/// -/// Matched case-insensitively: NTFS itself is case-insensitive, and these -/// canonical names are occasionally surfaced with varied casing. -pub(crate) const NTFS_METAFILE_NAMES: &[&str] = &[ - // Reserved FRS 0–11 (volume root metafiles) - "$MFT", - "$MFTMirr", - "$LogFile", - "$Volume", - "$AttrDef", - "$Bitmap", - "$Boot", - "$BadClus", - "$Secure", - "$UpCase", - "$Extend", - // `$Extend` directory children - "$ObjId", - "$Quota", - "$Reparse", - "$UsnJrnl", - "$RmMetadata", - "$Deleted", - // `$Extend\$RmMetadata` children - "$Repair", - "$Tops", - "$TxfLog", - "$Txf", -]; - -/// Returns whether `name` is one of the reserved `NTFS_METAFILE_NAMES` -/// (a crate-private allowlist, so no intra-doc link from this public item). -/// -/// Real metafiles are already excluded from the compact index at build time -/// (`build_compact_index` drops them via `PathResolver` FRS-validity, not by -/// name). This exact-name check is the *authoritative* classifier for the -/// `--hide-system` filter, so it can never misclassify an ordinary -/// `$`-prefixed file as a metafile. -#[must_use] -#[inline] -pub fn is_ntfs_metafile_name(name: &str) -> bool { - NTFS_METAFILE_NAMES - .iter() - .any(|reserved| name.eq_ignore_ascii_case(reserved)) -} - -impl CompactRecord { - /// Directory flag bit in raw NTFS `FILE_ATTRIBUTE_DIRECTORY`. - const DIRECTORY_BIT: u32 = 0x0010; - - /// Returns `true` if this record is a directory. - #[inline] - #[must_use] - pub const fn is_directory(self) -> bool { - self.flags & Self::DIRECTORY_BIT != 0 - } - - /// Returns `true` if this record is one of the reserved NTFS metafiles - /// (`$MFT`, `$LogFile`, `$Bitmap`, `$Secure`, the `$Extend` family, …). - /// - /// The cached [`name_first_byte`](Self::name_first_byte) field is a cheap - /// gate: every metafile name starts with `$`, and `$`-prefixed records are - /// a vanishing fraction of an index, so this rejects virtually every record - /// with a single byte comparison and only touches the names arena for the - /// handful of `$`-prefixed candidates. The arena lookup is *required* for - /// correctness, because an ordinary file may also start with `$` - /// (`$Recycle.Bin`, `$PatchCache`, the `WinSxS` `$$_*.cdf-ms` filemaps) β€” - /// those are NOT metafiles and must not be hidden by `--hide-system`. - /// See [`is_ntfs_metafile_name`]. - #[inline] - #[must_use] - pub fn is_system_metafile(&self, names: &[u8]) -> bool { - self.name_first_byte == b'$' && is_ntfs_metafile_name(self.name(names)) - } - - /// Get the name from a names blob as a **lossy `&str` view**. - /// - /// Valid-UTF-8 names (the common case) are returned verbatim; an ill-formed - /// (surrogate-bearing) name stored as WTF-8 returns `""` for display. Use - /// [`Self::name_bytes`] for the lossless bytes that exact/substring search - /// matches against, so a file with an ill-formed name stays findable - /// (WI-4.4). - #[inline] - #[must_use] - pub fn name<'a>(&self, names: &'a [u8]) -> &'a str { - core::str::from_utf8(self.name_bytes(names)).unwrap_or("") - } - - /// Get the name's **raw bytes** (WTF-8) from a names blob β€” the lossless - /// accessor. - /// - /// Returns exactly the stored bytes, including the byte-faithful encoding - /// of an ill-formed NTFS name (unpaired surrogates). This is what makes - /// every file matchable/findable by its true name regardless of UTF-8 - /// well-formedness (WI-4.4). Returns `&[]` for an out-of-range slice. - #[inline] - #[must_use] - pub fn name_bytes<'a>(&self, names: &'a [u8]) -> &'a [u8] { - let start = self.name_offset as usize; - let end = start.saturating_add(self.name_len as usize); - names.get(start..end).unwrap_or(&[]) - } -} - -// Compile-time size assertion. -const _: () = assert!( - size_of::() == 80, - "CompactRecord must be exactly 80 bytes" -); - -/// Children index in CSR (Compressed Sparse Row) layout. -/// -/// `children(i)` returns the compact indices of record i's children as -/// a contiguous `&[u32]` slice. The CSR layout avoids per-record `Vec` -/// allocations and enables bulk serialization/deserialization. -#[derive(Clone)] -pub struct ChildrenIndex { - /// CSR offsets β€” one per record + sentinel. Length = `record_count` + 1. - /// Children of record `i` are `values[offsets[i]..offsets[i+1]]`. - offsets: Vec, - /// Flat array of all child indices. - values: Vec, -} - -impl ChildrenIndex { - /// Total heap capacity (offsets + values) in bytes. - #[must_use] - pub const fn heap_size_bytes(&self) -> usize { - self.offsets.capacity() * size_of::() + self.values.capacity() * size_of::() - } - - /// Build from `CompactRecord::parent_idx` in two passes (count + scatter). - #[must_use] - pub fn build(records: &[CompactRecord]) -> Self { - // Count children per parent - let mut counts = vec![0_u32; records.len()]; - for rec in records { - let parent = rec.parent_idx; - if parent != u32::MAX - && let Some(cnt) = counts.get_mut(parent as usize) - { - *cnt += 1; - } - } - - // Prefix-sum β†’ offsets - let mut offsets = Vec::with_capacity(records.len() + 1); - let mut running = 0_u32; - for &cnt in &counts { - offsets.push(running); - running = running.saturating_add(cnt); - } - offsets.push(running); - - // Scatter children into values - let mut values = vec![0_u32; running as usize]; - let mut write_pos = offsets.clone(); - for (idx, rec) in records.iter().enumerate() { - let parent = rec.parent_idx; - if parent != u32::MAX - && let Some(pos) = write_pos.get_mut(parent as usize) - && let Some(slot) = values.get_mut(*pos as usize) - { - let child_idx = uffs_mft::len_to_u32(idx); - *slot = child_idx; - *pos += 1; - } - } - - Self { offsets, values } - } - - /// Construct directly from pre-built CSR arrays (cache deserialization). - #[must_use] - pub const fn from_csr(offsets: Vec, values: Vec) -> Self { - Self { offsets, values } - } - - /// Borrow the CSR components for serialization. - #[must_use] - pub(crate) fn as_csr(&self) -> (&[u32], &[u32]) { - (&self.offsets, &self.values) - } - - /// Return the children of record `idx` as a contiguous slice. - #[must_use] - pub fn get(&self, idx: usize) -> &[u32] { - let start = self.offsets.get(idx).copied().unwrap_or(0) as usize; - let end = self.offsets.get(idx + 1).copied().unwrap_or(0) as usize; - self.values.get(start..end).unwrap_or(&[]) - } - - /// Total number of child entries across all records. - #[must_use] - pub const fn total_children(&self) -> usize { - self.values.len() - } - - /// Number of records tracked (one slot per record). - #[must_use] - pub const fn record_count(&self) -> usize { - self.offsets.len().saturating_sub(1) - } - - /// Create an empty children index. - #[must_use] - pub fn empty() -> Self { - Self { - offsets: vec![0], - values: Vec::new(), - } - } -} - -/// Extension inverted index: `extension_id β†’ &[u32]` (record indices). -/// -/// CSR layout identical to `ChildrenIndex`. Built once at load time in a -/// single O(N) pass so `--ext rs` queries can iterate only matching records -/// instead of scanning all 25M entries. -#[derive(Clone)] -pub struct ExtensionIndex { - /// CSR offsets β€” length = `max_ext_id` + 2 (one per `ext_id` + sentinel). - offsets: Vec, - /// Flat array of record indices, grouped by `extension_id`. - values: Vec, -} - -impl ExtensionIndex { - /// Total heap capacity (offsets + values) in bytes. - #[must_use] - pub const fn heap_size_bytes(&self) -> usize { - self.offsets.capacity() * size_of::() + self.values.capacity() * size_of::() - } - - /// Build from compact records in two passes (count + scatter). - #[must_use] - pub fn build(records: &[CompactRecord]) -> Self { - // Find the maximum extension_id to size the offsets array. - let max_id = records - .iter() - .map(|rec| rec.extension_id) - .max() - .unwrap_or(0) as usize; - - // Pass 1: count records per extension_id. - let mut counts = vec![0_u32; max_id + 1]; - for rec in records { - if rec.name_len == 0 { - continue; - } - if let Some(cnt) = counts.get_mut(rec.extension_id as usize) { - *cnt += 1; - } - } - - // Prefix-sum β†’ offsets. - let mut offsets = Vec::with_capacity(max_id + 2); - let mut running = 0_u32; - for &cnt in &counts { - offsets.push(running); - running = running.saturating_add(cnt); - } - offsets.push(running); - - // Pass 2: scatter record indices into values. - let mut values = vec![0_u32; running as usize]; - let mut write_pos = offsets.clone(); - for (idx, rec) in records.iter().enumerate() { - if rec.name_len == 0 { - continue; - } - let eid = rec.extension_id as usize; - if let Some(pos) = write_pos.get_mut(eid) - && let Some(slot) = values.get_mut(*pos as usize) - { - let idx_u32 = uffs_mft::len_to_u32(idx); - *slot = idx_u32; - *pos += 1; - } - } - - Self { offsets, values } - } - - /// Return record indices for the given `extension_id`. - #[must_use] - pub fn get(&self, ext_id: u16) -> &[u32] { - let eid = ext_id as usize; - let start = self.offsets.get(eid).copied().unwrap_or(0) as usize; - let end = self.offsets.get(eid + 1).copied().unwrap_or(0) as usize; - self.values.get(start..end).unwrap_or(&[]) - } - - /// Create an empty extension index. - #[must_use] - pub fn empty() -> Self { - Self { - offsets: vec![0], - values: Vec::new(), - } - } - - /// Total number of indexed record entries. - #[must_use] - pub const fn total_entries(&self) -> usize { - self.values.len() - } -} +/// Mutable delta overlay over the immutable base CSR indexes (Phase 2+). +pub mod delta; + +// File-size decomposition: the row type, the CSR indexes, path-length +// computation, and the MFTβ†’compact builder live in focused submodules. Every +// public item is re-exported below so the canonical `crate::compact::X` paths +// (used across the workspace) are unchanged. +mod builder; +mod children; +mod extension; +mod path_len; +mod record; + +pub use builder::build_compact_index; +pub(crate) use builder::{INDEX_TTL_SECONDS, resolve_case_fold}; +pub use children::ChildrenIndex; +pub use delta::IndexDelta; +pub use extension::ExtensionIndex; +pub(crate) use path_len::{PathChange, compute_path_lengths, update_path_lengths_incremental}; +pub(crate) use record::NTFS_METAFILE_NAMES; +pub use record::{CompactRecord, is_ntfs_metafile_name}; + +/// Touched-record count (adds + tombstones since the last compaction) above +/// which [`DriveCompactIndex::apply_index_delta`] folds the delta back into +/// fresh bases (design Β§5.4). Sized to amortize the ~340 ms base rebuild +/// across many small USN applies while bounding delta memory + per-search merge +/// cost; tune from the `IDXDELTA-TIMING` WIN baseline. +pub(crate) const TRIGRAM_COMPACT_THRESHOLD: u32 = 50_000; /// A loaded drive with compact index. #[derive(Clone)] @@ -428,12 +77,21 @@ pub struct DriveCompactIndex { /// rationale. pub names: ColumnStorage, /// Trigram inverted index built from folded names (char-level, `$UpCase`). - pub trigram: TrigramIndex, + /// + /// `Arc`-shared (Phase 3): the per-apply whole-body clone the daemon takes + /// before patching pointer-clones this immutable base (a refcount bump) + /// instead of deep-copying its ~hundreds-of-MB CSR arrays. The apply path + /// never mutates it in place β€” it overlays changes on [`Self::delta`] and + /// only ever *replaces* the whole `Arc` at compaction. + pub trigram: Arc, /// CSR children index: `children.get(i)` β†’ child indices of record i. - pub children: ChildrenIndex, + /// `Arc`-shared (Phase 3) β€” see [`Self::trigram`]; rebuilt (Arc replaced) + /// each apply until Phase 4 gives it a delta overlay. + pub children: Arc, /// Extension inverted index: `ext_id β†’ record indices`. /// Enables O(K) `--ext` queries where K = matching records, not O(N). - pub ext_index: ExtensionIndex, + /// `Arc`-shared (Phase 3) β€” see [`Self::trigram`]. + pub ext_index: Arc, /// NTFS `$UpCase` case folding engine for this volume. pub fold: uffs_text::case_fold::CaseFold, /// Extension name table: `ext_names[extension_id]` β†’ lowercase extension @@ -490,6 +148,18 @@ pub struct DriveCompactIndex { /// silently degrades to the full-reload fallback. See the /// v9 β†’ v10 cache format bump in `compact_cache::COMPACT_VERSION`. pub frs_to_compact: Vec, + /// Incremental-index-maintenance overlay (design Β§5.1). + /// + /// `None` on a freshly built / freshly compacted / cache-loaded index: + /// the base CSR indexes ([`Self::trigram`], [`Self::children`], + /// [`Self::ext_index`]) are authoritative and search reads them with zero + /// overhead. Once [`crate::compact_loader::apply_usn_patch`] starts + /// overlaying USN deltas (Phase 2b) this becomes `Some`, and the search + /// choke points ([`Self::trigram_search`], …) merge base βˆͺ delta minus + /// tombstones. Compaction folds the delta into a fresh base and resets it + /// to `None`. Never serialized β€” the on-disk cache is always delta-free + /// (compact before save), so a cache load yields `None`. + pub delta: Option, } /// Per-component heap footprint of a [`DriveCompactIndex`]. @@ -524,6 +194,172 @@ impl AsRef for DriveCompactIndex { } impl DriveCompactIndex { + /// Trigram candidate search through the base βˆͺ delta overlay (design Β§5.2). + /// + /// The single choke point every trigram caller goes through. When + /// [`Self::delta`] is `None` (fresh / compacted index) it delegates to the + /// base [`TrigramIndex::search`] with **zero** overhead. When a delta is + /// present it merges, per needle-trigram, the base posting with the delta + /// posting, intersects across the needle's trigrams (the trigram AND), then + /// resolves tombstones on the final candidate set. + /// + /// **Tombstone correctness:** a candidate whose record is tombstoned is + /// kept **iff** it appears in the delta posting of *every* needle + /// trigram β€” i.e. it was re-added (renamed-in) under a name that still + /// contains the needle. A deleted record (tombstoned, no re-add) and a + /// renamed-away record matched only via its stale base postings are + /// both dropped. Filtering the final set (not per posting list) is what + /// lets a renamed file remain visible under its new name while + /// disappearing from its old one. + /// + /// Returns `None` for needles under 3 codepoints (caller falls back to a + /// linear scan), mirroring [`TrigramIndex::search`]. + #[must_use] + pub fn trigram_search(&self, needle: &str) -> Option> { + let Some(delta) = &self.delta else { + return self.trigram.search(needle, self.fold); + }; + let trigrams = crate::trigram::needle_trigrams(needle, self.fold)?; + if trigrams.is_empty() { + return Some(Vec::new()); + } + + // Per needle-trigram effective posting = base βˆͺ delta. An absent trigram + // (empty in both) is skipped, never zeroing the result β€” the trigram + // index is a candidate pre-filter, exactly as the base search treats it. + let mut lists: Vec> = Vec::with_capacity(trigrams.len()); + for &tri in &trigrams { + let base = self.trigram.get_posting(tri).unwrap_or(&[]); + let merged = delta::merge_postings(base, delta.trigram_postings(tri)); + if !merged.is_empty() { + lists.push(merged); + } + } + if lists.is_empty() { + return Some(Vec::new()); + } + + lists.sort_unstable_by_key(Vec::len); + let mut result = lists.first().cloned().unwrap_or_default(); + for list in lists.iter().skip(1) { + crate::trigram::intersect_in_place(&mut result, list); + if result.is_empty() { + break; + } + } + + // Final tombstone resolution: keep a tombstoned candidate only if it was + // re-added under a name covering every needle trigram (see doc above). + if !delta.tombstones.is_empty() { + result.retain(|&idx| { + !delta.is_tombstoned(idx) + || trigrams + .iter() + .all(|&tri| delta.trigram_postings(tri).binary_search(&idx).is_ok()) + }); + } + Some(result) + } + + /// Fold the delta overlay back into fresh bases and clear it (design Β§5.4 + /// compaction). Rebuilds the trigram (Phase 2b) and extension (Phase 4a) + /// bases from the current records β€” which already reflect every applied + /// mutation β€” then resets `delta = None` so subsequent searches take the + /// zero-overhead base fast path. + /// + /// O(total records); the per-apply path drives toward this running only + /// occasionally (every [`TRIGRAM_COMPACT_THRESHOLD`] touched records) or + /// before serialization (the on-disk cache is always delta-free). + /// + /// `children` is still rebuilt every apply (Phase 4b will move it onto the + /// overlay), so compaction does not need to touch it here. + pub(crate) fn compact_base(&mut self) { + self.trigram = Arc::new(TrigramIndex::build(&self.records, &self.names, self.fold)); + self.ext_index = Arc::new(ExtensionIndex::build(&self.records)); + self.delta = None; + } + + /// Record indices whose extension is `ext_id`, through the base βˆͺ delta + /// overlay (Phase 4a). The choke point every `--ext` query goes through. + /// + /// When [`Self::delta`] is `None` this borrows the base CSR posting slice + /// with **zero** allocation. With a delta present it merges the base and + /// delta postings, then validates each candidate against the live records β€” + /// keeping `idx` only if `records[idx].extension_id == ext_id` and the + /// record is live (`name_len != 0`). That records check is what makes a + /// renamed extension (`foo.log` β†’ `foo.pdf`) and a delete correct + /// **without** a separate ext tombstone: a stale base posting fails the + /// check. + #[must_use] + pub fn records_with_ext(&self, ext_id: u16) -> Cow<'_, [u32]> { + let base = self.ext_index.get(ext_id); + let Some(delta) = &self.delta else { + return Cow::Borrowed(base); + }; + let merged = delta::merge_postings(base, delta.ext_postings(ext_id)); + let filtered: Vec = merged + .into_iter() + .filter(|&idx| { + self.records + .get(idx as usize) + .is_some_and(|rec| rec.extension_id == ext_id && rec.name_len != 0) + }) + .collect(); + Cow::Owned(filtered) + } + + /// Overlay one USN apply's changes onto the base+delta index instead of + /// rebuilding the trigram (Phase 2b) and extension (Phase 4a) bases. + /// + /// `adds` are the created / renamed / reused records (their post-mutation + /// name trigrams + extension + parent are added to the delta); `tombstones` + /// are the deleted / renamed-away / reused-slot records (their stale base + /// **trigram** postings are masked β€” the ext/children overlays validate + /// candidates against the live records instead, so they need no tombstone). + /// Returns `true` if the accumulated delta crossed + /// [`TRIGRAM_COMPACT_THRESHOLD`] and triggered a [`Self::compact_base`] + /// fold this call. + pub(crate) fn apply_index_delta(&mut self, adds: &[PathChange], tombstones: &[u32]) -> bool { + // Fast path for a batch that will cross the compaction threshold anyway + // (e.g. a 100k-file burst): populating the delta only to discard it is + // pure waste. Refold the base directly from the records β€” which already + // reflect every change in this batch β€” and drop any prior overlay. + let pending = self.delta.as_ref().map_or(0, IndexDelta::len); + let batch = u32::try_from(adds.len().saturating_add(tombstones.len())).unwrap_or(u32::MAX); + if pending.saturating_add(batch) > TRIGRAM_COMPACT_THRESHOLD { + self.compact_base(); + return true; + } + + let mut delta = self.delta.take().unwrap_or_default(); + for &idx in tombstones { + delta.tombstone(idx); + } + let fold = self.fold; + for change in adds { + let Some(rec) = self.records.get(change.idx as usize) else { + continue; + }; + // A record tombstoned this same batch (e.g. created-then-deleted) is + // gone; skip its add entirely. + if rec.name_len == 0 { + continue; + } + // Trigram postings only for names β‰₯ 3 codepoints (shorter names are + // found via linear scan, not the trigram pre-filter β€” matching the + // base build); but the extension + children overlays are added for + // EVERY record regardless of name length, so an `--ext` / tree query + // never misses a short-named create/rename. + let trigrams = + crate::trigram::needle_trigrams(rec.name(&self.names), fold).unwrap_or_default(); + delta.add_record(change.idx, &trigrams, rec.extension_id, rec.parent_idx); + } + // The early check above guarantees `pending + batch ≀ threshold`, and + // the populated delta can only be ≀ that, so no compaction is due here. + self.delta = Some(delta); + false + } + /// Compute the total heap footprint of this index (in bytes). /// /// This measures *capacity* (what the allocator reserved), not *len* @@ -654,501 +490,6 @@ impl DriveCompactIndex { } } -/// Expand alternate data streams (ADS) for a single record, producing the -/// name Γ— stream cross product as extra `CompactRecord` entries. -#[expect( - clippy::single_call_fn, - reason = "Extracted to keep expand_links_and_ads under the too_many_lines limit" -)] -fn expand_ads_streams( - index: &MftIndex, - record: &uffs_mft::index::FileRecord, - resolve_parent: &dyn Fn(uffs_mft::ParentFrs, uffs_mft::Frs) -> u32, - names: &mut Vec, - extra: &mut Vec, -) { - // Collect all names for this record (primary + hardlinks). - let mut all_names: Vec<(&str, u32)> = Vec::new(); - let primary_name = index.get_name(record.first_name.name); - if !primary_name.is_empty() { - let pid = resolve_parent(record.first_name.parent_frs, record.frs); - all_names.push((primary_name, pid)); - } - if record.name_count > 1 { - let mut le = record.first_name.next_entry; - while le != uffs_mft::NO_ENTRY { - let Some(lnk) = index.links.get(le as usize) else { - break; - }; - let ln = index.get_name(lnk.name); - if !ln.is_empty() { - let lp = resolve_parent(lnk.parent_frs, record.frs); - all_names.push((ln, lp)); - } - le = lnk.next_entry; - } - } - - // Walk output streams (skip default $DATA at head of chain). - let mut se = record.first_stream.next_entry; - while se != uffs_mft::NO_ENTRY { - let Some(stream) = index.streams.get(se as usize) else { - break; - }; - if stream.is_output_stream() { - let sn = index.stream_name(stream); - if !sn.is_empty() { - for &(base_name, parent_idx) in &all_names { - let combined = format!("{base_name}:{sn}"); - let name_offset = uffs_mft::len_to_u32(names.len()); - let name_len = uffs_mft::len_to_u16(combined.len()); - names.extend_from_slice(combined.as_bytes()); - - extra.push(CompactRecord { - size: stream.size.length, - allocated: stream.size.allocated, - treesize: 0, - tree_allocated: 0, - created: record.stdinfo.created, - modified: record.stdinfo.modified, - accessed: record.stdinfo.accessed, - name_offset, - flags: record.stdinfo.flags, - parent_idx, - descendants: 0, - name_len, - extension_id: 0, - path_len: 0, - name_first_byte: combined.as_bytes().first().copied().unwrap_or(0), - _pad: [0; 1], - }); - } - } - } - se = stream.next_entry; - } -} - -/// Resolve a typed `ParentFrs` (vs an own typed `Frs`) into a compact-record -/// index, returning `u32::MAX` for the "no real parent" cases (self-reference, -/// `NO_ENTRY` sentinel, or root). -/// -/// Extracted as a free helper so the typed `ParentFrs`/`Frs` signature is -/// enforced at every call site AND so `build_compact_index` stays under -/// the clippy `too_many_lines` budget. -#[expect( - clippy::single_call_fn, - reason = "Wrapped by a closure in build_compact_index; kept free-standing \ - for clippy::too_many_lines budget headroom" -)] -fn resolve_parent_compact_idx( - index: &MftIndex, - parent_frs: uffs_mft::ParentFrs, - own_frs: uffs_mft::Frs, -) -> u32 { - let parent = parent_frs.as_frs(); - if parent == own_frs || parent_frs.raw() == u64::from(uffs_mft::NO_ENTRY) || parent.is_root() { - return u32::MAX; - } - let parent_usize = uffs_mft::frs_to_usize(parent.raw()); - index - .frs_to_idx - .get(parent_usize) - .copied() - .filter(|&idx| idx != uffs_mft::NO_ENTRY) - .unwrap_or(u32::MAX) -} - -/// Expand hardlinks and ADS into additional `CompactRecord` entries. -/// -/// Phase 2 (hardlinks): for each valid record with `name_count > 1`, walks the -/// link chain and creates additional records with alternate name/parent. -/// -/// Phase 3 (ADS): delegates to [`expand_ads_streams`] for each valid record -/// with `stream_count > 1`. -#[expect( - clippy::single_call_fn, - reason = "Extracted to keep build_compact_index under the too_many_lines limit" -)] -fn expand_links_and_ads( - index: &MftIndex, - resolver: &uffs_mft::index::PathResolver, - resolve_parent: &dyn Fn(uffs_mft::ParentFrs, uffs_mft::Frs) -> u32, - names: &mut Vec, -) -> Vec { - let mut extra: Vec = Vec::new(); - - for (idx, record) in index.records.iter().enumerate() { - if !resolver.is_valid_idx(idx) { - continue; - } - - // Phase 2: hardlink expansion. - if record.name_count > 1 { - let mut link_entry = record.first_name.next_entry; - while link_entry != uffs_mft::NO_ENTRY { - let Some(link) = index.links.get(link_entry as usize) else { - break; - }; - let link_parent = resolve_parent(link.parent_frs, record.frs); - extra.push(CompactRecord { - size: record.first_stream.size.length, - allocated: record.first_stream.size.allocated, - treesize: record.treesize, - tree_allocated: record.tree_allocated, - created: record.stdinfo.created, - modified: record.stdinfo.modified, - accessed: record.stdinfo.accessed, - name_offset: link.name.offset, - flags: record.stdinfo.flags, - parent_idx: link_parent, - descendants: record.descendants, - name_len: link.name.length(), - extension_id: link.name.extension_id(), - path_len: 0, - name_first_byte: names.get(link.name.offset as usize).copied().unwrap_or(0), - _pad: [0; 1], - }); - link_entry = link.next_entry; - } - } - - // Phase 3: ADS expansion (name Γ— stream cross product). - if record.stream_count > 1 { - expand_ads_streams(index, record, resolve_parent, names, &mut extra); - } - } - extra -} - -/// Compute `path_len` (in **characters**, not bytes) for every record -/// via top-down BFS. -/// -/// Root entries (`parent_idx == u32::MAX`) get -/// `path_len = 2 + 1 + name_chars` (e.g. `"C:\" + name`), and children -/// accumulate `parent.path_len + 1 (separator) + name_chars`. -/// Saturates at `u16::MAX` (65 535) for extremely deep paths. -/// -/// Character counting matches `str::chars().count()` so the precomputed -/// value agrees with the display-row path-length filter. -pub(crate) fn compute_path_lengths( - records: &mut [CompactRecord], - names: &[u8], - drive_letter: uffs_mft::platform::DriveLetter, -) { - // Drive prefix in characters: the letter (1 char) + colon (1 char) = 2. - // `DriveLetter` is ASCII A–Z by construction (validated in - // `DriveLetter::parse`), so the previous runtime `debug_assert!` - // is now a tautology and was removed. The arithmetic only cares - // about "1 letter char + 1 colon". - let _: uffs_mft::platform::DriveLetter = drive_letter; - let drive_prefix_chars: u32 = 1 /* letter */ + 1 /* ':' */; - - // Build forward adjacency list (parent β†’ children) for top-down BFS. - let record_count = records.len(); - let mut children_of: Vec> = vec![Vec::new(); record_count]; - let mut roots: Vec = Vec::new(); - - for (idx, rec) in records.iter().enumerate() { - let pi = rec.parent_idx; - if pi == u32::MAX { - roots.push(uffs_mft::len_to_u32(idx)); - } else if let Some(siblings) = children_of.get_mut(pi as usize) { - siblings.push(uffs_mft::len_to_u32(idx)); - } - } - - // BFS from roots. - let mut queue = alloc::collections::VecDeque::with_capacity(roots.len()); - for &root in &roots { - let Some(rec) = records.get(root as usize) else { - continue; - }; - let name_chars = name_char_count(rec, names); - let pl = if name_chars == 0 { - // Drive root directory: "C:\" - drive_prefix_chars + 1 - } else { - // Top-level file/dir: "C:\" - drive_prefix_chars + 1 + name_chars - }; - if let Some(slot) = records.get_mut(root as usize) { - slot.path_len = uffs_mft::len_to_u16(pl as usize); - } - queue.push_back(root); - } - - while let Some(idx) = queue.pop_front() { - let parent_pl = records - .get(idx as usize) - .map_or(0, |rec| u32::from(rec.path_len)); - let children: Vec = children_of - .get(idx as usize) - .map_or_else(Vec::new, Clone::clone); - for &child in &children { - let child_chars = records - .get(child as usize) - .map_or(0, |rec| name_char_count(rec, names)); - // path = parent_path + "\" + name - let pl = parent_pl.saturating_add(1).saturating_add(child_chars); - if let Some(slot) = records.get_mut(child as usize) { - slot.path_len = uffs_mft::len_to_u16(pl as usize); - } - queue.push_back(child); - } - } -} - -/// Count the number of Unicode characters in a record's filename. -/// -/// Falls back to `name_len` (byte count) if the name slice is not valid -/// UTF-8 β€” this is correct for ASCII names and a safe upper bound -/// otherwise. -fn name_char_count(rec: &CompactRecord, names: &[u8]) -> u32 { - let start = rec.name_offset as usize; - let end = start + rec.name_len as usize; - names - .get(start..end) - .and_then(|slice| core::str::from_utf8(slice).ok()) - .map_or_else( - || u32::from(rec.name_len), - |name| uffs_mft::len_to_u32(name.chars().count()), - ) -} - -/// Build a `DriveCompactIndex` from a loaded `MftIndex`. -/// -/// Returns `(DriveCompactIndex, compact_build_ms, trigram_build_ms)`. -#[must_use] -pub fn build_compact_index( - drive_letter: uffs_mft::platform::DriveLetter, - index: &MftIndex, -) -> (DriveCompactIndex, u128, u128) { - use uffs_mft::index::PathResolver; - - let compact_start = Instant::now(); - - // Build path resolver to determine which records are valid. - // This filters out system metafiles (FRS 0-15 except root) and - // propagates invalidity to descendants (e.g., $Extend children). - let resolver = PathResolver::build(index, false); - - // Closure wraps the free helper `resolve_parent_compact_idx` so the - // typed `ParentFrs`/`Frs` signature is enforced at every call site - // (own↔parent swap becomes a compile error). Keeping the helper - // free-standing also keeps `build_compact_index` under the - // clippy::too_many_lines budget. - let resolve_parent = |parent_frs: uffs_mft::ParentFrs, own_frs: uffs_mft::Frs| -> u32 { - resolve_parent_compact_idx(index, parent_frs, own_frs) - }; - - // Phase 1: build primary compact records (parallel). - let mut records: Vec = index - .records - .par_iter() - .enumerate() - .map(|(idx, record)| { - // Skip invalid records (system metafiles + descendants). - if !resolver.is_valid_idx(idx) { - return CompactRecord::default(); - } - - let name_ref = &record.first_name.name; - let parent_idx = resolve_parent(record.first_name.parent_frs, record.frs); - - CompactRecord { - size: record.first_stream.size.length, - allocated: record.first_stream.size.allocated, - treesize: record.treesize, - tree_allocated: record.tree_allocated, - created: record.stdinfo.created, - modified: record.stdinfo.modified, - accessed: record.stdinfo.accessed, - name_offset: name_ref.offset, - flags: record.stdinfo.flags, - parent_idx, - descendants: record.descendants, - name_len: name_ref.length(), - extension_id: name_ref.extension_id(), - path_len: 0, - name_first_byte: index - .names - .get(name_ref.offset as usize) - .copied() - .unwrap_or(0), - _pad: [0; 1], - } - }) - .collect(); - - // Phase 2+3: expand hardlinks and ADS (sequential β€” rare, <1% of records). - let mut names = index.names.clone(); - let expanded = expand_links_and_ads(index, &resolver, &resolve_parent, &mut names); - records.extend(expanded); - - // Phase 4: compute path_len (in characters) for every record via - // top-down BFS. path_len = char count of "C:\dir\name". - compute_path_lengths(&mut records, &names, drive_letter); - - let compact_elapsed = compact_start.elapsed().as_millis(); - - // Try live $UpCase from the NTFS volume; fall back to compiled-in default. - let fold = resolve_case_fold(drive_letter); - - let tri_start = Instant::now(); - let trigram = TrigramIndex::build(&records, &names, fold); - let tri_elapsed = tri_start.elapsed().as_millis(); - - // Build children CSR index from parent_idx (two-pass: count + scatter). - let children = ChildrenIndex::build(&records); - - // Copy extension name table from MftIndex (Arc β†’ Box). - let mut ext_names: Vec> = index - .extensions - .names - .iter() - .map(|arc| Box::from(arc.as_ref())) - .collect(); - - let ext_t0 = Instant::now(); - let ext_index = ExtensionIndex::build(&records); - let ext_build_ms = ext_t0.elapsed().as_millis(); - tracing::info!( - drive = %drive_letter, - entries = ext_index.total_entries(), - build_ms = ext_build_ms, - "ExtensionIndex built" - ); - - shrink_compact_vecs(drive_letter, &mut records, &mut names, &mut ext_names); - - // Phase 8: clone the FRS β†’ mft_idx mapping off the transient - // `MftIndex` before it goes out of scope. In the primary - // `build_compact_index` path compact_idx == mft_idx (records - // are produced 1:1 by `index.records.par_iter().enumerate()`), - // so `frs_to_idx` is exactly the FRS β†’ compact_idx mapping the - // surgical-patch path needs. Hardlink / ADS-expanded records - // append at the END with the same FRS but higher compact_idx; - // those secondary slots are not addressable from journal events - // (USN events reference primary FRS) so the primary mapping is - // sufficient. `uffs_mft::NO_ENTRY == u32::MAX` matches the - // sentinel `frs_to_compact` uses for unmapped slots. - let mut compact_index = DriveCompactIndex { - letter: drive_letter, - records: ColumnStorage::from_vec(records), - names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, - fold, - ext_names, - source: IndexSource::MftFile(std::path::PathBuf::from(format!("{drive_letter}:"))), - source_epoch: index.build_epoch, - bloom: None, - path_trie: None, - frs_to_compact: index.frs_to_idx.clone(), - }; - - // Phase 4: populate bloom + path_trie from the freshly-built - // index. These are needed for the search-skip pre-check - // (Commit F) and serialised into the v9+ cache (Commit D). - let bloom = compact_index.build_bloom(); - let path_trie = compact_index.build_path_trie(); - compact_index.bloom = Some(bloom); - compact_index.path_trie = Some(path_trie); - - (compact_index, compact_elapsed, tri_elapsed) -} - -/// Shrink all growable Vecs to exact fit after compact index build. -/// -/// Reclaims capacity slack from the doubling growth strategy used during -/// construction. Saves ~500 MB across 7 drives. -fn shrink_compact_vecs( - drive_letter: uffs_mft::platform::DriveLetter, - records: &mut Vec, - names: &mut Vec, - ext_names: &mut Vec>, -) { - let pre = records.capacity() * size_of::() + names.capacity(); - records.shrink_to_fit(); - names.shrink_to_fit(); - ext_names.shrink_to_fit(); - let post = records.capacity() * size_of::() + names.capacity(); - let reclaimed_mb = pre.saturating_sub(post) / (1024 * 1024); - if reclaimed_mb > 0 { - tracing::info!( - drive = %drive_letter, - reclaimed_mb, - "shrink_to_fit reclaimed memory" - ); - } -} - -/// Cache TTL in seconds (4 hours β€” same as Windows CLI). -/// -/// USN Journal handles incremental freshness; this is a safety-net full rescan. -pub(crate) const INDEX_TTL_SECONDS: u64 = 14400; - -// ── Live $UpCase resolution ────────────────────────────────────────── - -/// Try to read the live `$UpCase` table from the NTFS volume for -/// `drive_letter`. On success, log the result at `INFO` and any diffs -/// from the compiled-in default at `WARN`. On failure, log at `WARN` -/// and fall back to [`CaseFold::default_table()`]. -pub(crate) fn resolve_case_fold( - drive_letter: uffs_mft::platform::DriveLetter, -) -> uffs_text::case_fold::CaseFold { - let live_table = match uffs_mft::platform::upcase::read_upcase_table(drive_letter) { - Ok(table) => table, - Err(err) => { - tracing::warn!( - drive = %drive_letter, - error = %err, - "$UpCase live read failed β€” falling back to compiled-in default table" - ); - return uffs_text::case_fold::CaseFold::default_table(); - } - }; - - // Leak the box to get a `&'static [u16]` for CaseFold::from_ntfs. - let live_fold = uffs_text::case_fold::CaseFold::from_ntfs(Box::leak(live_table)); - log_upcase_comparison(drive_letter, &live_fold); - live_fold -} - -/// Log the comparison between live and compiled-in `$UpCase` tables. -fn log_upcase_comparison( - drive_letter: uffs_mft::platform::DriveLetter, - live_fold: &uffs_text::case_fold::CaseFold, -) { - let default = uffs_text::case_fold::CaseFold::default_table(); - let diffs = default.diff(live_fold); - - if diffs.is_empty() { - tracing::info!( - drive = %drive_letter, - "$UpCase loaded from live volume β€” identical to compiled-in default" - ); - return; - } - - tracing::info!( - drive = %drive_letter, - diff_count = diffs.len(), - "$UpCase loaded from live volume β€” differs from compiled-in default" - ); - for diff in &diffs { - tracing::warn!( - drive = %drive_letter, - codepoint = format_args!("U+{:04X}", diff.codepoint), - default = format_args!("U+{:04X}", diff.default_maps_to), - live = format_args!("U+{:04X}", diff.live_maps_to), - "$UpCase diff" - ); - } -} - // ════════════════════════════════════════════════════════════════════════ // REGRESSION TESTS β€” Search Pipeline Parity Guards // @@ -1159,3 +500,7 @@ fn log_upcase_comparison( #[cfg(test)] #[path = "compact_tests.rs"] mod tests; + +#[cfg(test)] +#[path = "compact_trigram_delta_tests.rs"] +mod trigram_delta_tests; diff --git a/crates/uffs-core/src/compact/builder.rs b/crates/uffs-core/src/compact/builder.rs new file mode 100644 index 000000000..d96d44aa9 --- /dev/null +++ b/crates/uffs-core/src/compact/builder.rs @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Build a [`DriveCompactIndex`] from a loaded `MftIndex`: struct-of-arrays +//! column assembly, hardlink + ADS expansion, `$UpCase` case-fold resolution, +//! and the post-build vec shrink. + +use alloc::sync::Arc; +use std::time::Instant; + +use rayon::prelude::*; +use uffs_mft::index::MftIndex; + +use crate::compact::{ + ChildrenIndex, CompactRecord, DriveCompactIndex, ExtensionIndex, IndexSource, + compute_path_lengths, +}; +use crate::compact_storage::ColumnStorage; +use crate::trigram::TrigramIndex; + +/// Expand alternate data streams (ADS) for a single record, producing the +/// name Γ— stream cross product as extra `CompactRecord` entries. +#[expect( + clippy::single_call_fn, + reason = "Extracted to keep expand_links_and_ads under the too_many_lines limit" +)] +fn expand_ads_streams( + index: &MftIndex, + record: &uffs_mft::index::FileRecord, + resolve_parent: &dyn Fn(uffs_mft::ParentFrs, uffs_mft::Frs) -> u32, + names: &mut Vec, + extra: &mut Vec, +) { + // Collect all names for this record (primary + hardlinks). + let mut all_names: Vec<(&str, u32)> = Vec::new(); + let primary_name = index.get_name(record.first_name.name); + if !primary_name.is_empty() { + let pid = resolve_parent(record.first_name.parent_frs, record.frs); + all_names.push((primary_name, pid)); + } + if record.name_count > 1 { + let mut le = record.first_name.next_entry; + while le != uffs_mft::NO_ENTRY { + let Some(lnk) = index.links.get(le as usize) else { + break; + }; + let ln = index.get_name(lnk.name); + if !ln.is_empty() { + let lp = resolve_parent(lnk.parent_frs, record.frs); + all_names.push((ln, lp)); + } + le = lnk.next_entry; + } + } + + // Walk output streams (skip default $DATA at head of chain). + let mut se = record.first_stream.next_entry; + while se != uffs_mft::NO_ENTRY { + let Some(stream) = index.streams.get(se as usize) else { + break; + }; + if stream.is_output_stream() { + let sn = index.stream_name(stream); + if !sn.is_empty() { + for &(base_name, parent_idx) in &all_names { + let combined = format!("{base_name}:{sn}"); + let name_offset = uffs_mft::len_to_u32(names.len()); + let name_len = uffs_mft::len_to_u16(combined.len()); + names.extend_from_slice(combined.as_bytes()); + + extra.push(CompactRecord { + size: stream.size.length, + allocated: stream.size.allocated, + treesize: 0, + tree_allocated: 0, + created: record.stdinfo.created, + modified: record.stdinfo.modified, + accessed: record.stdinfo.accessed, + name_offset, + flags: record.stdinfo.flags, + parent_idx, + descendants: 0, + name_len, + extension_id: 0, + path_len: 0, + name_first_byte: combined.as_bytes().first().copied().unwrap_or(0), + _pad: [0; 1], + }); + } + } + } + se = stream.next_entry; + } +} + +/// Resolve a typed `ParentFrs` (vs an own typed `Frs`) into a compact-record +/// index, returning `u32::MAX` for the "no real parent" cases (self-reference, +/// `NO_ENTRY` sentinel, or root). +/// +/// Extracted as a free helper so the typed `ParentFrs`/`Frs` signature is +/// enforced at every call site AND so `build_compact_index` stays under +/// the clippy `too_many_lines` budget. +#[expect( + clippy::single_call_fn, + reason = "Wrapped by a closure in build_compact_index; kept free-standing \ + for clippy::too_many_lines budget headroom" +)] +fn resolve_parent_compact_idx( + index: &MftIndex, + parent_frs: uffs_mft::ParentFrs, + own_frs: uffs_mft::Frs, +) -> u32 { + let parent = parent_frs.as_frs(); + if parent == own_frs || parent_frs.raw() == u64::from(uffs_mft::NO_ENTRY) || parent.is_root() { + return u32::MAX; + } + let parent_usize = uffs_mft::frs_to_usize(parent.raw()); + index + .frs_to_idx + .get(parent_usize) + .copied() + .filter(|&idx| idx != uffs_mft::NO_ENTRY) + .unwrap_or(u32::MAX) +} + +/// Expand hardlinks and ADS into additional `CompactRecord` entries. +/// +/// Phase 2 (hardlinks): for each valid record with `name_count > 1`, walks the +/// link chain and creates additional records with alternate name/parent. +/// +/// Phase 3 (ADS): delegates to [`expand_ads_streams`] for each valid record +/// with `stream_count > 1`. +#[expect( + clippy::single_call_fn, + reason = "Extracted to keep build_compact_index under the too_many_lines limit" +)] +fn expand_links_and_ads( + index: &MftIndex, + resolver: &uffs_mft::index::PathResolver, + resolve_parent: &dyn Fn(uffs_mft::ParentFrs, uffs_mft::Frs) -> u32, + names: &mut Vec, +) -> Vec { + let mut extra: Vec = Vec::new(); + + for (idx, record) in index.records.iter().enumerate() { + if !resolver.is_valid_idx(idx) { + continue; + } + + // Phase 2: hardlink expansion. + if record.name_count > 1 { + let mut link_entry = record.first_name.next_entry; + while link_entry != uffs_mft::NO_ENTRY { + let Some(link) = index.links.get(link_entry as usize) else { + break; + }; + let link_parent = resolve_parent(link.parent_frs, record.frs); + extra.push(CompactRecord { + size: record.first_stream.size.length, + allocated: record.first_stream.size.allocated, + treesize: record.treesize, + tree_allocated: record.tree_allocated, + created: record.stdinfo.created, + modified: record.stdinfo.modified, + accessed: record.stdinfo.accessed, + name_offset: link.name.offset, + flags: record.stdinfo.flags, + parent_idx: link_parent, + descendants: record.descendants, + name_len: link.name.length(), + extension_id: link.name.extension_id(), + path_len: 0, + name_first_byte: names.get(link.name.offset as usize).copied().unwrap_or(0), + _pad: [0; 1], + }); + link_entry = link.next_entry; + } + } + + // Phase 3: ADS expansion (name Γ— stream cross product). + if record.stream_count > 1 { + expand_ads_streams(index, record, resolve_parent, names, &mut extra); + } + } + extra +} + +/// Build a `DriveCompactIndex` from a loaded `MftIndex`. +/// +/// Returns `(DriveCompactIndex, compact_build_ms, trigram_build_ms)`. +#[must_use] +pub fn build_compact_index( + drive_letter: uffs_mft::platform::DriveLetter, + index: &MftIndex, +) -> (DriveCompactIndex, u128, u128) { + use uffs_mft::index::PathResolver; + + let compact_start = Instant::now(); + + // Build path resolver to determine which records are valid. + // This filters out system metafiles (FRS 0-15 except root) and + // propagates invalidity to descendants (e.g., $Extend children). + let resolver = PathResolver::build(index, false); + + // Closure wraps the free helper `resolve_parent_compact_idx` so the + // typed `ParentFrs`/`Frs` signature is enforced at every call site + // (own↔parent swap becomes a compile error). Keeping the helper + // free-standing also keeps `build_compact_index` under the + // clippy::too_many_lines budget. + let resolve_parent = |parent_frs: uffs_mft::ParentFrs, own_frs: uffs_mft::Frs| -> u32 { + resolve_parent_compact_idx(index, parent_frs, own_frs) + }; + + // Phase 1: build primary compact records (parallel). + let mut records: Vec = index + .records + .par_iter() + .enumerate() + .map(|(idx, record)| { + // Skip invalid records (system metafiles + descendants). + if !resolver.is_valid_idx(idx) { + return CompactRecord::default(); + } + + let name_ref = &record.first_name.name; + let parent_idx = resolve_parent(record.first_name.parent_frs, record.frs); + + CompactRecord { + size: record.first_stream.size.length, + allocated: record.first_stream.size.allocated, + treesize: record.treesize, + tree_allocated: record.tree_allocated, + created: record.stdinfo.created, + modified: record.stdinfo.modified, + accessed: record.stdinfo.accessed, + name_offset: name_ref.offset, + flags: record.stdinfo.flags, + parent_idx, + descendants: record.descendants, + name_len: name_ref.length(), + extension_id: name_ref.extension_id(), + path_len: 0, + name_first_byte: index + .names + .get(name_ref.offset as usize) + .copied() + .unwrap_or(0), + _pad: [0; 1], + } + }) + .collect(); + + // Phase 2+3: expand hardlinks and ADS (sequential β€” rare, <1% of records). + let mut names = index.names.clone(); + let expanded = expand_links_and_ads(index, &resolver, &resolve_parent, &mut names); + records.extend(expanded); + + // Phase 4: compute path_len (in characters) for every record via + // top-down BFS. path_len = char count of "C:\dir\name". + compute_path_lengths(&mut records, &names, drive_letter); + + let compact_elapsed = compact_start.elapsed().as_millis(); + + // Try live $UpCase from the NTFS volume; fall back to compiled-in default. + let fold = resolve_case_fold(drive_letter); + + let tri_start = Instant::now(); + let trigram = TrigramIndex::build(&records, &names, fold); + let tri_elapsed = tri_start.elapsed().as_millis(); + + // Build children CSR index from parent_idx (two-pass: count + scatter). + let children = ChildrenIndex::build(&records); + + // Copy extension name table from MftIndex (Arc β†’ Box). + let mut ext_names: Vec> = index + .extensions + .names + .iter() + .map(|arc| Box::from(arc.as_ref())) + .collect(); + + let ext_t0 = Instant::now(); + let ext_index = ExtensionIndex::build(&records); + let ext_build_ms = ext_t0.elapsed().as_millis(); + tracing::info!( + drive = %drive_letter, + entries = ext_index.total_entries(), + build_ms = ext_build_ms, + "ExtensionIndex built" + ); + + shrink_compact_vecs(drive_letter, &mut records, &mut names, &mut ext_names); + + // Phase 8: clone the FRS β†’ mft_idx mapping off the transient + // `MftIndex` before it goes out of scope. In the primary + // `build_compact_index` path compact_idx == mft_idx (records + // are produced 1:1 by `index.records.par_iter().enumerate()`), + // so `frs_to_idx` is exactly the FRS β†’ compact_idx mapping the + // surgical-patch path needs. Hardlink / ADS-expanded records + // append at the END with the same FRS but higher compact_idx; + // those secondary slots are not addressable from journal events + // (USN events reference primary FRS) so the primary mapping is + // sufficient. `uffs_mft::NO_ENTRY == u32::MAX` matches the + // sentinel `frs_to_compact` uses for unmapped slots. + let mut compact_index = DriveCompactIndex { + letter: drive_letter, + records: ColumnStorage::from_vec(records), + names: ColumnStorage::from_vec(names), + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), + fold, + ext_names, + source: IndexSource::MftFile(std::path::PathBuf::from(format!("{drive_letter}:"))), + source_epoch: index.build_epoch, + bloom: None, + path_trie: None, + frs_to_compact: index.frs_to_idx.clone(), + // Freshly built from the MFT β€” base CSR indexes are authoritative, + // no overlay yet. apply_usn_patch (Phase 2b) starts the delta. + delta: None, + }; + + // Phase 4: populate bloom + path_trie from the freshly-built + // index. These are needed for the search-skip pre-check + // (Commit F) and serialised into the v9+ cache (Commit D). + let bloom = compact_index.build_bloom(); + let path_trie = compact_index.build_path_trie(); + compact_index.bloom = Some(bloom); + compact_index.path_trie = Some(path_trie); + + (compact_index, compact_elapsed, tri_elapsed) +} + +/// Shrink all growable Vecs to exact fit after compact index build. +/// +/// Reclaims capacity slack from the doubling growth strategy used during +/// construction. Saves ~500 MB across 7 drives. +fn shrink_compact_vecs( + drive_letter: uffs_mft::platform::DriveLetter, + records: &mut Vec, + names: &mut Vec, + ext_names: &mut Vec>, +) { + let pre = records.capacity() * size_of::() + names.capacity(); + records.shrink_to_fit(); + names.shrink_to_fit(); + ext_names.shrink_to_fit(); + let post = records.capacity() * size_of::() + names.capacity(); + let reclaimed_mb = pre.saturating_sub(post) / (1024 * 1024); + if reclaimed_mb > 0 { + tracing::info!( + drive = %drive_letter, + reclaimed_mb, + "shrink_to_fit reclaimed memory" + ); + } +} + +/// Cache TTL in seconds (4 hours β€” same as Windows CLI). +/// +/// USN Journal handles incremental freshness; this is a safety-net full rescan. +pub(crate) const INDEX_TTL_SECONDS: u64 = 14400; + +// ── Live $UpCase resolution ────────────────────────────────────────── + +/// Try to read the live `$UpCase` table from the NTFS volume for +/// `drive_letter`. On success, log the result at `INFO` and any diffs +/// from the compiled-in default at `WARN`. On failure, log at `WARN` +/// and fall back to [`CaseFold::default_table()`]. +pub(crate) fn resolve_case_fold( + drive_letter: uffs_mft::platform::DriveLetter, +) -> uffs_text::case_fold::CaseFold { + let live_table = match uffs_mft::platform::upcase::read_upcase_table(drive_letter) { + Ok(table) => table, + Err(err) => { + tracing::warn!( + drive = %drive_letter, + error = %err, + "$UpCase live read failed β€” falling back to compiled-in default table" + ); + return uffs_text::case_fold::CaseFold::default_table(); + } + }; + + // Leak the box to get a `&'static [u16]` for CaseFold::from_ntfs. + let live_fold = uffs_text::case_fold::CaseFold::from_ntfs(Box::leak(live_table)); + log_upcase_comparison(drive_letter, &live_fold); + live_fold +} + +/// Log the comparison between live and compiled-in `$UpCase` tables. +fn log_upcase_comparison( + drive_letter: uffs_mft::platform::DriveLetter, + live_fold: &uffs_text::case_fold::CaseFold, +) { + let default = uffs_text::case_fold::CaseFold::default_table(); + let diffs = default.diff(live_fold); + + if diffs.is_empty() { + tracing::info!( + drive = %drive_letter, + "$UpCase loaded from live volume β€” identical to compiled-in default" + ); + return; + } + + tracing::info!( + drive = %drive_letter, + diff_count = diffs.len(), + "$UpCase loaded from live volume β€” differs from compiled-in default" + ); + for diff in &diffs { + tracing::warn!( + drive = %drive_letter, + codepoint = format_args!("U+{:04X}", diff.codepoint), + default = format_args!("U+{:04X}", diff.default_maps_to), + live = format_args!("U+{:04X}", diff.live_maps_to), + "$UpCase diff" + ); + } +} diff --git a/crates/uffs-core/src/compact/children.rs b/crates/uffs-core/src/compact/children.rs new file mode 100644 index 000000000..882d4c688 --- /dev/null +++ b/crates/uffs-core/src/compact/children.rs @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! [`ChildrenIndex`] β€” CSR parentβ†’children adjacency, the read-side of the +//! tree walk + the Phase-1 subtree path-length propagation. + +use crate::compact::CompactRecord; + +/// Children index in CSR (Compressed Sparse Row) layout. +/// +/// `children(i)` returns the compact indices of record i's children as +/// a contiguous `&[u32]` slice. The CSR layout avoids per-record `Vec` +/// allocations and enables bulk serialization/deserialization. +#[derive(Clone)] +pub struct ChildrenIndex { + /// CSR offsets β€” one per record + sentinel. Length = `record_count` + 1. + /// Children of record `i` are `values[offsets[i]..offsets[i+1]]`. + offsets: Vec, + /// Flat array of all child indices. + values: Vec, +} + +impl ChildrenIndex { + /// Total heap capacity (offsets + values) in bytes. + #[must_use] + pub const fn heap_size_bytes(&self) -> usize { + self.offsets.capacity() * size_of::() + self.values.capacity() * size_of::() + } + + /// Build from `CompactRecord::parent_idx` in two passes (count + scatter). + #[must_use] + pub fn build(records: &[CompactRecord]) -> Self { + // Count children per parent + let mut counts = vec![0_u32; records.len()]; + for rec in records { + let parent = rec.parent_idx; + if parent != u32::MAX + && let Some(cnt) = counts.get_mut(parent as usize) + { + *cnt += 1; + } + } + + // Prefix-sum β†’ offsets + let mut offsets = Vec::with_capacity(records.len() + 1); + let mut running = 0_u32; + for &cnt in &counts { + offsets.push(running); + running = running.saturating_add(cnt); + } + offsets.push(running); + + // Scatter children into values + let mut values = vec![0_u32; running as usize]; + let mut write_pos = offsets.clone(); + for (idx, rec) in records.iter().enumerate() { + let parent = rec.parent_idx; + if parent != u32::MAX + && let Some(pos) = write_pos.get_mut(parent as usize) + && let Some(slot) = values.get_mut(*pos as usize) + { + let child_idx = uffs_mft::len_to_u32(idx); + *slot = child_idx; + *pos += 1; + } + } + + Self { offsets, values } + } + + /// Construct directly from pre-built CSR arrays (cache deserialization). + #[must_use] + pub const fn from_csr(offsets: Vec, values: Vec) -> Self { + Self { offsets, values } + } + + /// Borrow the CSR components for serialization. + #[must_use] + pub(crate) fn as_csr(&self) -> (&[u32], &[u32]) { + (&self.offsets, &self.values) + } + + /// Return the children of record `idx` as a contiguous slice. + #[must_use] + pub fn get(&self, idx: usize) -> &[u32] { + let start = self.offsets.get(idx).copied().unwrap_or(0) as usize; + let end = self.offsets.get(idx + 1).copied().unwrap_or(0) as usize; + self.values.get(start..end).unwrap_or(&[]) + } + + /// Total number of child entries across all records. + #[must_use] + pub const fn total_children(&self) -> usize { + self.values.len() + } + + /// Number of records tracked (one slot per record). + #[must_use] + pub const fn record_count(&self) -> usize { + self.offsets.len().saturating_sub(1) + } + + /// Create an empty children index. + #[must_use] + pub fn empty() -> Self { + Self { + offsets: vec![0], + values: Vec::new(), + } + } +} diff --git a/crates/uffs-core/src/compact/delta.rs b/crates/uffs-core/src/compact/delta.rs new file mode 100644 index 000000000..db8d9c7ee --- /dev/null +++ b/crates/uffs-core/src/compact/delta.rs @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Mutable overlay over the immutable base CSR indexes +//! (incremental-index-maintenance Β§5.1). +//! +//! The base [`crate::trigram::TrigramIndex`] / +//! [`crate::compact::ChildrenIndex`] / [`crate::compact::ExtensionIndex`] are +//! compressed-sparse-row structures: fast to +//! query, immutable, and **expensive to rebuild** (the per-apply rebuild is the +//! cost this project removes). [`IndexDelta`] holds the postings *added* since +//! the last compaction plus a tombstone set for records whose base postings are +//! stale (deleted or renamed away). A search reads `base βˆͺ delta βˆ’ tombstones`; +//! an occasional compaction folds the delta back into a fresh base and clears +//! it (`delta = None`). +//! +//! **Invariant:** every posting list is kept **sorted ascending and deduped** +//! on insert, so the baseβˆͺdelta merge at query time is a linear sorted-merge +//! and tombstone filtering is a sorted-set difference. The base CSR posting +//! lists are already sorted, so the shapes compose. +//! +//! This is Phase-2 scaffolding: the type + its merge primitives land here with +//! unit tests; `DriveCompactIndex` gains the `delta` field and the +//! `trigram_search` choke point when trigram delta is wired (design Β§4 Phase +//! 2), so each of the ~20 `DriveCompactIndex` construction sites is touched +//! exactly once, with the change that gives the field meaning. + +use rustc_hash::{FxHashMap, FxHashSet}; + +/// Mutable overlay over the immutable base CSR indexes. A `None` +/// `delta` on [`crate::compact::DriveCompactIndex`] means "freshly compacted β€” +/// pure base, zero query overhead". +#[derive(Debug, Default, Clone)] +pub struct IndexDelta { + /// packed-trigram β†’ sorted, deduped record indices added since compaction. + pub trigram: FxHashMap>, + /// `ext_id` β†’ sorted, deduped record indices added since compaction. + pub ext: FxHashMap>, + /// parent record idx β†’ sorted, deduped child record indices added since + /// compaction. + pub children: FxHashMap>, + /// record indices whose BASE postings are stale (deleted / renamed-away). + pub tombstones: FxHashSet, + /// count of distinct records touched since compaction (the compaction + /// trigger input β€” see [`IndexDelta::len`]). + pub touched_records: u32, +} + +impl IndexDelta { + /// Register a newly created / renamed-in record's postings across every + /// index overlay. `trigrams` is the packed-trigram set of the record's name + /// (deduped by the caller is fine β€” `sorted_insert` dedups anyway). + /// + /// A renamed record is `tombstone`d at its stale base postings first, then + /// `add_record`ed at its new ones; create is `add_record` only. + pub fn add_record(&mut self, idx: u32, trigrams: &[u64], ext_id: u16, parent_idx: u32) { + for &key in trigrams { + sorted_insert(self.trigram.entry(key).or_default(), idx); + } + sorted_insert(self.ext.entry(ext_id).or_default(), idx); + // u32::MAX parent = root sentinel; root has no parent posting to add to. + if parent_idx != u32::MAX { + sorted_insert(self.children.entry(parent_idx).or_default(), idx); + } + self.touched_records = self.touched_records.saturating_add(1); + } + + /// Mark a record's BASE postings stale. Idempotent. The record may still + /// reappear in `delta` postings via a subsequent [`IndexDelta::add_record`] + /// (rename = tombstone-old + add-new); tombstone filtering is applied to + /// the final merged set, so that is correct. + pub fn tombstone(&mut self, idx: u32) { + if self.tombstones.insert(idx) { + self.touched_records = self.touched_records.saturating_add(1); + } + } + + /// Whether `idx`'s base postings have been tombstoned. + #[must_use] + pub fn is_tombstoned(&self, idx: u32) -> bool { + self.tombstones.contains(&idx) + } + + /// Records touched since compaction β€” the compaction-trigger input. Counts + /// distinct adds + tombstones (an add and a tombstone of the same idx, as + /// in a rename, count as two touches, which is the intended "work done" + /// signal). + #[must_use] + pub const fn len(&self) -> u32 { + self.touched_records + } + + /// Whether nothing has been overlaid since compaction. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.touched_records == 0 + } + + /// Delta postings for one packed trigram (sorted, deduped), or `&[]`. + #[must_use] + pub fn trigram_postings(&self, key: u64) -> &[u32] { + self.trigram.get(&key).map_or(&[], Vec::as_slice) + } + + /// Delta postings for one extension id (sorted, deduped), or `&[]`. + #[must_use] + pub fn ext_postings(&self, ext_id: u16) -> &[u32] { + self.ext.get(&ext_id).map_or(&[], Vec::as_slice) + } + + /// Delta child postings for one parent idx (sorted, deduped), or `&[]`. + #[must_use] + pub fn child_postings(&self, parent_idx: u32) -> &[u32] { + self.children.get(&parent_idx).map_or(&[], Vec::as_slice) + } +} + +/// Insert `value` into a sorted, deduped `Vec`, keeping it sorted and +/// deduped. No-op if already present. O(log n) search + O(n) shift β€” postings +/// are small per key (one apply batch's worth) so this is cheap. +fn sorted_insert(list: &mut Vec, value: u32) { + if let Err(pos) = list.binary_search(&value) { + list.insert(pos, value); + } +} + +/// Sorted-union merge of a base posting list with delta additions β€” the +/// per-trigram building block of +/// [`crate::compact::DriveCompactIndex::trigram_search`]. Both inputs are +/// sorted and deduped, as is the result. +/// +/// Tombstones are deliberately **not** applied here: a renamed record is +/// tombstoned in base yet legitimately re-added in `delta` under its new name, +/// so tombstone validity can only be decided on the final intersected candidate +/// set (see `trigram_search`), never per posting list. +#[must_use] +pub(crate) fn merge_postings(base: &[u32], delta: &[u32]) -> Vec { + if delta.is_empty() { + return base.to_vec(); + } + if base.is_empty() { + return delta.to_vec(); + } + let mut out = Vec::with_capacity(base.len() + delta.len()); + let mut base_it = base.iter().copied().peekable(); + let mut delta_it = delta.iter().copied().peekable(); + loop { + let next = match (base_it.peek().copied(), delta_it.peek().copied()) { + (Some(bv), Some(dv)) if bv < dv => { + base_it.next(); + bv + } + (Some(bv), Some(dv)) if bv > dv => { + delta_it.next(); + dv + } + (Some(bv), Some(_)) => { + base_it.next(); + delta_it.next(); + bv // equal β€” emit once + } + (Some(bv), None) => { + base_it.next(); + bv + } + (None, Some(dv)) => { + delta_it.next(); + dv + } + (None, None) => return out, + }; + if out.last() != Some(&next) { + out.push(next); + } + } +} + +#[cfg(test)] +mod tests { + use super::IndexDelta; + + #[test] + fn add_record_keeps_postings_sorted_and_deduped() { + let mut delta = IndexDelta::default(); + // Insert out of order + a duplicate trigram for the same record. + delta.add_record(5, &[300, 100, 200, 100], 2, 4); + delta.add_record(3, &[100], 2, 4); + delta.add_record(9, &[100], 7, 4); + + assert_eq!(delta.trigram_postings(100), &[3, 5, 9], "sorted + deduped"); + assert_eq!(delta.trigram_postings(200), &[5]); + assert_eq!(delta.trigram_postings(300), &[5]); + assert_eq!(delta.ext_postings(2), &[3, 5]); + assert_eq!(delta.ext_postings(7), &[9]); + assert_eq!( + delta.child_postings(4), + &[3, 5, 9], + "all three share parent 4" + ); + assert_eq!(delta.trigram_postings(999), &[] as &[u32], "absent key"); + } + + #[test] + fn root_parent_sentinel_adds_no_child_posting() { + let mut delta = IndexDelta::default(); + delta.add_record(0, &[10], 1, u32::MAX); // root: no parent posting + assert!( + delta.children.is_empty(), + "u32::MAX parent must not create a posting" + ); + assert_eq!(delta.trigram_postings(10), &[0]); + } + + #[test] + fn tombstone_is_idempotent_and_counted_once() { + let mut delta = IndexDelta::default(); + delta.tombstone(7); + delta.tombstone(7); + assert!(delta.is_tombstoned(7)); + assert!(!delta.is_tombstoned(8)); + assert_eq!(delta.len(), 1, "duplicate tombstone is not double-counted"); + } + + #[test] + fn len_counts_distinct_touches_including_rename_as_two() { + let mut delta = IndexDelta::default(); + assert!(delta.is_empty()); + // rename: tombstone old postings, add new β€” two units of work. + delta.tombstone(4); + delta.add_record(4, &[1, 2], 0, 1); + assert_eq!(delta.len(), 2); + assert!(!delta.is_empty()); + } + + #[test] + fn merge_postings_is_sorted_deduped_union() { + use super::merge_postings; + assert_eq!(merge_postings(&[1, 3, 5, 7], &[2, 5, 6]), vec![ + 1, 2, 3, 5, 6, 7 + ]); + assert_eq!(merge_postings(&[], &[2, 4]), vec![2, 4]); + assert_eq!(merge_postings(&[1, 3], &[]), vec![1, 3]); + assert_eq!(merge_postings(&[], &[]), Vec::::new()); + // full overlap dedups to one copy. + assert_eq!(merge_postings(&[1, 2, 3], &[1, 2, 3]), vec![1, 2, 3]); + } +} diff --git a/crates/uffs-core/src/compact/extension.rs b/crates/uffs-core/src/compact/extension.rs new file mode 100644 index 000000000..c4d0c3e01 --- /dev/null +++ b/crates/uffs-core/src/compact/extension.rs @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! [`ExtensionIndex`] β€” CSR `extension_id` β†’ records inverted index for O(K) +//! `--ext` queries. + +use crate::compact::CompactRecord; + +/// Extension inverted index: `extension_id β†’ &[u32]` (record indices). +/// +/// CSR layout identical to `ChildrenIndex`. Built once at load time in a +/// single O(N) pass so `--ext rs` queries can iterate only matching records +/// instead of scanning all 25M entries. +#[derive(Clone)] +pub struct ExtensionIndex { + /// CSR offsets β€” length = `max_ext_id` + 2 (one per `ext_id` + sentinel). + offsets: Vec, + /// Flat array of record indices, grouped by `extension_id`. + values: Vec, +} + +impl ExtensionIndex { + /// Total heap capacity (offsets + values) in bytes. + #[must_use] + pub const fn heap_size_bytes(&self) -> usize { + self.offsets.capacity() * size_of::() + self.values.capacity() * size_of::() + } + + /// Build from compact records in two passes (count + scatter). + #[must_use] + pub fn build(records: &[CompactRecord]) -> Self { + // Find the maximum extension_id to size the offsets array. + let max_id = records + .iter() + .map(|rec| rec.extension_id) + .max() + .unwrap_or(0) as usize; + + // Pass 1: count records per extension_id. + let mut counts = vec![0_u32; max_id + 1]; + for rec in records { + if rec.name_len == 0 { + continue; + } + if let Some(cnt) = counts.get_mut(rec.extension_id as usize) { + *cnt += 1; + } + } + + // Prefix-sum β†’ offsets. + let mut offsets = Vec::with_capacity(max_id + 2); + let mut running = 0_u32; + for &cnt in &counts { + offsets.push(running); + running = running.saturating_add(cnt); + } + offsets.push(running); + + // Pass 2: scatter record indices into values. + let mut values = vec![0_u32; running as usize]; + let mut write_pos = offsets.clone(); + for (idx, rec) in records.iter().enumerate() { + if rec.name_len == 0 { + continue; + } + let eid = rec.extension_id as usize; + if let Some(pos) = write_pos.get_mut(eid) + && let Some(slot) = values.get_mut(*pos as usize) + { + let idx_u32 = uffs_mft::len_to_u32(idx); + *slot = idx_u32; + *pos += 1; + } + } + + Self { offsets, values } + } + + /// Return record indices for the given `extension_id`. + #[must_use] + pub fn get(&self, ext_id: u16) -> &[u32] { + let eid = ext_id as usize; + let start = self.offsets.get(eid).copied().unwrap_or(0) as usize; + let end = self.offsets.get(eid + 1).copied().unwrap_or(0) as usize; + self.values.get(start..end).unwrap_or(&[]) + } + + /// Create an empty extension index. + #[must_use] + pub fn empty() -> Self { + Self { + offsets: vec![0], + values: Vec::new(), + } + } + + /// Total number of indexed record entries. + #[must_use] + pub const fn total_entries(&self) -> usize { + self.values.len() + } +} diff --git a/crates/uffs-core/src/compact/path_len.rs b/crates/uffs-core/src/compact/path_len.rs new file mode 100644 index 000000000..c63bf2905 --- /dev/null +++ b/crates/uffs-core/src/compact/path_len.rs @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Per-record `path_len` computation: the cold-load top-down BFS plus the +//! Phase-1 incremental update used by the USN apply path +//! (incremental-index-maintenance Β§5.5). + +use crate::compact::{ChildrenIndex, CompactRecord}; + +/// Compute `path_len` (in **characters**, not bytes) for every record +/// via top-down BFS. +/// +/// Root entries (`parent_idx == u32::MAX`) get +/// `path_len = 2 + 1 + name_chars` (e.g. `"C:\" + name`), and children +/// accumulate `parent.path_len + 1 (separator) + name_chars`. +/// Saturates at `u16::MAX` (65 535) for extremely deep paths. +/// +/// Character counting matches `str::chars().count()` so the precomputed +/// value agrees with the display-row path-length filter. +pub(crate) fn compute_path_lengths( + records: &mut [CompactRecord], + names: &[u8], + drive_letter: uffs_mft::platform::DriveLetter, +) { + // Drive prefix in characters: the letter (1 char) + colon (1 char) = 2. + // `DriveLetter` is ASCII A–Z by construction (validated in + // `DriveLetter::parse`), so the previous runtime `debug_assert!` + // is now a tautology and was removed. The arithmetic only cares + // about "1 letter char + 1 colon". + let _: uffs_mft::platform::DriveLetter = drive_letter; + let drive_prefix_chars: u32 = 1 /* letter */ + 1 /* ':' */; + + // Build forward adjacency list (parent β†’ children) for top-down BFS. + let record_count = records.len(); + let mut children_of: Vec> = vec![Vec::new(); record_count]; + let mut roots: Vec = Vec::new(); + + for (idx, rec) in records.iter().enumerate() { + let pi = rec.parent_idx; + if pi == u32::MAX { + roots.push(uffs_mft::len_to_u32(idx)); + } else if let Some(siblings) = children_of.get_mut(pi as usize) { + siblings.push(uffs_mft::len_to_u32(idx)); + } + } + + // BFS from roots. + let mut queue = alloc::collections::VecDeque::with_capacity(roots.len()); + for &root in &roots { + let Some(rec) = records.get(root as usize) else { + continue; + }; + let name_chars = name_char_count(rec, names); + let pl = if name_chars == 0 { + // Drive root directory: "C:\" + drive_prefix_chars + 1 + } else { + // Top-level file/dir: "C:\" + drive_prefix_chars + 1 + name_chars + }; + if let Some(slot) = records.get_mut(root as usize) { + slot.path_len = uffs_mft::len_to_u16(pl as usize); + } + queue.push_back(root); + } + + while let Some(idx) = queue.pop_front() { + let parent_pl = records + .get(idx as usize) + .map_or(0, |rec| u32::from(rec.path_len)); + let children: Vec = children_of + .get(idx as usize) + .map_or_else(Vec::new, Clone::clone); + for &child in &children { + let child_chars = records + .get(child as usize) + .map_or(0, |rec| name_char_count(rec, names)); + // path = parent_path + "\" + name + let pl = parent_pl.saturating_add(1).saturating_add(child_chars); + if let Some(slot) = records.get_mut(child as usize) { + slot.path_len = uffs_mft::len_to_u16(pl as usize); + } + queue.push_back(child); + } + } +} + +/// A record whose `path_len` a USN apply must refresh, plus whether the change +/// can shift its whole subtree. +/// +/// Phase 1 of incremental-index-maintenance (design doc Β§5.5): instead of the +/// O(total) [`compute_path_lengths`] BFS every apply, refresh only the records +/// a batch touched. A **directory rename** moves every descendant's path by a +/// constant Ξ”, so `subtree` requests the descendant walk; creates and file +/// renames are a single O(1) refresh. +#[derive(Debug, Clone, Copy)] +pub(crate) struct PathChange { + /// Compact index of the created / renamed record. + pub idx: u32, + /// `true` for a directory rename (propagate Ξ” to descendants); `false` for + /// creates and file renames (refresh this record only). + pub subtree: bool, +} + +/// Refresh `path_len` for only the records a USN batch touched, instead of the +/// O(total-records) [`compute_path_lengths`] BFS β€” the Phase-1 lever of +/// incremental-index-maintenance (design doc Β§5.5). +/// +/// `children` must be the **freshly rebuilt** CSR so a directory rename can +/// walk its subtree. Caller falls back to the full [`compute_path_lengths`] +/// for cold loads and for batches large enough that incremental loses +/// (see the threshold in `compact_loader/rebuild.rs`). +pub(crate) fn update_path_lengths_incremental( + records: &mut [CompactRecord], + names: &[u8], + drive_letter: uffs_mft::platform::DriveLetter, + children: &ChildrenIndex, + changed: &[PathChange], +) { + // `DriveLetter` is ASCII A–Z by construction, so the drive prefix is always + // "X:" = 2 chars (matches `compute_path_lengths`). + let _: uffs_mft::platform::DriveLetter = drive_letter; + let drive_prefix_chars: u32 = 1 /* letter */ + 1 /* ':' */; + + for change in changed { + let idx = change.idx as usize; + let Some(rec) = records.get(idx) else { + continue; + }; + // Skip a slot tombstoned within the same batch (create then delete): + // `apply_delete` set name_len=0 + parent=MAX. + if rec.name_len == 0 && rec.parent_idx == u32::MAX { + continue; + } + let old_pl = u32::from(rec.path_len); + let new_pl = path_len_from_parent(records, names, drive_prefix_chars, change.idx); + if let Some(slot) = records.get_mut(idx) { + slot.path_len = uffs_mft::len_to_u16(new_pl as usize); + } + if change.subtree { + let delta = i64::from(new_pl) - i64::from(old_pl); + if delta != 0 { + shift_subtree_path_len(records, children, change.idx, delta); + } + } + } +} + +/// `path_len` for `idx` from its (current) parent's `path_len` + own name β€” +/// the per-node arithmetic of [`compute_path_lengths`]'s BFS, in isolation. +fn path_len_from_parent( + records: &[CompactRecord], + names: &[u8], + drive_prefix_chars: u32, + idx: u32, +) -> u32 { + let Some(rec) = records.get(idx as usize) else { + return 0; + }; + let name_chars = name_char_count(rec, names); + if rec.parent_idx == u32::MAX { + // Root level: "C:\" (no name) or "C:\". + if name_chars == 0 { + drive_prefix_chars.saturating_add(1) + } else { + drive_prefix_chars + .saturating_add(1) + .saturating_add(name_chars) + } + } else { + let parent_pl = records + .get(rec.parent_idx as usize) + .map_or(0, |parent| u32::from(parent.path_len)); + parent_pl.saturating_add(1).saturating_add(name_chars) + } +} + +/// Add `delta` to every descendant of `root`'s `path_len` (a directory rename +/// shifts each descendant's full path by the same amount). Iterative DFS over +/// the children CSR β€” pure arithmetic, no name/string walk. +fn shift_subtree_path_len( + records: &mut [CompactRecord], + children: &ChildrenIndex, + root: u32, + delta: i64, +) { + let mut stack: Vec = children.get(root as usize).to_vec(); + while let Some(idx) = stack.pop() { + if let Some(rec) = records.get_mut(idx as usize) { + let shifted = i64::from(rec.path_len) + .saturating_add(delta) + .clamp(0, i64::from(u16::MAX)); + rec.path_len = u16::try_from(shifted).unwrap_or(u16::MAX); + } + stack.extend_from_slice(children.get(idx as usize)); + } +} + +/// Count the number of Unicode characters in a record's filename. +/// +/// Falls back to `name_len` (byte count) if the name slice is not valid +/// UTF-8 β€” this is correct for ASCII names and a safe upper bound +/// otherwise. +fn name_char_count(rec: &CompactRecord, names: &[u8]) -> u32 { + let start = rec.name_offset as usize; + let end = start + rec.name_len as usize; + names + .get(start..end) + .and_then(|slice| core::str::from_utf8(slice).ok()) + .map_or_else( + || u32::from(rec.name_len), + |name| uffs_mft::len_to_u32(name.chars().count()), + ) +} diff --git a/crates/uffs-core/src/compact/record.rs b/crates/uffs-core/src/compact/record.rs new file mode 100644 index 000000000..8fcb19638 --- /dev/null +++ b/crates/uffs-core/src/compact/record.rs @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! The 80-byte [`CompactRecord`] row type + the NTFS metafile-name allowlist. +//! +//! Extracted from `compact.rs` (file-size decomposition); the public path +//! `crate::compact::CompactRecord` is preserved via re-export. + +/// Compact per-record data for in-memory search, filter, and sort. +/// +/// 80 bytes per record (76 data + 4 explicit tail padding). +/// Derives `bytemuck::Pod` + `Zeroable` so the entire record array can be +/// serialized/deserialized as a single bulk `memcpy` β€” no per-field encoding. +#[derive(Debug, Clone, Copy, Default, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub struct CompactRecord { + // ── u64 fields first (8-byte aligned) ───────────────────────── + /// Logical file size in bytes. + pub size: u64, + /// Allocated size on disk in bytes ("Size on Disk" column). + pub allocated: u64, + /// Sum of logical file sizes in entire subtree. + pub treesize: u64, + /// Sum of allocated sizes in entire subtree. + pub tree_allocated: u64, + /// Creation time (Unix microseconds). + pub created: i64, + /// Last write time (Unix microseconds). + pub modified: i64, + /// Last access time (Unix microseconds). + pub accessed: i64, + + // ── u32 fields (4-byte aligned) ─────────────────────────────── + /// Byte offset into the names blob. + pub name_offset: u32, + /// Raw NTFS `FILE_ATTRIBUTE_*` flags. + pub flags: u32, + /// Index into the compact array of the parent directory. + /// `u32::MAX` = root or orphan. + pub parent_idx: u32, + /// Count of all descendants in subtree. 0 for files. + pub descendants: u32, + + // ── u16 fields (2-byte aligned) ─────────────────────────────── + /// UTF-8 byte length of the filename. + pub name_len: u16, + /// Interned extension ID (0 = no extension). + pub extension_id: u16, + /// Full path length in UTF-8 bytes (e.g. `C:\Windows\System32\cmd.exe` = + /// 28). Precomputed at index build time via top-down parent-chain walk. + /// Saturates at `u16::MAX` (65 535) for extremely deep paths. + pub path_len: u16, + + /// First byte of the filename (e.g. `b'$'` for NTFS metafiles). + /// + /// Cached here as a cheap hot-path *gate*: only `$`-prefixed records can be + /// NTFS metafiles, so [`is_system_metafile`](Self::is_system_metafile) can + /// reject virtually every record with one sequential field read instead of + /// a random cache-miss into the names arena. The handful of `$`-prefixed + /// candidates then pay one arena lookup for the authoritative name check. + pub name_first_byte: u8, + + /// Explicit tail padding for 8-byte struct alignment. + /// Required by `bytemuck::Pod` β€” no implicit padding allowed. + #[expect( + clippy::pub_underscore_fields, + reason = "bytemuck Pod requires all fields same visibility" + )] + pub _pad: [u8; 1], +} + +/// The fixed set of reserved NTFS metafile names: the `$`-prefixed records at +/// reserved FRS 0–15 and under the `$Extend` directory. An NTFS volume can +/// only ever contain *these* specific metafiles. +/// +/// Any *other* `$`-prefixed name β€” `$Recycle.Bin`, `$PatchCache`, +/// `$WinREAgent`, the `WinSxS` `$$_*.cdf-ms` filemaps, or a user file literally +/// named `$foo` β€” is an ordinary file that file managers and tools like +/// Everything display. Classifying those as metafiles is exactly the bug +/// `--hide-system` had. +/// +/// Matched case-insensitively: NTFS itself is case-insensitive, and these +/// canonical names are occasionally surfaced with varied casing. +pub(crate) const NTFS_METAFILE_NAMES: &[&str] = &[ + // Reserved FRS 0–11 (volume root metafiles) + "$MFT", + "$MFTMirr", + "$LogFile", + "$Volume", + "$AttrDef", + "$Bitmap", + "$Boot", + "$BadClus", + "$Secure", + "$UpCase", + "$Extend", + // `$Extend` directory children + "$ObjId", + "$Quota", + "$Reparse", + "$UsnJrnl", + "$RmMetadata", + "$Deleted", + // `$Extend\$RmMetadata` children + "$Repair", + "$Tops", + "$TxfLog", + "$Txf", +]; + +/// Returns whether `name` is one of the reserved `NTFS_METAFILE_NAMES` +/// (a crate-private allowlist, so no intra-doc link from this public item). +/// +/// Real metafiles are already excluded from the compact index at build time +/// (`build_compact_index` drops them via `PathResolver` FRS-validity, not by +/// name). This exact-name check is the *authoritative* classifier for the +/// `--hide-system` filter, so it can never misclassify an ordinary +/// `$`-prefixed file as a metafile. +#[must_use] +#[inline] +pub fn is_ntfs_metafile_name(name: &str) -> bool { + NTFS_METAFILE_NAMES + .iter() + .any(|reserved| name.eq_ignore_ascii_case(reserved)) +} + +impl CompactRecord { + /// Directory flag bit in raw NTFS `FILE_ATTRIBUTE_DIRECTORY`. + const DIRECTORY_BIT: u32 = 0x0010; + + /// Returns `true` if this record is a directory. + #[inline] + #[must_use] + pub const fn is_directory(self) -> bool { + self.flags & Self::DIRECTORY_BIT != 0 + } + + /// Returns `true` if this record is one of the reserved NTFS metafiles + /// (`$MFT`, `$LogFile`, `$Bitmap`, `$Secure`, the `$Extend` family, …). + /// + /// The cached [`name_first_byte`](Self::name_first_byte) field is a cheap + /// gate: every metafile name starts with `$`, and `$`-prefixed records are + /// a vanishing fraction of an index, so this rejects virtually every record + /// with a single byte comparison and only touches the names arena for the + /// handful of `$`-prefixed candidates. The arena lookup is *required* for + /// correctness, because an ordinary file may also start with `$` + /// (`$Recycle.Bin`, `$PatchCache`, the `WinSxS` `$$_*.cdf-ms` filemaps) β€” + /// those are NOT metafiles and must not be hidden by `--hide-system`. + /// See [`is_ntfs_metafile_name`]. + #[inline] + #[must_use] + pub fn is_system_metafile(&self, names: &[u8]) -> bool { + self.name_first_byte == b'$' && is_ntfs_metafile_name(self.name(names)) + } + + /// Get the name from a names blob as a **lossy `&str` view**. + /// + /// Valid-UTF-8 names (the common case) are returned verbatim; an ill-formed + /// (surrogate-bearing) name stored as WTF-8 returns `""` for display. Use + /// [`Self::name_bytes`] for the lossless bytes that exact/substring search + /// matches against, so a file with an ill-formed name stays findable + /// (WI-4.4). + #[inline] + #[must_use] + pub fn name<'a>(&self, names: &'a [u8]) -> &'a str { + core::str::from_utf8(self.name_bytes(names)).unwrap_or("") + } + + /// Get the name's **raw bytes** (WTF-8) from a names blob β€” the lossless + /// accessor. + /// + /// Returns exactly the stored bytes, including the byte-faithful encoding + /// of an ill-formed NTFS name (unpaired surrogates). This is what makes + /// every file matchable/findable by its true name regardless of UTF-8 + /// well-formedness (WI-4.4). Returns `&[]` for an out-of-range slice. + #[inline] + #[must_use] + pub fn name_bytes<'a>(&self, names: &'a [u8]) -> &'a [u8] { + let start = self.name_offset as usize; + let end = start.saturating_add(self.name_len as usize); + names.get(start..end).unwrap_or(&[]) + } +} + +// Compile-time size assertion. +const _: () = assert!( + size_of::() == 80, + "CompactRecord must be exactly 80 bytes" +); diff --git a/crates/uffs-core/src/compact_cache.rs b/crates/uffs-core/src/compact_cache.rs index 63e32bda5..5f510e966 100644 --- a/crates/uffs-core/src/compact_cache.rs +++ b/crates/uffs-core/src/compact_cache.rs @@ -791,9 +791,9 @@ where letter: parsed.drive_letter, records, names, - trigram, - children: parsed.children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(parsed.children), + ext_index: Arc::new(ext_index), fold: parsed.fold, ext_names, source: IndexSource::MftFile(PathBuf::from(format!("{}:", parsed.drive_letter))), @@ -806,6 +806,9 @@ where // covers the future-format edge case where a new cache // version omits the section. frs_to_compact: parsed.frs_to_compact_loaded.unwrap_or_default(), + // Cache load is always delta-free β€” the on-disk format stores base only + // (compact before save), so a freshly loaded index has no overlay. + delta: None, }; // Phase 4 Commit D β€” v9+ caches embed the bloom + trie directly, diff --git a/crates/uffs-core/src/compact_cache/parked.rs b/crates/uffs-core/src/compact_cache/parked.rs index 7b45e4a29..6e3de4749 100644 --- a/crates/uffs-core/src/compact_cache/parked.rs +++ b/crates/uffs-core/src/compact_cache/parked.rs @@ -458,6 +458,7 @@ pub fn load_parked_body( #[cfg(test)] mod tests { + use alloc::sync::Arc; use std::path::PathBuf; use super::*; @@ -508,9 +509,9 @@ mod tests { letter: uffs_mft::platform::DriveLetter::C, records: ColumnStorage::from_vec(records), names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), fold, ext_names: vec![Box::from(""), Box::from("toml")], source: IndexSource::MftFile(PathBuf::from("C:")), @@ -518,6 +519,7 @@ mod tests { bloom: None, path_trie: None, frs_to_compact: Vec::new(), + delta: None, }; index.bloom = Some(index.build_bloom()); index.path_trie = Some(index.build_path_trie()); diff --git a/crates/uffs-core/src/compact_cache/tests.rs b/crates/uffs-core/src/compact_cache/tests.rs index f755d1355..8c53b619d 100644 --- a/crates/uffs-core/src/compact_cache/tests.rs +++ b/crates/uffs-core/src/compact_cache/tests.rs @@ -73,9 +73,9 @@ fn make_test_index() -> DriveCompactIndex { letter: uffs_mft::platform::DriveLetter::T, records: ColumnStorage::from_vec(records), names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), fold, ext_names: vec![Box::from("")], source: IndexSource::MftFile(PathBuf::from("T:")), @@ -83,6 +83,7 @@ fn make_test_index() -> DriveCompactIndex { bloom: None, path_trie: None, frs_to_compact, + delta: None, } } diff --git a/crates/uffs-core/src/compact_filters.rs b/crates/uffs-core/src/compact_filters.rs index 4f4df56c0..0bd8c0960 100644 --- a/crates/uffs-core/src/compact_filters.rs +++ b/crates/uffs-core/src/compact_filters.rs @@ -152,6 +152,7 @@ impl DriveCompactIndex { #[cfg(test)] mod tests { + use alloc::sync::Arc; use std::path::PathBuf; use super::*; @@ -203,9 +204,9 @@ mod tests { letter: uffs_mft::platform::DriveLetter::C, records: ColumnStorage::from_vec(records), names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), fold, ext_names: vec![Box::from(""), Box::from("toml")], source: IndexSource::MftFile(PathBuf::from("C:")), @@ -213,6 +214,7 @@ mod tests { bloom: None, path_trie: None, frs_to_compact: Vec::new(), + delta: None, } } @@ -322,9 +324,9 @@ mod tests { letter: uffs_mft::platform::DriveLetter::X, records: ColumnStorage::from_vec(records), names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), fold, ext_names: vec![Box::from("")], source: IndexSource::MftFile(PathBuf::from("X:")), @@ -332,6 +334,7 @@ mod tests { bloom: None, path_trie: None, frs_to_compact: Vec::new(), + delta: None, }; let bloom = drive.build_bloom(); diff --git a/crates/uffs-core/src/compact_loader.rs b/crates/uffs-core/src/compact_loader.rs index 8676a18c8..13679c15c 100644 --- a/crates/uffs-core/src/compact_loader.rs +++ b/crates/uffs-core/src/compact_loader.rs @@ -11,10 +11,10 @@ use std::time::Instant; use uffs_mft::index::MftIndex; -use crate::compact::{ - ChildrenIndex, CompactRecord, DriveCompactIndex, INDEX_TTL_SECONDS, build_compact_index, -}; -use crate::trigram::TrigramIndex; +use crate::compact::{DriveCompactIndex, INDEX_TTL_SECONDS, build_compact_index}; + +mod apply; +mod rebuild; /// What produced a given `DriveCompactIndex`. #[derive(Clone)] @@ -454,210 +454,6 @@ pub fn load_mft_file( load_drive(&MftSource::File(mft_path.to_path_buf(), drive), no_cache) } -/// A USN-created file's identity, staged into the index's names blob + -/// extension table via a mutable `drive` borrow BEFORE any record borrow. -/// -/// All fields are `Copy`, so the caller can take a `&mut CompactRecord` -/// after this returns without a borrow conflict. -struct StagedCreate { - /// Byte offset of the staged name in `drive.names`. - name_offset: u32, - /// UTF-8 byte length of the staged name. - name_len: u16, - /// Cached first byte of the name (hot-path metafile gate). - name_first_byte: u8, - /// Interned extension id for the new name (`0` = no extension). - extension_id: u16, - /// Compact index of the parent directory (`u32::MAX` if unmapped). - parent_idx: u32, - /// Real size/timestamps/flags from a targeted MFT read, or all-zero when - /// the USN-only change carried no metadata (a later re-warm fills it). - /// Representation matches `CompactRecord`, so it copies straight in. - meta: uffs_mft::usn::RecordMeta, -} - -/// Append `change`'s filename to the names blob and intern its extension, -/// resolving the parent's compact index. Mutably borrows `drive`, so it -/// must run before any `&mut CompactRecord` borrow. -fn stage_create(drive: &mut DriveCompactIndex, change: &uffs_mft::usn::FileChange) -> StagedCreate { - let extension_id = drive.intern_extension(&change.filename); - let name_start = drive.names.len(); - drive - .names - .as_mut_vec() - .extend_from_slice(change.filename.as_bytes()); - let parent_frs_usize = uffs_mft::frs_to_usize(change.parent_frs.raw()); - let parent_idx = drive - .frs_to_compact - .get(parent_frs_usize) - .copied() - .unwrap_or(u32::MAX); - StagedCreate { - name_offset: uffs_mft::len_to_u32(name_start), - name_len: uffs_mft::len_to_u16(change.filename.len()), - name_first_byte: change.filename.as_bytes().first().copied().unwrap_or(0), - extension_id, - parent_idx, - meta: change.meta.unwrap_or_default(), - } -} - -/// Overwrite an existing compact slot with a reused/re-animated file's -/// identity. Per-file metrics come from the staged metadata β€” real values -/// when a targeted MFT read backfilled them, else zero (a later re-warm -/// fills them; the USN `FileChange` carries only name + parent). -const fn overwrite_slot(rec: &mut CompactRecord, staged: &StagedCreate) { - rec.name_offset = staged.name_offset; - rec.name_len = staged.name_len; - rec.name_first_byte = staged.name_first_byte; - rec.extension_id = staged.extension_id; - rec.parent_idx = staged.parent_idx; - rec.size = staged.meta.size; - rec.allocated = staged.meta.allocated; - rec.created = staged.meta.created; - rec.modified = staged.meta.modified; - rec.accessed = staged.meta.accessed; - rec.flags = staged.meta.flags; - // Tree metrics are recomputed post-loop (CSR rebuild + compute_path_ - // lengths); never carried by a USN change. - rec.treesize = 0; - rec.tree_allocated = 0; - rec.descendants = 0; - rec.path_len = 0; -} - -/// Apply a delete change: tombstone the slot (`name_len = 0`, parent -/// unmapped so the CSR rebuild drops it) and unmap its FRS so a later batch -/// can't re-animate the tombstone. -fn apply_delete( - drive: &mut DriveCompactIndex, - frs_usize: usize, - compact_idx: u32, - stats: &mut PatchStats, -) { - if compact_idx == u32::MAX { - stats.skipped += 1; - return; - } - if let Some(rec) = drive.records.as_mut_slice().get_mut(compact_idx as usize) { - rec.name_len = 0; - rec.parent_idx = u32::MAX; - if let Some(slot) = drive.frs_to_compact.get_mut(frs_usize) { - *slot = u32::MAX; - } - stats.deleted += 1; - } -} - -/// Apply a create change: overwrite the mapped slot when the MFT record -/// number was reused (tombstone OR stale live record), or append a fresh -/// record + register its FRS mapping when the slot is new. -fn apply_create( - drive: &mut DriveCompactIndex, - change: &uffs_mft::usn::FileChange, - frs_usize: usize, - compact_idx: u32, - stats: &mut PatchStats, -) { - if change.filename.is_empty() { - stats.skipped += 1; - return; - } - // Stage name + interned extension up front (mutable index borrow) so the - // per-record write can take a `&mut CompactRecord` without conflict. - let staged = stage_create(drive, change); - if compact_idx == u32::MAX { - // Brand-new record: append, then register the FRS mapping. NTFS - // reuses freed record numbers and a long-running daemon can outgrow - // the build-time table, so extend + sentinel-fill any gap. - let new_rec = CompactRecord { - size: staged.meta.size, - allocated: staged.meta.allocated, - treesize: 0, - tree_allocated: 0, - created: staged.meta.created, - modified: staged.meta.modified, - accessed: staged.meta.accessed, - name_offset: staged.name_offset, - flags: staged.meta.flags, - parent_idx: staged.parent_idx, - descendants: 0, - name_len: staged.name_len, - extension_id: staged.extension_id, - // path_len filled by `compute_path_lengths` post-loop. - path_len: 0, - name_first_byte: staged.name_first_byte, - _pad: [0; 1], - }; - let new_compact_idx = uffs_mft::len_to_u32(drive.records.len()); - drive.records.as_mut_vec().push(new_rec); - if frs_usize >= drive.frs_to_compact.len() { - drive - .frs_to_compact - .resize(frs_usize.saturating_add(1), u32::MAX); - } - if let Some(slot) = drive.frs_to_compact.get_mut(frs_usize) { - *slot = new_compact_idx; - } - stats.created += 1; - } else if let Some(rec) = drive.records.as_mut_slice().get_mut(compact_idx as usize) { - // The record number is already mapped. A `created` event means NTFS - // reused that slot for a NEW file β€” the old occupant (a tombstone, OR - // a stale live record whose delete was coalesced/missed) no longer - // exists. Overwrite it wholesale. Skipping a live slot here is what - // dropped FRS-reused recreates (the "delta.pdf vanished" report). - overwrite_slot(rec, &staged); - stats.created += 1; - } -} - -/// Apply a rename change: re-point the name, **re-intern the extension** (a -/// rename can change it: `foo.log` β†’ `foo.pdf`), refresh the first-byte -/// cache, and update `parent_idx`. The FRS keeps its slot, so the mapping is -/// unchanged. -fn apply_rename( - drive: &mut DriveCompactIndex, - change: &uffs_mft::usn::FileChange, - compact_idx: u32, - stats: &mut PatchStats, -) { - if compact_idx == u32::MAX || change.filename.is_empty() { - stats.skipped += 1; - return; - } - let extension_id = drive.intern_extension(&change.filename); - let name_start = drive.names.len(); - drive - .names - .as_mut_vec() - .extend_from_slice(change.filename.as_bytes()); - let new_parent_frs = uffs_mft::frs_to_usize(change.parent_frs.raw()); - let new_parent_compact = drive - .frs_to_compact - .get(new_parent_frs) - .copied() - .unwrap_or(u32::MAX); - if let Some(rec) = drive.records.as_mut_slice().get_mut(compact_idx as usize) { - rec.name_offset = uffs_mft::len_to_u32(name_start); - rec.name_len = uffs_mft::len_to_u16(change.filename.len()); - rec.extension_id = extension_id; - rec.name_first_byte = change.filename.as_bytes().first().copied().unwrap_or(0); - rec.parent_idx = new_parent_compact; - // Apply backfilled size/timestamps/flags when a targeted MFT read - // attached them (corrects a record previously created USN-only with - // zeroed metrics); otherwise leave the existing values untouched. - if let Some(meta) = change.meta { - rec.size = meta.size; - rec.allocated = meta.allocated; - rec.created = meta.created; - rec.modified = meta.modified; - rec.accessed = meta.accessed; - rec.flags = meta.flags; - } - stats.renamed += 1; - } -} - /// Apply USN changes in-place to the compact index. /// /// Mutates records (`parent_idx`, names, flags) and the @@ -699,6 +495,19 @@ pub fn apply_usn_patch( ) -> PatchStats { let mut stats = PatchStats::default(); + // Phase 1: collect the records whose path_len must be refreshed so the + // post-loop rebuild can do an O(changed) path update instead of the + // O(total) BFS (incremental-index-maintenance Β§5.5). `path_changes` + // doubles as the Phase-2b trigram-ADD set (every created / renamed record + // re-adds its new name's trigrams to the delta). + let mut path_changes: Vec = Vec::new(); + // Phase 2b: records whose stale base trigram postings must be masked β€” + // deletes, renames, and FRS-reuse overwrites. + let mut tombstones: Vec = Vec::new(); + + // IDXDELTA-TIMING: the O(changed) per-change mutation loop, measured apart + // from the O(total) rebuild below so the baseline shows where the time goes. + let t_loop = Instant::now(); for change in changes { // Typed `Frs` β†’ raw `u64` lift at the frs_to_compact CSR lookup // boundary. The mapping table is `Vec` indexed by `usize`, @@ -729,51 +538,55 @@ pub fn apply_usn_patch( // The flags are mutually-exclusive net states (resolved in // `aggregate_changes`), so a simple priority dispatch is correct. if change.deleted { - apply_delete(drive, frs_usize, compact_idx, &mut stats); + apply::apply_delete(drive, frs_usize, compact_idx, &mut stats, &mut tombstones); } else if change.created { - apply_create(drive, change, frs_usize, compact_idx, &mut stats); + apply::apply_create( + drive, + change, + frs_usize, + compact_idx, + &mut stats, + &mut path_changes, + &mut tombstones, + ); } else if change.renamed { - apply_rename(drive, change, compact_idx, &mut stats); + apply::apply_rename( + drive, + change, + compact_idx, + &mut stats, + &mut path_changes, + &mut tombstones, + ); } else { stats.skipped += 1; } } - // Rebuild derived structures from updated records + names. - // Children CSR: ~100ms for 7M records. Trigram: ~500ms for 7M records. - // Both are necessary so newly created/renamed files appear in tree - // traversal AND trigram search. - drive.children = ChildrenIndex::build(&drive.records); - // Recompute path_len for all records (picks up creates + renames). - crate::compact::compute_path_lengths(&mut drive.records, &drive.names, drive.letter); - // Rebuild trigram index using CaseFold β€” no names_lower clone needed. - drive.trigram = TrigramIndex::build(&drive.records, &drive.names, drive.fold); - // Rebuild extension inverted index so --ext queries reflect USN changes. - drive.ext_index = crate::compact::ExtensionIndex::build(&drive.records); - - if !changes.is_empty() { - log_batch_summary(drive, changes.len(), &stats); - } + // Rebuild the derived structures (children CSR, path lengths, trigram, + // extension index) from the mutated records + names, with per-step + // IDXDELTA-TIMING. Extracted to `rebuild.rs` β€” see that module + the + // incremental-index-maintenance design doc. + rebuild::rebuild_derived_and_log( + drive, + changes.len(), + &stats, + t_loop.elapsed(), + &path_changes, + &tombstones, + ); stats } -/// Emit the per-batch USN-apply summary (how the poll mutated the index) -/// at DEBUG. -fn log_batch_summary(drive: &DriveCompactIndex, changes: usize, stats: &PatchStats) { - tracing::debug!( - drive = %drive.letter, - changes, - created = stats.created, - deleted = stats.deleted, - renamed = stats.renamed, - skipped = stats.skipped, - records = drive.records.len(), - ext_index_entries = drive.ext_index.total_entries(), - "usn apply: batch applied" - ); -} - #[cfg(test)] #[path = "compact_loader_tests.rs"] mod tests; + +#[cfg(test)] +#[path = "compact_loader_path_oracle_tests.rs"] +mod path_oracle_tests; + +#[cfg(test)] +#[path = "compact_loader_trigram_oracle_tests.rs"] +mod trigram_oracle_tests; diff --git a/crates/uffs-core/src/compact_loader/apply.rs b/crates/uffs-core/src/compact_loader/apply.rs new file mode 100644 index 000000000..ae4d2a567 --- /dev/null +++ b/crates/uffs-core/src/compact_loader/apply.rs @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Per-change record mutation for [`super::apply_usn_patch`]: stage a created +//! file into the names blob + extension table, then apply create / delete / +//! rename to the compact records + `frs_to_compact` mapping, collecting the +//! path-length and trigram-delta change sets for the post-loop rebuild. + +use super::PatchStats; +use crate::compact::{CompactRecord, DriveCompactIndex}; + +/// A USN-created file's identity, staged into the index's names blob + +/// extension table via a mutable `drive` borrow BEFORE any record borrow. +/// +/// All fields are `Copy`, so the caller can take a `&mut CompactRecord` +/// after this returns without a borrow conflict. +struct StagedCreate { + /// Byte offset of the staged name in `drive.names`. + name_offset: u32, + /// UTF-8 byte length of the staged name. + name_len: u16, + /// Cached first byte of the name (hot-path metafile gate). + name_first_byte: u8, + /// Interned extension id for the new name (`0` = no extension). + extension_id: u16, + /// Compact index of the parent directory (`u32::MAX` if unmapped). + parent_idx: u32, + /// Real size/timestamps/flags from a targeted MFT read, or all-zero when + /// the USN-only change carried no metadata (a later re-warm fills it). + /// Representation matches `CompactRecord`, so it copies straight in. + meta: uffs_mft::usn::RecordMeta, +} + +/// Append `change`'s filename to the names blob and intern its extension, +/// resolving the parent's compact index. Mutably borrows `drive`, so it +/// must run before any `&mut CompactRecord` borrow. +fn stage_create(drive: &mut DriveCompactIndex, change: &uffs_mft::usn::FileChange) -> StagedCreate { + let extension_id = drive.intern_extension(&change.filename); + let name_start = drive.names.len(); + drive + .names + .as_mut_vec() + .extend_from_slice(change.filename.as_bytes()); + let parent_frs_usize = uffs_mft::frs_to_usize(change.parent_frs.raw()); + let parent_idx = drive + .frs_to_compact + .get(parent_frs_usize) + .copied() + .unwrap_or(u32::MAX); + StagedCreate { + name_offset: uffs_mft::len_to_u32(name_start), + name_len: uffs_mft::len_to_u16(change.filename.len()), + name_first_byte: change.filename.as_bytes().first().copied().unwrap_or(0), + extension_id, + parent_idx, + meta: change.meta.unwrap_or_default(), + } +} + +/// Overwrite an existing compact slot with a reused/re-animated file's +/// identity. Per-file metrics come from the staged metadata β€” real values +/// when a targeted MFT read backfilled them, else zero (a later re-warm +/// fills them; the USN `FileChange` carries only name + parent). +const fn overwrite_slot(rec: &mut CompactRecord, staged: &StagedCreate) { + rec.name_offset = staged.name_offset; + rec.name_len = staged.name_len; + rec.name_first_byte = staged.name_first_byte; + rec.extension_id = staged.extension_id; + rec.parent_idx = staged.parent_idx; + rec.size = staged.meta.size; + rec.allocated = staged.meta.allocated; + rec.created = staged.meta.created; + rec.modified = staged.meta.modified; + rec.accessed = staged.meta.accessed; + rec.flags = staged.meta.flags; + // Tree metrics are recomputed post-loop (CSR rebuild + compute_path_ + // lengths); never carried by a USN change. + rec.treesize = 0; + rec.tree_allocated = 0; + rec.descendants = 0; + rec.path_len = 0; +} + +/// Apply a delete change: tombstone the slot (`name_len = 0`, parent +/// unmapped so the CSR rebuild drops it) and unmap its FRS so a later batch +/// can't re-animate the tombstone. +pub(super) fn apply_delete( + drive: &mut DriveCompactIndex, + frs_usize: usize, + compact_idx: u32, + stats: &mut PatchStats, + tombstones: &mut Vec, +) { + if compact_idx == u32::MAX { + stats.skipped += 1; + return; + } + if let Some(rec) = drive.records.as_mut_slice().get_mut(compact_idx as usize) { + rec.name_len = 0; + rec.parent_idx = u32::MAX; + if let Some(slot) = drive.frs_to_compact.get_mut(frs_usize) { + *slot = u32::MAX; + } + // Phase 2b: mask the deleted record's stale base trigram postings. + tombstones.push(compact_idx); + stats.deleted += 1; + } +} + +/// Apply a create change: overwrite the mapped slot when the MFT record +/// number was reused (tombstone OR stale live record), or append a fresh +/// record + register its FRS mapping when the slot is new. +pub(super) fn apply_create( + drive: &mut DriveCompactIndex, + change: &uffs_mft::usn::FileChange, + frs_usize: usize, + compact_idx: u32, + stats: &mut PatchStats, + path_changes: &mut Vec, + tombstones: &mut Vec, +) { + if change.filename.is_empty() { + stats.skipped += 1; + return; + } + // Stage name + interned extension up front (mutable index borrow) so the + // per-record write can take a `&mut CompactRecord` without conflict. + let staged = stage_create(drive, change); + if compact_idx == u32::MAX { + // Brand-new record: append, then register the FRS mapping. NTFS + // reuses freed record numbers and a long-running daemon can outgrow + // the build-time table, so extend + sentinel-fill any gap. + let new_rec = CompactRecord { + size: staged.meta.size, + allocated: staged.meta.allocated, + treesize: 0, + tree_allocated: 0, + created: staged.meta.created, + modified: staged.meta.modified, + accessed: staged.meta.accessed, + name_offset: staged.name_offset, + flags: staged.meta.flags, + parent_idx: staged.parent_idx, + descendants: 0, + name_len: staged.name_len, + extension_id: staged.extension_id, + // path_len filled by `compute_path_lengths` post-loop. + path_len: 0, + name_first_byte: staged.name_first_byte, + _pad: [0; 1], + }; + let new_compact_idx = uffs_mft::len_to_u32(drive.records.len()); + drive.records.as_mut_vec().push(new_rec); + if frs_usize >= drive.frs_to_compact.len() { + drive + .frs_to_compact + .resize(frs_usize.saturating_add(1), u32::MAX); + } + if let Some(slot) = drive.frs_to_compact.get_mut(frs_usize) { + *slot = new_compact_idx; + } + // A new record has no descendants yet β†’ O(1) path refresh, no subtree. + path_changes.push(crate::compact::PathChange { + idx: new_compact_idx, + subtree: false, + }); + stats.created += 1; + } else if let Some(rec) = drive.records.as_mut_slice().get_mut(compact_idx as usize) { + // The record number is already mapped. A `created` event means NTFS + // reused that slot for a NEW file β€” the old occupant (a tombstone, OR + // a stale live record whose delete was coalesced/missed) no longer + // exists. Overwrite it wholesale. Skipping a live slot here is what + // dropped FRS-reused recreates (the "delta.pdf vanished" report). + overwrite_slot(rec, &staged); + // FRS-reuse overwrite: treat as a fresh record (its old subtree, if + // any, was deleted/remapped and is handled by its own changes). + path_changes.push(crate::compact::PathChange { + idx: compact_idx, + subtree: false, + }); + // Phase 2b: the reused slot's old occupant's base postings are stale β€” + // mask them; the new name is re-added via `path_changes`. + tombstones.push(compact_idx); + stats.created += 1; + } +} + +/// Apply a rename change: re-point the name, **re-intern the extension** (a +/// rename can change it: `foo.log` β†’ `foo.pdf`), refresh the first-byte +/// cache, and update `parent_idx`. The FRS keeps its slot, so the mapping is +/// unchanged. +pub(super) fn apply_rename( + drive: &mut DriveCompactIndex, + change: &uffs_mft::usn::FileChange, + compact_idx: u32, + stats: &mut PatchStats, + path_changes: &mut Vec, + tombstones: &mut Vec, +) { + if compact_idx == u32::MAX || change.filename.is_empty() { + stats.skipped += 1; + return; + } + let extension_id = drive.intern_extension(&change.filename); + let name_start = drive.names.len(); + drive + .names + .as_mut_vec() + .extend_from_slice(change.filename.as_bytes()); + let new_parent_frs = uffs_mft::frs_to_usize(change.parent_frs.raw()); + let new_parent_compact = drive + .frs_to_compact + .get(new_parent_frs) + .copied() + .unwrap_or(u32::MAX); + if let Some(rec) = drive.records.as_mut_slice().get_mut(compact_idx as usize) { + rec.name_offset = uffs_mft::len_to_u32(name_start); + rec.name_len = uffs_mft::len_to_u16(change.filename.len()); + rec.extension_id = extension_id; + rec.name_first_byte = change.filename.as_bytes().first().copied().unwrap_or(0); + rec.parent_idx = new_parent_compact; + // Apply backfilled size/timestamps/flags when a targeted MFT read + // attached them (corrects a record previously created USN-only with + // zeroed metrics); otherwise leave the existing values untouched. + if let Some(meta) = change.meta { + rec.size = meta.size; + rec.allocated = meta.allocated; + rec.created = meta.created; + rec.modified = meta.modified; + rec.accessed = meta.accessed; + rec.flags = meta.flags; + } + // A directory rename shifts every descendant's path by a constant Ξ”; + // a file rename only refreshes this record. + path_changes.push(crate::compact::PathChange { + idx: compact_idx, + subtree: rec.is_directory(), + }); + // Phase 2b: mask the old-name base postings; the new name is re-added + // via `path_changes`. The trigram_search tombstone logic keeps the + // record visible under its new name and gone from its old one. + tombstones.push(compact_idx); + stats.renamed += 1; + } +} diff --git a/crates/uffs-core/src/compact_loader/rebuild.rs b/crates/uffs-core/src/compact_loader/rebuild.rs new file mode 100644 index 000000000..fc7ca3e44 --- /dev/null +++ b/crates/uffs-core/src/compact_loader/rebuild.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Post-apply rebuild of the derived indexes for [`super::apply_usn_patch`]. +//! +//! After the per-change loop mutates the record columns + `frs_to_compact`, +//! the derived structures (children CSR, path lengths, trigram, extension +//! inverted index) are rebuilt from scratch so newly created / renamed / +//! deleted files appear in tree traversal AND trigram / `--ext` search. +//! +//! This is the **O(total-records)** step the incremental-index-maintenance +//! work (`docs/architecture/incremental-index-maintenance.md`) replaces with a +//! base+delta overlay; until then the per-index rebuild cost is captured here +//! under the `IDXDELTA-TIMING` dev marker so a baseline can be measured and a +//! regression detected. Extracted from `compact_loader.rs` to keep that file +//! under the workspace 800-LOC policy and to house the temporary IDXDELTA +//! timing in one place for Phase-5 removal. + +use alloc::sync::Arc; +use std::time::Instant; + +use super::PatchStats; +use crate::compact::{ + ChildrenIndex, DriveCompactIndex, PathChange, update_path_lengths_incremental, +}; + +/// Above this many touched records, the per-change incremental path update +/// loses to a single O(total) BFS (each create/rename re-walks parents), so we +/// fall back to the full [`crate::compact::compute_path_lengths`]. Sized well +/// above a normal USN poll batch; the 50k disk-save threshold is the practical +/// ceiling on a single apply anyway. +const FULL_PATH_RECOMPUTE_THRESHOLD: usize = 50_000; + +/// Rebuild the derived indexes from the mutated records + names and emit the +/// per-batch summary. `loop_elapsed` is how long the caller's O(changed) +/// per-change loop took, so the `IDXDELTA-TIMING` line can attribute time to +/// the loop vs. each index rebuild. +pub(super) fn rebuild_derived_and_log( + drive: &mut DriveCompactIndex, + changes_len: usize, + stats: &PatchStats, + loop_elapsed: core::time::Duration, + path_changes: &[PathChange], + tombstones: &[u32], +) { + let loop_us = dur_us(loop_elapsed); + + // IDXDELTA-TIMING: per-index full-rebuild cost of one apply β€” the + // O(total-records) baseline the incremental (base+delta) work drives + // toward O(changed). Remove with the IDXDELTA dev instrumentation (Phase 5). + // Children CSR is rebuilt FIRST so the incremental path update below can + // walk a directory rename's subtree against current adjacency. + let t_children = Instant::now(); + drive.children = Arc::new(ChildrenIndex::build(&drive.records)); + let children_us = dur_us(t_children.elapsed()); + // Phase 1: refresh path_len only for the touched records (O(changed)). + // An EMPTY change set here means the batch touched no record's path_len + // (e.g. a delete-only batch β€” a delete tombstones its record and never + // shifts any surviving record's path_len), so the correct work is *none*: + // `update_path_lengths_incremental` is a no-op over an empty slice. The + // full O(total) BFS is reserved for the cold-load builder + // (`build_compact_index`); reaching it from a live apply was a 0.5 s + // regression on delete-only batches. The only apply-time fallback is a + // pathologically huge batch where the per-record re-walk loses to one BFS. + let t_paths = Instant::now(); + if path_changes.len() > FULL_PATH_RECOMPUTE_THRESHOLD { + crate::compact::compute_path_lengths(&mut drive.records, &drive.names, drive.letter); + } else { + update_path_lengths_incremental( + drive.records.as_mut_slice(), + &drive.names, + drive.letter, + &drive.children, + path_changes, + ); + } + let paths_us = dur_us(t_paths.elapsed()); + // Phase 2b + 4a: overlay this batch's trigram + extension postings onto the + // delta instead of rebuilding the ~340 ms trigram and ~58 ms ext bases. + // `apply_index_delta` adds each created/renamed record's postings and masks + // tombstoned trigrams, folding back to fresh bases only when the delta + // crosses the compaction threshold (so `trigram_us` is ~0 on most applies, + // a full build on the occasional compaction tick). Ext is served through + // `records_with_ext` (base βˆͺ delta), so no per-apply ext rebuild. + let t_trigram = Instant::now(); + let compacted = drive.apply_index_delta(path_changes, tombstones); + let trigram_us = dur_us(t_trigram.elapsed()); + // `ext_us` retained in the IDXDELTA-TIMING line for baseline comparison; + // the rebuild is gone (Phase 4a), so it now measures ~0. + let ext_us = 0_u64; + + if changes_len != 0 { + tracing::info!( + marker = "IDXDELTA-TIMING", + drive = %drive.letter, + records = drive.records.len(), + changes = changes_len, + loop_us, + children_us, + paths_us, + trigram_us, + compacted, + ext_us, + rebuild_us = children_us + .saturating_add(paths_us) + .saturating_add(trigram_us) + .saturating_add(ext_us), + total_us = loop_us + .saturating_add(children_us) + .saturating_add(paths_us) + .saturating_add(trigram_us) + .saturating_add(ext_us), + "IDXDELTA-TIMING apply: loop + children/ext rebuild + trigram delta \ + (paths incremental; trigram_usβ‰ˆ0 unless compacted)" + ); + log_batch_summary(drive, changes_len, stats); + } +} + +/// IDXDELTA-TIMING helper: a `Duration` as whole microseconds (`u64`). +/// Integer to satisfy uffs-core's `float_arithmetic` deny and to keep sub-ms +/// precision for the O(changed) loop; the WIN idx-delta-verify script renders +/// these as ms. Remove with the IDXDELTA dev instrumentation (Phase 5). +fn dur_us(elapsed: core::time::Duration) -> u64 { + u64::try_from(elapsed.as_micros()).unwrap_or(u64::MAX) +} + +/// Emit the per-batch USN-apply summary (how the poll mutated the index) at +/// DEBUG. +fn log_batch_summary(drive: &DriveCompactIndex, changes: usize, stats: &PatchStats) { + tracing::debug!( + drive = %drive.letter, + changes, + created = stats.created, + deleted = stats.deleted, + renamed = stats.renamed, + skipped = stats.skipped, + records = drive.records.len(), + ext_index_entries = drive.ext_index.total_entries(), + "usn apply: batch applied" + ); +} diff --git a/crates/uffs-core/src/compact_loader_path_oracle_tests.rs b/crates/uffs-core/src/compact_loader_path_oracle_tests.rs new file mode 100644 index 000000000..d36f0bbc4 --- /dev/null +++ b/crates/uffs-core/src/compact_loader_path_oracle_tests.rs @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Phase-1 path-length oracle for [`super::apply_usn_patch`] +//! (incremental-index-maintenance Β§7). +//! +//! The per-change incremental `path_len` update done inside `apply_usn_patch` +//! must be **byte-identical** to a full `compute_path_lengths` rebuild β€” the +//! correctness gate the design requires ("base+delta must be byte-identical to +//! a full rebuild"). The hardest case is a **directory rename**, whose length +//! delta must propagate to every descendant's `path_len` via the children CSR. +//! +//! Kept in a dedicated sibling submodule so neither `compact_loader.rs` nor +//! `compact_loader_tests.rs` crosses the workspace 800-LOC policy ceiling. + +use alloc::sync::Arc; +use std::path::PathBuf; + +use uffs_mft::usn::FileChange; +use uffs_text::case_fold::CaseFold; + +use super::{IndexSource, apply_usn_patch}; +use crate::compact::{ + ChildrenIndex, CompactRecord, DriveCompactIndex, ExtensionIndex, compute_path_lengths, +}; +use crate::compact_storage::ColumnStorage; +use crate::trigram::TrigramIndex; + +/// Nested fixture: top dir "C" (frs 5) β†’ dir "sub" (frs 6) β†’ "deep.txt" +/// (frs 7). Names: C[0..1] sub[1..4] deep.txt[4..12]. `path_len`s are +/// initialised via the cold-load BFS so the apply path takes over from a +/// correct baseline, exactly as it does after a real cold load. +fn build_nested_fixture() -> DriveCompactIndex { + let names = b"Csubdeep.txt".to_vec(); + let records = vec![ + CompactRecord { + name_offset: 0, + flags: 0x10, + parent_idx: u32::MAX, + name_len: 1, + name_first_byte: b'C', + ..CompactRecord::default() + }, + CompactRecord { + name_offset: 1, + flags: 0x10, // directory β€” its rename must shift the subtree + parent_idx: 0, + name_len: 3, + name_first_byte: b's', + ..CompactRecord::default() + }, + CompactRecord { + name_offset: 4, + parent_idx: 1, + name_len: 8, + name_first_byte: b'd', + ..CompactRecord::default() + }, + ]; + let fold = CaseFold::default_table(); + let frs_to_compact: Vec = (0_usize..20) + .map(|frs| match frs { + 5 => 0_u32, + 6 => 1, + 7 => 2, + _ => u32::MAX, + }) + .collect(); + let mut drive = DriveCompactIndex { + letter: uffs_mft::platform::DriveLetter::T, + records: ColumnStorage::from_vec(records.clone()), + names: ColumnStorage::from_vec(names.clone()), + trigram: Arc::new(TrigramIndex::build(&records, &names, fold)), + children: Arc::new(ChildrenIndex::build(&records)), + ext_index: Arc::new(ExtensionIndex::build(&records)), + fold, + ext_names: vec![Box::from("")], + source: IndexSource::MftFile(PathBuf::from("T:")), + source_epoch: 1, + bloom: None, + path_trie: None, + frs_to_compact, + delta: None, + }; + // Cold-load init of path_lens (the full BFS the apply path replaces). + compute_path_lengths(drive.records.as_mut_slice(), &drive.names, drive.letter); + drive +} + +/// Assert the live (incremental) `path_len`s on `drive` equal a from-scratch +/// `compute_path_lengths` BFS over the same (now-mutated) records. +/// +/// Only **live** records are compared: a tombstoned record +/// (`name_len == 0 && parent_idx == u32::MAX`, set by `apply_delete`) never +/// surfaces in search or path resolution, so its `path_len` is meaningless β€” +/// the incremental path leaves it stale while a full BFS recomputes it as a +/// root. That divergence is correct, so it is excluded. +fn assert_path_len_matches_full_rebuild(drive: &mut DriveCompactIndex) { + let is_live = |rec: &CompactRecord| !(rec.name_len == 0 && rec.parent_idx == u32::MAX); + let incremental: Vec<(usize, u16)> = drive + .records + .iter() + .enumerate() + .filter(|(_, rec)| is_live(rec)) + .map(|(idx, rec)| (idx, rec.path_len)) + .collect(); + compute_path_lengths(drive.records.as_mut_slice(), &drive.names, drive.letter); + let full_rebuild: Vec<(usize, u16)> = drive + .records + .iter() + .enumerate() + .filter(|(_, rec)| is_live(rec)) + .map(|(idx, rec)| (idx, rec.path_len)) + .collect(); + assert_eq!( + incremental, full_rebuild, + "incremental path_len must equal the full rebuild for live records; \ + incremental={incremental:?} full={full_rebuild:?}", + ); +} + +#[test] +fn incremental_path_len_matches_full_rebuild_oracle() { + let mut drive = build_nested_fixture(); + + // Apply a batch that exercises every path op: directory rename (subtree Ξ”), + // a fresh create, and a file rename. + apply_usn_patch(&mut drive, &[ + FileChange { + frs: 6_u64.into(), + parent_frs: 5_u64.into(), + filename: "subdirectory".to_owned(), // longer β†’ Ξ” > 0 + renamed: true, + ..FileChange::default() + }, + FileChange { + frs: 8_u64.into(), + parent_frs: 5_u64.into(), + filename: "new.bin".to_owned(), + created: true, + ..FileChange::default() + }, + FileChange { + frs: 7_u64.into(), + parent_frs: 6_u64.into(), + filename: "deep-renamed.txt".to_owned(), + renamed: true, + ..FileChange::default() + }, + ]); + + // `apply_usn_patch` used the INCREMENTAL path update (batch < threshold). + assert_path_len_matches_full_rebuild(&mut drive); +} + +/// Regression guard for the delete-only batch: a delete pushes **no** +/// `PathChange` (it tombstones its record and shifts no surviving record's +/// `path_len`), so the apply's `path_changes` slice is empty. The path update +/// must then be a *no-op* β€” NOT a fall-back to the full O(total) BFS, which on +/// a live 3.9 M-record drive was a 0.5 s per-apply regression. Surviving +/// records' `path_len`s must still equal a full rebuild afterwards. +#[test] +fn delete_only_batch_leaves_path_lengths_correct_without_full_recompute() { + let mut drive = build_nested_fixture(); + + // Delete the leaf "deep.txt" (frs 7). No create / rename β†’ empty + // path_changes β†’ must take the no-op incremental branch. + apply_usn_patch(&mut drive, &[FileChange { + frs: 7_u64.into(), + parent_frs: 6_u64.into(), + deleted: true, + ..FileChange::default() + }]); + + // "C" and "sub" are untouched survivors; their path_len must match a full + // rebuild over the post-delete record set. + assert_path_len_matches_full_rebuild(&mut drive); +} diff --git a/crates/uffs-core/src/compact_loader_tests.rs b/crates/uffs-core/src/compact_loader_tests.rs index 9b86f4d5e..17703e4b4 100644 --- a/crates/uffs-core/src/compact_loader_tests.rs +++ b/crates/uffs-core/src/compact_loader_tests.rs @@ -16,6 +16,7 @@ //! Extracted into a sibling submodule so `compact_loader.rs` stays //! well below the file-size policy ceiling. +use alloc::sync::Arc; use std::path::PathBuf; use uffs_mft::usn::FileChange; @@ -102,9 +103,9 @@ fn make_synthetic_drive() -> DriveCompactIndex { letter: uffs_mft::platform::DriveLetter::T, records: ColumnStorage::from_vec(records), names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), fold, ext_names: vec![Box::from("")], source: IndexSource::MftFile(PathBuf::from("T:")), @@ -112,6 +113,7 @@ fn make_synthetic_drive() -> DriveCompactIndex { bloom: None, path_trie: None, frs_to_compact, + delta: None, } } @@ -267,8 +269,8 @@ fn apply_usn_patch_rename_reinterns_extension() { "first-byte cache must reflect the renamed name" ); assert!( - drive.ext_index.get(pdf_id).contains(&2), - "ExtensionIndex.get(pdf) must include the renamed record" + drive.records_with_ext(pdf_id).contains(&2), + "records_with_ext(pdf) must include the renamed record" ); } @@ -310,7 +312,7 @@ fn apply_usn_patch_create_replaces_live_reused_slot() { let pdf_id = *pdf_ids.first().expect("'pdf' interned"); assert_eq!(record.extension_id, pdf_id, "reused slot tagged 'pdf'"); assert!( - drive.ext_index.get(pdf_id).contains(&2), + drive.records_with_ext(pdf_id).contains(&2), "ExtensionIndex.get(pdf) must include the reused record" ); } @@ -481,12 +483,12 @@ fn apply_usn_patch_created_record_is_findable_by_extension() { "created record must be tagged with the resolved 'pdf' id" ); - // 3. The rebuilt inverted index returns the new record for that id β€” this is - // exactly what `--ext pdf` walks. - let matches = drive.ext_index.get(pdf_id); + // 3. records_with_ext (base βˆͺ delta overlay) returns the new record for that id + // β€” exactly what `--ext pdf` walks. + let matches = drive.records_with_ext(pdf_id); assert!( matches.contains(&u32::try_from(new_idx).expect("idx fits u32")), - "ExtensionIndex.get(pdf) must include the USN-created record" + "records_with_ext(pdf) must include the USN-created record" ); } diff --git a/crates/uffs-core/src/compact_loader_trigram_oracle_tests.rs b/crates/uffs-core/src/compact_loader_trigram_oracle_tests.rs new file mode 100644 index 000000000..4cd689248 --- /dev/null +++ b/crates/uffs-core/src/compact_loader_trigram_oracle_tests.rs @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Phase-2b end-to-end oracle for the trigram base+delta overlay +//! (incremental-index-maintenance Β§4 Phase 2 / Β§7). +//! +//! Drives a **real** [`super::apply_usn_patch`] batch (create + rename + +//! delete) so the delta is populated exactly as the live USN path does, then +//! asserts that `trigram_search` through the base βˆͺ delta overlay returns +//! **identical** candidates to a fully **compacted** index (delta folded into a +//! fresh base). That equivalence is the Phase-2b correctness contract: "base + +//! delta must be byte-identical to a full rebuild" β€” for search results, across +//! every op. + +use alloc::sync::Arc; +use std::path::PathBuf; + +use uffs_mft::usn::FileChange; +use uffs_text::case_fold::CaseFold; + +use super::{IndexSource, apply_usn_patch}; +use crate::compact::{ChildrenIndex, CompactRecord, DriveCompactIndex, ExtensionIndex}; +use crate::compact_storage::ColumnStorage; +use crate::trigram::TrigramIndex; + +/// Push one record (root or file) and register its FRSβ†’compact mapping. +fn push_record( + names: &mut Vec, + records: &mut Vec, + frs_to_compact: &mut Vec, + name: &str, + frs: usize, + parent: u32, + dir: bool, +) { + let idx = u32::try_from(records.len()).expect("fixture fits u32"); + let offset = u32::try_from(names.len()).expect("fixture names fit u32"); + names.extend_from_slice(name.as_bytes()); + records.push(CompactRecord { + name_offset: offset, + flags: if dir { 0x10 } else { 0 }, + parent_idx: parent, + name_len: u16::try_from(name.len()).expect("fixture name fits u16"), + name_first_byte: name.as_bytes().first().copied().unwrap_or(0), + ..CompactRecord::default() + }); + if frs >= frs_to_compact.len() { + frs_to_compact.resize(frs + 1, u32::MAX); + } + if let Some(slot) = frs_to_compact.get_mut(frs) { + *slot = idx; + } +} + +/// Root "C" (frs 5) + four files; FRS mapping populated so `apply_usn_patch` +/// can resolve every change. +fn build_drive() -> DriveCompactIndex { + let mut names = Vec::new(); + let mut records = Vec::new(); + let mut frs_to_compact = Vec::new(); + push_record( + &mut names, + &mut records, + &mut frs_to_compact, + "C", + 5, + u32::MAX, + true, + ); + push_record( + &mut names, + &mut records, + &mut frs_to_compact, + "report.txt", + 10, + 0, + false, + ); + push_record( + &mut names, + &mut records, + &mut frs_to_compact, + "alpha.txt", + 11, + 0, + false, + ); + push_record( + &mut names, + &mut records, + &mut frs_to_compact, + "config.json", + 12, + 0, + false, + ); + push_record( + &mut names, + &mut records, + &mut frs_to_compact, + "datafile.bin", + 13, + 0, + false, + ); + + let fold = CaseFold::default_table(); + let trigram = TrigramIndex::build(&records, &names, fold); + let children = ChildrenIndex::build(&records); + let ext_index = ExtensionIndex::build(&records); + DriveCompactIndex { + letter: uffs_mft::platform::DriveLetter::T, + records: ColumnStorage::from_vec(records), + names: ColumnStorage::from_vec(names), + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), + fold, + ext_names: vec![Box::from("")], + source: IndexSource::MftFile(PathBuf::from("T:")), + source_epoch: 1, + bloom: None, + path_trie: None, + frs_to_compact, + delta: None, + } +} + +fn sorted_candidates(drive: &DriveCompactIndex, needle: &str) -> Vec { + let mut got = drive.trigram_search(needle).unwrap_or_default(); + got.sort_unstable(); + got +} + +#[test] +fn apply_batch_delta_search_equals_compacted_rebuild_oracle() { + let mut drive = build_drive(); + + // A batch hitting every op: create a file, rename one, delete one. + apply_usn_patch(&mut drive, &[ + FileChange { + frs: 20_u64.into(), + parent_frs: 5_u64.into(), + filename: "newfile.log".to_owned(), + created: true, + ..FileChange::default() + }, + FileChange { + frs: 10_u64.into(), + parent_frs: 5_u64.into(), + filename: "summary.txt".to_owned(), // report.txt -> summary.txt + renamed: true, + ..FileChange::default() + }, + FileChange { + frs: 11_u64.into(), + parent_frs: 5_u64.into(), + deleted: true, // alpha.txt deleted + ..FileChange::default() + }, + ]); + + // The live drive now serves search through the base βˆͺ delta overlay. + assert!( + drive.delta.is_some(), + "apply must have populated the trigram delta" + ); + + // Oracle reference: the same drive with the delta folded into a fresh base. + let mut compacted = drive.clone(); + compacted.compact_base(); + assert!(compacted.delta.is_none(), "compaction must clear the delta"); + + // Every needle must yield identical candidates from the overlay and the + // compacted rebuild β€” covering created, renamed (new + old name), deleted, + // and untouched files. + for needle in [ + "summ", "summary", // renamed-in (new name) + "report", "repo", // renamed-away (old name) β€” gone from both + "newfile", "newf", // created + "alpha", "lpha", // deleted β€” gone from both + "config", "datafile", "bin", "txt", "log", // untouched / extensions + ] { + let overlay = sorted_candidates(&drive, needle); + let rebuilt = sorted_candidates(&compacted, needle); + assert_eq!( + overlay, rebuilt, + "needle {needle:?}: overlay {overlay:?} != compacted rebuild {rebuilt:?}", + ); + } + + // Spot-check the semantics concretely (compact_idx: report/summary=1, + // config=3, datafile=4, newfile appended at 5). + assert_eq!( + sorted_candidates(&drive, "summary"), + vec![1], + "renamed visible as summary" + ); + assert!( + sorted_candidates(&drive, "report").is_empty(), + "old name gone" + ); + assert_eq!( + sorted_candidates(&drive, "newfile"), + vec![5], + "created visible" + ); + assert!( + sorted_candidates(&drive, "alpha").is_empty(), + "deleted gone" + ); + + // Phase 4a ext oracle: records_with_ext through the overlay must equal the + // compacted rebuild for every extension id (the create interns ".log", the + // rename keeps ".txt", the delete drops ".txt"). Covers id 0 (no extension) + // up past the highest interned id. + let max_ext = drive + .records + .iter() + .map(|rec| rec.extension_id) + .max() + .unwrap_or(0); + for ext_id in 0..=max_ext { + let mut overlay = drive.records_with_ext(ext_id).into_owned(); + overlay.sort_unstable(); + let mut rebuilt = compacted.records_with_ext(ext_id).into_owned(); + rebuilt.sort_unstable(); + assert_eq!( + overlay, rebuilt, + "ext_id {ext_id}: overlay {overlay:?} != compacted rebuild {rebuilt:?}", + ); + } +} diff --git a/crates/uffs-core/src/compact_trigram_delta_tests.rs b/crates/uffs-core/src/compact_trigram_delta_tests.rs new file mode 100644 index 000000000..a094f4118 --- /dev/null +++ b/crates/uffs-core/src/compact_trigram_delta_tests.rs @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +//! Phase-2a correctness tests for [`DriveCompactIndex::trigram_search`] β€” the +//! base βˆͺ delta overlay choke point (incremental-index-maintenance Β§5.2). +//! +//! These pin the *semantics* of the overlay by populating an [`IndexDelta`] +//! **manually** (the apply path that fills it for real lands in Phase 2b), so +//! the merge + tombstone resolution is locked down independently of the USN +//! plumbing. The hard case is a rename: the record must become visible under +//! its new name yet vanish from its old one β€” which is exactly why tombstone +//! filtering is applied to the final candidate set, never per posting list. + +use alloc::sync::Arc; +use std::path::PathBuf; + +use uffs_text::case_fold::CaseFold; + +use crate::compact::{ + ChildrenIndex, CompactRecord, DriveCompactIndex, ExtensionIndex, IndexDelta, IndexSource, +}; +use crate::compact_storage::ColumnStorage; +use crate::trigram::{TrigramIndex, needle_trigrams}; + +/// Append a record (and its name bytes) to the fixture columns. +fn push_record( + names: &mut Vec, + records: &mut Vec, + name: &str, + parent: u32, + dir: bool, +) { + let offset = u32::try_from(names.len()).expect("fixture names blob fits u32"); + names.extend_from_slice(name.as_bytes()); + records.push(CompactRecord { + name_offset: offset, + flags: if dir { 0x10 } else { 0 }, + parent_idx: parent, + name_len: u16::try_from(name.len()).expect("fixture name fits u16"), + name_first_byte: name.as_bytes().first().copied().unwrap_or(0), + ..CompactRecord::default() + }); +} + +/// Build a flat fixture: a root "C" (idx 0) plus one file per name, each a +/// child of the root. Returns the index with `delta = None` (pure base). +fn build_drive(file_names: &[&str]) -> DriveCompactIndex { + let mut names: Vec = Vec::new(); + let mut records: Vec = Vec::new(); + + push_record(&mut names, &mut records, "C", u32::MAX, true); + for name in file_names { + push_record(&mut names, &mut records, name, 0, false); + } + + let fold = CaseFold::default_table(); + // Build the base CSR indexes before moving the columns into storage. + let trigram = TrigramIndex::build(&records, &names, fold); + let children = ChildrenIndex::build(&records); + let ext_index = ExtensionIndex::build(&records); + let count = u32::try_from(records.len()).expect("fixture record count fits u32"); + let frs_to_compact: Vec = (0..count).collect(); + // Fields in struct-definition order (clippy::inconsistent_struct_constructor). + DriveCompactIndex { + letter: uffs_mft::platform::DriveLetter::T, + records: ColumnStorage::from_vec(records), + names: ColumnStorage::from_vec(names), + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), + fold, + ext_names: vec![Box::from("")], + source: IndexSource::MftFile(PathBuf::from("T:")), + source_epoch: 1, + bloom: None, + path_trie: None, + frs_to_compact, + delta: None, + } +} + +/// Trigram candidates as a sorted Vec for stable assertions. +fn candidates(drive: &DriveCompactIndex, needle: &str) -> Vec { + let mut got = drive.trigram_search(needle).unwrap_or_default(); + got.sort_unstable(); + got +} + +#[test] +fn delta_none_delegates_to_base_search() { + let drive = build_drive(&["report.txt", "alpha.txt"]); + // "repo" matches report.txt (idx 1) only; pure-base fast path. + assert_eq!(candidates(&drive, "repo"), vec![1]); + assert_eq!(candidates(&drive, "alpha"), vec![2]); +} + +#[test] +fn create_via_delta_becomes_searchable() { + let mut drive = build_drive(&["report.txt"]); + let fold = drive.fold; + // Simulate a create of "summary.log" at idx 2 (record need not exist for the + // candidate set; trigram_search is a pre-filter over postings). + let delta = drive.delta.get_or_insert_with(Default::default); + let tris = needle_trigrams("summary.log", fold).unwrap(); + delta.add_record(2, &tris, 0, 0); + + assert_eq!( + candidates(&drive, "summ"), + vec![2], + "new file visible via delta" + ); + assert_eq!( + candidates(&drive, "repo"), + vec![1], + "base file still visible" + ); +} + +#[test] +fn rename_visible_under_new_name_and_gone_from_old() { + let mut drive = build_drive(&["report.txt", "alpha.txt"]); + let fold = drive.fold; + // Rename idx 1 "report.txt" -> "summary.txt": tombstone its stale base + // postings, then re-add under the new name's trigrams. + let delta = drive.delta.get_or_insert_with(Default::default); + delta.tombstone(1); + let tris = needle_trigrams("summary.txt", fold).unwrap(); + delta.add_record(1, &tris, 0, 0); + + assert_eq!( + candidates(&drive, "summ"), + vec![1], + "visible under NEW name" + ); + assert_eq!( + candidates(&drive, "repo"), + Vec::::new(), + "INVISIBLE under OLD name despite stale base postings" + ); + assert_eq!( + candidates(&drive, "alpha"), + vec![2], + "unrelated file untouched" + ); +} + +#[test] +fn delete_via_tombstone_disappears() { + let mut drive = build_drive(&["report.txt", "alpha.txt"]); + // Delete idx 2 "alpha.txt": tombstone, no re-add. + let delta = drive.delta.get_or_insert_with(Default::default); + delta.tombstone(2); + + assert_eq!( + candidates(&drive, "alpha"), + Vec::::new(), + "deleted file no longer a candidate" + ); + assert_eq!(candidates(&drive, "repo"), vec![1], "sibling unaffected"); +} + +#[test] +fn short_needle_returns_none_like_base() { + let mut drive = build_drive(&["report.txt"]); + drive.delta = Some(IndexDelta::default()); + // < 3 codepoints -> None (caller falls back to linear scan), even with a + // delta present. + assert!(drive.trigram_search("re").is_none()); +} diff --git a/crates/uffs-core/src/search/query/mod.rs b/crates/uffs-core/src/search/query/mod.rs index 9d748e36a..e3bffacf4 100644 --- a/crates/uffs-core/src/search/query/mod.rs +++ b/crates/uffs-core/src/search/query/mod.rs @@ -424,7 +424,7 @@ pub(crate) fn search_compact_drive( let t_tri = std::time::Instant::now(); let candidates = if !case_sensitive && trigram_needle.len() >= 3 { - drive.trigram.search(&trigram_needle, fold) + drive.trigram_search(&trigram_needle) } else { None }; diff --git a/crates/uffs-core/src/search/query/numeric_top_n.rs b/crates/uffs-core/src/search/query/numeric_top_n.rs index 35e66c4bc..b18a8a525 100644 --- a/crates/uffs-core/src/search/query/numeric_top_n.rs +++ b/crates/uffs-core/src/search/query/numeric_top_n.rs @@ -215,7 +215,7 @@ fn scan_ext_fast_path( state: &mut DriveTopN, ) { for &ext_id in &filters.resolved_ext_ids { - for &rec_idx_u32 in drive.ext_index.get(ext_id) { + for &rec_idx_u32 in drive.records_with_ext(ext_id).iter() { let rec_idx = rec_idx_u32 as usize; let Some(rec) = drive.records.get(rec_idx) else { continue; diff --git a/crates/uffs-core/src/search/query/path_only_top_n.rs b/crates/uffs-core/src/search/query/path_only_top_n.rs index bdff3de21..ce4737df2 100644 --- a/crates/uffs-core/src/search/query/path_only_top_n.rs +++ b/crates/uffs-core/src/search/query/path_only_top_n.rs @@ -535,7 +535,7 @@ fn collect_path_only_via_ext_index + Sync>( // `.clone()` (Phase 6c category-Ξ΄) that was anticipating a // future filter push that never landed. for &ext_id in &search_filters.resolved_ext_ids { - for &rec_idx_u32 in drive.ext_index.get(ext_id) { + for &rec_idx_u32 in drive.records_with_ext(ext_id).iter() { let rec_idx = rec_idx_u32 as usize; let Some(rec) = drive.records.get(rec_idx) else { continue; diff --git a/crates/uffs-core/src/search/query/path_sorted_top_n.rs b/crates/uffs-core/src/search/query/path_sorted_top_n.rs index edfcafa20..43f81d17b 100644 --- a/crates/uffs-core/src/search/query/path_sorted_top_n.rs +++ b/crates/uffs-core/src/search/query/path_sorted_top_n.rs @@ -264,7 +264,7 @@ fn collect_path_via_ext_index + Sync>( // `.clone()` (Phase 6c category-Ξ΄) that was anticipating a // re-aliasing scenario that the current code doesn't hit. for &ext_id in &search_filters.resolved_ext_ids { - for &rec_idx_u32 in drive.ext_index.get(ext_id) { + for &rec_idx_u32 in drive.records_with_ext(ext_id).iter() { let rec_idx = rec_idx_u32 as usize; let Some(rec) = drive.records.get(rec_idx) else { continue; diff --git a/crates/uffs-core/src/search/query/prefix_search.rs b/crates/uffs-core/src/search/query/prefix_search.rs index bc2b892bc..fca5cb543 100644 --- a/crates/uffs-core/src/search/query/prefix_search.rs +++ b/crates/uffs-core/src/search/query/prefix_search.rs @@ -44,7 +44,7 @@ pub(crate) fn search_compact_drive_prefix( // Get trigram candidates using first 3 chars of prefix. // get() safely handles any byte boundaries; prefix is ASCII from pattern. let trigram_needle = prefix.get(..prefix.len().min(3)).unwrap_or(prefix); - let candidates = drive.trigram.search(trigram_needle, drive.fold); + let candidates = drive.trigram_search(trigram_needle); let tri_ms = t_tri.elapsed().as_millis(); let tri_count = candidates.as_ref().map_or(0, Vec::len); diff --git a/crates/uffs-core/src/search/tree.rs b/crates/uffs-core/src/search/tree.rs index a4489c5f6..67b595d96 100644 --- a/crates/uffs-core/src/search/tree.rs +++ b/crates/uffs-core/src/search/tree.rs @@ -528,7 +528,7 @@ fn trigram_filtered_records( limit: usize, mut predicate: impl FnMut(&crate::compact::CompactRecord) -> bool, ) -> Vec { - let candidates = drive.trigram.search(needle, drive.fold); + let candidates = drive.trigram_search(needle); match candidates { None => drive .records diff --git a/crates/uffs-core/src/trigram.rs b/crates/uffs-core/src/trigram.rs index 613b107b4..aba61eb34 100644 --- a/crates/uffs-core/src/trigram.rs +++ b/crates/uffs-core/src/trigram.rs @@ -236,9 +236,13 @@ impl TrigramIndex { self.keys.len() } - /// Look up the posting list for a single packed char-trigram key. + /// Look up the base posting list for a single packed char-trigram key. + /// + /// `pub(crate)` so [`crate::compact::DriveCompactIndex::trigram_search`] + /// can merge a base posting with its delta overlay (incremental-index + /// Β§5.2) without re-deriving the CSR lookup. #[must_use] - fn get_posting(&self, packed: u64) -> Option<&[u32]> { + pub(crate) fn get_posting(&self, packed: u64) -> Option<&[u32]> { let idx = self.keys.binary_search(&packed).ok()?; let start = *self.offsets.get(idx)? as usize; let end = *self.offsets.get(idx + 1)? as usize; @@ -252,23 +256,7 @@ impl TrigramIndex { /// linear scan). #[must_use] pub fn search(&self, needle: &str, fold: CaseFold) -> Option> { - let folded: Vec = needle.chars().map(|ch| fold.fold_char(ch)).collect(); - if folded.len() < 3 { - return None; - } - - let mut seen = rustc_hash::FxHashSet::default(); - let mut trigrams: Vec = Vec::new(); - for window in folded.windows(3) { - let Some(&[cp0, cp1, cp2]) = window.first_chunk::<3>() else { - continue; - }; - let packed = pack_char_trigram(cp0, cp1, cp2); - if seen.insert(packed) { - trigrams.push(packed); - } - } - + let trigrams = needle_trigrams(needle, fold)?; if trigrams.is_empty() { return Some(Vec::new()); } @@ -299,15 +287,37 @@ impl TrigramIndex { } } +/// The deduped packed char-trigrams of a search needle, or `None` if the needle +/// folds to fewer than 3 codepoints (caller falls back to a linear scan). +/// +/// Shared by [`TrigramIndex::search`] and the base+delta +/// [`crate::compact::DriveCompactIndex::trigram_search`] so the needleβ†’trigram +/// packing has exactly one definition. +#[must_use] +pub(crate) fn needle_trigrams(needle: &str, fold: CaseFold) -> Option> { + let folded: Vec = needle.chars().map(|ch| fold.fold_char(ch)).collect(); + if folded.len() < 3 { + return None; + } + let mut seen = rustc_hash::FxHashSet::default(); + let mut trigrams: Vec = Vec::new(); + for window in folded.windows(3) { + let Some(&[cp0, cp1, cp2]) = window.first_chunk::<3>() else { + continue; + }; + let packed = pack_char_trigram(cp0, cp1, cp2); + if seen.insert(packed) { + trigrams.push(packed); + } + } + Some(trigrams) +} + /// Intersect a sorted `Vec` with a sorted slice **in place**. /// /// Retains only elements present in both, preserving sorted order. /// Shrinks `result` via `truncate` β€” no allocation, no new `Vec`. -#[expect( - clippy::single_call_fn, - reason = "separated for clarity β€” hot-path intersection logic" -)] -fn intersect_in_place(result: &mut Vec, other: &[u32]) { +pub(crate) fn intersect_in_place(result: &mut Vec, other: &[u32]) { let mut write = 0_usize; let mut j = 0_usize; for i in 0..result.len() { diff --git a/crates/uffs-daemon/build.rs b/crates/uffs-daemon/build.rs new file mode 100644 index 000000000..79c8e1207 --- /dev/null +++ b/crates/uffs-daemon/build.rs @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright (c) 2025-2026 SKY, LLC. + +// Build scripts run on the build host, not the shipping binary's target, so +// the workspace `deny(unwrap_used)` / `deny(expect_used)` runtime lints do not +// apply here; best-effort error handling with sensible fallbacks is the +// idiomatic shape for a build script whose only "failure" is "git not present". +#![allow( + clippy::expect_used, + reason = "build scripts may panic on build-host failure; workspace deny-expect targets runtime code" +)] + +//! Build script for `uffs-daemon`. +//! +//! Emits `UFFS_GIT_SHA` β€” the short commit the daemon was built from β€” so the +//! startup log can stamp **which build** is running. This exists for the +//! `IDXDELTA` development flow (incremental-index-maintenance): the USN +//! testing earlier hit a "ran the wrong/stale binary" trap, and a definitive +//! build stamp in the daemon log is how the WIN test-script confirms it +//! exercised the binary it just compiled. Remove this build script (and the +//! `option_env!("UFFS_GIT_SHA")` read in `startup.rs`) when the IDXDELTA dev +//! instrumentation is stripped in Phase 5. + +use std::process::Command; + +fn main() { + let sha = Command::new("git") + .args(["rev-parse", "--short", "HEAD"]) + .output() + .ok() + .filter(|out| out.status.success()) + .and_then(|out| String::from_utf8(out.stdout).ok()) + .map(|raw| raw.trim().to_owned()) + .filter(|trimmed| !trimmed.is_empty()) + .unwrap_or_else(|| "unknown".to_owned()); + + // Append `-dirty` when the working tree has uncommitted changes, so a + // hand-tweaked local build is never mistaken for the clean commit. + let dirty = Command::new("git") + .args(["status", "--porcelain"]) + .output() + .ok() + .filter(|out| out.status.success()) + .is_some_and(|out| !out.stdout.is_empty()); + + let stamp = if dirty { format!("{sha}-dirty") } else { sha }; + println!("cargo:rustc-env=UFFS_GIT_SHA={stamp}"); + + // Re-run when HEAD moves so the stamp tracks the checked-out commit. + // Best-effort relative path from the crate dir to the repo `.git`; a wrong + // path just means the stamp can lag one commit on an exotic layout, which + // is acceptable for a dev-only marker. + println!("cargo:rerun-if-changed=../../.git/HEAD"); + println!("cargo:rerun-if-changed=build.rs"); +} diff --git a/crates/uffs-daemon/src/cache/shard.rs b/crates/uffs-daemon/src/cache/shard.rs index 3789ee112..ff7c600c0 100644 --- a/crates/uffs-daemon/src/cache/shard.rs +++ b/crates/uffs-daemon/src/cache/shard.rs @@ -535,7 +535,25 @@ impl ShardEntry { // `frs_to_compact` mapping rides along on the clone so // `apply_usn_patch` can patch it in lock-step with the // records. + // IDXDELTA-TIMING: the whole-body deep clone (records + names + every + // CSR index) the Arc-swap requires before patching. On a multi-million- + // record drive this is a large memory copy and a real fraction of the + // per-apply cost that the rebuild timing alone does not capture. The + // base+delta design should shrink it (share the immutable base CSR via + // Arc, clone only the small delta), so baseline it now. Remove with the + // rest of the IDXDELTA dev instrumentation in Phase 5. + let t_clone = std::time::Instant::now(); let mut owned: DriveCompactIndex = (**body_arc).clone(); + if !changes.is_empty() { + tracing::info!( + marker = "IDXDELTA-TIMING", + drive = %owned.letter, + records = owned.records.len(), + changes = changes.len(), + clone_us = u64::try_from(t_clone.elapsed().as_micros()).unwrap_or(u64::MAX), + "IDXDELTA-TIMING apply: whole-body clone (baseline)" + ); + } let stats = uffs_core::compact_loader::apply_usn_patch(&mut owned, changes); Some((Arc::new(owned), stats)) } diff --git a/crates/uffs-daemon/src/cache/shard/tests.rs b/crates/uffs-daemon/src/cache/shard/tests.rs index 93296d275..17f992d7d 100644 --- a/crates/uffs-daemon/src/cache/shard/tests.rs +++ b/crates/uffs-daemon/src/cache/shard/tests.rs @@ -89,9 +89,9 @@ fn make_test_body(letter: uffs_mft::platform::DriveLetter) -> DriveCompactIndex letter, records: ColumnStorage::from_vec(records), names: ColumnStorage::from_vec(names), - trigram, - children, - ext_index, + trigram: Arc::new(trigram), + children: Arc::new(children), + ext_index: Arc::new(ext_index), fold, ext_names: vec![Box::from("")], source: IndexSource::MftFile(PathBuf::from(format!("{letter}:"))), @@ -99,6 +99,7 @@ fn make_test_body(letter: uffs_mft::platform::DriveLetter) -> DriveCompactIndex bloom: None, path_trie: None, frs_to_compact, + delta: None, } } diff --git a/crates/uffs-daemon/src/startup.rs b/crates/uffs-daemon/src/startup.rs index 47e5a7c4c..4563ba10d 100644 --- a/crates/uffs-daemon/src/startup.rs +++ b/crates/uffs-daemon/src/startup.rs @@ -70,6 +70,17 @@ pub(crate) fn log_daemon_starting(config: &DaemonConfig) { no_retire = config.no_retire, "uffsd starting" ); + // IDXDELTA: dev build stamp for the incremental-index-maintenance flow. + // Lets the idx-delta-verify WIN test-script confirm WHICH build it is + // exercising (grep the log for "IDXDELTA build active"). `UFFS_GIT_SHA` + // is emitted by this crate's build.rs; "unknown" when git is unavailable. + // Remove with the rest of the IDXDELTA dev instrumentation in Phase 5. + tracing::info!( + marker = "IDXDELTA", + version = env!("CARGO_PKG_VERSION"), + git = option_env!("UFFS_GIT_SHA").unwrap_or("unknown"), + "IDXDELTA build active β€” incremental-index-maintenance dev build" + ); } /// Publish the [`events::DaemonEvent::DaemonStarting`] notification diff --git a/docs/architecture/baselines/incremental-index-2026-06-26.json b/docs/architecture/baselines/incremental-index-2026-06-26.json new file mode 100644 index 000000000..9a77e67bf --- /dev/null +++ b/docs/architecture/baselines/incremental-index-2026-06-26.json @@ -0,0 +1,26 @@ +{ + "_comment": "Incremental-index-maintenance per-apply timing baseline (design Β§8). Captured by scripts/windows/idx-delta-verify.rs on a live Windows MFT. Later phases diff against this to detect a timing regression. Means over the captured applies; values in milliseconds.", + "build_git": "629966bc2", + "version": "0.6.14", + "captured": "2026-06-26", + "drive": "C", + "drive_records": 3889117, + "apply_samples": 12, + "per_apply_ms": { + "clone": 165.576, + "loop": 61.797, + "children": 54.058, + "paths": 623.464, + "trigram": 377.988, + "ext": 84.426, + "rebuild_subtotal": 1139.936, + "full_apply": 1367.309 + }, + "targets_by_phase": { + "1_paths_incremental": "623 -> ~0 for small batches", + "2_trigram_delta": "378 -> base+delta merge cost", + "3_clone_arc_share": "166 -> records+names+delta only", + "4_ext_children_delta": "84 + 54 -> overlay", + "regression_tolerance_pct": 15 + } +} diff --git a/docs/architecture/incremental-index-maintenance.md b/docs/architecture/incremental-index-maintenance.md new file mode 100644 index 000000000..84f4e0137 --- /dev/null +++ b/docs/architecture/incremental-index-maintenance.md @@ -0,0 +1,468 @@ + + +# Incremental Index Maintenance β€” Two-Tier Base + Delta (LSM-style) + +**Status:** Phases 1 + 2 complete (incremental paths + trigram base+delta). `compact.rs` decomposed. Phase 3 next (Arc-share the clone), pending WIN timing of Phase 2b. +**Owner:** _(assign)_ +**Branch:** `feat/incremental-index-maintenance` +**Dev marker:** `IDXDELTA` (all temporary dev-only logging / timing carries this token; grep-and-remove before merge β€” see Β§9) + +--- + +## 1. Problem + +Every live USN apply (`uffs_core::compact_loader::apply_usn_patch`) mutates the +record columns in place (O(changed)), then **rebuilds the derived structures +from scratch (O(total records))**. + +### Measured baseline (Phase 0, build `629966bc2`, live C: = 3,889,117 records) + +Captured by `scripts/windows/idx-delta-verify.rs` β€” mean over 12 applies +(`docs/architecture/baselines/` once committed): + +| Step | Mean | Kind | Incremental target | +|------|-----:|------|--------------------| +| **`compute_path_lengths`** | **623 ms** | per-record path-len recompute | **#1 β€” only changed records + renamed subtree** | +| `TrigramIndex::build` | 378 ms | CSR inverted index | base + delta overlay | +| whole-body **clone** (Arc-swap) | 166 ms | deep copy in `shard.rs` | Arc-share the immutable base CSR | +| `ExtensionIndex::build` | 84 ms | CSR | base + delta overlay | +| per-change **loop** | 62 ms | O(changed) | already incremental | +| `ChildrenIndex::build` | 54 ms | CSR | base + delta overlay | +| **rebuild subtotal** | **1140 ms** | | | +| **full apply (clone+loop+rebuild)** | **β‰ˆ 1367 ms** | | **the number to beat** | + +> **Baseline overturned the original assumption.** This doc first guessed +> *trigram* was the ~80 % win (~500 ms of ~600 ms). The measurement says the +> full apply is **~1.37 s** (not ~600 ms), and **`compute_path_lengths` (623 ms) +> is the single biggest cost β€” larger than trigram (378 ms)**. Instrumenting the +> *clone* separately (166 ms) was also load-bearing: the rebuild timing alone +> hid it. The phase order in Β§4 is sequenced from this data, not the guess. + +So a single-file change pays a **~1.37 s** full apply. Consequences already +observed in production / the verify harness: + +- **Apply backlog** when the apply interval drops below the rebuild cost + (mitigated, not removed, by the apply-coalescing guard in `fix/usn-apply-coalesce`). +- **Churn CPU**: a continuously-active drive burns a bounded fraction of a core + on rebuilds. +- **Freshness/CPU tradeoff**: the production apply interval is pinned at **30 s** + precisely to keep rebuild churn down β€” i.e. we trade search freshness for CPU + *because* each apply is O(n). + +These CSR structures are **immutable / read-optimized**: inserting one record's +postings means shifting the flat `values`/`offsets` arrays β€” the same cost as a +rebuild. **You cannot cheaply mutate them in place.** This is fundamental, not a +missing optimization. + +## 2. Goal + +Turn apply from **O(total records)** into **O(changed records)** without +regressing search correctness or latency: + +- Sub-second search freshness becomes cheap (apply interval can drop to ~1 s or + event-driven). +- Churn CPU drops to ~proportional-to-changes. +- The existing full rebuild survives, but only as an **occasional compaction + step**, not a per-apply tax. (This also speeds the save-tick path.) + +**Non-goals:** changing the on-disk compact-cache format (the base CSR is still +what we serialize); changing search semantics/results; touching the +Windows-only I/O path. + +## 3. Architecture β€” two-tier (base + delta + tombstones) + +The Lucene-segment / LSM pattern: + +``` +DriveCompactIndex +β”œβ”€β”€ records / names (mutated in place β€” already O(changed)) +β”œβ”€β”€ frs_to_compact (mutated in place β€” already O(changed)) +β”œβ”€β”€ trigram: TrigramIndex (BASE) ─┐ +β”œβ”€β”€ children: ChildrenIndex (BASE) β”‚ immutable CSR, rebuilt only at compaction +β”œβ”€β”€ ext_index:ExtensionIndex (BASE) β”€β”˜ +└── delta: Option (NEW β€” small mutable overlay) + β”œβ”€β”€ trigram: HashMap> + β”œβ”€β”€ ext: HashMap> + β”œβ”€β”€ children: HashMap> + └── tombstones: FxHashSet (records whose BASE postings are stale) +``` + +- **Base layer** β€” the current immutable CSR indexes. Built at cold-load and at + compaction; never mutated between. +- **Delta layer** β€” per-index mutable overlays holding postings for records + created/renamed *since the last compaction*. +- **Tombstones** β€” record indices whose **base** postings are stale (deleted, or + renamed and re-added to the delta with a new name). Search subtracts them. + +### 3.1 Semantics by operation + +| USN op | records/names | tombstone (base idx) | delta postings | +|--------|---------------|----------------------|----------------| +| **create** | append new record (new idx) | β€” (idx not in base) | add new idx β†’ trigram/ext/children | +| **delete** | mark record removed | tombstone the mapped base idx; if idx was a recent create, drop it from delta instead | remove from delta if present | +| **rename** | update name/ext/parent in place | tombstone the base idx (old-name base postings now stale) | add the same idx β†’ trigram/ext/children **with the new name** | + +Key invariant: **a record index appears in search results iff** it is +`(in base AND not tombstoned) OR (in delta)`. A renamed record is *both* +tombstoned-in-base (old name suppressed) *and* present-in-delta (new name found) +β€” same idx, no data duplication. + +### 3.2 Search integration (the hot path β€” highest risk) + +Every read that consults a base index must consult `base βˆͺ delta` and subtract +tombstones. Wrap each at a single choke point on `DriveCompactIndex`: + +| Base call (today) | New delta-aware accessor | Callers to migrate | +|-------------------|--------------------------|--------------------| +| `self.trigram.search(needle, fold) -> Option>` | `self.trigram_search(needle) -> Option>` | `search/tree.rs`, `search/query/mod.rs`, `search/query/prefix_search.rs` | +| `self.children.get(idx) -> &[u32]` | `self.children_of(idx) -> SmallVec/Cow<[u32]>` | `FastPathResolver`, directory listing, tree search | +| `self.ext_index.get(ext_id) -> &[u32]` | `self.records_with_ext(ext_id) -> Cow<[u32]>` | `--ext` filter dispatch | + +- **Trigram** intersects posting lists across the needle's trigrams. For each + trigram `t`, the effective posting list is `base.get_posting(t) βˆͺ delta.trigram[t]` + (sorted-merge, dedup). Intersect across trigrams as today; **filter tombstones + on the final result** (cheap β€” one `FxHashSet` lookup per surviving idx). +- **Ext / children** return `base.get(k)` filtered through tombstones, with + `delta[k]` appended. When the delta is empty (`delta == None`), every accessor + is a zero-overhead passthrough to the base β€” *no regression for the common, + freshly-compacted case.* + +### 3.3 Compaction + +Fold the delta back into a fresh base CSR (this **is** today's +`apply_usn_patch` rebuild path, reused verbatim) when any trigger fires: + +- delta record count `> COMPACT_THRESHOLD_RECORDS` (start at 50 000), **or** +- delta record count `> COMPACT_THRESHOLD_FRACTION` of base (start at 5 %), **or** +- the save tick fires (we already pay a rebuild there β€” fold the delta in then). + +After compaction: new base, `delta = None`, tombstones cleared. Compaction runs +on the existing background `spawn_blocking` applier path, never on a query. + +## 4. Phases (each is independently shippable + reversible) + +> Each phase keeps the **full-rebuild path as the oracle** (see Β§7). A phase is +> "done" only when the oracle test passes and the baseline (Β§8) shows no search +> regression. + +Order is by **measured cost** (Β§1), biggest lever first, cheapest/riskiest last. +Cumulative "apply after this phase" assumes a small change batch on the 3.89M +baseline (clone+loop are constant-ish; each phase removes one rebuild term). + +- **Phase 0 β€” scaffolding (βœ… done on this branch):** + - Design doc; build-id stamp + per-step `IDXDELTA-TIMING` (Β§9); WIN rig + + baseline (Β§8, Β§10). **Done:** baseline captured (β‰ˆ1367 ms). + - **Still in Phase 0 (next):** `IndexDelta` struct + `delta: Option` + field on `DriveCompactIndex` (unused, `None` everywhere β†’ zero behavior + change) + the oracle harness (Β§7). Gate for every phase below. + +- **Phase 1 β€” incremental `compute_path_lengths` (623 ms β†’ ~O(changed); the #1 win):** + This is *not* a base+delta overlay β€” `path_len` is a per-`CompactRecord` + field (`= parent.path_len + 1 separator + name_len`), so it is updated + surgically. Approach (Β§5.5): + - **create / file-rename:** recompute just that record's `path_len` from its + (unchanged) parent's `path_len` + new `name_len` β€” O(1). + - **directory rename:** `Ξ” = new_dir_path_len βˆ’ old_dir_path_len`; walk the + renamed dir's subtree via the (still-fresh) children CSR and add `Ξ”` to each + descendant's `path_len` β€” O(subtree), cheap arithmetic, no string walk. + - **delete:** record is tombstoned; `path_len` irrelevant. + - Children + trigram + ext **still full-rebuild** this phase (keeps the diff + small and gives a valid children CSR for the subtree walk). + - **Acceptance:** oracle passes (path resolution identical to a full rebuild); + `paths_us` drops from ~623 ms to sub-ms for small batches; apply β‰ˆ 744 ms. + +- **Phase 2 β€” trigram delta (378 ms; base + delta overlay):** + `IndexDelta.trigram` + tombstones + `DriveCompactIndex::trigram_search` (Β§3.2, + Β§5.1–5.3); apply stops rebuilding trigram; migrate the 3 trigram callers; + compaction folds the delta. **Acceptance:** oracle passes; trigram search + within baseline + Ξ΅; apply β‰ˆ 366 ms. + +- **Phase 3 β€” shrink the clone (166 ms; Arc-share the base CSR):** + Hold the immutable base indexes as `Arc` / `Arc` / + `Arc` on `DriveCompactIndex` so the per-apply whole-body clone + copies records + names + the small delta, **not** the large inverted indexes + (pointer-clone the Arcs). **Acceptance:** `clone_us` drops materially; oracle + unaffected (pure representation change). Best done after Phase 2 makes trigram + a shareable base. + +- **Phase 4 β€” extension + children delta (84 + 54 ms):** same overlay shape for + `ext_index` β†’ `records_with_ext` and `children` β†’ `children_of`. **Children is + the highest-care** index β€” it feeds `FastPathResolver` *and* the Phase-1 subtree + walk; exercise the path-resolver oracle heavily and keep the children full + rebuild until its delta + the Phase-1 walk are reconciled. + +- **Phase 5 β€” unify + retire per-apply rebuild + re-tune:** apply is now O(changed) + end-to-end; the full rebuild runs only at compaction. Re-evaluate the production + apply-interval default (candidate: 30 s β†’ ~2 s or event-driven). Remove the dead + per-apply rebuild branch. + +- **Phase 6 β€” cleanup:** grep-remove every `IDXDELTA` dev marker + the build.rs + stamp + the dev script timing (Β§9); fold the baseline into a committed + perf-regression test; graduate `idx-delta-verify.rs` into `tests/` or delete. + +## 5. Detailed implementation guidelines (junior-dev executable) + +### 5.1 New types (`crates/uffs-core/src/compact/delta.rs`, new file) + +```rust +/// Mutable overlay over the immutable base CSR indexes. `None` on +/// DriveCompactIndex means "freshly compacted β€” pure base, zero overhead". +#[derive(Debug, Default, Clone)] +pub struct IndexDelta { + /// packed-trigram -> sorted, deduped record indices added since compaction. + pub trigram: rustc_hash::FxHashMap>, + /// ext_id -> record indices added since compaction. + pub ext: rustc_hash::FxHashMap>, + /// parent record idx -> child record indices added since compaction. + pub children: rustc_hash::FxHashMap>, + /// record indices whose BASE postings are stale (deleted / renamed-away). + pub tombstones: rustc_hash::FxHashSet, + /// running count of distinct records touched (compaction trigger input). + pub touched_records: u32, +} +``` + +- All postings kept **sorted + deduped** on insert (binary-search insert) so the + baseβˆͺdelta merge is a linear sorted-merge. +- Provide: `add_record(idx, trigrams: &[u64], ext_id, parent_idx)`, + `tombstone(idx)`, `is_tombstoned(idx)`, `len()` (for compaction trigger). + +### 5.2 `DriveCompactIndex` accessors (single choke point) + +Implement on `DriveCompactIndex` (in `compact.rs`), each a passthrough when +`self.delta.is_none()`: + +```rust +pub fn trigram_search(&self, needle: &str) -> Option> { + let base = self.trigram.search(needle, self.fold)?; // existing logic + let Some(delta) = &self.delta else { return Some(base); }; // fast path + // merge per-trigram postings from delta, re-intersect, filter tombstones + // (helper: merge_and_filter β€” see delta.rs) + Some(self.merge_trigram(needle, base, delta)) +} +``` + +> **Correctness note for trigram:** because trigram search is an **AND +> intersection** across the needle's trigrams, a delta record only survives if it +> is in the delta posting for *every* trigram of the needle. Since `add_record` +> inserts the idx into all of the record's name-trigrams, this holds. Tombstone +> filtering is applied to the final intersected set, never per-list (a base idx +> may legitimately appear in some lists; only the final membership matters). + +### 5.3 `apply_usn_patch` changes (`compact_loader.rs`) + +Today (per phase, replace the rebuild for the migrated index): + +```rust +// BEFORE (per apply): +drive.trigram = TrigramIndex::build(&drive.records, &drive.names, drive.fold); // ~500ms + +// AFTER (per apply): +let delta = drive.delta.get_or_insert_with(IndexDelta::default); +for &idx in &created_or_renamed_idxs { + delta.add_record(idx, &trigrams_for(idx), ext_of(idx), parent_of(idx)); +} +for &idx in &deleted_or_renamed_old { + delta.tombstone(idx); +} +if delta.len() > COMPACT_THRESHOLD { compact(drive); } // occasional full rebuild +``` + +Keep `compact(drive)` = the *current* full rebuild (children+trigram+ext+ +path-lengths), then `drive.delta = None`. + +### 5.4 Serialization + +The compact-cache (`compact_cache.rs`) serializes **base only**. Before a disk +save, **compact first** (fold delta β†’ base), then serialize. So the on-disk +format is unchanged and always delta-free. (Cold load β†’ `delta = None`.) + +### 5.5 Phase 1 β€” incremental `compute_path_lengths` (the #1 lever) + +`compute_path_lengths` today (`compact.rs`) builds a parentβ†’children adjacency +and BFS-recomputes **every** record's `path_len` where +`path_len = parent.path_len + 1 (separator) + name_char_count`. That O(n) BFS is +the 623 ms. The incremental version only touches what changed. + +**Inputs.** `apply_usn_patch`'s per-change loop already knows each touched +record's compact idx and disposition. Collect them into a small list as the loop +runs (no extra pass): `Vec<(u32 idx, PathOp)>` where +`PathOp = { Created, FileRenamed, DirRenamed, Deleted }`. The directory bit comes +from `CompactRecord::flags` (`FILE_ATTRIBUTE_DIRECTORY`). + +**New fn** (e.g. `compact.rs::update_path_lengths_incremental`): + +```rust +pub(crate) fn update_path_lengths_incremental( + records: &mut [CompactRecord], + names: &[u8], + drive_letter: DriveLetter, + children: &ChildrenIndex, // the freshly-rebuilt CSR (Phase 1 keeps it) + changed: &[(u32, PathOp)], +) { + for &(idx, op) in changed { + match op { + PathOp::Deleted => {} // tombstoned; path_len irrelevant + PathOp::Created | PathOp::FileRenamed => { + // parent is unchanged β†’ its path_len is valid. O(1). + set_path_len_from_parent(records, names, drive_letter, idx); + } + PathOp::DirRenamed => { + let old = records[idx as usize].path_len; + set_path_len_from_parent(records, names, drive_letter, idx); + let delta = i32::from(records[idx as usize].path_len) - i32::from(old); + if delta != 0 { + // every descendant's path runs *through* this dir, so its + // path_len shifts by exactly `delta`. DFS/BFS the subtree + // via the children CSR; pure arithmetic, no name walk. + shift_subtree_path_len(records, children, idx, delta); + } + } + } + } +} +``` + +- `set_path_len_from_parent`: `path_len = parent.path_len + 1 + name_char_count` + (root/drive cases identical to the BFS seed in `compute_path_lengths`). +- `shift_subtree_path_len`: stack/queue over `children.get(idx)` recursively, + `rec.path_len = (rec.path_len as i32 + delta) as u16` (saturating). + +**Wiring** (`compact_loader/rebuild.rs`): in Phase 1 keep the children/trigram/ext +full rebuilds, but **replace the `compute_path_lengths(...)` call with +`update_path_lengths_incremental(..., changed)`**. Children must be rebuilt +*before* the path update so the subtree walk sees current adjacency. Gate behind +a `changed.len() < FULL_RECOMPUTE_THRESHOLD` fallback to the full BFS for +pathological huge batches (and for the cold-load path, which still calls the full +`compute_path_lengths`). + +**Edge cases the oracle (Β§7) must cover:** rename a directory with a deep subtree +(Ξ” propagation); FRS-reuse (create into a just-deleted slot); a file whose parent +was itself renamed in the same batch (process parents before children β€” sort +`changed` by depth, or rely on the BFS order the children CSR already gives); +case-only rename (`name_char_count` unchanged β†’ Ξ” = 0, no subtree walk). + +## 6. Risk register + +| Risk | Mitigation | +|------|------------| +| Search correctness drift (baseβˆͺdelta β‰  truth) | Oracle test (Β§7) is mandatory per phase; property-based over random op sequences. | +| Hot-path latency regression (delta merge cost) | Passthrough when `delta == None`; baseline timing gate (Β§8); keep delta small via compaction threshold. | +| Tombstone leak (memory grows on churny drive) | Compaction threshold bounds delta+tombstone size; `touched_records` trigger. | +| Rename edge cases (FRS reuse, case-only rename) | Dedicated oracle scenarios; reuse the USN net-state resolution already in `uffs-mft::usn`. | +| Path resolver fed stale children (Phase 3) | Path-resolver-specific oracle; Phase 3 isolated + last. | + +## 7. Oracle test harness (the core correctness guarantee) + +**Invariant:** for any sequence of USN ops, the two-tier index must be +**observationally identical** to a freshly-rebuilt full index. + +Location: `crates/uffs-core/src/compact/delta_oracle_tests.rs`. + +``` +fn oracle(ops: &[Op]) { + let mut incremental = base_index(); // two-tier (delta path) + let mut rebuilt = base_index(); // control (full rebuild every apply) + for op in ops { + apply_incremental(&mut incremental, op); // delta path + apply_full_rebuild(&mut rebuilt, op); // O(n) control + for q in QUERY_BATTERY { // name / --ext / prefix / tree / path-resolve + assert_eq!(sorted(incremental.query(q)), sorted(rebuilt.query(q)), + "divergence after {op:?} on query {q:?}"); + } + } + // After a forced compaction, the base CSR must be byte-identical to a + // from-scratch rebuild of the same record set. + incremental.compact(); + assert_eq!(incremental.trigram, rebuilt.trigram); // byte-identical + assert_eq!(incremental.children, rebuilt.children); + assert_eq!(incremental.ext_index, rebuilt.ext_index); +} +``` + +- **Query battery:** exact-name, substring (trigram), `--ext`, prefix, tree/glob, + and **path resolution** (FastPathResolver) β€” one assertion per query type. +- **Op generation:** both hand-written regression scenarios (createβ†’renameβ†’delete, + FRS reuse, case-only rename, delete-then-recreate-into-same-dir) **and** a + `proptest`/seeded-random generator over `{create, delete, rename}` with a small + name alphabet (so trigrams collide and intersections are exercised). +- Runs cross-platform (no live MFT β€” synthetic records), so it gates every PR. + +## 8. Baseline + timing-regression detection + +- Add an env-gated micro-benchmark (`cargo bench` or a `#[ignore]` timing test) + that, on a synthetic N-record drive, measures: **apply latency**, **trigram / + ext / children search latency** at delta sizes `{0, 1k, 10k, 50k}`, and + **compaction latency**. +- Capture a **baseline JSON** (`docs/architecture/baselines/incremental-index-.json`) + committed at the end of Phase 0 (pure-base numbers) and refreshed per phase. +- The dev test-script (Β§10) prints a **timing table** tagged `IDXDELTA-TIMING` + and diffs against the committed baseline, flagging any search latency that + regresses beyond a tolerance (start at +15 %). This is how we catch a "delta + merge made search slower" regression on the Windows box, live. + +## 9. Dev instrumentation β€” `IDXDELTA` marker (removable) + +Mirror the `USNFIX` convention used for the live-USN debugging: + +- **Build identifier:** at daemon start, log + `tracing::info!(marker = "IDXDELTA", build = env!("...GIT_SHA or version"), "IDXDELTA build active")` + so the test-script can confirm *which* build it exercised (we hit the wrong-build + trap during USN testing β€” do not repeat it). +- **Per-apply timing:** `IDXDELTA-TIMING apply: delta_add=… tombstone=… compact=…(ms)`. +- **Per-search timing:** `IDXDELTA-TIMING search: base=… delta_merge=… total=…(ms) delta_len=…`. +- **Compaction events:** `IDXDELTA compact: folded delta_len=… into base records=… in …ms`. +- All such lines carry the literal token `IDXDELTA`. **Phase 5 removal:** + `grep -rn IDXDELTA crates/ scripts/` β†’ delete every hit; the only survivors + become permanent `debug!`/metrics if we decide to keep them (decided in Phase 5, + not before). + +## 10. Dev test-script β€” `scripts/windows/idx-delta-verify.rs` + +Modeled on `scripts/windows/usn-verify.rs` (same `~/bin/uffs.exe` resolution, +`~/idxtest` scratch, `_run/` artifact dir, daemon-restart-with-logging pattern). +What it adds beyond usn-verify: + +1. **Build confirmation** β€” assert the daemon log contains `IDXDELTA build active` + and print the build id (fail fast on a stale binary). +2. **Churn generator** β€” create / rename / delete in escalating bursts (10, 100, + 1 000, 10 000 files) so the delta grows and compaction triggers, capturing each + search's result set to `_run/NN-*.csv` (correctness) AND the `IDXDELTA-TIMING` + lines to `_run/timing.log` (perf). +3. **Freshness probe** β€” after a burst, measure wall-clock from file-op to + search-visible (should be β‰ˆ apply interval, no backlog). +4. **Timing-regression gate** β€” parse `_run/timing.log`, build a table, diff + against the committed baseline (Β§8), and print `PASS`/`REGRESSION` per metric. +5. **Oracle cross-check (optional, on-box)** β€” re-run a search with the daemon + forced to compact, and confirm identical results pre/post compaction (the + live analogue of Β§7). + +Output: one shareable `~/idxtest/_run/` dir, exactly like the USN flow β€” so we can +"push β†’ pull on WIN β†’ run β†’ share `_run/`" each iteration. + +## 11. Tracking + +| Phase | Item | Status | PR | Notes | +|-------|------|--------|----|----| +| 0 | Design doc (+ measured baseline + data-driven re-order) | βœ… done | `2e57d6013`, this | | +| 0 | Dev markers + build-id stamp (Β§9) | βœ… done | `629966bc2` | `IDXDELTA` | +| 0 | Per-step apply timing (clone/loop/rebuild) | βœ… done | `629966bc2` | Β΅s integers | +| 0 | `idx-delta-verify.rs` WIN rig + baseline (Β§8, Β§10) | βœ… done | `629966bc2` | β‰ˆ1367 ms | +| 0 | `IndexDelta` type | βœ… done | `61dfde09d` | `compact/delta.rs`, unit-tested; posting/tombstone overlay | +| 0 | `delta: Option` field on `DriveCompactIndex` | βœ… done | `1cf72d589` | wired with `trigram_search` (Phase 2a) so each of ~20 ctor sites was touched once | +| 0 | Oracle harness (Β§7) | βœ… done | `9806bc339`, `b7c688e09` | path-len oracle + trigram base+delta oracle (overlay ≑ compacted rebuild) | +| **1** | **Incremental `compute_path_lengths` (Β§5.5)** | βœ… done | `9806bc339` | 623 ms β†’ ~O(changed); WIN-validated 0.005 ms; oracle byte-identical incl. dir-rename subtree Ξ” | +| **2a** | **`trigram_search` base+delta choke point (plumbing)** | βœ… done | `1cf72d589` | zero-behavior-change; field + 3 caller migration; rename-visibility unit-tested | +| **2b** | **Apply populates trigram delta; no per-tick rebuild** | βœ… done | `b7c688e09` | 338 ms β†’ ~0 (compaction at 50k touched); end-to-end oracle; awaiting WIN timing | +| β€” | *Decompose `compact.rs` 1363 β†’ 385* (refactor) | βœ… done | `c3728b0c1` | 5 submodules; off file-size exception list | +| 3 | Shrink clone β€” Arc-share base CSR indexes | ☐ todo | | 166 ms | +| 4 | Extension + children delta (`records_with_ext` / `children_of`) | ☐ todo | | 84 + 54 ms; children highest care | +| 5 | Unify; retire per-apply rebuild; re-tune apply interval | ☐ todo | | 30 s β†’ ~2 s | +| 6 | Remove `IDXDELTA` dev helpers (+ build.rs); graduate baseline β†’ perf test | ☐ todo | | grep-and-remove | + +**Done-definition (whole project):** apply is O(changes); oracle green; no search +latency regression vs baseline; production apply interval reduced; all `IDXDELTA` +dev scaffolding removed. diff --git a/scripts/ci/file_size_exceptions.txt b/scripts/ci/file_size_exceptions.txt index b6d8908d4..91304a425 100644 --- a/scripts/ci/file_size_exceptions.txt +++ b/scripts/ci/file_size_exceptions.txt @@ -14,7 +14,6 @@ crates/uffs-mft/src/reader/index_read.rs|PERMANENT: Single impl MftReader block crates/uffs-diag/src/bin/compare_scan_parity.rs|PERMANENT: Standalone diagnostic binary; single-file readability outweighs LOC policy for tooling crates/uffs-mcp/src/cookbook.rs|PERMANENT: Declarative JSON data (curated agent cookbook examples); splitting by line count would fragment the cohesive narrative crates/uffs-core/src/compact_cache.rs|PERMANENT: Serialize/deserialize pipeline for compact index cache (heap path + Phase 2b runtime mmap path), tightly coupled with shared parse_compact_body + assemble_compact_index helpers; tests already extracted to compact_cache/tests.rs sibling module -crates/uffs-core/src/compact.rs|PERMANENT: Core compact index data structures + builder; only 13 over limit crates/uffs-daemon/src/index/aggregation.rs|PERMANENT: Daemon-side aggregation dispatch; tightly coupled helpers, only 54 over limit crates/uffs-client/src/protocol/tests.rs|PERMANENT: Wire format round-trip test suite; splitting fragments test cohesion crates/uffs-core/src/search/backend_tests.rs|PERMANENT: Backend sort/filter integration test suite; shared fixtures require cohesion diff --git a/scripts/windows/idx-delta-verify.rs b/scripts/windows/idx-delta-verify.rs new file mode 100644 index 000000000..5ad417c40 --- /dev/null +++ b/scripts/windows/idx-delta-verify.rs @@ -0,0 +1,567 @@ +#!/usr/bin/env rust-script +//! ```cargo +//! [dependencies] +//! anyhow = "1" +//! ``` +//! +//! idx-delta-verify.rs β€” measurement rig + baseline for the incremental-index- +//! maintenance work (design: `docs/architecture/incremental-index-maintenance.md`). +//! +//! Phase 0 goal: **before** any delta work, prove the rig works on the WIN box +//! and capture a timing BASELINE so later phases can detect a regression. It +//! deliberately mirrors `scripts/windows/usn-verify.rs` (same `~/bin/uffs.exe` +//! resolution, `~/idxtest` scratch, `_run/` artifacts, daemon-restart-with- +//! logging) so the dev loop is identical: push -> pull on WIN -> run -> share +//! `_run/`. +//! +//! What it does: +//! 0. BIN SYNC β€” copies the freshly built `uffs`/`uffsd` (+ broker/mcp if +//! present) from **the build dir cargo actually uses** (`cargo metadata`'s +//! `target_directory`, honouring `CARGO_TARGET_DIR` / `.cargo/*.toml`; +//! override with `UFFS_RELEASE_DIR`) into `~/bin`, so the rig can never run +//! a stale daemon. Build, then run β€” no manual copy step. +//! 1. BUILD CONFIRMATION β€” restarts the daemon with logging, then asserts the +//! log contains `IDXDELTA build active`, prints the version + git SHA, and +//! asserts that SHA equals repo HEAD (hard stale-daemon guard). +//! 2. CHURN + TIMING β€” creates files in escalating bursts so each apply fires +//! the O(n) full rebuild, captures every `IDXDELTA-TIMING apply` line, and +//! summarises the per-index rebuild cost (children / trigram / ext / total) +//! at the drive's live record count. +//! 3. FRESHNESS β€” measures wall-clock from a create to the file being +//! search-visible (sanity: no backlog at the pinned apply interval). +//! 4. BASELINE β€” writes `_run/baseline.txt` (the numbers to commit per the +//! design doc Β§8) + `_run/idx-timing.log` (the raw IDXDELTA-TIMING lines). +//! +//! Usage: rust-script scripts\windows\idx-delta-verify.rs +//! +//! All `IDXDELTA` markers are dev-only; the design doc Β§9 / Phase 5 removes them. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread::sleep; +use std::time::{Duration, Instant}; + +use anyhow::{Context, Result, bail}; + +/// Apply-cadence override (ms) for the test daemon. Kept above the per-apply +/// rebuild cost (~600 ms on a multi-million-record drive) so apply ticks don't +/// outrun the rebuild and pile up β€” same rationale as the USN harness. +const APPLY_INTERVAL_MS: &str = "1500"; +/// Settle after `--daemon stop` so the socket / PID file clear. +const KILL_SETTLE: Duration = Duration::from_secs(2); +/// Poll cadence while waiting for a burst's files to become search-visible. +const POLL_INTERVAL: Duration = Duration::from_millis(500); +/// `info` enables the daemon build marker AND the `IDXDELTA-TIMING` apply line +/// (both logged at INFO); core trace adds per-change detail if needed. +const LOG_SPEC: &str = "info,uffs_core=info,uffs_daemon=info"; +/// Escalating create-burst sizes β€” bigger bursts exercise bigger apply batches. +/// The 100k burst crosses `TRIGRAM_COMPACT_THRESHOLD` (50k) so it also measures +/// a delta compaction (full trigram refold) under load, while the smaller +/// bursts measure the steady-state delta-overlay apply (trigram_us β‰ˆ 0). +const BURSTS: &[usize] = &[1_000, 10_000, 100_000]; + +/// `~/bin/uffs.exe` β€” the canonical user-installed **Rust** binary. Pinned to +/// the explicit `.exe` so a bare `uffs` can't resolve the C++ `uffs.com` via +/// PATHEXT (see usn-verify.rs). Copy your freshly built binaries into `~/bin` +/// first β€” the spawned `uffsd.exe` is the one next to this `uffs.exe`. +fn uffs_bin() -> PathBuf { + let home = std::env::var_os("USERPROFILE") + .or_else(|| std::env::var_os("HOME")) + .map(PathBuf::from) + .expect("USERPROFILE or HOME must be set"); + let name = if cfg!(windows) { "uffs.exe" } else { "uffs" }; + home.join("bin").join(name) +} + +/// Display name for the cosmetic `$ ...` echoes β€” `uffs.exe`, never bare `uffs`. +fn uffs_display() -> &'static str { + if cfg!(windows) { "uffs.exe" } else { "uffs" } +} + +fn home_dir() -> PathBuf { + std::env::var_os("USERPROFILE") + .or_else(|| std::env::var_os("HOME")) + .map(PathBuf::from) + .expect("USERPROFILE or HOME must be set") +} + +/// Binaries the rig depends on, copied fresh from the build dir into `~/bin`. +/// `uffs` + `uffsd` are required (the daemon under test); the broker is +/// optional (only present once `uffs-broker` has been built) and copied +/// best-effort so a non-elevated box still re-syncs the two it needs. +const REQUIRED_BINS: &[&str] = &["uffs", "uffsd"]; +const OPTIONAL_BINS: &[&str] = &["uffs-broker", "uffsmcp"]; + +/// Add the platform executable suffix (`.exe` on Windows). +fn exe(name: &str) -> String { + if cfg!(windows) { format!("{name}.exe") } else { name.to_owned() } +} + +/// Resolve the `release/` dir of **the build cargo actually uses** β€” honouring +/// `CARGO_TARGET_DIR`, `.cargo/*.toml` `build.target-dir`, etc. β€” so the rig +/// copies the binary that was just built, not a stale `~/bin` copy (the +/// stale-binary trap that has bitten this dev loop repeatedly). +/// +/// Order: explicit `UFFS_RELEASE_DIR` override β†’ `cargo metadata`'s +/// `target_directory` + `release`. +fn release_dir() -> Result { + if let Some(dir) = std::env::var_os("UFFS_RELEASE_DIR") { + return Ok(PathBuf::from(dir)); + } + let out = Command::new("cargo") + .args(["metadata", "--format-version", "1", "--no-deps"]) + .output() + .context("failed to run `cargo metadata` to locate the build dir")?; + if !out.status.success() { + bail!( + "`cargo metadata` failed ({}). Run the rig from inside the repo, or set \ + UFFS_RELEASE_DIR to your build's release dir.", + out.status + ); + } + let json = String::from_utf8_lossy(&out.stdout); + let target = parse_target_directory(&json).context( + "could not find target_directory in `cargo metadata` output; \ + set UFFS_RELEASE_DIR explicitly", + )?; + Ok(PathBuf::from(target).join("release")) +} + +/// Extract the JSON string value of `"target_directory"` from one-line +/// `cargo metadata` output, unescaping `\\`/`\"`/`\/` (Windows paths arrive as +/// `C:\\rust-target\\ttapi`). No serde dependency β€” a focused hand-scan. +fn parse_target_directory(json: &str) -> Option { + let key = "\"target_directory\":\""; + let start = json.find(key)? + key.len(); + let mut out = String::new(); + let mut chars = json[start..].chars(); + while let Some(ch) = chars.next() { + match ch { + '"' => return Some(out), + '\\' => match chars.next()? { + 'n' => out.push('\n'), + other => out.push(other), // \\ -> \, \" -> ", \/ -> / + }, + other => out.push(other), + } + } + None +} + +/// Short HEAD SHA of the repo (`git rev-parse --short HEAD`), for the +/// build-id match guard. `None` if git is unavailable. +fn git_head_short() -> Option { + let out = Command::new("git") + .args(["rev-parse", "--short", "HEAD"]) + .output() + .ok()?; + out.status + .success() + .then(|| String::from_utf8_lossy(&out.stdout).trim().to_owned()) + .filter(|sha| !sha.is_empty()) +} + +/// Whether the diff between the running daemon's build SHA and HEAD touches any +/// **build-affecting** path (crate source or a Cargo manifest), i.e. the binary +/// is genuinely stale. A HEAD that advanced only through `scripts/` or `docs/` +/// (e.g. a verify-rig tweak) leaves the daemon binary current, so it is NOT +/// stale. Defaults to `true` (assume stale) if git can't answer β€” fail safe. +fn build_is_stale(daemon_sha: &str, head_sha: &str) -> bool { + let Ok(out) = Command::new("git") + .args(["diff", "--name-only", daemon_sha, head_sha]) + .output() + else { + return true; + }; + if !out.status.success() { + return true; + } + String::from_utf8_lossy(&out.stdout).lines().any(|path| { + path.starts_with("crates/") + || path == "Cargo.toml" + || path == "Cargo.lock" + || path.starts_with("rust-toolchain") + }) +} + +/// Copy freshly built binaries from the cargo build dir into `~/bin` so the rig +/// always exercises the just-built daemon. Required bins missing β†’ bail with a +/// "build first" hint; optional bins are copied only if present. +fn sync_bins(bin_dir: &Path) -> Result<()> { + let src_dir = release_dir()?; + println!("\n== Bin sync =="); + println!(" build dir: {}", src_dir.display()); + println!(" dest: {}", bin_dir.display()); + fs::create_dir_all(bin_dir).with_context(|| format!("create {}", bin_dir.display()))?; + + for name in REQUIRED_BINS { + let src = src_dir.join(exe(name)); + if !src.exists() { + bail!( + "required binary {} not found β€” build first \ + (e.g. `cargo build --release -p uffs-cli -p uffs-daemon`).", + src.display() + ); + } + copy_bin(&src, &bin_dir.join(exe(name)))?; + } + for name in OPTIONAL_BINS { + let src = src_dir.join(exe(name)); + if src.exists() { + // Best-effort: the broker is a running LocalSystem service, so its + // exe is legitimately locked (os error 32). The rig only needs a + // fresh uffs + uffsd, so a locked/failed optional copy just warns. + if let Err(err) = copy_bin(&src, &bin_dir.join(exe(name))) { + println!(" skip {} ({err})", exe(name)); + } + } + } + Ok(()) +} + +/// Copy one binary, reporting its source build mtime so a stale build is +/// visible at a glance. +fn copy_bin(src: &Path, dest: &Path) -> Result<()> { + let built = src + .metadata() + .and_then(|meta| meta.modified()) + .ok() + .and_then(|time| time.elapsed().ok()) + .map_or_else(|| "?".to_owned(), |age| format!("{}s ago", age.as_secs())); + fs::copy(src, dest) + .with_context(|| format!("copy {} -> {}", src.display(), dest.display()))?; + println!(" copied {} (built {built})", dest.display()); + Ok(()) +} + +/// Run a `uffs.exe` subcommand inheriting stdout/stderr. +fn run(uffs: &Path, args: &[&str]) -> Result<()> { + println!("\n$ {} {}", uffs_display(), args.join(" ")); + Command::new(uffs) + .args(args) + .status() + .with_context(|| format!("failed to spawn uffs {}", args.join(" ")))?; + Ok(()) +} + +/// Run a search, return (row_count, captured_stdout). A row is a quoted CSV +/// data line (minus the header). +fn search(uffs: &Path, term: &str) -> Result<(usize, String)> { + let output = Command::new(uffs) + .args([term, "--format", "csv"]) + .output() + .with_context(|| format!("failed to spawn uffs {term}"))?; + let text = String::from_utf8_lossy(&output.stdout).into_owned(); + let rows = text + .lines() + .filter(|line| line.starts_with('"')) + .count() + .saturating_sub(1); + Ok((rows, text)) +} + +/// Poll `search(term)` until at least `expected` rows are visible or `max_wait` +/// elapses. Returns `(rows_seen, latency, timed_out)` β€” the wall-clock from the +/// first poll to visibility is the true apply-to-searchable latency (vs. the old +/// fixed-sleep probe which only measured the settle constant). +fn poll_until_visible( + uffs: &Path, + term: &str, + expected: usize, + max_wait: Duration, +) -> Result<(usize, Duration, bool)> { + let start = Instant::now(); + loop { + let (rows, _) = search(uffs, term)?; + if rows >= expected { + return Ok((rows, start.elapsed(), false)); + } + if start.elapsed() >= max_wait { + return Ok((rows, start.elapsed(), true)); + } + sleep(POLL_INTERVAL); + } +} + +/// Poll `search(term)` until **zero** rows match (the deleted / renamed-away +/// file has left the index) or `max_wait` elapses. Returns +/// `(rows_remaining, latency, timed_out)`. +fn poll_until_absent(uffs: &Path, term: &str, max_wait: Duration) -> Result<(usize, Duration, bool)> { + let start = Instant::now(); + loop { + let (rows, _) = search(uffs, term)?; + if rows == 0 { + return Ok((0, start.elapsed(), false)); + } + if start.elapsed() >= max_wait { + return Ok((rows, start.elapsed(), true)); + } + sleep(POLL_INTERVAL); + } +} + +fn main() -> Result<()> { + let uffs = uffs_bin(); + + // Sync freshly built bins from the actual cargo build dir into ~/bin so the + // rig never runs a stale daemon. Capture HEAD so the build-confirmation + // step can assert the running uffsd is THIS commit. + let bin_dir = home_dir().join("bin"); + sync_bins(&bin_dir)?; + let head_sha = git_head_short(); + + if !uffs.exists() { + bail!( + "uffs binary not found at {} even after bin sync β€” check the build dir.", + uffs.display() + ); + } + + let base = home_dir().join("idxtest"); + let run_dir = base.join("_run"); + println!("== UFFS incremental-index baseline rig =="); + println!("binary: {}", uffs.display()); + println!("scratch: {}", base.display()); + println!("artifacts: {}", run_dir.display()); + + let _ = fs::remove_dir_all(&base); + fs::create_dir_all(&run_dir).with_context(|| format!("create {}", run_dir.display()))?; + + run(&uffs, &["--version"])?; + + // ── Restart the daemon with logging into the artifacts dir ────────────── + let _ = Command::new(&uffs).args(["--daemon", "stop"]).status(); + sleep(KILL_SETTLE); + println!( + "\n$ {} --daemon start (UFFS_LOG={LOG_SPEC}, UFFS_USN_APPLY_INTERVAL_MS={APPLY_INTERVAL_MS})", + uffs_display() + ); + let status = Command::new(&uffs) + .args(["--daemon", "start"]) + .env("UFFS_LOG", LOG_SPEC) + .env("UFFS_LOG_DIR", &run_dir) + .env("UFFS_USN_APPLY_INTERVAL_MS", APPLY_INTERVAL_MS) + .status() + .context("failed to spawn `uffs --daemon start`")?; + if !status.success() { + bail!("`uffs --daemon start` exited with {status}"); + } + run(&uffs, &["--status"])?; + + let log_path = run_dir.join("uffsd.log"); + + // ── 1. BUILD CONFIRMATION β€” fail fast on a stale binary ───────────────── + println!("\n== Build confirmation =="); + let build_line = read_log(&log_path) + .lines() + .find(|line| line.contains("IDXDELTA build active")) + .map(str::to_owned); + let build_line = match build_line { + Some(line) => { + println!(" OK β€” {}", line.trim()); + line + } + None => bail!( + "no `IDXDELTA build active` line in {} β€” the running uffsd.exe is NOT an \ + IDXDELTA build. Rebuild then re-run (the rig re-syncs ~/bin for you).", + log_path.display() + ), + }; + + // Build-id match guard: the running daemon's git SHA must equal repo HEAD, + // else a stale uffsd is being exercised (the trap that has cost several + // 30-min WIN cycles). `git=""` is emitted by the IDXDELTA marker. + if let Some(head) = &head_sha { + let logged = build_line + .split("git=\"") + .nth(1) + .and_then(|rest| rest.split('"').next()) + .unwrap_or(""); + if logged == head { + println!(" build-id match: uffsd git={logged} == HEAD {head}"); + } else if build_is_stale(logged, head) { + bail!( + "STALE DAEMON: running uffsd is git={logged:?} but HEAD is {head:?} and \ + crate source / Cargo manifests differ between them β€” rebuild + re-run \ + (the rig re-syncs ~/bin, but you must `cargo build --release` first).", + ); + } else { + // HEAD advanced only through scripts/docs (e.g. this rig itself); + // the daemon binary is still current with the crate source. + println!( + " build-id OK: uffsd git={logged}, HEAD={head} differ only in \ + non-source files β€” binary is current." + ); + } + } + + // ── 2 + 3. CHURN, TIMING, FRESHNESS ───────────────────────────────────── + // Each burst is measured independently via a per-round filename prefix so + // the poll target is exactly that burst's `count` (not the running total), + // and creation throughput is reported apart from apply-to-visible latency. + for (round, &count) in BURSTS.iter().enumerate() { + println!("\n== Burst {}: create {count} files ==", round + 1); + let create_start = Instant::now(); + for i in 0..count { + fs::write(base.join(format!("idx_{round}_{i}.tmp")), b"x") + .with_context(|| format!("write idx_{round}_{i}.tmp"))?; + } + let create_elapsed = create_start.elapsed(); + + // Visibility budget scales with batch size: file-creation IO + USN poll + // + apply + (for the 100k burst) a delta compaction. ~20 s floor plus + // ~1 s per 5k files β†’ 100k allows ~40 s before flagging a backlog. + let max_wait = Duration::from_secs(20 + (count as u64) / 5_000); + let term = format!("idx_{round}_"); + let (rows, latency, timed_out) = poll_until_visible(&uffs, &term, count, max_wait)?; + let rate = (count as f64) / create_elapsed.as_secs_f64().max(0.001); + println!( + " created {count} in {:.1}s ({:.0} files/s); '{term}' -> {rows}/{count} \ + visible after {:.1}s{}", + create_elapsed.as_secs_f64(), + rate, + latency.as_secs_f64(), + if timed_out { " <<< TIMED OUT (apply backlog)" } else { "" }, + ); + } + + // ── Rename + delete correctness smoke, on UNIQUE sentinel names ───────── + // `idxmutate*` shares no trigram with the bulk `idx__` files, so + // each search is unambiguous (the old `idx_0_1` probe matched 111 bulk + // files by substring β€” a false signal). Poll-until-applied, not a sleep. + println!("\n== Mutate smoke (unique sentinels) =="); + let src = base.join("idxmutate_src.tmp"); + let del = base.join("idxmutate_del.tmp"); + fs::write(&src, b"x").context("write idxmutate_src.tmp")?; + fs::write(&del, b"x").context("write idxmutate_del.tmp")?; + let (staged, _, stage_to) = + poll_until_visible(&uffs, "idxmutate", 2, Duration::from_secs(20))?; + println!( + " staged 2 sentinels; 'idxmutate' -> {staged}/2 visible{}", + if stage_to { " <<< TIMED OUT" } else { "" } + ); + + fs::rename(&src, base.join("idxmutate_renamed.tmp")).context("rename sentinel")?; + fs::remove_file(&del).context("delete sentinel")?; + + let mutate_wait = Duration::from_secs(20); + let (ren_rows, ren_lat, ren_to) = + poll_until_visible(&uffs, "idxmutate_renamed", 1, mutate_wait)?; + let (del_rows, del_lat, del_to) = poll_until_absent(&uffs, "idxmutate_del", mutate_wait)?; + let (old_rows, _, _) = poll_until_absent(&uffs, "idxmutate_src", Duration::from_secs(6))?; + println!( + " rename : 'idxmutate_renamed' -> {ren_rows} after {:.1}s (expect >=1){}", + ren_lat.as_secs_f64(), + if ren_to { " <<< FAIL/TIMED OUT" } else { "" } + ); + println!( + " delete : 'idxmutate_del' -> {del_rows} after {:.1}s (expect 0){}", + del_lat.as_secs_f64(), + if del_to { " <<< FAIL/TIMED OUT" } else { "" } + ); + println!(" oldname: 'idxmutate_src' -> {old_rows} (expect 0, renamed away)"); + + // ── Stop the daemon to flush, then extract + summarise the timing ─────── + println!("\n== Stopping daemon to flush the log =="); + let _ = Command::new(&uffs).args(["--daemon", "stop"]).status(); + sleep(KILL_SETTLE); + + let log = read_log(&log_path); + let timing_lines: Vec<&str> = log + .lines() + .filter(|line| line.contains("IDXDELTA-TIMING apply")) + .collect(); + fs::write(run_dir.join("idx-timing.log"), timing_lines.join("\n"))?; + + let baseline = summarise(&timing_lines); + println!("\n== BASELINE (per-apply cost breakdown) =="); + println!("{baseline}"); + fs::write(run_dir.join("baseline.txt"), &baseline)?; + + println!("\n== Done =="); + println!("Share: {}", run_dir.display()); + println!( + "Key: baseline.txt (commit per design Β§8), idx-timing.log (raw IDXDELTA-TIMING), uffsd.log." + ); + Ok(()) +} + +/// Mean (and sample count) of a numeric `key=value` tracing field across all +/// lines that carry it. Field-generic so new IDXDELTA-TIMING fields in later +/// phases need no parser change. +fn field_mean(lines: &[&str], key: &str) -> Option<(f64, usize)> { + let prefix = format!("{key}="); + let vals: Vec = lines + .iter() + .filter_map(|line| { + line.split_whitespace() + .find_map(|tok| tok.strip_prefix(&prefix)) + .and_then(|raw| raw.parse::().ok()) + }) + .collect(); + if vals.is_empty() { + None + } else { + let mean = vals.iter().sum::() / vals.len() as f64; + Some((mean, vals.len())) + } +} + +/// Build the human-readable baseline: the mean of each per-apply cost field. +/// The apply emits two lines (whole-body clone; per-change loop + rebuild), so +/// `clone_ms` and the rebuild fields come from different lines β€” we report each +/// mean and the implied full per-apply cost = clone + loop + rebuild. +fn summarise(lines: &[&str]) -> String { + if lines.is_empty() { + return " (no `IDXDELTA-TIMING apply` lines captured β€” did any apply fire? \ + check uffsd.log / the apply interval)" + .to_owned(); + } + let records = lines + .iter() + .filter_map(|line| { + line.split_whitespace() + .find_map(|tok| tok.strip_prefix("records=")) + .and_then(|raw| raw.parse::().ok()) + }) + .max() + .unwrap_or(0); + + // The daemon logs whole-microsecond fields (`*_us`) β€” integer, to respect + // uffs-core's no-float policy; render them as ms here (1 us = 0.001 ms). + let row = |label: &str, key: &str| -> String { + match field_mean(lines, key) { + Some((mean_us, count)) => { + format!(" mean {label:<10} {:>8.3} ms (n={count})\n", mean_us / 1000.0) + } + None => format!(" mean {label:<10} -- (no samples)\n"), + } + }; + + let mean_us = |key: &str| field_mean(lines, key).map_or(0.0, |(mean, _)| mean); + let implied_ms = (mean_us("clone_us") + mean_us("loop_us") + mean_us("rebuild_us")) / 1000.0; + + let mut out = format!( + " apply lines: {}\n drive records: {records}\n", + lines.len() + ); + out.push_str(&row("clone", "clone_us")); + out.push_str(&row("loop", "loop_us")); + out.push_str(&row("children", "children_us")); + out.push_str(&row("paths", "paths_us")); + out.push_str(&row("trigram", "trigram_us")); + out.push_str(&row("ext", "ext_us")); + out.push_str(&row("rebuild", "rebuild_us")); + out.push_str(&format!( + " ─────────────────────────────────\n \ + IMPLIED full apply β‰ˆ clone+loop+rebuild = {implied_ms:>8.3} ms \ + <- the per-apply cost to beat\n" + )); + out +} + +/// Read the daemon log, tolerating a missing file (returns empty). +fn read_log(path: &Path) -> String { + fs::read_to_string(path).unwrap_or_default() +}