Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ members = [
"crates/ruvector-graph-condense-wasm",
# Perception substrate: delta -> boundary -> coherence -> proof -> action
"crates/ruvector-perception",
# Agent memory compaction: coherence-gated graph clustering (ADR-199)
"crates/ruvector-memory-compact",
]
resolver = "2"

Expand Down
26 changes: 26 additions & 0 deletions crates/ruvector-memory-compact/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[package]
name = "ruvector-memory-compact"
version = "0.1.0"
edition = "2021"
description = "Coherence-gated agent memory compaction for ruvector: merge semantically redundant memories using graph clustering"
authors = ["ruvnet", "claude-flow"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
keywords = ["agent-memory", "vector-compaction", "coherence", "graph-clustering", "ruvector"]
categories = ["algorithms", "data-structures"]

[[bin]]
name = "benchmark"
path = "src/main.rs"

[features]
default = ["parallel"]
parallel = ["rayon"]

[dependencies]
rand = "0.8"
rayon = { version = "1.10", optional = true }
serde = { version = "1", features = ["derive"] }

[dev-dependencies]
rand = "0.8"
161 changes: 161 additions & 0 deletions crates/ruvector-memory-compact/src/coherence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
//! Variant 3 — Coherence-gated compactor.
//!
//! Extends the graph-merge approach with per-cluster adaptive thresholds:
//! high-coherence clusters (tight, uniform) are merged aggressively, while
//! low-coherence clusters (mixed, heterogeneous) are preserved.
//!
//! Coherence score for a candidate merge = mean(edge weights) - std_dev(edge weights)
//! across all edges incident to the two nodes being merged. High score = tight cluster.

use crate::graph::{CoherenceGraph, UnionFind};
use crate::kmeans::avg_intra_sim;
use crate::{
centroid, recall_clustered, CompactionResult, Compactor, MemoryEntry, MemoryStore,
WitnessRecord,
};

/// Coherence-gated memory compactor.
pub struct CoherenceGatedCompactor {
pub graph_k: usize,
/// Minimum coherence score required to approve a merge (0.0–1.0).
pub coherence_floor: f32,
/// Max cluster size after merge.
pub max_cluster: usize,
}

impl Default for CoherenceGatedCompactor {
fn default() -> Self {
Self {
graph_k: 15,
coherence_floor: 0.50,
max_cluster: 20,
}
}
}

impl Compactor for CoherenceGatedCompactor {
fn name(&self) -> &'static str {
"coherence-gated"
}

fn compact(
&self,
store: &mut MemoryStore,
target_ratio: f64,
queries: &[Vec<f32>],
k: usize,
) -> CompactionResult {
let n = store.len();
let target_clusters = ((n as f64) * target_ratio).round().max(1.0) as usize;
let before: Vec<MemoryEntry> = store.entries.clone();

let graph = CoherenceGraph::build(&store.entries, self.graph_k);
let clusters = self.coherence_merge(&graph, n, target_clusters);

let mut new_entries: Vec<MemoryEntry> = Vec::with_capacity(clusters.len());
let mut witness: Vec<WitnessRecord> = Vec::new();
let mut new_id = store.next_id;

for cluster in &clusters {
let embs: Vec<&[f32]> = cluster
.iter()
.map(|&i| before[i].embedding.as_slice())
.collect();
let c = centroid(&embs);
let intra_sim = avg_intra_sim(&before, cluster);
let merged_ids: Vec<u64> = cluster.iter().map(|&i| before[i].id).collect();
witness.push(WitnessRecord {
centroid_id: new_id,
merged_ids,
intra_sim,
});
new_entries.push(MemoryEntry {
id: new_id,
embedding: c,
age: cluster.iter().map(|&i| before[i].age).max().unwrap_or(0),
metadata: format!("coherence-gated({})", cluster.len()),
});
new_id += 1;
}
store.entries = new_entries;
store.next_id = new_id;

let recall = recall_clustered(queries, &before, &store.entries, &witness, k);
let compacted = store.len();
CompactionResult {
variant: self.name().to_string(),
original_count: n,
compacted_count: compacted,
compaction_ratio: 1.0 - compacted as f64 / n as f64,
recall_at_k: recall,
duration_ms: 0,
witness_records: witness,
}
}
}

impl CoherenceGatedCompactor {
fn coherence_merge(
&self,
graph: &CoherenceGraph,
n: usize,
target_clusters: usize,
) -> Vec<Vec<usize>> {
// Pre-compute per-node neighbourhood coherence scores (read-only).
let node_coherence = node_coherence_scores(graph, n);

let mut uf = UnionFind::new(n);
let mut sizes: Vec<usize> = vec![1; n];
let mut current_clusters = n;

// Sort edges by weight descending (greedy best-first merging).
let mut sorted_edges: Vec<(f32, usize, usize)> =
graph.edges.iter().map(|e| (e.weight, e.a, e.b)).collect();
sorted_edges.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap());

for (weight, a, b) in &sorted_edges {
if current_clusters <= target_clusters {
break;
}
let ra = uf.find(*a);
let rb = uf.find(*b);
if ra == rb {
continue;
}
let new_size = sizes[ra] + sizes[rb];
if new_size > self.max_cluster {
continue;
}
// Coherence gate: average node coherence of the two endpoints.
let coh = (node_coherence[*a] + node_coherence[*b]) / 2.0;
// Also require the bridging edge to be above a derived threshold.
let threshold = self.coherence_floor * 0.8; // slightly relaxed
if coh < self.coherence_floor || *weight < threshold {
continue;
}
uf.union(*a, *b);
let new_root = uf.find(*a);
sizes[new_root] = new_size;
current_clusters -= 1;
}

uf.components(n)
}
}

/// For each node, compute coherence = mean(neighbour_weights) - std_dev(neighbour_weights).
/// Purely read-only over the graph adjacency list — no UF needed.
fn node_coherence_scores(graph: &CoherenceGraph, n: usize) -> Vec<f32> {
(0..n)
.map(|i| {
let weights: Vec<f32> = graph.adj[i].iter().map(|(_, w)| *w).collect();
if weights.is_empty() {
return 0.0_f32;
}
let mean = weights.iter().sum::<f32>() / weights.len() as f32;
let var =
weights.iter().map(|&w| (w - mean).powi(2)).sum::<f32>() / weights.len() as f32;
(mean - var.sqrt()).max(0.0)
})
.collect()
}
163 changes: 163 additions & 0 deletions crates/ruvector-memory-compact/src/graph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
//! k-NN coherence graph construction over a MemoryStore.
//!
//! Builds a sparse similarity graph: each node is a memory entry; each edge
//! (i, j) carries the cosine similarity between entry i and entry j. Only the
//! top-k neighbours per node are stored to keep the graph tractable.

use crate::{cosine_sim, MemoryEntry};

/// A weighted edge in the coherence graph.
#[derive(Debug, Clone)]
pub struct Edge {
pub a: usize,
pub b: usize,
pub weight: f32,
}

/// Sparse k-NN coherence graph.
pub struct CoherenceGraph {
pub n: usize,
/// Adjacency list: for each node, its (neighbour_index, similarity) pairs.
pub adj: Vec<Vec<(usize, f32)>>,
pub edges: Vec<Edge>,
}

impl CoherenceGraph {
/// Build from a slice of memory entries with `k` neighbours per node.
pub fn build(entries: &[MemoryEntry], k: usize) -> Self {
let n = entries.len();
let mut adj: Vec<Vec<(usize, f32)>> = vec![Vec::new(); n];
let mut edges: Vec<Edge> = Vec::new();

for i in 0..n {
// Compute similarity to all other nodes.
let mut sims: Vec<(f32, usize)> = (0..n)
.filter(|&j| j != i)
.map(|j| (cosine_sim(&entries[i].embedding, &entries[j].embedding), j))
.collect();
// Keep top-k by similarity.
sims.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
sims.truncate(k);

for (sim, j) in sims {
adj[i].push((j, sim));
if i < j {
edges.push(Edge {
a: i,
b: j,
weight: sim,
});
}
}
}
Self { n, adj, edges }
}

/// Return all edge weights above `threshold` as (a, b) index pairs.
pub fn edges_above(&self, threshold: f32) -> Vec<(usize, usize)> {
self.edges
.iter()
.filter(|e| e.weight >= threshold)
.map(|e| (e.a, e.b))
.collect()
}

/// Intra-cluster coherence: average similarity among all pairs in `cluster`.
pub fn cluster_coherence(&self, cluster: &[usize]) -> f32 {
if cluster.len() < 2 {
return 1.0;
}
let mut sum = 0.0_f32;
let mut count = 0usize;
for (ii, &a) in cluster.iter().enumerate() {
for &b in &cluster[ii + 1..] {
// Look up in adjacency list first (fast path).
if let Some(&(_, w)) = self.adj[a].iter().find(|(n, _)| *n == b) {
sum += w;
} else if let Some(&(_, w)) = self.adj[b].iter().find(|(n, _)| *n == a) {
sum += w;
}
// If not in k-NN graph, skip (similarity is low by assumption).
count += 1;
}
}
if count == 0 {
1.0
} else {
sum / count as f32
}
}

/// Coherence score for a cluster: 1 - std_dev(pairwise similarities).
/// High score means all members are uniformly similar (tight cluster).
pub fn cluster_coherence_score(&self, cluster: &[usize]) -> f32 {
if cluster.len() < 2 {
return 1.0;
}
let mut sims: Vec<f32> = Vec::new();
for (ii, &a) in cluster.iter().enumerate() {
for &b in &cluster[ii + 1..] {
if let Some(&(_, w)) = self.adj[a].iter().find(|(n, _)| *n == b) {
sims.push(w);
} else if let Some(&(_, w)) = self.adj[b].iter().find(|(n, _)| *n == a) {
sims.push(w);
}
}
}
if sims.is_empty() {
return 0.0;
}
let mean = sims.iter().sum::<f32>() / sims.len() as f32;
let variance = sims.iter().map(|&s| (s - mean).powi(2)).sum::<f32>() / sims.len() as f32;
(1.0 - variance.sqrt()).max(0.0)
}
}

/// Union-Find for connected-component clustering.
pub struct UnionFind {
parent: Vec<usize>,
rank: Vec<usize>,
}

impl UnionFind {
pub fn new(n: usize) -> Self {
Self {
parent: (0..n).collect(),
rank: vec![0; n],
}
}

pub fn find(&mut self, x: usize) -> usize {
if self.parent[x] != x {
self.parent[x] = self.find(self.parent[x]);
}
self.parent[x]
}

pub fn union(&mut self, x: usize, y: usize) {
let rx = self.find(x);
let ry = self.find(y);
if rx == ry {
return;
}
match self.rank[rx].cmp(&self.rank[ry]) {
std::cmp::Ordering::Less => self.parent[rx] = ry,
std::cmp::Ordering::Greater => self.parent[ry] = rx,
std::cmp::Ordering::Equal => {
self.parent[ry] = rx;
self.rank[rx] += 1;
}
}
}

/// Collect components as groups of node indices.
pub fn components(&mut self, n: usize) -> Vec<Vec<usize>> {
let mut map: std::collections::HashMap<usize, Vec<usize>> =
std::collections::HashMap::new();
for i in 0..n {
let root = self.find(i);
map.entry(root).or_default().push(i);
}
map.into_values().collect()
}
}
Loading
Loading