diff --git a/src/cli/index.ts b/src/cli/index.ts index b3c29d6..62ddfbe 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -77,12 +77,17 @@ Available providers for storing and retrieving memories: Extracts memories via LLM, chunks + embeds extracted content, hybrid BM25 + vector search. Requires: OPENAI_API_KEY (for memory extraction via gpt-4o-mini + embeddings) + basic-memory Basic Memory - local-first Markdown knowledge graph + Writes sessions as notes via the local "bm" CLI; full-text + semantic search. + Requires: basic-memory CLI ("uv tool install basic-memory") + Usage: -p supermemory Use Supermemory as the memory provider -p mem0 Use Mem0 as the memory provider -p zep Use Zep as the memory provider -p filesystem Use file-based memory (CLAUDE.md style) -p rag Use hybrid RAG memory (OpenClaw/QMD style) + -p basic-memory Use Basic Memory (local Markdown knowledge graph) `) } diff --git a/src/providers/README.md b/src/providers/README.md index 42d200e..9ae608b 100644 --- a/src/providers/README.md +++ b/src/providers/README.md @@ -55,3 +55,44 @@ Example: See `src/providers/zep/prompts.ts` | `supermemory` | `supermemory` | Raw JSON sessions | | `mem0` | `mem0ai` | v2 API with graph | | `zep` | `@getzep/zep-cloud` | Graph-based, custom prompts | +| `filesystem` | OpenAI | MEMORY.md-style: LLM-extracted Markdown + text search | +| `rag` | OpenAI | OpenClaw/QMD-style: chunked + embedded, hybrid BM25 + vector | +| `basic-memory` | `bm` CLI | Local Markdown knowledge graph; hybrid FTS + semantic search | + +## Basic Memory Setup + +[Basic Memory](https://github.com/basicmachines-co/basic-memory) is a local-first +knowledge graph built from Markdown files. The provider drives the local `bm` CLI +(`bm tool ... --local`, JSON output) — no API key or hosted service is required. + +1. **Install the CLI** (Python, via [uv](https://docs.astral.sh/uv/)): + + ```bash + uv tool install basic-memory + # verify + bm --version + ``` + + If the executable is named differently or not on `PATH`, set `BASIC_MEMORY_CLI` + to the command name or absolute path (e.g. `export BASIC_MEMORY_CLI=basic-memory`). + +2. **Run the benchmark** — no key needed: + + ```bash + bun run src/index.ts run -p basic-memory -b locomo -s 5 + ``` + +**How it works** + +- **Isolation:** the provider points `BASIC_MEMORY_CONFIG_DIR` / `BASIC_MEMORY_HOME` + at `data/providers/basic-memory`, and creates one throwaway BM **project per + `containerTag`**. It never touches your real `~/.config/basic-memory` config or + existing projects. +- **Ingest:** each session is written as a Markdown note via `bm tool write-note`. +- **Indexing:** `awaitIndexing` polls `bm status` until the project's file/db sync + settles, then runs `bm reindex --embeddings` to build vector embeddings (these lag + the FTS index, and are needed for semantic recall). +- **Search:** `bm tool search-notes --hybrid` combines full-text and semantic search. +- **Clear:** the BM project and its on-disk data are removed. + +The first search/reindex downloads the embedding model to a local cache (one-time). diff --git a/src/providers/basic-memory/index.test.ts b/src/providers/basic-memory/index.test.ts new file mode 100644 index 0000000..46ad62a --- /dev/null +++ b/src/providers/basic-memory/index.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, test } from "bun:test" +import { formatSessionNote, parseJsonOutput, projectName } from "./index" +import type { UnifiedSession } from "../../types/unified" + +describe("projectName", () => { + test("passes through safe characters", () => { + expect(projectName("q1-run_abc-123")).toBe("q1-run_abc-123") + }) + + test("replaces unsafe characters with dashes", () => { + expect(projectName("q1/run abc:42")).toBe("q1-run-abc-42") + }) +}) + +describe("parseJsonOutput", () => { + test("parses clean JSON", () => { + expect(parseJsonOutput<{ a: number }>('{"a":1}')).toEqual({ a: 1 }) + }) + + test("strips leading CLI noise before the JSON object", () => { + const noisy = 'Fetching 5 files: 100%\nWarning: no HF_TOKEN\n{"results":[{"title":"x"}]}' + expect(parseJsonOutput<{ results: unknown[] }>(noisy)).toEqual({ + results: [{ title: "x" }], + }) + }) + + test("parses a JSON array preceded by noise", () => { + expect(parseJsonOutput("progress...\n[1,2,3]")).toEqual([1, 2, 3]) + }) + + test("throws when no JSON is present", () => { + expect(() => parseJsonOutput("no json here")).toThrow() + }) +}) + +describe("formatSessionNote", () => { + const session: UnifiedSession = { + sessionId: "s1", + metadata: { date: "2026-03-01T10:00:00Z", formattedDate: "March 1, 2026" }, + messages: [ + { role: "user", speaker: "Caroline", content: "I adopted Biscuit." }, + { role: "assistant", speaker: "Melanie", content: "Nice!" }, + ], + } + + test("includes the formatted date and conversation", () => { + const note = formatSessionNote(session) + expect(note).toContain("**Date:** March 1, 2026") + expect(note).toContain("## Conversation") + expect(note).toContain("**Caroline**: I adopted Biscuit.") + expect(note).toContain("**Melanie**: Nice!") + }) + + test("falls back to role and ISO date when speaker/formattedDate missing", () => { + const note = formatSessionNote({ + sessionId: "s2", + metadata: { date: "2026-03-02T00:00:00Z" }, + messages: [{ role: "user", content: "hi" }], + }) + expect(note).toContain("**Date:** 2026-03-02T00:00:00Z") + expect(note).toContain("**user**: hi") + }) +}) diff --git a/src/providers/basic-memory/index.ts b/src/providers/basic-memory/index.ts new file mode 100644 index 0000000..cdd98f7 --- /dev/null +++ b/src/providers/basic-memory/index.ts @@ -0,0 +1,347 @@ +import { spawn } from "node:child_process" +import { mkdir, rm } from "node:fs/promises" +import { accessSync, constants } from "node:fs" +import { delimiter, isAbsolute, join } from "node:path" +import type { + Provider, + ProviderConfig, + IngestOptions, + IngestResult, + SearchOptions, + IndexingProgressCallback, +} from "../../types/provider" +import type { UnifiedSession } from "../../types/unified" +import { logger } from "../../utils/logger" +import { BASIC_MEMORY_PROMPTS } from "./prompts" + +/** + * Root directory for this provider's isolated Basic Memory installation. + * We deliberately point BASIC_MEMORY_CONFIG_DIR / BASIC_MEMORY_HOME here so the + * benchmark never reads or writes the user's real BM config or projects. + */ +const BASE_DIR = join(process.cwd(), "data", "providers", "basic-memory") +const CONFIG_DIR = join(BASE_DIR, "config") +const PROJECTS_DIR = join(BASE_DIR, "projects") + +/** Max time to wait for BM to report a settled sync state, in ms. */ +const INDEX_SETTLE_TIMEOUT_MS = 120_000 +/** Poll interval while waiting for indexing to settle, in ms. */ +const INDEX_POLL_MS = 1_000 + +/** Sanitize a containerTag into a valid Basic Memory project name. */ +export function projectName(containerTag: string): string { + return containerTag.replace(/[^a-zA-Z0-9_-]/g, "-") +} + +/** + * Resolve a CLI name to an absolute executable path by searching $PATH. + * Returns the input unchanged if it is already an absolute path, or null if no + * executable is found. Needed because Bun's spawn does not do PATH lookup. + */ +function resolveBinary(bin: string): string | null { + if (isAbsolute(bin)) { + return isExecutable(bin) ? bin : null + } + const dirs = (process.env.PATH || "").split(delimiter).filter(Boolean) + for (const dir of dirs) { + const candidate = join(dir, bin) + if (isExecutable(candidate)) return candidate + } + return null +} + +function isExecutable(path: string): boolean { + try { + accessSync(path, constants.X_OK) + return true + } catch { + return false + } +} + +interface BmSearchResult { + title?: string + permalink?: string + content?: string + matched_chunk?: string + score?: number + metadata?: Record +} + +/** + * Basic Memory Provider + * + * Drives a LOCAL Basic Memory install (the `bm` CLI, which exposes MCP tools via + * `bm tool ... --local` with JSON output). Each containerTag is mapped to a + * dedicated, throwaway BM project so benchmark runs stay isolated from one + * another and from the user's real knowledge base. + * + * - Ingest: each conversation session is written as a Markdown note via + * `bm tool write-note`. BM extracts entities/observations/relations and indexes + * the note (full-text + optional semantic embeddings) on write. + * - Indexing: writes are synchronous, but we poll `bm status` until the project + * reports no pending file/db changes, so search runs against a settled index. + * - Search: `bm tool search-notes` returns ranked notes as JSON. + * - Clear: the project (config entry + on-disk data) is removed entirely. + * + * Requires the `bm` (basic-memory) CLI to be installed and on PATH. Install with: + * uv tool install basic-memory (or: uvx basic-memory ...) + */ +export class BasicMemoryProvider implements Provider { + name = "basic-memory" + prompts = BASIC_MEMORY_PROMPTS + // BM writes go through a single local SQLite DB per project. Different + // containerTags use different projects, so cross-question concurrency is safe, + // but we keep ingest modest to avoid hammering the local sync loop. + concurrency = { + default: 10, + ingest: 4, + } + + private bmBin = "bm" + + async initialize(config: ProviderConfig): Promise { + // Allow overriding the CLI entrypoint (e.g. "basic-memory") via config/baseUrl. + const requested = + typeof config.baseUrl === "string" && config.baseUrl.trim() ? config.baseUrl.trim() : "bm" + + // Bun's child_process.spawn does not perform PATH lookup, so resolve the + // CLI to an absolute path here (supports "bm", "basic-memory", or a full path). + const resolved = resolveBinary(requested) + if (!resolved) { + throw new Error( + `Basic Memory CLI ("${requested}") not found on PATH. ` + + `Install it with "uv tool install basic-memory", or set BASIC_MEMORY_CLI ` + + `to the absolute path of the bm executable.` + ) + } + this.bmBin = resolved + + await mkdir(CONFIG_DIR, { recursive: true }) + await mkdir(PROJECTS_DIR, { recursive: true }) + + // Fail fast with a helpful message if the CLI is missing/unrunnable. + try { + await this.runBm(["--version"]) + } catch (e) { + throw new Error( + `Basic Memory CLI ("${this.bmBin}") found but not runnable. ` + + `Install it with "uv tool install basic-memory". Original error: ${e}` + ) + } + logger.info("Initialized Basic Memory provider (local bm CLI)") + } + + async ingest(sessions: UnifiedSession[], options: IngestOptions): Promise { + const project = projectName(options.containerTag) + await this.ensureProject(project) + + const documentIds: string[] = [] + + // Write sessions sequentially: they share a single project DB, and BM's local + // sync loop is happiest with serialized writes within a project. + for (const session of sessions) { + const content = formatSessionNote(session) + const result = await this.runBmJson<{ permalink?: string }>( + [ + "tool", + "write-note", + "--title", + `Session ${session.sessionId}`, + "--folder", + "sessions", + "--project", + project, + "--local", + ], + content + ) + const id = result?.permalink || session.sessionId + documentIds.push(id) + logger.debug(`Ingested session ${session.sessionId} -> ${id}`) + } + + return { documentIds } + } + + async awaitIndexing( + result: IngestResult, + containerTag: string, + onProgress?: IndexingProgressCallback + ): Promise { + const project = projectName(containerTag) + const total = result.documentIds.length + + onProgress?.({ completedIds: [], failedIds: [], total }) + + // BM indexes FTS on write; poll `bm status` until the project reports a + // settled (no-changes) sync state so search hits the finished index. + const deadline = Date.now() + INDEX_SETTLE_TIMEOUT_MS + while (Date.now() < deadline) { + let settled = false + try { + const status = await this.runBm(["status", "--project", project, "--local"]) + settled = /no changes/i.test(status) + } catch (e) { + logger.warn(`status check failed for ${project}: ${e}`) + } + if (settled) break + await sleep(INDEX_POLL_MS) + } + + // Vector embeddings are built incrementally and may lag the FTS index, so + // force a full embeddings rebuild for this project. This is what makes + // hybrid/semantic search return results for natural-language questions. + // (`reindex` uses `-p` and has no `--local` flag.) + try { + await this.runBm(["reindex", "-p", project, "--full", "--embeddings"]) + } catch (e) { + logger.warn(`embeddings reindex failed for ${project}: ${e}`) + } + + onProgress?.({ + completedIds: result.documentIds, + failedIds: [], + total, + }) + } + + async search(query: string, options: SearchOptions): Promise { + const project = projectName(options.containerTag) + const limit = options.limit || 10 + + let parsed: { results?: BmSearchResult[] } + try { + // Hybrid mode combines BM's full-text (FTS) and semantic (vector) search, + // which is required to match natural-language questions where the exact + // keywords don't all appear in the note. + parsed = await this.runBmJson<{ results?: BmSearchResult[] }>([ + "tool", + "search-notes", + query, + "--project", + project, + "--local", + "--hybrid", + "--page-size", + String(limit), + ]) + } catch (e) { + logger.warn(`Search failed for ${project}: ${e}`) + return [] + } + + return parsed?.results || [] + } + + async clear(containerTag: string): Promise { + const project = projectName(containerTag) + try { + await this.runBm(["project", "remove", project, "--local"]) + } catch (e) { + logger.warn(`Failed to remove project ${project}: ${e}`) + } + try { + await rm(join(PROJECTS_DIR, project), { recursive: true, force: true }) + } catch (e) { + logger.warn(`Failed to remove project dir for ${project}: ${e}`) + } + logger.info(`Cleared Basic Memory data for: ${containerTag}`) + } + + /** Create the throwaway BM project for this containerTag if it doesn't exist. */ + private async ensureProject(project: string): Promise { + const projectPath = join(PROJECTS_DIR, project) + await mkdir(projectPath, { recursive: true }) + try { + await this.runBm(["project", "add", project, projectPath, "--local"]) + } catch (e) { + // "already exists" is fine; surface anything else. + if (!/already exists/i.test(String(e))) { + logger.warn(`project add for ${project} returned: ${e}`) + } + } + } + + /** Run a `bm` command, returning stdout. Throws on non-zero exit. */ + private runBm(args: string[], stdin?: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn(this.bmBin, args, { + env: { + ...process.env, + BASIC_MEMORY_CONFIG_DIR: CONFIG_DIR, + BASIC_MEMORY_HOME: PROJECTS_DIR, + }, + }) + + let stdout = "" + let stderr = "" + child.stdout.on("data", (d) => (stdout += d.toString())) + child.stderr.on("data", (d) => (stderr += d.toString())) + child.on("error", reject) + child.on("close", (code) => { + if (code === 0) resolve(stdout) + else reject(new Error(`bm ${args[0]} exited ${code}: ${stderr || stdout}`)) + }) + + if (stdin !== undefined) { + child.stdin.write(stdin) + } + child.stdin.end() + }) + } + + /** Run a `bm` command and parse the last JSON object/array from stdout. */ + private async runBmJson(args: string[], stdin?: string): Promise { + const out = await this.runBm(args, stdin) + return parseJsonOutput(out) + } +} + +/** Render a unified session as a Markdown note body. */ +export function formatSessionNote(session: UnifiedSession): string { + const date = + (session.metadata?.formattedDate as string) || + (session.metadata?.date as string) || + "Unknown date" + + const transcript = session.messages + .map((m) => { + const speaker = m.speaker || m.role + const ts = m.timestamp ? ` [${m.timestamp}]` : "" + return `**${speaker}**${ts}: ${m.content}` + }) + .join("\n\n") + + return `**Date:** ${date}\n\n## Conversation\n\n${transcript}\n` +} + +/** + * Parse JSON from `bm tool` stdout. The CLI may emit non-JSON noise (e.g. model + * download progress) before the JSON payload, so we extract the JSON span. + */ +export function parseJsonOutput(out: string): T { + const trimmed = out.trim() + try { + return JSON.parse(trimmed) as T + } catch { + // Find the first { or [ and parse from there to the matching end. + const start = trimmed.search(/[{[]/) + if (start === -1) { + throw new Error(`No JSON found in bm output: ${trimmed.slice(0, 200)}`) + } + const open = trimmed[start] + const close = open === "{" ? "}" : "]" + const end = trimmed.lastIndexOf(close) + if (end <= start) { + throw new Error(`Malformed JSON in bm output: ${trimmed.slice(0, 200)}`) + } + return JSON.parse(trimmed.slice(start, end + 1)) as T + } +} + +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)) +} + +export default BasicMemoryProvider diff --git a/src/providers/basic-memory/prompts.ts b/src/providers/basic-memory/prompts.ts new file mode 100644 index 0000000..32e3e97 --- /dev/null +++ b/src/providers/basic-memory/prompts.ts @@ -0,0 +1,78 @@ +import type { ProviderPrompts } from "../../types/prompts" + +interface BasicMemoryResult { + title?: string + permalink?: string + content?: string + matched_chunk?: string + score?: number + metadata?: { + sessionId?: string + date?: string + [key: string]: unknown + } +} + +function buildBasicMemoryContext(context: unknown[]): string { + const results = context as BasicMemoryResult[] + + if (results.length === 0) { + return "No relevant notes were found in the knowledge base." + } + + return results + .map((result, i) => { + const title = result.title || result.permalink || `note-${i + 1}` + const date = result.metadata?.date + const relevance = + typeof result.score === "number" ? ` (relevance: ${result.score.toFixed(2)})` : "" + const dateLine = date ? `Date: ${date}\n` : "" + // matched_chunk is the search-engine hit; content is the full note body. + const body = result.content || result.matched_chunk || "" + return `=== Note ${i + 1}: ${title}${relevance} ===\n${dateLine}${body}` + }) + .join("\n\n---\n\n") +} + +export function buildBasicMemoryAnswerPrompt( + question: string, + context: unknown[], + questionDate?: string +): string { + const retrievedContext = buildBasicMemoryContext(context) + + return `You are a question-answering system. You have access to a Basic Memory knowledge base: a graph of Markdown notes built from conversation sessions. Based on the retrieved notes below, answer the question. + +Question: ${question} +Question Date: ${questionDate || "Not specified"} + +Retrieved Notes: +${retrievedContext} + +**Understanding the Context:** +Each note above was created from a conversation session and stored as Markdown in Basic Memory. Notes may include a session date and the verbatim conversation transcript between speakers. + +**How to Answer:** +1. Scan the notes for facts, events, preferences, and relationships relevant to the question. +2. Pay attention to the session Date for temporal reasoning. +3. For time-based questions, calculate relative dates ("last week", "yesterday") using the session date, then relate them to the question date. +4. Cross-reference information across multiple notes if needed. + +Instructions: +- Base your answer ONLY on the provided notes. +- If the notes contain enough information, provide a clear, concise answer. +- If the notes do not contain enough information, respond with "I don't know". +- Pay attention to temporal context for time-based questions. + +Reasoning: +[Your step-by-step reasoning process here] + +Answer: +[Your final answer here]` +} + +export const BASIC_MEMORY_PROMPTS: ProviderPrompts = { + answerPrompt: buildBasicMemoryAnswerPrompt, +} + +export default BASIC_MEMORY_PROMPTS diff --git a/src/providers/index.ts b/src/providers/index.ts index 5f71566..825a13a 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -5,6 +5,7 @@ import { Mem0Provider } from "./mem0" import { ZepProvider } from "./zep" import { FilesystemProvider } from "./filesystem" import { RAGProvider } from "./rag" +import { BasicMemoryProvider } from "./basic-memory" const providers: Record Provider> = { supermemory: SupermemoryProvider, @@ -12,6 +13,7 @@ const providers: Record Provider> = { zep: ZepProvider, filesystem: FilesystemProvider, rag: RAGProvider, + "basic-memory": BasicMemoryProvider, } export function createProvider(name: ProviderName): Provider { @@ -39,4 +41,11 @@ export function getProviderInfo(name: ProviderName): { } } -export { SupermemoryProvider, Mem0Provider, ZepProvider, FilesystemProvider, RAGProvider } +export { + SupermemoryProvider, + Mem0Provider, + ZepProvider, + FilesystemProvider, + RAGProvider, + BasicMemoryProvider, +} diff --git a/src/types/provider.ts b/src/types/provider.ts index cdc0228..a0908b1 100644 --- a/src/types/provider.ts +++ b/src/types/provider.ts @@ -47,4 +47,4 @@ export interface Provider { clear(containerTag: string): Promise } -export type ProviderName = "supermemory" | "mem0" | "zep" | "filesystem" | "rag" +export type ProviderName = "supermemory" | "mem0" | "zep" | "filesystem" | "rag" | "basic-memory" diff --git a/src/utils/config.ts b/src/utils/config.ts index 8ac1268..10e7f08 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -30,6 +30,10 @@ export function getProviderConfig(provider: string): { apiKey: string; baseUrl?: return { apiKey: config.openaiApiKey } // Filesystem uses OpenAI for memory extraction case "rag": return { apiKey: config.openaiApiKey } // RAG provider uses OpenAI for embeddings + case "basic-memory": + // Basic Memory runs fully locally via the `bm` CLI; no API key required. + // BASIC_MEMORY_CLI optionally overrides the CLI entrypoint (e.g. "basic-memory"). + return { apiKey: "none", baseUrl: process.env.BASIC_MEMORY_CLI || "bm" } default: throw new Error(`Unknown provider: ${provider}`) }