diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 2c18f1a7..12f04554 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -19,7 +19,7 @@ Magic Context is an `@opencode-ai/plugin` (entry `src/index.ts`) that rewrites t - **Adapters** (`src/plugin/`): hook wrappers, tool registry, RPC handlers, dream-timer lifecycle, per-session hook construction. - **Runtime** (`src/hooks/magic-context/`): the transform pipeline, postprocess phase, event/command handlers, system-prompt injection, compartment runners, decay rendering, strip-and-replay, nudges, m[0]/m[1] injection. - **Feature services** (`src/features/magic-context/`): storage, scheduler, tagger, memory, dreamer, sidekick, git-commit + message FTS indexes, unified search, overflow detection, migrations. -- **Tools** (`src/tools/`): `ctx_reduce`, `ctx_expand`, `ctx_note`, `ctx_memory`, `ctx_search`. +- **Tools** (`src/tools/`): `ctx_reduce`, `ctx_expand`, `ctx_note`, `ctx_memory`, `ctx_search`, `ctx_skill_note`, `ctx_skill_recall`. - **Config + shared** (`src/config/`, `src/shared/`): Zod config (deep-merge raw JSONC before validation; invalid leaves fall back to defaults with warnings, never disable the plugin), logger, data paths, SQLite selector, harness id, RPC transport, conflict detector, tag-transcript primitive (shared with Pi). - **TUI** (`src/tui/`): sidebar + `/ctx-status` / `/ctx-recomp` dialogs, RPC-backed; shipped as raw TS via the `./tui` export (not bundled into `dist/index.js`). - **CLI** (`packages/cli/`, separate `@cortexkit/magic-context` package): `npx` setup / doctor / migrate wizard. @@ -39,6 +39,13 @@ This is the heart of the system and the part most easily gotten wrong. A "transf 6. Compartment phase: inject the `` (m[0]/m[1]) into `message[0]`. 7. **Postprocess** (`transform-postprocess-phase.ts`): the mutation gates — pending-op drain, heuristic cleanup, nudges, synthetic-todowrite, auto-search. +**Tool surface:** +- Purpose: Expose agent tools with validated schemas and storage-backed execution. +- Location: `src/tools/ctx-reduce/`, `src/tools/ctx-expand/`, `src/tools/ctx-note/`, `src/tools/ctx-memory/`, `src/tools/ctx-search/`, `src/tools/ctx-skill-note/`, `src/tools/ctx-skill-recall/` +- Contains: Tool definitions, argument schemas, action gating (incl. dreamer-only actions in `ctx_memory`), user-facing result formatting. The two `ctx_skill_*` tools share a `recallSkillMemoryBlock` core with the transparent after-hook path (see Skill-memory in Key Abstractions) so write-back and explicit recall both go through the same recall+format pipeline. +- Depends on: `src/features/magic-context/`, `src/features/magic-context/skill-memory/`, and `src/hooks/magic-context/read-session-chunk.ts`. +- Used by: `src/plugin/tool-registry.ts`. + ### Pass taxonomy (every pass is exactly one) - **SOFT+ (defer / `cache_hit`):** nothing new. m[0] AND m[1] replay byte-identical; the entire `system + m[0] + m[1]` prefix stays cached. Only the conversation tail moves (where `ctx_reduce`/age drops land, themselves replayed deterministically). The steady state — most passes are this. - **SOFT (cache-busting):** m[1] re-renders (new compartments / memories / user-profile surface as deltas) while m[0] stays byte-identical. `system + m[0]` stays cached; the cache busts at the m[1] breakpoint. Driven by an execute pass, `/ctx-flush`, or a deferred-history drain. @@ -102,6 +109,23 @@ The long-history pipeline. Tiered compartments + deterministic decay renderer (r `protected-tail-boundary.ts` decides, per pass, which prefix of the raw tail is eligible for the historian and which suffix stays protected — from true-raw token sizes (not user-turn counts), so sparse-user-turn sessions can't deadlock the historian (#132). Boundary anchors at `lastCompartmentEnd + 1`; token target `N` capped at `0.40 × usable` (ABS_CAP 96k); a live-prompt floor keeps it from crossing the newest meaningful user message on routine (<80%) passes. **Open tool arcs** (a tool invocation with no result in the window) only hold the boundary back when **recent** (≥ the size-walk start = the live window); a stale/interrupted open arc older than that is compactable — otherwise one dead `running` tool call at the eligible-head edge would freeze the historian indefinitely. The trigger/runner share a content-stable range fingerprint for cross-view staleness validation. +**Skill-memory flow (per-skill cross-session recall):** transparent augmentation of opencode's built-in `skill` tool — when a loaded skill declares `skill-memory: { enabled: true }` in its frontmatter, accumulated gotchas/discoveries surface in a `` block appended to the skill tool's RESULT on every load. Three opencode hooks plus two agent-callable tools implement the loop. +1. **Definition (`tool.definition` in `src/hooks/magic-context/skill-tool-definition.ts`)** — augments the `skill` tool's JSON Schema with an optional `intent` string parameter. Effect-Schema strips unknown keys (`onExcessProperty: "ignore"`) before the skill tool runs, so `intent` never reaches the skill itself; the before-hook captures it pre-validation. Idempotent (re-adds are guarded). +2. **Before (`tool.execute.before` in `src/hooks/magic-context/hook-handlers.ts` — `createToolExecuteBeforeHook`)** — stashes the raw `intent` by `callID` in a bounded closure-state `Map` (60s TTL sweep + 256-entry cap + full clear on session delete, so unpaired before-hooks never leak). The stash is the only place `intent` is observable; it's deleted in the after-hook's `finally`. +3. **After (`tool.execute.after` in `src/hooks/magic-context/hook-handlers.ts` — `createToolExecuteAfterHook`)** — runs only for `input.tool === "skill"`. Parses the `Base directory for this skill: file:///...` line from `output.output` via `parseSkillProvenance()` (`fileURLToPath`-based, cross-platform) to recover the resolved `SKILL.md` path + tier (project/global) + `skill_source`. Then re-reads `SKILL.md` from disk (opencode's skill loader strips the `skill-memory:` block from the model-facing output, so the frontmatter is unreadable from `output.output`). Populates a session-scoped `SkillLoadRegistry` keyed by `${sessionId}:${skillId}` (NOT persisted, cleaned in `onSessionDeleted`). When frontmatter has `enabled: true` and notes exist, delegates to `recallSkillMemoryBlock` (feature layer) and appends the block to `output.output` BEFORE the Channel-1 ctx_reduce nudge runs. +4. **Cache safety (keystone).** The append lands in the skill tool RESULT = conversation tail, NOT the cached m[0]/m[1] prefix. This is why the feature cannot regress the prompt-cache hit rate. Channel-1 already appends to tool output strings the same way (precedent in `maybeInjectChannel1Nudge`) — this is proven production behavior. +5. **Write-back (`ctx_skill_note`)** — `kind` is a hard gate rejecting `'general'` at the tool level; duplicates dedup on `normalized_hash` and bump `hit_count` (`computeNormalizedHash` from `memory/normalize-hash.ts`). Resolves `(skill_id, tier, project_identity, resolved_path)` from the session-scoped `SkillLoadRegistry` (so the agent must load the skill first — actionable error otherwise). Inserts into the `skill_memory` table (migration v50). The injected block footer reinforces: "After using this skill, call `ctx_skill_note` — record only gotchas, novel discoveries, or error→fix; skip routine successes." +6. **Explicit recall (`ctx_skill_recall`)** — companion tool to the transparent path; reuses `recallSkillMemoryBlock` so P2 embeddings upgrade both at once. Registry-first resolution (exact, free, no disk I/O when the skill was loaded this session) with a cold-start disk fallback that walks opencode's real `discoverSkills()` order (project dirs first — they shadow global — then global external + config dirs). +7. **Multi-rung recall cascade (P2)** — `recallSkillMemoryBlock` is a four-rung selector: no intent → flat recency×hit (`mode="no-intent"`); intent + model-matched embeddings → cosine blend across `intent_embedding` + `delta_embedding` with tunable `ranking_relevance`/`ranking_recency`/`ranking_hit` weights (`mode="full"`); intent + no model match → FTS5 over the content-linked `skill_memory_fts` vtable (`mode="fts5-fallback"`); intent tokenized to empty → flat fallback (`mode="flat-fts"`). Embeddings live on `skill_memory` (BLOBs scanned via `Float32Array` cosine, mirroring the memories-embed path); a programmatic no-LLM `reembedStaleSkillNotes` keeps them fresh during the `distill-skill-memory` task. Read-side `recall_count` (migration v51) is bumped on every surfaced note — distinct from write-side `hit_count` so the recency term cannot be poisoned by which notes are queried most. +8. **Global-tier unification (P3a)** — global-tier notes collapse to `project_identity = '*'` (collision-merge at migration v52). The `partitionKey(tier, projectIdentity)` helper is the single chokepoint for every global write/recall/reembed/stats call site. A historian-extracted lesson learned in repoA is recallable from repoB without per-repo replication; the originating repo is preserved in `origin_project` (`source_type='historian'` for P3b writes, `'agent'` for tool writes). +9. **Historian auto-extraction (P3b)** — the historian prompt emits an optional `` block when the chunk shows the agent USING a skill (surfaced via the `TC: skill()` marker emitted by `extractToolCallSummaries` in `read-session-formatting.ts`) and learning a UNIVERSAL, reusable lesson. Both OpenCode (`compartment-runner-incremental.ts`) and Pi (`pi-historian-runner.ts`) runners promote the validated observations post-commit via the shared `promoteSkillObservations` helper, gated by `promotionActive && !discardedLast`. Writes go to the global `'*'` partition with `source_type='historian'` and `resolved_path=''` sentinel; hash-dedup bumps `hit_count` on collisions. +10. **Dreamer distill (`distill-skill-memory` task — opt-in, NOT a default)** — `DREAMER_TASKS` enum carries it (line 25 of `src/config/schema/magic-context.ts`); `DEFAULT_DREAMER_TASKS` does NOT (mirroring the `maintain-docs` precedent). The task prompt lives in `src/features/magic-context/dreamer/task-prompts.ts` and runs the merge/prune/promote maintenance cycle documented in CONFIGURATION.md. + +**Git-commit indexing:** +- `src/features/magic-context/git-commits/indexer.ts` reads HEAD-only non-merge commits via `git log` (NUL-byte-free format separator `\x1f`), bounded by `experimental.git_commit_indexing.{since_days, max_commits}`. +- Embeddings are generated through the same embedding provider chain as memories. +- Indexing fires from the dream-timer startup tick and periodic interval; manual `/ctx-dream` does NOT trigger commit indexing. + ## Memory, search & embeddings - **Memories** (`memory/storage-memory.ts`): project-scoped durable knowledge in the 5-category taxonomy (PROJECT_RULES / ARCHITECTURE / CONSTRAINTS / CONFIG_VALUES / NAMING), with FTS + vector side tables. `ctx_memory` exposes write/archive/update/merge/list; `list` is dreamer-only; primary agents may only mutate their own project's memories (workspace-shared categories aside). @@ -131,23 +155,184 @@ Background maintenance (V2: per-task cron scheduling). A process-wide 15-min tim ## Storage & migrations -`storage-db.ts` creates the schema and runs versioned migrations (`migrations.ts`, currently v1–v44). `LATEST_SUPPORTED_VERSION` is a schema fence — it MUST be bumped with every new migration (a unit test asserts it equals the highest migration), and a stale value makes the DB refuse to open after the migration applies. `ensureColumn()` + `healAllNullColumns()` backfill upgraded DBs even if a migration row is lost. New session-scoped tables must be added to `clearSession()`. A bulletproof `MAGIC_CONTEXT_TEST_DATA_DIR` guard keeps the test suite off the live DB (running `bun test` once migrated a live DB and fail-closed running binaries). SQLite binds must use SPREAD positional args, never the array form (`bun:sqlite` binds a lone array positionally; `node:sqlite` reads it as named params and throws). +`storage-db.ts` creates the schema and runs versioned migrations (`migrations.ts`, currently v1–v52). `LATEST_SUPPORTED_VERSION` is a schema fence — it MUST be bumped with every new migration (a unit test asserts it equals the highest migration), and a stale value makes the DB refuse to open after the migration applies. `ensureColumn()` + `healAllNullColumns()` backfill upgraded DBs even if a migration row is lost. New session-scoped tables must be added to `clearSession()`. A bulletproof `MAGIC_CONTEXT_TEST_DATA_DIR` guard keeps the test suite off the live DB (running `bun test` once migrated a live DB and fail-closed running binaries). SQLite binds must use SPREAD positional args, never the array form (`bun:sqlite` binds a lone array positionally; `node:sqlite` reads it as named params and throws). ## Session modes Three effective modes; the heavier features (historian, nudges, adjunct injection) are gated, while tag/drop plumbing stays on everywhere. -| Feature | Primary + `ctx_reduce_enabled: true` | Primary + `ctx_reduce_enabled: false` | Subagents | +| Feature | Primary + `ctx_reduce_enabled: true` | Primary + `ctx_reduce_enabled: false` | Subagents (any `ctx_reduce_enabled`) | |---|---|---|---| | Tag DB records | ✓ | ✓ | ✓ | -| `§N§` prefix injection + `ctx_reduce` tool | ✓ | ✗ | ✓ (if `ctx_reduce` available) | -| Historian / compartments / decay / m[0]m[1] | ✓ | ✓ | ✗ | -| Channel 1 nudge | ✓ | ✗ | ✓ | -| Channel 2 nudge | ✓ | ✗ | ✓ | -| Synthetic-todowrite / auto-search | ✓ | ✓ | ✗ | -| Heuristic tool drops at execute | ✓ once/turn | ✓ once/turn | ✓ every execute pass | -| 85% force-materialize / 95% block | ✓ | ✓ | ✗ (overflow path only) | -| Caveman text compression | ✗ | opt-in | ✗ | +| `§N§` tag prefix injection in message text | ✓ | ✗ | ✗ | +| `ctx_reduce` tool | ✓ | ✗ | ✗ | +| Historian / compartments / decay rendering | ✓ | ✓ | ✗ | +| Compartment injection (``) | ✓ | ✓ | ✗ | +| ``, ``, `` system-prompt blocks | ✓ | ✓ | ✗ | +| Channel 1 ctx_reduce nudge (tool-output ``) | ✓ | ✗ | ✓ | +| Channel 2 ceiling nudge (synthetic-user, one-shot) | ✓ | ✗ | ✗ | +| Deferred-note nudges | ✓ | ✗ | ✗ | +| Synthetic-todowrite injection | ✓ | ✓ | ✗ | +| Skill-memory `` recall append (transparent after-hook) | ✓ | ✓ | ✓ | +| Auto-search hint | ✓ | ✓ | ✗ | +| Heuristic tool drops at execute threshold | ✓ (once per user turn) | ✓ (once per user turn) | ✓ (every execute pass — no once-per-turn guard) | +| Heuristic reasoning clearing | ✓ | ✓ | ✓ | +| 85 % force-materialization | ✓ | ✓ | ✗ | +| 95 % block + emergency recovery | ✓ | ✓ | ✗ (overflow handled via `overflow-detection.ts` only; no recovery flag persisted) | +| Experimental age-tier caveman text compression | ✗ | opt-in via `experimental.caveman_text_compression.enabled` | ✗ | + + +**Decay rendering (replaces the LLM compressor):** +- Purpose: Deterministically choose a render tier per compartment from age, importance, and live history-budget pressure — self-tuning as the model's context window changes, with zero LLM cost. +- Location: `src/hooks/magic-context/decay-curve.ts` (validated formula + tier boundaries), `src/hooks/magic-context/decay-render.ts` (shared OpenCode + Pi renderer). +- Pattern: Exponential half-life `H = H50·2^((I−50)/D)/max(p,0.10)` (`H50=24`, `D=25`); log-cost tier thresholds `[0.201,0.729,1.322,2.587]`; budget pressure computed once per pass; oldest-first demotion; archive/self-close past the last boundary. Council-validated invariants (monotonicity, finite demotion, O(budget) cost) locked by `decay-curve.test.ts`. + +**Compartment events (v2, stored-not-rendered):** +- Purpose: Persist historian-extracted `causal_incident` / `trajectory_correction` events as a corpus for future dreamer aggregation; never rendered into the prompt in v2.0. +- Location: `compartment_events` table (migration v23); `insertCompartmentEvents` / `getCompartmentEvents`. +- Pattern: Anchored to durable compartment ids (`at_compartment` → id at publish); discarded-tail events filtered; cleared on session deletion. + +**Message-history index:** +- Purpose: FTS-backed raw user/assistant message search outside the transform hot path. +- Location: `src/features/magic-context/message-index.ts`, `src/features/magic-context/message-index-async.ts` +- Pattern: Async reconciliation + live event indexing + pure-query reads. + +**Git-commit index:** +- Purpose: Per-project HEAD-only commit corpus for `ctx_search` integration. +- Location: `src/features/magic-context/git-commits/` +- Pattern: NUL-free git log reader + FTS index + embedding side table; populated by dream timer. + +**Dream queue and lease:** +- Purpose: Run at most one dream worker at a time and survive restarts. +- Location: `src/features/magic-context/dreamer/queue.ts`, `src/features/magic-context/dreamer/lease.ts`, `src/features/magic-context/dreamer/storage-dream-state.ts`, `src/features/magic-context/dreamer/storage-dream-runs.ts` +- Pattern: SQLite-backed queue plus cooperative lease lock plus durable run-history table. + +**Key-files pinning:** +- Purpose: Inject up to N project files into the system prompt as `` content for the active session. +- Location: `src/features/magic-context/key-files/identify-key-files.ts`, `src/features/magic-context/key-files/read-stats.ts`, `src/features/magic-context/key-files/storage-key-files.ts` +- Pattern: Per-session selection by Dreamer; budget-bound rendering; symlink-safe realpath check. + +**User memory pipeline:** +- Purpose: Extract user behavioral observations from historian output (the v2 `` block), collect candidates, and promote recurring patterns to stable global user memories. +- Location: `src/features/magic-context/user-memory/storage-user-memory.ts`, `src/features/magic-context/user-memory/review-user-memories.ts` +- Pattern: Historian extracts candidates **only when `dreamer.user_memories.enabled`** (privacy gate, enforced post-commit best-effort on both harnesses); dreamer reviews with a multi-session recurrence gate and promotes; the baseline set renders into m[0] `` (new promotions into m[1]). user_memories are globally scoped (no `project_path`). + +**Skill-memory (motor memory for skills):** +- Purpose: Per-skill cross-session recall — when a skill declares `skill-memory: { enabled: true }` in its frontmatter, accumulated gotchas/discoveries/fixes/workflow steps surface in a `` block appended to the skill tool's RESULT on every load. Agents write back via `ctx_skill_note`; explicit recall (without re-loading) is `ctx_skill_recall`. The transparent after-hook is the primary path; the two tools are companions. +- Location: `src/features/magic-context/skill-memory/{frontmatter,provenance,storage,recall}.ts`; `src/hooks/magic-context/skill-tool-definition.ts` + the `skill-memory` branches in `src/hooks/magic-context/hook-handlers.ts`; `src/tools/ctx-skill-note/`, `src/tools/ctx-skill-recall/`. Tables: `skill_memory` (migration v50), embedding columns + content-linked `skill_memory_fts` vtable (v51), `origin_project`/`source_type` + global `'*'` collision-merge (v52). See the fuller "Skill-memory flow" subsection above for the per-phase wiring. +- Pattern: Three-hook transparent augmentation (definition → before → after). The before-hook stashes an `intent` keyed by `${sessionId}:${callID}` (bounded 60s TTL + 256-cap + per-session prefix-prune on delete). The after-hook parses the trailing `Base directory for this skill: file:///...` line (line-anchored, last-match, cross-platform via `fileURLToPath`), reads the skill's `SKILL.md` from disk to recover its `skill-memory:` frontmatter (opencode strips it from the model-facing output), populates a session-scoped `SkillLoadRegistry` (NOT persisted), and calls `recallSkillMemoryBlock` (feature layer — shared core used by the tool too) to format the injected block. Append lands in the tool RESULT (conversation tail) — cache-safe by construction. Retrieval is a multi-rung recall cascade (intent embeddings → FTS5 → flat recency×hit), unioning the skill's own partition with the global `'*'` partition; the dreamer `distill-skill-memory` task (opt-in, NOT a default) re-embeds stale vectors and runs merge/prune/promote maintenance. Per-skill opt-in via SKILL.md frontmatter (`enabled: true` required; `max_tokens` 1500 / `max_pinned_tokens` 4000 / `dedup_threshold` 0.92 + `ranking_*` weights are tunable). + +**TUI ↔ server RPC:** +- Purpose: Localhost RPC for sidebar data, status/recomp dialogs, and TUI-action consumption. +- Location: `src/shared/rpc-server.ts`, `src/shared/rpc-client.ts`, `src/shared/rpc-utils.ts`, `src/shared/rpc-types.ts`, `src/shared/rpc-notifications.ts`, `src/plugin/rpc-handlers.ts` +- Pattern: Server publishes ephemeral port; TUI plugin polls for state and pushes notifications via the message queue. + +**Plugin message bus (legacy):** +- Purpose: Historical SQLite-backed TUI ↔ server bus, retained for migration compatibility. +- Location: `src/features/magic-context/plugin-messages.ts` +- Pattern: Vestigial — superseded by RPC. Module remains for forward-compat with older TUI plugin versions that may still poll it; no active runtime callers in current code. + +**Compaction markers (deferred drain, plan v6):** +- Purpose: Inject OpenCode-compatible compaction boundaries into the message table so `filterCompacted` stops at historian's last compartment boundary, shrinking the transform-input array. Marker movement is deferred from historian publish into the next materializing transform pass so a single cache-bust cycle covers both the `` rebuild AND the marker boundary advance. +- Location: `src/features/magic-context/compaction-marker.ts`, `src/hooks/magic-context/compaction-marker-manager.ts`, `src/features/magic-context/storage-meta-persisted.ts` (pending blob helpers). +- Pattern: Historian incremental runner writes the prospective new boundary (`{ordinal, endMessageId, publishedAt}`) into `session_meta.pending_compaction_marker_state` in the same transaction that publishes new compartments. The next consuming transform pass that drains `deferredHistoryRefreshSessions` calls `applyDeferredCompactionMarker(...)`, which validates the pending target against the latest stored compartment via `getCompartmentsByEndMessageId(...)` plus an OpenCode-message existence check via `getOpenCodeMessageById(...)`, then sequences `removeCompactionMarker` → `injectCompactionMarker`. Returns a tagged `MarkerUpdateOutcome` (`applied` | `already-current` | `stale-skip` | `retryable-failure`); only `retryable-failure` preserves the deferred-history signal so the next pass retries. CAS-clear (`clearPendingCompactionMarkerStateIf`) on success guards against publish/drain races within and across processes. Eager paths (`/ctx-flush`, `/ctx-recomp`) call the marker manager directly and CAS-clear any stale pending blob. Restart-safe: hook init calls `getSessionsWithPendingMarker(...)` to rehydrate deferred sets so the next pass after restart still drains. `event-handler` CAS-clears pending state on `session.compacted` (provider already advanced the boundary) and on `session.deleted` via cascade. Raw-history readers strip `summary=true` / `finish="stop"` rows to preserve original ordinals. Stable feature, default `compaction_markers: true` since v0.16.x; deferred drain since v0.19 (plan v6). + +**Auto-update checker:** +- Purpose: Self-update the cached `@latest` plugin install once per plugin process — OpenCode's plugin cache no longer auto-updates. +- Location: `src/hooks/auto-update-checker/checker.ts`, `src/hooks/auto-update-checker/cache.ts`, `src/hooks/auto-update-checker/constants.ts` +- Pattern: Fires from plugin init with on-disk cross-process dedup; rewrites the install-directory dependency entry + `bun.lock` (or runs `npm install` under OpenCode's npm-managed cache). + +**Agent prompt pack:** +- Purpose: Keep hidden-agent identities and prompt text isolated from runtime wiring. +- Location: `src/agents/dreamer.ts`, `src/agents/historian.ts` (declares `HISTORIAN_AGENT` and `HISTORIAN_EDITOR_AGENT`), `src/agents/sidekick.ts`, `src/agents/magic-context-prompt.ts` +- Pattern: Constants plus prompt builders. + +**Content stripping and replay:** +- Purpose: Strip reasoning, inline thinking, placeholder shells, structural noise, processed images, merged-assistant reasoning, system-injected stripping, and caveman compression from messages, and replay those operations on every transform pass to maintain stable message content across OpenCode's message rebuilds. +- Location: `src/hooks/magic-context/strip-content.ts`, `src/hooks/magic-context/caveman.ts`, `src/hooks/magic-context/caveman-cleanup.ts`, `src/hooks/magic-context/sentinel.ts` +- Pattern: Stateless strip functions plus deterministic in-place sentinel replacement (preserves message-part array shape across passes); paired with persisted watermarks (`cleared_reasoning_through_tag`, `stripped_placeholder_ids`, `tags.caveman_depth`) read from `session_meta` and `tags`. Several strips are provider-aware: `stripReasoningFromMergedAssistants` runs only for `anthropic`; whole-message empty-sentinel writes a `[dropped]` placeholder for non-Anthropic providers so openai-compatible providers don't see empty assistant messages. + +**Protected-tail boundary (v3):** +- Purpose: Decide, per pass, which prefix of the raw tail is eligible for the historian and which suffix stays protected — from true-raw token sizes instead of user-turn counts, so sparse-user-turn sessions can't deadlock the historian (issue #132). +- Location: `src/hooks/magic-context/protected-tail-boundary.ts` (resolver), `src/hooks/magic-context/read-session-true-raw-tokens.ts` (ordinal-keyed token index, fed by cached per-tag token counts with live-tokenize fallback), `src/hooks/magic-context/compartment-trigger.ts` (trigger consumption). +- Pattern: Boundary offset anchors at `lastCompartmentEnd + 1`; token target `N` capped at `0.40 × usable`; pure function of (messages, usage, budget) — no persisted high-watermark (backward relaxation is the #132 fix). The trigger runs in the transform off the in-memory `args.messages` tail (zero opencode.db reads steady-state; the resolved snapshot is handed to the runner so the historian sees exactly what the fire decision saw), with content-stable range fingerprints for cross-view staleness validation. + +**ctx_reduce nudges (Channel 1 / Channel 2):** +- Purpose: Keep the agent reducing its own context without cache-busting mutations — Channel 1 appends a `` to tool outputs in `tool.execute.after` (persisted to OpenCode's DB, so the bytes are durable and replay for free); Channel 2 delivers a one-shot synthetic-user ceiling nudge near the execute threshold via the live-server client on step-boundary `message.updated` events (mid-turn "tool-calls" and final "stop") — the queued message lands at the next step so the agent is warned while the pile is still growing. +- Location: `src/hooks/magic-context/ctx-reduce-nudge.ts` (shared math: severity, `reclaimable ≥ usable/3` trigger), `src/hooks/magic-context/hook-handlers.ts` (Channel 1 injection), `src/hooks/magic-context/channel2-delivery.ts` (Channel 2 lease + delivery), `packages/pi-plugin/src/ctx-reduce-nudge-pi.ts` (Pi mirror: `tool_result` mutation + `agent_end` followUp). +- Pattern: Channel-1 baselines (per-session, in-memory) carry the measurement (`tailToolTokens`, `usableTokens`) the triggers evaluate; Channel 2 uses a cross-process CAS lease (`channel2_nudge_state`: pending → claimed → delivered) with full-predicate revalidation at delivery — unknown baseline never delivers, stale predicate cancels to re-armable, only confirmed sends consume the one-per-session cap. + +**Tiered emergency drop (≥85%):** +- Purpose: Replace need-blind routine tool drops with a target-headroom eviction at force-materialize pressure — reclaim down to `fixedFloor + 0.30 × (ceiling − fixedFloor)`, evicting tool outputs oldest-first across tiers (T3 misc → T2 edit/search → T1 navigation), with newest-20% recency reserves on T1/T2. +- Location: `src/hooks/magic-context/emergency-drop.ts` (pure planner), applied from `heuristic-cleanup.ts` / `heuristic-cleanup-pi.ts`. +- Pattern: Split tag sets — `floorTags` (FULL active live-window set, floor accounting) vs `tags` (tool-only `canDrop()` eviction candidates); `last_emergency_input_sample` is the idempotence latch (no re-drop until a fresh provider usage reading arrives). + +**Caveman text compression (experimental):** +- Purpose: Apply oldest-first age-tier text compression to user/assistant text outside the protected tail when `ctx_reduce_enabled=false`. +- Location: `src/hooks/magic-context/caveman.ts` +- Pattern: Four tiers (ultra/full/lite/untouched) keyed by raw-ordinal age within the non-protected region. Persisted per-tag `caveman_depth` enables byte-identical replay; depth escalation always recomputes from `source_contents` to avoid lossy double compression. + +**Synthetic todowrite injection:** +- Purpose: Inject a deterministic `tool_use`/`tool_result` pair so the agent sees current todo state through its native todowrite mental model, even when real todowrite tool calls have been dropped from the prefix. +- Location: `src/hooks/magic-context/todo-view.ts` (renderer + hash), `src/hooks/magic-context/transform-postprocess-phase.ts` (B7 logic), `src/features/magic-context/storage-meta-persisted.ts` (state persistence) +- Pattern: Capture-path is pure DB write; cache-busting-pass injects fresh and persists `(call_id, anchor_message_id, state_json)`; defer-pass replays from persisted state_json for byte-identical wire bytes. + +**Persisted session meta:** +- Purpose: Store per-session scalars and JSON blobs that must survive across transform passes and OpenCode restarts. +- Location: `src/features/magic-context/storage-meta-shared.ts`, `src/features/magic-context/storage-meta-persisted.ts`, `src/features/magic-context/storage-meta-session.ts`, `src/features/magic-context/storage-meta.ts` +- Pattern: `session_meta` SQLite table with `ensureColumn()` and versioned migrations; typed row interfaces with runtime guards; NULL coercion in `isSessionMetaRow()` so legacy rows don't trigger fallback-to-defaults on every read. + +**Cache-busting signals (plan v6):** +- Purpose: Surface durable per-pass facts the postprocess phase uses to decide whether the v12 deferred-history drain, the deferred-marker drain, and the deferred-materialization drain are eligible to fire — without re-reading transform state. +- Location: `src/hooks/magic-context/cache-busting-signals.ts`, threaded into `RunPostTransformPhaseArgs` (`historyRebuiltThisPass`, `historyRefreshExplicitBeforePrepare`, `compartmentInjectionRebuiltFromDb`, `canConsumeDeferredLate`, `phaseJustAwaitedPublication`, etc.). +- Pattern: Captured at well-defined points in `transform.ts` (e.g. `historyRefreshExplicitBeforePrepare` is read immediately before `prepareCompartmentInjection`, not later) so concurrent transform passes don't clobber each other's signals. The drain decision (`historyWasConsumedThisPass`) combines `historyRebuiltThisPass && (canConsumeDeferredLate || phaseJustAwaitedPublication || explicitRebuildHappened) && materializationSatisfied`. Degraded-cache state (null-boundary rebuild) is tracked by `degradedCacheCountBySession` in postprocess; entry logs in `inject-compartments.ts` and a warning at `DEGRADE_CACHE_WARNING_THRESHOLD=10` consecutive degraded rebuilds. + +## Entry Points + +**CLI entry:** +- Location: `packages/cli/src/index.ts` (separate `@cortexkit/magic-context` package). +- Triggers: Executed as the unified `magic-context` bin target via `npx @cortexkit/magic-context@latest `. +- Responsibilities: Detect installed harnesses (OpenCode, Pi) and dispatch `setup` / `doctor` / `migrate` flows; print usage on unknown commands. + +**Plugin entry:** +- Location: `src/index.ts` +- Triggers: OpenCode loads the package entry declared in `package.json`. +- Responsibilities: Load config; surface config-warning toasts/ignored-messages; disable the plugin when conflicting plugins are detected (DCP, OMO context-management, OpenCode auto-compaction); register hidden agents (`historian`, `historian-editor`, `dreamer`, `sidekick`); start RPC server; start auto-update checker; start dream-schedule timer; wire hooks, commands, and tools. + +**TUI plugin entry:** +- Location: `src/tui/index.tsx` (separate `./tui` export from `package.json`). +- Triggers: OpenCode TUI loads the entry declared in `tui.json`. +- Responsibilities: Register Magic Context command-palette entries (with dual-path fallback for `api.keymap.registerLayer` vs legacy `api.command.register`); register sidebar slot; mount RPC-backed data layer. + +**Message transform entry:** +- Location: `src/plugin/messages-transform.ts` +- Triggers: `experimental.chat.messages.transform` +- Responsibilities: Defensive wrapper around the magic-context hook's transform — catches transient `SQLITE_BUSY`/`SQLITE_LOCKED` errors and other failures, persists summary to `session_meta.last_transform_error`, and falls back to unmodified messages so OpenCode's prompt loop always proceeds. + +**System-prompt transform entry:** +- Location: `src/hooks/magic-context/system-prompt-hash.ts` +- Triggers: `experimental.chat.system.transform` +- Responsibilities: Inject ``, ``, `` adjunct blocks and Magic Context guidance text; persist `system_prompt_hash` for cache-stability decisions; skip injection for OpenCode's internal `title`/`summary`/`compaction` agents and any agents matched by user-configured `system_prompt_injection.skip_signatures`. + +**Event entry:** +- Location: `src/plugin/event.ts` +- Triggers: OpenCode session and message lifecycle events. +- Responsibilities: Forward lifecycle events to the runtime event handler — `message.updated` (usage tracking, model drift detection, message-index live updates, Channel-2 ceiling-nudge delivery on step boundaries), `message.removed` (tag/index cleanup, anchor cleanup), `session.deleted` (full-session cleanup). The historian trigger decision no longer runs here — it lives in the transform, fed by the in-memory message tail (the event handler has no message array and the old per-streaming-delta DB read froze the event loop on large sessions). + +**Tool entry:** +- Location: `src/plugin/tool-registry.ts` +- Triggers: Plugin initialization. +- Responsibilities: Open storage, normalize arg schemas, and expose the supported tool set. + +**Tool definition entry:** +- Location: `src/index.ts` (`tool.definition` hook calls `recordToolDefinition`) +- Triggers: OpenCode `tool.definition` hook (per tool per flight). +- Responsibilities: Record tool description and parameter token counts per `(provider, model, agent, tool_id)` for sidebar token attribution, with content-fingerprint short-circuit to avoid re-measuring stable definitions. + +**RPC server entry:** +- Location: `src/shared/rpc-server.ts` (started from `src/index.ts`) +- Triggers: Plugin initialization. +- Responsibilities: Bind localhost RPC server on ephemeral port; publish port via `session_meta` for TUI discovery; serve sidebar/status/recomp/notification endpoints registered by `src/plugin/rpc-handlers.ts`. Subagents run heuristic drops on every execute pass (no once-per-turn guard) because a long subagent run is effectively one parent turn and would otherwise starve; they have no provider-cache reuse to protect. @@ -155,6 +340,24 @@ Subagents run heuristic drops on every execute pass (no once-per-turn guard) bec Fail **closed** when storage is unavailable (better to disable than silently overflow the prompt). Fail **open** in per-turn handlers (log and skip). Wrap the outer transform so transient `SQLITE_BUSY`/`SQLITE_LOCKED` never crash the prompt loop (#23). `overflow-detection.ts` parses provider context-overflow errors (Anthropic / OpenAI / Copilot) and persists the detected limit so later passes use the lower value. Subagent model fallback (`model-suggestion-retry.ts`) iterates the chain on retryable failures; abort/timeout/context-overflow short-circuit. Hidden agents carry a `steps`/`maxSteps` cap and are aborted via `session.abort` on timeout so a weak local model can't loop forever (#154). +**Subagent rationale:** subagents are driven by a parent agent, have bounded lifetimes, and often run in parallel (council, historian, sidekick, dreamer child sessions). They still benefit from automatic heuristic drops on their own context at execute passes (running on EVERY execute pass, not once-per-turn — long-running subagents are effectively one parent turn, and they'd starve under the parent's once-per-turn gate), but turning on historian, nudges, or prompt-adjunct injections in each subagent would create redundant work and per-agent cache churn. Subagents that run into overflow fall back to the existing `overflow-detection.ts` path; the detected limit is recorded so future passes use the lower value, but no emergency-recovery flag is persisted because subagents don't consume that path. The skill-memory `` recall append is the one exception that IS active for subagents: it rides the same ungated `tool.execute.after` path as the Channel-1 nudge (it is a tool-result append, NOT a prompt-adjunct injection), so an implementer subagent that loads a skill benefits from that skill's accumulated gotchas — the append lands in the subagent's own tool result (cache-safe in its context) at the cost of a single DB read. + ## Tag identity Each `tags` row is one taggable source-content unit (`message`, `file`, or `tool`). `message`/`file` tags key on `(session_id, message_id)` (synthetic content id). **`tool` tags key on a COMPOSITE `(session_id, callID, tool_owner_message_id)`** — because OpenCode reuses a `callID` counter per assistant turn, so the same `read:32` recurs across turns; including the owning assistant message id gives each invocation its own row (migration v10). Owner derivation: invocation parts own themselves; result parts pop a FIFO of unpaired invocations; a result whose invocation was compacted away falls back to the nearest prior persisted owner. The same composite keying mirrors in the drop queue and heuristic cleanup so dropped keys match what the tagger persisted. Per-tag token counts (`token_count` / `input_token_count` / `reasoning_token_count`) are computed once on tag insert and summed for sidebar / boundary / nudge math (off the hot path). + +**Provider error parsing:** `src/features/magic-context/overflow-detection.ts` parses provider-specific context-overflow errors (Anthropic, OpenAI, GitHub Copilot) and persists the detected limit to `session_meta.detected_context_limit` so subsequent passes use the lower value. `needs_emergency_recovery` is set for primary sessions; subagents skip emergency-recovery state because they don't consume that path. + +**Subagent model fallback:** `promptSyncWithModelSuggestionRetry` in `src/shared/model-suggestion-retry.ts` iterates the resolved fallback chain (user-configured `fallback_models` or builtin chain) on retryable failures. Abort, timeout, and context-overflow errors short-circuit the chain — those won't succeed on a different model and the caller's emergency-recovery path handles them. Suggestion retry ("did you mean X?") runs inside each attempt. + +## Cross-Cutting Concerns + +**Logging:** Use buffered file logging from `src/shared/logger.ts` and write to the temp-file path returned by `getLogFilePath()`. Per-session logs use `sessionLog(sessionId, message)`; module-level logs use `log(message)`. Heavy logging batches to disk to avoid blocking the transform path. + +**Caching:** Use deferred reductions, cached memory-block injection, per-session TTL tracking, per-tag cached token counts (computed once on tag insert), persisted reminder-replay state, per-session live injection cache, persisted system-prompt hash, and persisted todo-snapshot replay state — all coordinated through `src/hooks/magic-context/` and `src/features/magic-context/storage-meta-*.ts`. + +**Storage:** Use the SQLite database created by `src/features/magic-context/storage-db.ts` under the cortexkit data directory resolved by `src/shared/data-path.ts` (`~/.local/share/cortexkit/magic-context/context.db` on Linux/macOS, XDG-equivalent on Windows). Legacy OpenCode-plugin-folder DBs are migrated forward on first boot. The same DB is shared cross-harness between OpenCode and Pi; session-scoped tables include a `harness` discriminator (`'opencode'` / `'pi'`) while project-scoped tables (memories, git commits) are shared. + +**Schema migrations:** `src/features/magic-context/migrations.ts` declares versioned migrations v1–v52 (`LATEST_SUPPORTED_VERSION = 52` in `storage-db.ts` is the schema-fence ceiling and MUST be bumped with every new migration; a unit test — `schema-version-fence.test.ts` — asserts `LATEST_SUPPORTED_VERSION === LATEST_MIGRATION_VERSION` so the two can't drift). Notable: v10 `tool_owner_message_id` (composite tool-tag identity); v11 `todo_synthetic_*` (synthetic-todowrite); v12 orphan `memory_embeddings` cleanup; v13 `pending_compaction_marker_state` (deferred-marker drain); v14 project-scoped key files + version counter; v15 `deferred_execute_state` (boundary execution); v16 context-limit cache sentinels; v17 multi-anchor note-nudge/auto-search JSON storage; v18 `pending_pi_compaction_marker_state`; v19 compartment-state lease table; v20 subagent invocation token accounting; v21 session lifetime work metrics; **v22 the v2.0 cache-architecture foundation (m[0]/m[1] split tables, `project_state` epoch counter, plus per-compartment `p1`–`p4` tier columns, `importance`, `episode_type`, `p1_embedding`, and `legacy` flag); v23 `compartment_events` (historian-extracted causal_incident / trajectory_correction, stored-not-rendered in v2.0); v24 `historian_runs` telemetry (per-run chunk range, compartment/fact/event counts, importance min/max/avg, status + failure reason, FK to `subagent_invocations`); v25 `pi_stable_id_scheme` (Pi stable-id cutover watermark); v26 `memory_mutation_log` + `cached_m1_bytes` (memory supersede-delta — non-additive in-session memory mutations render as an m[1] `` delta instead of bumping the project epoch, plus the frozen-m[1]-bytes cache column); v27 `tags.entry_fingerprint` (Pi fallback-tag adoption); v28 `git_sweep_coordinator` (lease/cooldown for cross-process git-commit sweeps); v29 `notes.anchor_ordinal` (note→conversation-tail traceback); v30 `cached_m0_system_hash` / `cached_m0_tool_set_hash` / `cached_m0_model_key` (HARD-bust m[0] markers — provider-side cache-eviction detection for the materialization taxonomy; the migration clears the m[0]/m[1] cache once so pre-v30 rows re-materialize cleanly); v31 ctx_reduce-nudge state (`last_nudge_undropped`, `channel2_nudge_state`, `last_emergency_input_sample` + startup heal zeroing legacy sticky/anchor nudge state); v32 protected-tail v3 boundary state + per-tag cached token counts (`tags.token_count` / `input_token_count` / `reasoning_token_count` — computed once on tag insert, summed for sidebar/boundary/nudge math); v33 `compartment_chunk_embeddings` table for cross-session semantic search across compartment windows; v34 `workspaces` / `workspace_members` tables plus `cached_m0_workspace_fingerprint` m[0] marker (with a one-shot m[0]/m[1] cache reset so pre-v34 rows re-materialize cleanly); v35 `workspaces.share_categories` default + epoch refresh for existing members; v36 `session_projects` ownership map + seed for pre-v36 embedded sessions; v37 emergency drain catch-up latch + historian drain failure backoff; v38 `transform_decisions` table for durable cache-event cause attribution; v40 index Pi fallback tool owners for stable-id cutover; v41 key detected context limits by model; v42 per-task dreamer scheduling state (Dreamer v2 A+B); v43 memory verification side table and verify watermarks; v44 memory classification scope and shareability columns; v45 retrospective content watermark and processed-window idempotence; v46 Primers v1 candidate and promoted primer storage; v47 compiled smart-note checks and runtime policy state; v48 DreamerV2 rework: memory→file mapping vs verification split, classify marker; v49 per-model embedding coexistence and active identity tracking; **v50 `skill_memory` table for per-skill cross-session recall (P1 — see "Skill-memory" in Key Abstractions) with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE, plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes for the flat-recall path; v51 skill-memory P2 — `delta_embedding` + `recall_count` columns + content-linked `skill_memory_fts` FTS5 vtable (intent+delta, porter+unicode61 tokenizer, INSERT/UPDATE/DELETE triggers, post-migration rebuild) for the multi-rung recall cascade; v52 skill-memory historian extraction — `origin_project` + `source_type` columns and global-tier `'*'` collision-merge (one row per global lesson, recallable from any repo, with `origin_project` preserved).** Migration runner uses `schema_migrations` table with version-ordered execution and sibling-startup race protection (duplicate-insert is tolerated). + +**Harness-aware behavior:** `src/shared/harness.ts` exposes `setHarness()`/`getHarness()` for the runtime to identify itself; production INSERTs into session-scoped tables tag rows with the current harness. Pi-specific session-resolution paths are skipped on OpenCode and vice versa. diff --git a/CONFIGURATION.md b/CONFIGURATION.md index 2b1bd5de..81fa3543 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -387,6 +387,7 @@ To disable the dreamer entirely, set `dreamer.disable: true`. To disable a singl | `refresh-primers` | `0 3 * * *` | Re-investigate stale primers against current code and refresh their answers. | | `evaluate-smart-notes` | `0 3 * * *` | Surface smart notes whose `ctx_note` conditions have come true. | | `review-user-memories` | `0 3 * * *` | Promote recurring behavioral observations into the `` block (privacy-sensitive). | +| `distill-skill-memory` | `""` (off) | **Opt-in** — add a schedule to enable. Merge near-duplicate skill notes, prune stale low-hit notes, promote recurring gotchas to `pinned=1`, enforce per-skill note caps. Requires the `skill_memory` table (migration v50) — auto-created on upgrade. | ### Retrospective privacy @@ -625,6 +626,39 @@ Tier boundaries are hardcoded to keep behavior predictable and prevent cache-bus **When to enable.** Turn it on if you run very long, edit-heavy sessions and want to reclaim more context without losing the agent's record of what it did. The default stays off while cache stability is being validated in the wild. Requires a restart to take effect. +## Skill-Memory (per-skill frontmatter) + +Skill-memory is the "motor memory" for skills — per-skill, cross-session recall of gotchas, discoveries, fixes, and workflow steps. The plugin transparently augments opencode's built-in `skill` tool: when a skill declares `skill-memory: { enabled: true }` in its YAML frontmatter, accumulated notes for that skill surface in a `` block appended to the skill tool's RESULT on every load. Agents write back via `ctx_skill_note`; explicit recall (without re-loading) is `ctx_skill_recall`. + +Unlike every other setting in this file, **skill-memory is configured per-skill in each `SKILL.md`'s frontmatter, not in `magic-context.jsonc`**. Absent or malformed block = inert. A bad config in one skill cannot break other skills. + +```yaml +--- +name: test-driven-development +description: ... +skill-memory: + enabled: true # required: true to activate + max_tokens: 1500 # default 1500 — token budget for unpinned notes + max_pinned_tokens: 4000 # default 4000 — separate cap for pinned notes + dedup_threshold: 0.92 # default 0.92 — P2 cosine near-dedup threshold +--- +``` + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `enabled` | `boolean` | (required `true`) | Master switch per skill. When absent or `false`, the transparent after-hook skips this skill entirely and `ctx_skill_recall` returns "skill-memory is not enabled for ''". | +| `max_tokens` | `number` | `1500` | Hard cap on tokens for unpinned notes in the injected block. Greedy fill by composite score (P1: recency × hit_count). | +| `max_pinned_tokens` | `number` | `4000` | Separate cap for pinned notes. Pinned notes are always included first; on cap overflow, least-used pinned notes are truncated in ascending `hit_count` order with an "N pinned notes omitted" marker. | +| `dedup_threshold` | `number` | `0.92` | P2 cosine near-dedup threshold. P1 ships without embeddings, so this is reserved for the P2 rollout. Tune per-skill in the `0.85`–`0.95` range. | + +**Cache safety.** The injected block lands in the tool RESULT = conversation tail, never the cached m[0]/m[1] prefix. This is the same pattern as Channel-1 (`maybeInjectChannel1Nudge`) and is why skill-memory cannot regress the prompt-cache hit rate. + +**Write-back (`ctx_skill_note`).** The injected block's footer prompts: *"After using this skill, call `ctx_skill_note` — record only gotchas, novel discoveries, or error→fix; skip routine successes."* The `kind` parameter is a hard gate: `kind: "general"` is rejected at the tool level — general observations belong in `ctx_memory` with an appropriate category. + +**Dreamer integration.** Add `"distill-skill-memory"` to your `dreamer.tasks` list to opt in to overnight maintenance (merges near-duplicates, prunes stale zero-hit notes older than 30 days, promotes recurring gotchas to pinned). It is **not** a default task — the feature is opt-in like `maintain-docs`. + +**P1 vs P2.** P1 (shipped) is flat recall (recency × hit_count, no embeddings). P2 (planned) adds intent-aware ranking via the project's existing embedding provider. The per-skill `dedup_threshold` field is reserved for P2 cosine near-dedup and has no effect on P1. + ## Commands | Command | Description | diff --git a/README.md b/README.md index b57508e8..d805e26d 100644 --- a/README.md +++ b/README.md @@ -209,6 +209,7 @@ Because it runs during idle time, the dreamer pairs well with local models, even - **`ctx_expand`**: pull a compressed history range back to the original `U:`/`A:` transcript when the agent needs the exact details. - **`ctx_note`**: a scratchpad for deferred intentions. Notes resurface at natural boundaries (after commits, after historian runs, when todos finish). **Smart notes** carry an open-ended condition the dreamer watches for. +- **Skill-memory (motor memory for skills)**: a per-skill `` block is appended to the skill tool's RESULT on every load, surfacing accumulated gotchas, discoveries, fixes, and workflow steps the agent has recorded for that skill. Per-skill opt-in via the skill's `SKILL.md` frontmatter (`skill-memory: { enabled: true }`); write back with **`ctx_skill_note`**, recall without re-loading with **`ctx_skill_recall`**. The block lands in the tool result, not the cached prompt prefix, so it never thrashes the cache. Recall works **across sessions** (a new session inherits everything) and **across harnesses** (write a memory in OpenCode, retrieve it in Pi). @@ -223,6 +224,8 @@ Recall works **across sessions** (a new session inherits everything) and **acros | `ctx_search` | Recall | Search memories, conversation history, and git commits | | `ctx_expand` | Recall | Decompress a history range back to the transcript | | `ctx_note` | Recall | Deferred intentions and dreamer-evaluated smart notes | +| `ctx_skill_note` | Recall | Write back a per-skill note (gotcha/discovery/fix/workflow) for future loads | +| `ctx_skill_recall` | Recall | Explicitly recall skill-memory notes without re-loading the skill | --- diff --git a/STRUCTURE.md b/STRUCTURE.md index 2f09201f..bacc8d8a 100644 --- a/STRUCTURE.md +++ b/STRUCTURE.md @@ -57,14 +57,14 @@ **`src/features/`:** - Purpose: Group reusable subsystem logic by feature. -- Contains: Magic-context services (storage, scheduler, tagger, search, message-index, overflow detection, compaction markers), dreamer runtime, sidekick support, memory system, user-memory pipeline, git-commit indexer, tool-definition token measurement, schema migrations, built-in commands. -- Key subdirs: `src/features/magic-context/dreamer/`, `src/features/magic-context/memory/`, `src/features/magic-context/sidekick/`, `src/features/magic-context/user-memory/`, `src/features/magic-context/git-commits/`, `src/features/builtin-commands/` -- Key files: `src/features/magic-context/storage-db.ts`, `src/features/magic-context/storage.ts` (barrel), `src/features/magic-context/migrations.ts`, `src/features/magic-context/message-index.ts`, `src/features/magic-context/search.ts`, `src/features/magic-context/overflow-detection.ts`, `src/features/magic-context/dreamer/runner.ts`, `src/features/magic-context/memory/storage-memory.ts`, `src/features/magic-context/user-memory/storage-user-memory.ts`, `src/features/builtin-commands/commands.ts` +- Contains: Magic-context services (storage, scheduler, tagger, search, message-index, overflow detection, compaction markers), dreamer runtime, sidekick support, memory system, user-memory pipeline, key-files pinning, git-commit indexer, tool-definition token measurement, schema migrations, built-in commands, **skill-memory (per-skill motor memory)**. +- Key subdirs: `src/features/magic-context/dreamer/`, `src/features/magic-context/memory/`, `src/features/magic-context/sidekick/`, `src/features/magic-context/user-memory/`, `src/features/magic-context/key-files/`, `src/features/magic-context/git-commits/`, `src/features/magic-context/skill-memory/`, `src/features/builtin-commands/` +- Key files: `src/features/magic-context/storage-db.ts`, `src/features/magic-context/storage.ts` (barrel), `src/features/magic-context/migrations.ts`, `src/features/magic-context/message-index.ts`, `src/features/magic-context/search.ts`, `src/features/magic-context/overflow-detection.ts`, `src/features/magic-context/dreamer/runner.ts`, `src/features/magic-context/memory/storage-memory.ts`, `src/features/magic-context/skill-memory/{frontmatter,provenance,storage,recall}.ts`, `src/features/magic-context/user-memory/storage-user-memory.ts`, `src/features/builtin-commands/commands.ts` **`src/tools/`:** - Purpose: Define the agent-facing tool surface. -- Contains: One directory per tool with constants, types, implementation, and tests. Five tools: `ctx-reduce`, `ctx-expand`, `ctx-note`, `ctx-memory`, `ctx-search`. -- Key files: `src/tools/ctx-reduce/tools.ts`, `src/tools/ctx-expand/tools.ts`, `src/tools/ctx-note/tools.ts`, `src/tools/ctx-memory/tools.ts`, `src/tools/ctx-search/tools.ts` +- Contains: One directory per tool with constants, types, implementation, and tests. Seven tools: `ctx-reduce`, `ctx-expand`, `ctx-note`, `ctx-memory`, `ctx-search`, `ctx-skill-note`, `ctx-skill-recall`. The two `ctx_skill_*` tools share the `recallSkillMemoryBlock` core with the transparent after-hook path. +- Key files: `src/tools/ctx-reduce/tools.ts`, `src/tools/ctx-expand/tools.ts`, `src/tools/ctx-note/tools.ts`, `src/tools/ctx-memory/tools.ts`, `src/tools/ctx-search/tools.ts`, `src/tools/ctx-skill-note/tools.ts`, `src/tools/ctx-skill-recall/tools.ts` **`src/shared/`:** - Purpose: Keep cross-feature utilities small and dependency-light. @@ -101,16 +101,24 @@ - `src/hooks/magic-context/strip-content.ts`: Strip and replay reasoning, inline thinking, structural noise, dropped placeholders, merged-assistant reasoning, processed images, and system-injected messages. - `src/hooks/magic-context/caveman.ts`: Experimental age-tier text compression for primary sessions with `ctx_reduce_enabled=false`. - `src/hooks/magic-context/todo-view.ts`: Build the deterministic synthetic todowrite tool part and compute its hash-based `call_id`. +- `src/hooks/magic-context/skill-tool-definition.ts`: `injectSkillIntentParam` — adds an optional `intent` parameter to the `skill` tool's schema via the `tool.definition` hook (Effect-Schema strips it before the skill runs; the before-hook captures it pre-validation). - `src/hooks/magic-context/inject-compartments.ts`: m[0]/m[1] history layout — `renderM0`/`renderM1`/`materializeM0`/`mustMaterialize` (mirrored in Pi's `inject-compartments-pi.ts`). - `src/hooks/magic-context/decay-curve.ts`: Council-validated deterministic tier-decay math (half-life, log-cost tier boundaries, budget pressure). - `src/hooks/magic-context/decay-render.ts`: Shared OpenCode+Pi compartment renderer built on the decay curve (replaces the removed LLM compressor). - `src/hooks/magic-context/compartment-runner-incremental.ts`: v2 historian publish path — bounded reference blocks, tiered/scored compartments, faithful per-chunk facts, discard-last, events + `p1_embedding` on publish. - `src/hooks/magic-context/reference-retrieval.ts` (+ `reference-seeds.generated.ts`): 4 rotating seed compartments + last-6 recency references for the historian prompt. - `src/hooks/magic-context/historian-prompt.generated.ts`: Generated v8.7.3 historian system prompt (source: `.alfonso/.../historian-prompt-v8.7.3.md`; re-exported via `compartment-prompt.ts`). +- `src/hooks/magic-context/hook-handlers.ts` (skill-memory branches): `createToolExecuteBeforeHook` (stashes per-callID `intent` in a bounded closure map), `maybeInjectSkillMemory` (appends the `` block to `output.output` BEFORE the Channel-1 nudge), and the `createToolExecuteAfterHook` branch that parses the `Base directory` line + reads `SKILL.md` from disk to populate the session-scoped `SkillLoadRegistry`. - `src/features/magic-context/memory/memory-migration.ts`: `/ctx-session-upgrade` 9-cat→5-cat memory re-eval (active-only, permanent-safe, epoch-bumping). +- `src/features/magic-context/skill-memory/frontmatter.ts`: Minimal YAML frontmatter parser for the per-skill `skill-memory:` block — returns `null` (inert) when absent or malformed; a bad config in one skill cannot break other skills. +- `src/features/magic-context/skill-memory/provenance.ts`: `parseSkillProvenance` (cross-platform `fileURLToPath` parser for the `Base directory for this skill: file:///...` line), `deriveSkillTier` / `deriveSkillSource` (path-based classification), and the session-scoped `SkillLoadRegistry` (`Map` — NOT persisted, cleaned in `onSessionDeleted`). +- `src/features/magic-context/skill-memory/storage.ts`: `skill_memory` table CRUD — `insertSkillMemoryNote` (UNIQUE-violation on duplicate returns null so callers can `bumpHitCount`), `getSkillMemoryNotes` (window-function flat ranking for P1), `findExistingNote`, `bumpHitCount`. The `UNIQUE(skill_id, tier, project_identity, normalized_hash)` constraint plus the `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes live in migration v37. +- `src/features/magic-context/skill-memory/recall.ts`: `recallSkillMemoryBlock` — the shared recall+format core (used by BOTH the transparent after-hook AND `ctx_skill_recall`). `flatRecall` does P1's recency × hit_count greedy-fill; `buildSkillMemoryBlock` formats the `` XML with the `ctx_skill_note` write-back footer. Lives in the feature layer to avoid a tools→hooks layering violation. +- `src/tools/ctx-skill-note/tools.ts`: `ctx_skill_note` — write-back tool. Hard gate rejects `kind: "general"` (general observations belong in `ctx_memory`). Exact-dedup on `normalized_hash` (reuses `computeNormalizedHash` from `memory/normalize-hash.ts`) bumps `hit_count`. Resolves `(skill_id, tier, project_identity, resolved_path)` from the session-scoped `SkillLoadRegistry`. +- `src/tools/ctx-skill-recall/tools.ts`: `ctx_skill_recall` — explicit recall companion. Registry-first resolution (exact, free, no disk I/O when the skill was loaded this session) with a cold-start disk fallback walking opencode's real `discoverSkills()` order (project dirs first — they shadow global). Returns distinct messages for SKILL.md-not-found vs disabled vs cold-start-no-notes. - `src/features/magic-context/storage-db.ts`: Create durable storage; run versioned migrations; resolve runtime SQLite backend. - `src/features/magic-context/storage-meta-persisted.ts`: Read and write per-session persisted scalars and JSON blobs. -- `src/features/magic-context/migrations.ts`: Versioned schema migrations v1–v44 (`LATEST_SUPPORTED_VERSION` in `storage-db.ts` must track the highest; `schema-version-fence.test.ts` asserts they stay in lockstep). +- `src/features/magic-context/migrations.ts`: Versioned schema migrations v1–v50 (`LATEST_SUPPORTED_VERSION` in `storage-db.ts` must track the highest; `schema-version-fence.test.ts` asserts they stay in lockstep). v50 adds the `skill_memory` table with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes. - `src/features/magic-context/message-index.ts`: FTS-backed raw-message index for `ctx_search`. - `src/features/magic-context/search.ts`: Unified retrieval over memories, raw messages, and git commits. diff --git a/assets/magic-context.schema.json b/assets/magic-context.schema.json index 52b0e02c..f6399635 100644 --- a/assets/magic-context.schema.json +++ b/assets/magic-context.schema.json @@ -396,6 +396,10 @@ "refresh-primers": { "schedule": "0 3 * * *", "timeout_minutes": 20 + }, + "distill-skill-memory": { + "schedule": "", + "timeout_minutes": 20 } }, "type": "object", @@ -963,6 +967,56 @@ "minimum": 5 } } + }, + "distill-skill-memory": { + "default": { + "schedule": "", + "timeout_minutes": 20 + }, + "type": "object", + "properties": { + "schedule": { + "default": "", + "type": "string", + "description": "5-field cron schedule (e.g. \"0 3 * * *\"), or \"\" to disable this task." + }, + "model": { + "description": "Per-task model override (inherits dreamer.model)", + "type": "string" + }, + "fallback_models": { + "description": "Per-task fallback chain (inherits dreamer.fallback_models)", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "thinking_level": { + "description": "Pi only: per-task thinking level", + "type": "string", + "enum": [ + "off", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] + }, + "timeout_minutes": { + "default": 20, + "description": "Minutes allowed for this task before it is aborted", + "type": "number", + "minimum": 5 + } + } } }, "description": "Per-task scheduling + model config. Each task has its own cron schedule and may override the dreamer-level model." diff --git a/packages/cli/src/lib/dreamer-setup.test.ts b/packages/cli/src/lib/dreamer-setup.test.ts index 44e52db7..e043ea19 100644 --- a/packages/cli/src/lib/dreamer-setup.test.ts +++ b/packages/cli/src/lib/dreamer-setup.test.ts @@ -77,11 +77,11 @@ describe("runDreamerSetup", () => { const prompts = new MockPrompts({ confirms: [false], autos: ["x/y"], - selects: Array(11).fill("cron:0 3 * * *"), + selects: Array(12).fill("cron:0 3 * * *"), }); const result = await runDreamerSetup(prompts, ["x/y"]); expect(result.tasks).toBeDefined(); - expect(Object.keys(result.tasks ?? {}).length).toBe(11); + expect(Object.keys(result.tasks ?? {}).length).toBe(12); expect(result.tasks?.verify.schedule).toBe("0 3 * * *"); expect(result.tasks?.curate.schedule).toBe("0 3 * * *"); expect(result.tasks?.["classify-memories"].schedule).toBe("0 3 * * *"); diff --git a/packages/cli/src/lib/dreamer-setup.ts b/packages/cli/src/lib/dreamer-setup.ts index fd2de294..7d76812b 100644 --- a/packages/cli/src/lib/dreamer-setup.ts +++ b/packages/cli/src/lib/dreamer-setup.ts @@ -35,6 +35,7 @@ const TASK_DESCRIPTIONS: Record = { "review-user-memories": "Promote recurring behaviors into your user profile", "promote-primers": "Promote recurring project questions into Primers", "refresh-primers": "Refresh answers for active project Primers", + "distill-skill-memory": "Opt-in: distills per-skill memory (merge/prune/promote)", }; /** v1-behavior-preserving default schedules (must match the Zod schema defaults). */ @@ -50,6 +51,7 @@ const DEFAULT_TASK_SCHEDULES: Record = { "review-user-memories": "0 3 * * *", "promote-primers": "0 3 * * *", "refresh-primers": "0 3 * * *", + "distill-skill-memory": "", }; const PRESET_CUSTOM = "__custom__"; diff --git a/packages/dashboard/src-tauri/src/config.rs b/packages/dashboard/src-tauri/src/config.rs index c4db92bd..66b83216 100644 --- a/packages/dashboard/src-tauri/src/config.rs +++ b/packages/dashboard/src-tauri/src/config.rs @@ -28,7 +28,7 @@ pub fn resolve_project_config_path(project_path: &str) -> PathBuf { /// frontend DreamerTasksField list). The dashboard renders this fixed set so /// every project shows the same tasks regardless of its (possibly stale) per- /// project scheduler snapshot in task_schedule_state. -pub const CANONICAL_DREAM_TASKS: [&str; 11] = [ +pub const CANONICAL_DREAM_TASKS: [&str; 12] = [ "map-memories", "verify", "verify-broad", @@ -40,6 +40,7 @@ pub const CANONICAL_DREAM_TASKS: [&str; 11] = [ "review-user-memories", "promote-primers", "refresh-primers", + "distill-skill-memory", ]; /// Default cron per task (mirrors DEFAULT_TASK_SCHEDULES in the plugin schema and @@ -58,6 +59,7 @@ pub fn default_task_schedule(task: &str) -> &'static str { "review-user-memories" => "0 3 * * *", "promote-primers" => "0 3 * * *", "refresh-primers" => "0 3 * * *", + "distill-skill-memory" => "", _ => "", } } diff --git a/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx b/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx index 6dd609c3..4451f7fd 100644 --- a/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx +++ b/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx @@ -100,6 +100,12 @@ export const TASKS: TaskMeta[] = [ description: "Refresh answers for active project Primers", defaultSchedule: "0 3 * * *", }, + { + name: "distill-skill-memory", + label: "Distill skill memory", + description: "Opt-in: distills per-skill memory (merge/prune/promote)", + defaultSchedule: "", + }, ]; const PRESETS: { label: string; cron: string }[] = [ diff --git a/packages/docs/src/content/docs/reference/configuration.md b/packages/docs/src/content/docs/reference/configuration.md index 13968405..a37025df 100644 --- a/packages/docs/src/content/docs/reference/configuration.md +++ b/packages/docs/src/content/docs/reference/configuration.md @@ -199,6 +199,11 @@ Off-hours maintenance (Dreamer) and on-demand prompt augmentation (Sidekick). | `dreamer.tasks.refresh-primers.fallback_models` | string \\| string[] | — | Per-task fallback chain (inherits dreamer.fallback_models) | | `dreamer.tasks.refresh-primers.thinking_level` | `"off"` \\| `"minimal"` \\| `"low"` \\| `"medium"` \\| `"high"` \\| `"xhigh"` | — | Pi only: per-task thinking level | | `dreamer.tasks.refresh-primers.timeout_minutes` | number (5–) | `20` | Minutes allowed for this task before it is aborted | +| `dreamer.tasks.distill-skill-memory.schedule` | string | `""` | 5-field cron schedule (e.g. "0 3 * * *"), or "" to disable this task. | +| `dreamer.tasks.distill-skill-memory.model` | string | — | Per-task model override (inherits dreamer.model) | +| `dreamer.tasks.distill-skill-memory.fallback_models` | string \\| string[] | — | Per-task fallback chain (inherits dreamer.fallback_models) | +| `dreamer.tasks.distill-skill-memory.thinking_level` | `"off"` \\| `"minimal"` \\| `"low"` \\| `"medium"` \\| `"high"` \\| `"xhigh"` | — | Pi only: per-task thinking level | +| `dreamer.tasks.distill-skill-memory.timeout_minutes` | number (5–) | `20` | Minutes allowed for this task before it is aborted | | `dreamer.inject_docs` | boolean | `true` | Inject ARCHITECTURE.md and STRUCTURE.md into system prompt | | `dreamer.thinking_level` | `"off"` \\| `"minimal"` \\| `"low"` \\| `"medium"` \\| `"high"` \\| `"xhigh"` | — | Pi only: default thinking level for dreamer subagent invocations. See historian.thinking_level. | | `sidekick` | object | — | Optional sidekick agent configuration for session-start memory retrieval | diff --git a/packages/pi-plugin/src/commands/ctx-status.ts b/packages/pi-plugin/src/commands/ctx-status.ts index ef762eba..f9b5bd77 100644 --- a/packages/pi-plugin/src/commands/ctx-status.ts +++ b/packages/pi-plugin/src/commands/ctx-status.ts @@ -90,7 +90,7 @@ export function registerCtxStatusCommand( const modelKey = ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : undefined; - const statusText = executeStatus( + const statusText = await executeStatus( currentDeps.db, sessionId, currentDeps.protectedTags ?? 20, diff --git a/packages/pi-plugin/src/pi-historian-runner.test.ts b/packages/pi-plugin/src/pi-historian-runner.test.ts index 005c1db1..0299faac 100644 --- a/packages/pi-plugin/src/pi-historian-runner.test.ts +++ b/packages/pi-plugin/src/pi-historian-runner.test.ts @@ -393,6 +393,25 @@ describe("runPiHistorian", () => { closeQuietly(db); } }); + it("promotes skillObservations as global '*' notes", async () => { + const xml = `${successXml()}\n\n* council | gotcha | fast aggregator\n`; + const { db } = await runHistorianWith({ + outputs: [xml], + memoryEnabled: true, + autoPromote: true, + }); + try { + const row = db + .prepare( + "SELECT project_identity, source_type FROM skill_memory WHERE skill_id='council'", + ) + .get() as { project_identity: string; source_type: string } | undefined; + expect(row?.project_identity).toBe("*"); + expect(row?.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); it("runs the Pi subagent, parses output, and publishes compartments and facts", async () => { const { db, runner } = await runHistorianWith({ outputs: [successXml()] }); try { diff --git a/packages/pi-plugin/src/pi-historian-runner.ts b/packages/pi-plugin/src/pi-historian-runner.ts index c0d5c86e..0cd99ad1 100644 --- a/packages/pi-plugin/src/pi-historian-runner.ts +++ b/packages/pi-plugin/src/pi-historian-runner.ts @@ -52,6 +52,7 @@ import { } from "@magic-context/core/features/magic-context/memory"; import { resolveProjectIdentity } from "@magic-context/core/features/magic-context/memory/project-identity"; import { getMemoriesByProject } from "@magic-context/core/features/magic-context/memory/storage-memory"; +import { promoteSkillObservations } from "@magic-context/core/features/magic-context/skill-memory/promote"; import { clearEmergencyDrainLatch, clearEmergencyRecovery, @@ -1114,6 +1115,28 @@ export async function runPiHistorian(deps: PiHistorianDeps): Promise { } } + // Skill-memory historian extraction (Pi mirror of OpenCode): promote + // into per-skill notes. Same promotionActive + + // !discardedLast gate as facts/primers so a provisional tail does not + // double-emit. + if ( + promotionActive && + !discardedLast && + validatedPass.skillObservations && + validatedPass.skillObservations.length > 0 + ) { + try { + const written = promoteSkillObservations( + db, + projectPath, + validatedPass.skillObservations, + ); + sessionLog(sessionId, `promoted ${written} skill observation(s)`); + } catch (error) { + sessionLog(sessionId, "failed to promote skill observations:", error); + } + } + // Raw chunk embeddings: the ctx_search semantic substrate over session // history. Fire-and-forget, best-effort, memory-gated. if (embeddingActive) { @@ -1275,6 +1298,13 @@ type ValidationOutcome = ? P : never : never; + skillObservations?: ReturnType< + typeof validateHistorianOutput + > extends infer T + ? T extends { ok: true; skillObservations?: infer S } + ? S + : never + : never; events?: ReturnType extends infer T ? T extends { ok: true; events?: infer E } ? E @@ -1317,6 +1347,7 @@ async function validateHistorianResult( facts: validation.facts, userObservations: validation.userObservations, primerCandidates: validation.primerCandidates, + skillObservations: validation.skillObservations, events: validation.events, }; } diff --git a/packages/plugin/src/agents/magic-context-prompt.ts b/packages/plugin/src/agents/magic-context-prompt.ts index 4db441e4..f79e400e 100644 --- a/packages/plugin/src/agents/magic-context-prompt.ts +++ b/packages/plugin/src/agents/magic-context-prompt.ts @@ -65,6 +65,26 @@ function memoryGuidanceBlock(memoryEnabled: boolean): string { return memoryEnabled ? `${MEMORY_GUIDANCE}\n` : ""; } +// Skill-memory write-back guidance: `ctx_skill_note` captures non-obvious gotchas, +// discoveries, fixes, and workflow steps during skill use (durable across sessions); +// `ctx_skill_recall` rehydrates accumulated notes for a skill without re-loading it. +// Distinct from `ctx_memory`, which captures general project knowledge (not tied +// to a specific skill). NOT gated on memory.enabled — skill-memory is an +// independent store (its own table + tool-result-tail injection) — BUT the +// `ctx_memory` cross-reference is dropped when memory is off, since ctx_memory is +// then unregistered and pointing at it would be misleading (mirrors MEMORY_GUIDANCE). +function ctxSkillMemoryGuidance(memoryEnabled: boolean): string { + const generalObservationsLine = memoryEnabled + ? "Do NOT use `ctx_skill_note` for general project observations — those belong in `ctx_memory`." + : "Do NOT use `ctx_skill_note` for general project observations."; + return `Use \`ctx_skill_note\` after using a skill when you hit a non-obvious issue, found a better approach, or fixed a skill-specific error. Skip routine successes — only record gotchas, discoveries, fixes, and workflow steps that would save time on the next use. +Example: \`ctx_skill_note({skill: 'trilium', intent: 'bulk-retag a subtree', kind: 'gotcha', delta: 'ETAPI note PUT needs Content-Type: text/plain even for HTML content'})\` +Example: \`ctx_skill_note({skill: 'test-driven-development', intent: 'fix flaky auth test', kind: 'fix', delta: 'Always mock Date.now() in auth tests — real timers cause intermittent failures'})\` +${generalObservationsLine} + +Use \`ctx_skill_recall\` to explicitly query accumulated notes for a skill without re-loading it. Call it when you want to recall gotchas/discoveries for a skill you have already loaded this session, or when you need notes without triggering a full skill load. Returns the \`\` block directly as a tool result. Example: \`ctx_skill_recall({skill: 'trilium', intent: 'bulk-retag a subtree'})\`.`; +} + const BASE_INTRO = ( protectedTags: number, memoryEnabled: boolean, @@ -72,7 +92,8 @@ const BASE_INTRO = ( Use \`ctx_reduce\` to mark spent tagged content as discardable and reclaim space. Marking is NOT an immediate delete — it queues the content, which stays fully visible until space is actually needed (as soon as the next turn if you're already under pressure, much later if not), so mark a tool output as soon as you're done with it rather than hoarding the call for the end of the turn. The last ${protectedTags} tags are protected (marking one just queues it until it ages out). Syntax: "3-5", "1,2,9", or "1-5,8,12-15". Do not announce or narrate \`ctx_reduce\` drops — just call the tool silently. Saying "I'll drop these outputs" wastes tokens the user does not care about. ${CTX_NOTE_GUIDANCE} -${memoryGuidanceBlock(memoryEnabled)}Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. +${memoryGuidanceBlock(memoryEnabled)}${ctxSkillMemoryGuidance(memoryEnabled)} +Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. Use \`ctx_expand\` to recover the raw conversation behind a \`\` summary in \`\` — pass its \`start\`/\`end\` attributes when the summary is not enough (exact wording, values, error text). **Search before asking the user**: If you can't remember or don't know something that might have been discussed before or stored in project memory, use \`ctx_search\` before asking the user. Examples: - Can't remember where a related codebase or dependency lives → \`ctx_search(query="opencode source code path")\` @@ -93,7 +114,8 @@ Before your turn finishes, consider using \`ctx_reduce\` to drop large tool outp * a tagging system they can't observe just wastes tokens and (empirically) primes * some models to emit malformed `§N">§` tokens at the start of their own text. */ const BASE_INTRO_NO_REDUCE = (memoryEnabled: boolean): string => `${CTX_NOTE_GUIDANCE} -${memoryGuidanceBlock(memoryEnabled)}Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. +${memoryGuidanceBlock(memoryEnabled)}${ctxSkillMemoryGuidance(memoryEnabled)} +Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. Use \`ctx_expand\` to recover the raw conversation behind a \`\` summary in \`\` — pass its \`start\`/\`end\` attributes when the summary is not enough (exact wording, values, error text). **Search before asking the user**: If you can't remember or don't know something that might have been discussed before or stored in project memory, use \`ctx_search\` before asking the user. Examples: - Can't remember where a related codebase or dependency lives → \`ctx_search(query="opencode source code path")\` diff --git a/packages/plugin/src/agents/skill-memory-guidance.test.ts b/packages/plugin/src/agents/skill-memory-guidance.test.ts new file mode 100644 index 00000000..d449ce03 --- /dev/null +++ b/packages/plugin/src/agents/skill-memory-guidance.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "bun:test"; +import { buildMagicContextSection } from "./magic-context-prompt"; + +describe("magic-context-prompt skill-memory guidance", () => { + it("prompt includes ctx_skill_note guidance", () => { + // Real export is buildMagicContextSection (positional args: _agent, protectedTags, ctxReduceEnabled, ...) + const prompt = buildMagicContextSection(null, 20, true, false, false, false, false); + expect(prompt).toContain("ctx_skill_note"); + }); + + it("ctx_skill_note guidance includes worked example", () => { + const prompt = buildMagicContextSection(null, 20, true, false, false, false, false); + expect(prompt).toContain("kind:"); + expect(prompt).toContain("gotcha"); + }); +}); diff --git a/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts b/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts new file mode 100644 index 00000000..a5cfe446 --- /dev/null +++ b/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, test } from "bun:test"; +import { + AGENTIC_DREAM_TASKS, + CANONICAL_DREAM_TASKS, +} from "../../features/magic-context/dreamer/task-registry"; +import { DREAMER_TASKS, DreamingTaskSchema, DreamTasksSchema } from "./magic-context"; + +describe("distill-skill-memory dreamer task", () => { + test("distill-skill-memory is a canonical dream task", () => { + expect(CANONICAL_DREAM_TASKS).toContain("distill-skill-memory"); + }); + + test("distill-skill-memory is agentic (prompt-driven)", () => { + expect(AGENTIC_DREAM_TASKS).toContain("distill-skill-memory"); + // DREAMER_TASKS (the schema enum) re-exports AGENTIC_DREAM_TASKS. + expect(DREAMER_TASKS).toContain("distill-skill-memory"); + }); + + test("DreamingTaskSchema accepts distill-skill-memory", () => { + expect(() => DreamingTaskSchema.parse("distill-skill-memory")).not.toThrow(); + }); + + test("distill-skill-memory defaults to OFF (empty schedule = opt-in)", () => { + // v2 model: a task is opt-in when its default schedule is "" (disabled). + // Parse an explicit tasks object so the per-key defaults fire. + const parsed = DreamTasksSchema.parse({}); + expect(parsed["distill-skill-memory"].schedule).toBe(""); + }); + + test("maintain-docs is also opt-in (empty default schedule) — precedent", () => { + const parsed = DreamTasksSchema.parse({}); + expect(parsed["maintain-docs"].schedule).toBe(""); + }); +}); diff --git a/packages/plugin/src/config/schema/magic-context.ts b/packages/plugin/src/config/schema/magic-context.ts index fc3e0e8c..30637109 100644 --- a/packages/plugin/src/config/schema/magic-context.ts +++ b/packages/plugin/src/config/schema/magic-context.ts @@ -110,6 +110,8 @@ const DEFAULT_TASK_SCHEDULES: Record = { "review-user-memories": "0 3 * * *", "promote-primers": "0 3 * * *", "refresh-primers": "0 3 * * *", + // Opt-in (NOT a default): distills per-skill memory. Off until scheduled. + "distill-skill-memory": "", }; function defaultTaskConfig(task: DreamTaskName): z.input { @@ -159,6 +161,9 @@ export const DreamTasksSchema = z "refresh-primers": DreamTaskBaseConfigSchema.default(() => DreamTaskBaseConfigSchema.parse(defaultTaskConfig("refresh-primers")), ), + "distill-skill-memory": DreamTaskBaseConfigSchema.default(() => + DreamTaskBaseConfigSchema.parse(defaultTaskConfig("distill-skill-memory")), + ), }) .describe( "Per-task scheduling + model config. Each task has its own cron schedule and may override the dreamer-level model.", diff --git a/packages/plugin/src/features/magic-context/dreamer/task-executor.ts b/packages/plugin/src/features/magic-context/dreamer/task-executor.ts index 8866a605..b1d4ac67 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-executor.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-executor.ts @@ -23,6 +23,7 @@ import { getMemoryVerifications, type Memory, } from "../memory"; +import { reembedStaleSkillNotes } from "../skill-memory/reembed"; import { recordChildInvocation } from "../subagent-token-capture"; import { reviewUserMemories } from "../user-memory/review-user-memories"; import { getActiveUserMemories } from "../user-memory/storage-user-memory"; @@ -417,7 +418,18 @@ export function createDreamTaskExecutor(deps: DreamTaskExecutorDeps): TaskExecut }; } - // Agentic tasks: verify / curate / maintain-docs. + // Skill-memory P2: re-embed stale/NULL skill-note vectors BEFORE the + // distill agentic pass so the prompt sees a fully-embedded corpus and + // P1-era notes promote from the FTS rung to the cosine rung. + if (config.task === "distill-skill-memory") { + try { + await reembedStaleSkillNotes(db, projectIdentity); + } catch (e) { + log(`[dreamer] distill-skill-memory reembed pre-step failed: ${e}`); + } + } + + // Agentic tasks: curate / maintain-docs / distill-skill-memory. return await runAgenticTask(config, ctx, { deps, deadline, diff --git a/packages/plugin/src/features/magic-context/dreamer/task-gates.ts b/packages/plugin/src/features/magic-context/dreamer/task-gates.ts index 368473e4..7ff15a6d 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-gates.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-gates.ts @@ -156,6 +156,13 @@ export function evaluateTaskGate(task: DreamTaskName, ctx: TaskGateContext): boo (primer.lastObservedAt ?? 0) > primer.answerRefreshedAt, ); + case "distill-skill-memory": + // Agentic opt-in task: when scheduled, always eligible. The reembed + // pre-step + distill prompt no-op gracefully on an empty corpus, so a + // cheap always-true gate avoids coupling the scheduler to skill_memory + // table internals. + return true; + default: { const _exhaustive: never = task; return Boolean(_exhaustive); diff --git a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts index 044404b0..a222bf32 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts @@ -374,6 +374,38 @@ const STRUCTURE_TEMPLATE = ` **Tests:** co-located with source as \\\`*.test.ts\\\` \`\`\``; +// ── Distill Skill Memory ─────────────────────────────────────────────────── + +function buildDistillSkillMemoryPrompt(projectPath: string): string { + return `## Task: Distill Skill Memory (P2 — read-only health report) + +**Project:** ${projectPath} + +### Important context +- Embedding refresh for NULL/stale vectors already ran programmatically BEFORE this prompt — no need to re-embed. +- Merge (action="distill" + merge), prune, and promote are P3 / NOT YET IMPLEMENTED. Do NOT call ctx_skill_note with action="distill". + +### Your task: produce a short read-only summary of skill-memory corpus health +1. Query aggregate counts and flag obvious issues (scoped to THIS project's + own notes plus the cross-project global '*' partition): + \`\`\`sql + SELECT skill_id, tier, COUNT(*) as note_count, + SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END) as pinned_count, + SUM(CASE WHEN intent_embedding IS NULL OR delta_embedding IS NULL THEN 1 ELSE 0 END) as missing_embedding_count + FROM skill_memory + WHERE project_identity IN ('${projectPath.replace(/'/g, "''")}', '*') + GROUP BY skill_id, tier + ORDER BY note_count DESC + LIMIT 20; + \`\`\` +2. Note any skills with >100 notes, >30% gotcha-kind notes, or obvious near-duplicates (same skill + kind + very similar delta text). +3. Report findings as a short summary — no tool calls beyond read-only SQL queries. + +### Success criteria +- A concise health summary is logged (via ctx_memory) for the project maintainer to review. +- No tool calls to unimplemented actions.`; +} + // ── Dispatcher ───────────────────────────────────────────────────────────── export function buildDreamTaskPrompt( @@ -401,5 +433,7 @@ export function buildDreamTaskPrompt( args.lastDreamAt ?? null, args.existingDocs ?? { architecture: false, structure: false }, ); + case "distill-skill-memory": + return buildDistillSkillMemoryPrompt(args.projectPath); } } diff --git a/packages/plugin/src/features/magic-context/dreamer/task-registry.ts b/packages/plugin/src/features/magic-context/dreamer/task-registry.ts index af48dce4..c90ea99c 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-registry.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-registry.ts @@ -22,6 +22,9 @@ export const CANONICAL_DREAM_TASKS = [ "review-user-memories", "promote-primers", "refresh-primers", + // Opt-in: distills per-skill memory (merge/prune/promote). Runs only when + // scheduled (schedule != ""); requires the skill_memory table + feature. + "distill-skill-memory", ] as const; export type DreamTaskName = (typeof CANONICAL_DREAM_TASKS)[number]; @@ -33,7 +36,7 @@ export type DreamTaskName = (typeof CANONICAL_DREAM_TASKS)[number]; * primers, retrospective) have their own specialized runners and do NOT go * through the prompt builder. */ -export const AGENTIC_DREAM_TASKS = ["curate", "maintain-docs"] as const; +export const AGENTIC_DREAM_TASKS = ["curate", "maintain-docs", "distill-skill-memory"] as const; export type AgenticDreamTask = (typeof AGENTIC_DREAM_TASKS)[number]; diff --git a/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts new file mode 100644 index 00000000..3ab2b9dc --- /dev/null +++ b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, test } from "bun:test"; +import { float32ArrayToBlob, toFloat32Array } from "./storage-memory-embeddings"; + +describe("vector serde round-trip", () => { + test("Float32Array → blob → Float32Array preserves values", () => { + const vec = new Float32Array([0.1, -0.5, 0.99, 0.0]); + const blob = float32ArrayToBlob(vec); + const back = toFloat32Array(blob); + expect(Array.from(back)).toEqual(Array.from(vec)); + }); +}); diff --git a/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts index f10e623a..1fa57ae1 100644 --- a/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts +++ b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts @@ -37,7 +37,7 @@ function isEmbeddingRow(row: unknown): row is EmbeddingRow { ); } -function toFloat32Array(blob: Uint8Array | ArrayBuffer): Float32Array { +export function toFloat32Array(blob: Uint8Array | ArrayBuffer): Float32Array { if (blob instanceof Uint8Array) { const buffer = blob.buffer.slice(blob.byteOffset, blob.byteOffset + blob.byteLength); return new Float32Array(buffer); @@ -46,6 +46,11 @@ function toFloat32Array(blob: Uint8Array | ArrayBuffer): Float32Array { return new Float32Array(blob.slice(0)); } +/** Serialize a Float32Array to a SQLite BLOB (Buffer). Canonical — reuse, do not duplicate. */ +export function float32ArrayToBlob(vector: Float32Array): Buffer { + return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength); +} + function getSaveEmbeddingStatement(db: Database): PreparedStatement { let stmt = saveEmbeddingStatements.get(db); if (!stmt) { diff --git a/packages/plugin/src/features/magic-context/migrations-v38.test.ts b/packages/plugin/src/features/magic-context/migrations-v38.test.ts index fdcef5b5..b2703e31 100644 --- a/packages/plugin/src/features/magic-context/migrations-v38.test.ts +++ b/packages/plugin/src/features/magic-context/migrations-v38.test.ts @@ -33,6 +33,7 @@ describe("migration v38 — transform decisions", () => { const db = new Database(":memory:"); try { initializeDatabase(db); + runMigrations(db); expect(tableNames(db)).toContain("transform_decisions"); diff --git a/packages/plugin/src/features/magic-context/migrations-v42.test.ts b/packages/plugin/src/features/magic-context/migrations-v42.test.ts new file mode 100644 index 00000000..5e7e0799 --- /dev/null +++ b/packages/plugin/src/features/magic-context/migrations-v42.test.ts @@ -0,0 +1,126 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { LATEST_MIGRATION_VERSION, runMigrations } from "./migrations"; +import { initializeDatabase, LATEST_SUPPORTED_VERSION } from "./storage-db"; // ESM import (not require) — matches codebase pattern + +function columnNames(db: Database, table: string): string[] { + return (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>).map( + (c) => c.name, + ); +} + +function tableExists(db: Database, name: string): boolean { + return Boolean( + db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name = ?").get(name), + ); +} + +describe("migration v42 — skill_memory table", () => { + test("creates skill_memory table with correct columns on fresh DB, idempotently", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + runMigrations(db); // idempotency check + + expect(tableExists(db, "skill_memory")).toBe(true); + + const cols = columnNames(db, "skill_memory"); + expect(cols).toContain("id"); + expect(cols).toContain("skill_id"); + expect(cols).toContain("resolved_path"); + expect(cols).toContain("tier"); + expect(cols).toContain("skill_source"); + expect(cols).toContain("project_identity"); + expect(cols).toContain("intent"); + expect(cols).toContain("intent_embedding"); + expect(cols).toContain("embedding_model_version"); + expect(cols).toContain("kind"); + expect(cols).toContain("delta"); + expect(cols).toContain("tags"); + expect(cols).toContain("hit_count"); + expect(cols).toContain("pinned"); + expect(cols).toContain("normalized_hash"); + expect(cols).toContain("created_at"); + expect(cols).toContain("last_used_at"); + + expect( + db + .prepare("SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1") + .get(), + ).toEqual({ version: LATEST_MIGRATION_VERSION }); + } finally { + closeQuietly(db); + } + }); + + test("skill_memory CHECK constraints reject invalid tier and kind values", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + + const insert = db.prepare(` + INSERT INTO skill_memory + (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, hit_count, pinned, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?) + `); + + // Valid row + expect(() => + insert.run( + "test-skill", + "/path/SKILL.md", + "project", + "git:abc123", + "test intent", + "gotcha", + "test delta", + "hash1", + Date.now(), + ), + ).not.toThrow(); + + // Invalid tier + expect(() => + insert.run( + "test-skill", + "/path/SKILL.md", + "invalid-tier", + "git:abc123", + "test intent", + "gotcha", + "test delta", + "hash2", + Date.now(), + ), + ).toThrow(); + + // Invalid kind + expect(() => + insert.run( + "test-skill", + "/path/SKILL.md", + "project", + "git:abc123", + "test intent", + "general", + "test delta", + "hash3", + Date.now(), + ), + ).toThrow(); + } finally { + closeQuietly(db); + } + }); + + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v42", () => { + // This test will fail until storage-db.ts is bumped to 39. + // Belt-and-braces: mirrors schema-version-fence.test.ts but is co-located with the migration. + // If this feels redundant, keep it with this comment — co-location aids discoverability. + // NOTE: use ESM import at the top of the file (not require()) to match codebase pattern. + expect(LATEST_SUPPORTED_VERSION).toBe(LATEST_MIGRATION_VERSION); + }); +}); diff --git a/packages/plugin/src/features/magic-context/migrations-v49.test.ts b/packages/plugin/src/features/magic-context/migrations-v49.test.ts index c42a325d..ad7b80ec 100644 --- a/packages/plugin/src/features/magic-context/migrations-v49.test.ts +++ b/packages/plugin/src/features/magic-context/migrations-v49.test.ts @@ -158,7 +158,9 @@ describe("migration v49 — per-model embedding coexistence", () => { ) VALUES (999999, 'ses-orphan', 'git:orphan', 'opencode', 0, 0, 1, 'h', 'm', 4, x'01020304', 1)`, ).run(); db.exec("PRAGMA foreign_keys=ON"); - db.prepare("DELETE FROM schema_migrations WHERE version = 49").run(); + // Delete v49 AND everything above it (our skill migrations are v50-52) + // so getCurrentVersion drops below 49 and the v49 body actually re-runs. + db.prepare("DELETE FROM schema_migrations WHERE version >= 49").run(); expect(() => runMigrations(db)).not.toThrow(); expect(countRows(db, "compartment_chunk_embeddings")).toBe(0); diff --git a/packages/plugin/src/features/magic-context/migrations-v51.test.ts b/packages/plugin/src/features/magic-context/migrations-v51.test.ts new file mode 100644 index 00000000..38c2b28c --- /dev/null +++ b/packages/plugin/src/features/magic-context/migrations-v51.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { LATEST_MIGRATION_VERSION, MIGRATIONS, runMigrations } from "./migrations"; +import { initializeDatabase, LATEST_SUPPORTED_VERSION } from "./storage-db"; + +function columnNames(db: Database, table: string): string[] { + return (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>).map( + (c) => c.name, + ); +} +function tableExists(db: Database, name: string): boolean { + return Boolean( + db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name = ?").get(name), + ); +} + +describe("migration v51 — skill_memory embeddings + FTS", () => { + test("fresh DB: delta_embedding column and skill_memory_fts exist, no throw", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + runMigrations(db); // idempotency + + expect(columnNames(db, "skill_memory")).toContain("delta_embedding"); + expect(tableExists(db, "skill_memory_fts")).toBe(true); + } finally { + closeQuietly(db); + } + }); + + test("FTS triggers keep skill_memory_fts in sync with skill_memory", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + db.prepare( + `INSERT INTO skill_memory + (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, hit_count, pinned, created_at) + VALUES (?,?,?,?,?,?,?,?,0,0,?)`, + ).run( + "s1", + "/p/SKILL.md", + "global", + "git:abc", + "fix a flaky auth test", + "fix", + "mock Date.now in auth tests", + "h1", + Date.now(), + ); + + const hit = db + .prepare( + `SELECT m.id FROM skill_memory_fts f JOIN skill_memory m ON m.id = f.rowid + WHERE skill_memory_fts MATCH ?`, + ) + .get('"auth"'); + expect(hit).toBeTruthy(); + } finally { + closeQuietly(db); + } + }); + + // requires `import { MIGRATIONS } from "./migrations";` (added above) + test("v51 migration backfills FTS for rows that pre-existed v40", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + // Build a PRE-v40 schema: apply every migration BELOW v40 directly via up() (runMigrations has no + // target-version param). skill_memory is created at v39; the FTS table + delta_embedding do NOT exist yet. + for (const m of MIGRATIONS.filter((x) => x.version < 51).sort( + (a, b) => a.version - b.version, + )) { + m.up(db); + } + // Insert a row under the pre-v51 schema — no FTS table yet, so no AFTER-INSERT trigger indexes it. + db.prepare( + `INSERT INTO skill_memory + (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, hit_count, pinned, created_at) + VALUES (?,?,?,?,?,?,?,?,0,0,?)`, + ).run( + "s2", + "/p/SKILL.md", + "global", + "git:abc", + "handle oauth refresh", + "fix", + "rotate the token early", + "h2", + Date.now(), + ); + // Apply ONLY v40's up() — its body must ALTER + create the FTS table + BACKFILL the pre-existing row. + const v51 = MIGRATIONS.find((m) => m.version === 51); + if (!v51) throw new Error("v40 migration not found"); + v51.up(db); + const hit = db + .prepare( + `SELECT m.id FROM skill_memory_fts f JOIN skill_memory m ON m.id = f.rowid WHERE skill_memory_fts MATCH ?`, + ) + .get('"oauth"'); + expect(hit).toBeTruthy(); + } finally { + closeQuietly(db); + } + }); + + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v51", () => { + expect(LATEST_SUPPORTED_VERSION).toBe(LATEST_MIGRATION_VERSION); + expect(LATEST_SUPPORTED_VERSION).toBe(52); + }); +}); diff --git a/packages/plugin/src/features/magic-context/migrations-v52.test.ts b/packages/plugin/src/features/magic-context/migrations-v52.test.ts new file mode 100644 index 00000000..d92b6400 --- /dev/null +++ b/packages/plugin/src/features/magic-context/migrations-v52.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { LATEST_MIGRATION_VERSION, MIGRATIONS, runMigrations } from "./migrations"; +import { initializeDatabase, LATEST_SUPPORTED_VERSION } from "./storage-db"; + +function migratedDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +function insertGlobal( + db: Database, + skillId: string, + projectIdentity: string, + hash: string, + opts: { hit?: number; recall?: number; lastUsed?: number | null; createdAt?: number } = {}, +): void { + db.prepare( + `INSERT INTO skill_memory (skill_id, resolved_path, tier, project_identity, intent, kind, delta, hit_count, recall_count, pinned, normalized_hash, created_at, last_used_at) + VALUES (?, '/p', 'global', ?, 'i', 'fix', 'd-' || ?, ?, ?, 0, ?, ?, ?)`, + ).run( + skillId, + projectIdentity, + hash, + opts.hit ?? 0, + opts.recall ?? 0, + hash, + opts.createdAt ?? Date.now(), + opts.lastUsed ?? null, + ); +} + +describe("migration v52 — origin_project + source_type + global '*' unification", () => { + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v52", () => { + expect(LATEST_SUPPORTED_VERSION).toBe(52); + expect(LATEST_MIGRATION_VERSION).toBe(52); + }); + + test("fresh DB has origin_project + source_type columns", () => { + const db = migratedDb(); + try { + const cols = ( + db.prepare("PRAGMA table_info(skill_memory)").all() as Array<{ name: string }> + ).map((r) => r.name); + expect(cols).toContain("origin_project"); + expect(cols).toContain("source_type"); + } finally { + closeQuietly(db); + } + }); + + test("singleton global note rewritten to '*' with origin_project preserved", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "h1"); + + const v52 = MIGRATIONS.find((m) => m.version === 52); + expect(v52).toBeDefined(); + v52?.up(db); + + const row = db + .prepare( + "SELECT project_identity, origin_project FROM skill_memory WHERE normalized_hash='h1'", + ) + .get() as { project_identity: string; origin_project: string }; + expect(row.project_identity).toBe("*"); + expect(row.origin_project).toBe("git:repoA"); + } finally { + closeQuietly(db); + } + }); + + test("collision-merge: same lesson in 2 repos → one '*' row, summed counters, MAX(last_used_at)", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "dup", { + hit: 2, + recall: 3, + lastUsed: 1000, + createdAt: 500, + }); + insertGlobal(db, "council", "git:repoB", "dup", { + hit: 5, + recall: 1, + lastUsed: 9000, + createdAt: 800, + }); + + MIGRATIONS.find((m) => m.version === 52)?.up(db); + + const rows = db + .prepare( + "SELECT project_identity, hit_count, recall_count, last_used_at, created_at FROM skill_memory WHERE normalized_hash='dup'", + ) + .all() as Array<{ + project_identity: string; + hit_count: number; + recall_count: number; + last_used_at: number; + created_at: number; + }>; + expect(rows.length).toBe(1); + expect(rows[0].project_identity).toBe("*"); + expect(rows[0].hit_count).toBe(7); + expect(rows[0].recall_count).toBe(4); + expect(rows[0].last_used_at).toBe(9000); + expect(rows[0].created_at).toBe(500); + } finally { + closeQuietly(db); + } + }); + + test("idempotent: re-running v52 up() does not double-process '*' rows", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "h1", { hit: 1 }); + + const v52 = MIGRATIONS.find((m) => m.version === 52); + v52?.up(db); + v52?.up(db); + + const rows = db + .prepare("SELECT hit_count FROM skill_memory WHERE normalized_hash='h1'") + .all() as Array<{ hit_count: number }>; + expect(rows.length).toBe(1); + expect(rows[0].hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("FTS index consistent after collision-merge (no orphans)", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "dup"); + insertGlobal(db, "council", "git:repoB", "dup"); + + MIGRATIONS.find((m) => m.version === 52)?.up(db); + + const ftsCount = db.prepare("SELECT COUNT(*) AS n FROM skill_memory_fts").get() as { + n: number; + }; + const rowCount = db.prepare("SELECT COUNT(*) AS n FROM skill_memory").get() as { + n: number; + }; + // Prove the merge actually happened (2 dup rows → 1) so the parity + // assertion below isn't trivially true on a no-op merge. + expect(rowCount.n).toBe(1); + expect(ftsCount.n).toBe(rowCount.n); + } finally { + closeQuietly(db); + } + }); + + test("project-tier rows untouched", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + db.prepare( + `INSERT INTO skill_memory (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, created_at) VALUES ('s', '/p', 'project', 'git:repoA', 'i', 'fix', 'd', 'ph', 1)`, + ).run(); + + MIGRATIONS.find((m) => m.version === 52)?.up(db); + + const row = db + .prepare("SELECT project_identity FROM skill_memory WHERE normalized_hash='ph'") + .get() as { project_identity: string }; + expect(row.project_identity).toBe("git:repoA"); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/migrations.ts b/packages/plugin/src/features/magic-context/migrations.ts index 7321c694..4ab16f55 100644 --- a/packages/plugin/src/features/magic-context/migrations.ts +++ b/packages/plugin/src/features/magic-context/migrations.ts @@ -47,7 +47,12 @@ function assertForeignKeyIntegrity(db: Database, table?: string): void { } } -const MIGRATIONS: Migration[] = [ +function columnExists(db: Database, table: string, column: string): boolean { + const rows = db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name?: string }>; + return rows.some((row) => row.name === column); +} + +export const MIGRATIONS: Migration[] = [ { version: 1, description: "Merge session_notes + smart_notes into unified notes table", @@ -1877,6 +1882,177 @@ const MIGRATIONS: Migration[] = [ `); }, }, + { + // Skill-memory P1: per-skill cross-session recall. Originally v38, then + // v39/v42 across earlier rebases; renumbered to v50 after upstream v0.27 + // shipped its own v42–v49 (dreamer scheduling, memory verification, + // classification scope, per-model embedding coexistence, etc.). + version: 50, + description: "Add skill_memory table for per-skill cross-session recall", + up: (db: Database) => { + db.exec(` + CREATE TABLE IF NOT EXISTS skill_memory ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + skill_id TEXT NOT NULL, + resolved_path TEXT NOT NULL, + tier TEXT NOT NULL CHECK(tier IN ('project', 'global')), + skill_source TEXT CHECK(skill_source IN ( + 'opencode-project', 'opencode-global', + 'claude-skills', 'agents-skills' + )), + project_identity TEXT NOT NULL, + intent TEXT NOT NULL, + intent_embedding BLOB, + embedding_model_version TEXT, + kind TEXT NOT NULL CHECK(kind IN ('gotcha', 'discovery', 'fix', 'workflow')), + delta TEXT NOT NULL, + tags TEXT, + hit_count INTEGER NOT NULL DEFAULT 0, + pinned INTEGER NOT NULL DEFAULT 0 CHECK(pinned IN (0, 1)), + normalized_hash TEXT NOT NULL, + created_at INTEGER NOT NULL, + last_used_at INTEGER, + UNIQUE(skill_id, tier, project_identity, normalized_hash) + ); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_lookup + ON skill_memory(skill_id, tier, project_identity, last_used_at DESC); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_fts_prep + ON skill_memory(skill_id, tier, project_identity, kind); + `); + }, + }, + + { + // Skill-memory P2: was v39/v43 across earlier rebases; renumbered to v51 + // after upstream v0.27 took v42–v49 (skill-P1 is now v50). + version: 51, + description: + "Skill-memory P2: delta_embedding + recall_count columns + skill_memory_fts FTS5 vtable", + up: (db: Database) => { + // skill_memory is migration-only (created by v50); ALTER is safe here. + if (!columnExists(db, "skill_memory", "delta_embedding")) { + db.exec(`ALTER TABLE skill_memory ADD COLUMN delta_embedding BLOB;`); + } + + // recall_count: read-side usage counter, bumped each time a note is surfaced + // in a recall block (distinct from hit_count, which is write-side re-record salience). + // Answers "which notes are recalled most". NOT_NULL+DEFAULT is valid in ALTER ADD COLUMN. + if (!columnExists(db, "skill_memory", "recall_count")) { + db.exec( + `ALTER TABLE skill_memory ADD COLUMN recall_count INTEGER NOT NULL DEFAULT 0;`, + ); + } + + // FTS5 over (intent, delta), content-linked to skill_memory — mirrors memories_fts. + db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS skill_memory_fts USING fts5( + intent, + delta, + content='skill_memory', + content_rowid='id', + tokenize='porter unicode61' + ); + + CREATE TRIGGER IF NOT EXISTS skill_memory_ai AFTER INSERT ON skill_memory BEGIN + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_ad AFTER DELETE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_au AFTER UPDATE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + `); + + // Backfill the FTS index for any existing skill_memory rows. External-content FTS5 tables + // expose content rowids immediately, so a `NOT IN (SELECT rowid FROM …_fts)` guard is a no-op; + // the 'rebuild' command is the correct way to (re)populate an external-content index. + db.exec(`INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild');`); + }, + }, + { + // Skill-memory historian extraction: was v41/v44 across earlier rebases; + // renumbered to v52 after upstream v0.27 took v42–v49 (skill is now v50/51). + version: 52, + description: + "Skill-memory historian extraction: origin_project + source_type columns; unify global-tier notes under project_identity='*' (collision-merge)", + up: (db: Database) => { + db.transaction(() => { + if (!columnExists(db, "skill_memory", "origin_project")) { + db.exec(`ALTER TABLE skill_memory ADD COLUMN origin_project TEXT;`); + } + if (!columnExists(db, "skill_memory", "source_type")) { + db.exec(`ALTER TABLE skill_memory ADD COLUMN source_type TEXT;`); + } + + // resolved_path stays TEXT NOT NULL; historian writes the '' sentinel + // (handled in storage layer, not here). + const groups = db + .prepare( + `SELECT skill_id, normalized_hash, COUNT(*) AS n, MIN(created_at) AS min_created, + SUM(hit_count) AS sum_hit, SUM(recall_count) AS sum_recall, MAX(last_used_at) AS max_used + FROM skill_memory + WHERE tier='global' AND project_identity != '*' + GROUP BY skill_id, normalized_hash HAVING COUNT(*) > 1`, + ) + .all() as Array<{ + skill_id: string; + normalized_hash: string; + n: number; + min_created: number; + sum_hit: number; + sum_recall: number; + max_used: number | null; + }>; + for (const g of groups) { + const survivor = db + .prepare( + `SELECT id, project_identity FROM skill_memory + WHERE skill_id=? AND normalized_hash=? AND tier='global' AND project_identity != '*' + ORDER BY created_at ASC, id ASC LIMIT 1`, + ) + .get(g.skill_id, g.normalized_hash) as { + id: number; + project_identity: string; + }; + db.prepare( + `DELETE FROM skill_memory WHERE skill_id=? AND normalized_hash=? AND tier='global' AND project_identity != '*' AND id != ?`, + ).run(g.skill_id, g.normalized_hash, survivor.id); + db.prepare( + `UPDATE skill_memory SET hit_count=?, recall_count=?, last_used_at=?, origin_project=?, project_identity='*' WHERE id=?`, + ).run( + g.sum_hit, + g.sum_recall, + g.max_used, + survivor.project_identity, + survivor.id, + ); + } + + // Defensive (S4): drop any pre-'*' row whose (skill_id, normalized_hash) + // already has a '*' sibling. Dead code in normal flow — v41 is the only + // writer of '*' rows and runs atomically, so a pre-'*' row can't coexist + // with a '*' sibling after a clean run. It only fires if a prior v41 run + // was interrupted after creating some '*' rows but before finishing; in + // that case the '*' row is canonical and the leftover pre-'*' row is + // dropped rather than colliding on the singleton UPDATE below. + db.prepare( + `DELETE FROM skill_memory AS s + WHERE s.tier='global' AND s.project_identity != '*' + AND EXISTS (SELECT 1 FROM skill_memory g WHERE g.tier='global' AND g.project_identity='*' AND g.skill_id=s.skill_id AND g.normalized_hash=s.normalized_hash)`, + ).run(); + + db.prepare( + `UPDATE skill_memory SET origin_project = project_identity, project_identity = '*' WHERE tier='global' AND project_identity != '*'`, + ).run(); + })(); + }, + }, ]; /** diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts new file mode 100644 index 00000000..58d70b85 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts @@ -0,0 +1,134 @@ +import { describe, expect, test } from "bun:test"; +import { parseFrontmatterConfig } from "./frontmatter"; + +describe("parseFrontmatterConfig", () => { + test("returns null when no frontmatter present", () => { + expect(parseFrontmatterConfig("# Skill\nNo frontmatter here.")).toBeNull(); + }); + + test("returns null when frontmatter has no skill-memory block", () => { + const content = `---\ntitle: My Skill\n---\n# Skill`; + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("returns null when skill-memory.enabled is false or absent", () => { + const content = `---\nskill-memory:\n enabled: false\n---\n# Skill`; + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("returns config when skill-memory.enabled is true", () => { + const content = `---\nskill-memory:\n enabled: true\n max_tokens: 2000\n dedup_threshold: 0.88\n---\n# Skill`; + const config = parseFrontmatterConfig(content); + expect(config).not.toBeNull(); + expect(config!.enabled).toBe(true); + expect(config!.max_tokens).toBe(2000); + expect(config!.dedup_threshold).toBe(0.88); + }); + + test("uses defaults when optional fields are absent", () => { + const content = `---\nskill-memory:\n enabled: true\n---\n# Skill`; + const config = parseFrontmatterConfig(content); + expect(config!.max_tokens).toBe(1500); + expect(config!.max_pinned_tokens).toBe(4000); + expect(config!.dedup_threshold).toBe(0.92); + }); + + test("returns null on malformed YAML (non-choke)", () => { + const content = `---\nskill-memory:\n enabled: [invalid yaml\n---\n# Skill`; + // Must not throw — malformed config = inert + expect(() => parseFrontmatterConfig(content)).not.toThrow(); + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("returns null when skill-memory block is not an object", () => { + const content = `---\nskill-memory: true\n---\n# Skill`; + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("parses flat ranking_* keys as numbers", () => { + const md = `--- +skill-memory: + enabled: true + ranking_relevance: 0.7 + ranking_recency: 0.2 + ranking_hit: 0.1 +--- +body`; + const cfg = parseFrontmatterConfig(md); + expect(cfg?.ranking_relevance).toBe(0.7); + expect(cfg?.ranking_recency).toBe(0.2); + expect(cfg?.ranking_hit).toBe(0.1); + }); + + test("ranking_* default to undefined when omitted (recall applies defaults)", () => { + const md = `--- +skill-memory: + enabled: true +--- +body`; + const cfg = parseFrontmatterConfig(md); + expect(cfg?.ranking_relevance).toBeUndefined(); + }); + + test("does NOT misparse a later --- horizontal rule as frontmatter (start-anchored)", () => { + // No real frontmatter; a horizontal-rule pair appears mid-document. With + // an `m`-flagged regex `^` would match the rule's line start and capture + // the block between the rules as config. Must return null. + const md = `# Skill\n\nSome prose.\n\n---\nskill-memory:\n enabled: true\n---\n\nMore prose.`; + expect(parseFrontmatterConfig(md)).toBeNull(); + }); + + test("honors enabled: true with a trailing inline comment", () => { + const md = `---\nskill-memory:\n enabled: true # motor memory on\n max_tokens: 2000 # bump it\n---\nbody`; + const cfg = parseFrontmatterConfig(md); + expect(cfg).not.toBeNull(); + expect(cfg!.enabled).toBe(true); + expect(cfg!.max_tokens).toBe(2000); + }); + + test("honors an inline comment on the skill-memory block header", () => { + const md = `---\nskill-memory: # procedural recall\n enabled: true\n---\nbody`; + expect(parseFrontmatterConfig(md)?.enabled).toBe(true); + }); + + test("parses the inline flow-mapping form the docs/remediation advertise", () => { + // ctx_skill_recall remediation + ARCHITECTURE/CONFIGURATION/README all tell + // users to add `skill-memory: { enabled: true }`. The parser MUST accept it + // or the guidance is dead-on-arrival. + const cfg = parseFrontmatterConfig("---\nskill-memory: { enabled: true }\n---\n# Skill"); + expect(cfg).not.toBeNull(); + expect(cfg!.enabled).toBe(true); + }); + + test("parses inline flow-mapping with multiple keys", () => { + const cfg = parseFrontmatterConfig( + "---\nskill-memory: { enabled: true, max_tokens: 2000, dedup_threshold: 0.8 }\n---\nbody", + ); + expect(cfg!.enabled).toBe(true); + expect(cfg!.max_tokens).toBe(2000); + expect(cfg!.dedup_threshold).toBe(0.8); + }); + + test("inline form with enabled: false stays inert", () => { + expect( + parseFrontmatterConfig("---\nskill-memory: { enabled: false }\n---\nbody"), + ).toBeNull(); + }); + + test("a plain quoted scalar still enables", () => { + const md = `---\nskill-memory:\n enabled: "true"\n---\nbody`; + expect(parseFrontmatterConfig(md)?.enabled).toBe(true); + }); + + test("does NOT strip a '#' inside a quoted scalar (comment-strip is unquoted-only)", () => { + // Quoted value containing a '#': the inline-comment strip must NOT fire, + // so the value stays the literal "true # x" (≠ "true") and the config is + // inert. If the '#' WERE wrongly stripped, it would collapse to "true" + // and enable — so a passing assertion proves the '#' was preserved. + const quoted = `---\nskill-memory:\n enabled: "true # x"\n---\nbody`; + expect(parseFrontmatterConfig(quoted)).toBeNull(); + // Contrast: the SAME text UNquoted is a real inline comment → stripped → enables. + const unquoted = `---\nskill-memory:\n enabled: true # x\n---\nbody`; + expect(parseFrontmatterConfig(unquoted)?.enabled).toBe(true); + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts new file mode 100644 index 00000000..c235c52d --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts @@ -0,0 +1,180 @@ +/** + * Minimal YAML frontmatter parser for the `skill-memory:` block. + * Does NOT depend on a full YAML library — parses only the specific + * `skill-memory:` sub-block using line-by-line key:value extraction. + * Malformed or absent blocks return null (inert). A bad config in one + * skill cannot break other skills. + */ + +export interface SkillMemoryConfig { + enabled: true; + max_tokens: number; + max_pinned_tokens: number; + dedup_threshold: number; + ranking_relevance?: number; + ranking_recency?: number; + ranking_hit?: number; +} + +// Anchored to the very start of the file (NO `m` flag): frontmatter is only +// valid as the first bytes of the document. With `m`, `^` matches any line +// start, so a later `--- ... ---` block (e.g. a markdown horizontal rule) could +// be misparsed as config. A leading UTF-8 BOM and leading whitespace/blank +// lines are tolerated (`\uFEFF?\s*`) so an editor-saved SKILL.md with a BOM or a +// stray blank first line still parses; this stays start-anchored because `\s*` +// only spans leading whitespace before the first `---`, never a mid-document rule. +const FRONTMATTER_REGEX = /^\uFEFF?\s*---\r?\n([\s\S]*?)\r?\n---/; + +export function parseFrontmatterConfig(content: string): SkillMemoryConfig | null { + try { + const fmMatch = content.match(FRONTMATTER_REGEX); + if (!fmMatch) return null; + + const fmText = fmMatch[1]; + const skillMemoryBlock = extractSkillMemoryBlock(fmText); + if (!skillMemoryBlock) return null; + + const enabled = skillMemoryBlock.enabled; + if (enabled !== true && enabled !== "true") return null; + + return { + enabled: true, + max_tokens: toNumber(skillMemoryBlock.max_tokens, 1500), + max_pinned_tokens: toNumber(skillMemoryBlock.max_pinned_tokens, 4000), + dedup_threshold: toNumber(skillMemoryBlock.dedup_threshold, 0.92), + ranking_relevance: toOptionalNumber(skillMemoryBlock.ranking_relevance), + ranking_recency: toOptionalNumber(skillMemoryBlock.ranking_recency), + ranking_hit: toOptionalNumber(skillMemoryBlock.ranking_hit), + }; + } catch { + // Non-choke: malformed config = inert + return null; + } +} + +function toNumber(value: unknown, defaultValue: number): number { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return defaultValue; +} + +function toOptionalNumber(value: unknown): number | undefined { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return undefined; +} + +/** + * Extract the `skill-memory:` sub-block from YAML frontmatter text. + * Returns a flat key→value map of the block's immediate children. + * Returns null if the block is absent or not an object. + */ +function extractSkillMemoryBlock(fmText: string): Record | null { + const lines = fmText.split(/\r?\n/); + let inSkillMemory = false; + const result: Record = {}; + let found = false; + + for (const line of lines) { + if (!inSkillMemory) { + // Inline flow-mapping form on the header line: + // `skill-memory: { enabled: true, max_tokens: 2000 }` + // This is the form the ctx_skill_recall remediation message and the + // root docs advertise, so it MUST parse — otherwise a user following + // the guidance silently gets an inert config. Parse the {...} body + // into the same flat map the block form produces, then stop (a flow + // mapping is self-contained on one line). + const inlineMatch = line.match(/^skill-memory:\s*\{(.*)\}\s*(#.*)?$/); + if (inlineMatch) { + found = true; + for (const pair of splitFlowEntries(inlineMatch[1])) { + const sep = pair.indexOf(":"); + if (sep < 0) continue; + const key = pair.slice(0, sep).trim(); + if (!/^\w+$/.test(key)) continue; + result[key] = parseYamlScalar(pair.slice(sep + 1).trim()); + } + break; + } + // Tolerate a trailing inline comment after the block header + // (`skill-memory: # motor memory`), which is valid YAML. + if (/^skill-memory:\s*(#.*)?$/.test(line)) { + inSkillMemory = true; + found = true; + } + continue; + } + // End of skill-memory block: a line that starts without indentation + if (/^\S/.test(line)) break; + // Parse indented key: value lines + const kvMatch = line.match(/^\s{2,}(\w+):\s*(.*)$/); + if (kvMatch) { + const key = kvMatch[1]; + const rawVal = kvMatch[2].trim(); + result[key] = parseYamlScalar(rawVal); + } + } + + return found ? result : null; +} + +/** + * Split a YAML flow-mapping body (the text inside `{...}`) on top-level commas, + * leaving quoted segments intact. Minimal — the skill-memory config is a flat + * map of scalar values, so we don't need nested {}/[] handling. + */ +function splitFlowEntries(body: string): string[] { + const entries: string[] = []; + let current = ""; + let quote: '"' | "'" | null = null; + for (const ch of body) { + if (quote) { + current += ch; + if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'") { + quote = ch; + current += ch; + continue; + } + if (ch === ",") { + entries.push(current); + current = ""; + continue; + } + current += ch; + } + if (current.trim()) entries.push(current); + return entries; +} + +function parseYamlScalar(raw: string): unknown { + // Strip an inline `# comment` for UNQUOTED scalars (YAML requires whitespace + // before the `#`). Quoted values keep their content verbatim so a literal + // "#" inside quotes survives. Without this, `enabled: true # on` would parse + // as the string "true # on" and silently fail the strict true/false check. + const isQuoted = + (raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'")); + if (!isQuoted) { + const commentIdx = raw.search(/\s#/); + if (commentIdx >= 0) raw = raw.slice(0, commentIdx).trim(); + } + + if (raw === "true") return true; + if (raw === "false") return false; + if (raw === "null" || raw === "~") return null; + const num = Number(raw); + if (raw !== "" && Number.isFinite(num)) return num; + // Strip surrounding quotes + if ((raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'"))) { + return raw.slice(1, -1); + } + return raw; +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts b/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts new file mode 100644 index 00000000..914a064e --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; +import { promoteSkillObservations } from "./promote"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("promoteSkillObservations", () => { + test("direct-writes a global '*' note with historian provenance", () => { + const db = makeDb(); + try { + const n = promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "gotcha", lesson: "aggregator needs a fast model" }, + ]); + expect(n).toBe(1); + const row = db + .prepare( + "SELECT tier, project_identity, origin_project, source_type, resolved_path, kind FROM skill_memory", + ) + .get() as Record; + expect(row.tier).toBe("global"); + expect(row.project_identity).toBe("*"); + expect(row.origin_project).toBe("git:repoA"); + expect(row.source_type).toBe("historian"); + expect(row.resolved_path).toBe(""); + expect(row.kind).toBe("gotcha"); + } finally { + closeQuietly(db); + } + }); + + test("exact-hash duplicate bumps hit_count instead of inserting", () => { + const db = makeDb(); + try { + promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "fix", lesson: "same lesson" }, + ]); + const n = promoteSkillObservations(db, "git:repoB", [ + { skillId: "council", kind: "fix", lesson: "same lesson" }, + ]); + expect(n).toBe(0); + const rows = db.prepare("SELECT hit_count FROM skill_memory").all() as Array<{ + hit_count: number; + }>; + expect(rows.length).toBe(1); + expect(rows[0].hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("rejects kind='general'", () => { + const db = makeDb(); + try { + const n = promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "general" as never, lesson: "x" }, + ]); + expect(n).toBe(0); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/promote.ts b/packages/plugin/src/features/magic-context/skill-memory/promote.ts new file mode 100644 index 00000000..c94abc9e --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/promote.ts @@ -0,0 +1,66 @@ +import { log } from "../../../shared/logger"; +import type { Database } from "../../../shared/sqlite"; +import { computeNormalizedHash } from "../memory/normalize-hash"; +import { bumpHitCount, findExistingNote, insertSkillMemoryNote, partitionKey } from "./storage"; + +const VALID_KINDS = new Set(["gotcha", "discovery", "fix", "workflow"]); + +export interface SkillObservation { + skillId: string; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + lesson: string; +} + +/** + * Direct-write historian-extracted skill observations as GLOBAL-tier notes under + * the '*' partition (source_type='historian', resolved_path='' sentinel). Hash-dedup: + * an exact-hash match bumps hit_count instead of inserting. Returns the number of + * NEW notes written (dups excluded). Best-effort per item: never throws. + */ +export function promoteSkillObservations( + db: Database, + originProject: string, + observations: SkillObservation[], +): number { + let written = 0; + const tier = "global" as const; + const part = partitionKey(tier, originProject); + + for (const obs of observations) { + if (!obs.skillId || !obs.lesson || !VALID_KINDS.has(obs.kind)) continue; + + try { + const normalizedHash = computeNormalizedHash(obs.lesson); + const existing = findExistingNote(db, obs.skillId, tier, part, normalizedHash); + if (existing) { + bumpHitCount(db, obs.skillId, tier, part, normalizedHash); + continue; + } + + const id = insertSkillMemoryNote(db, { + skillId: obs.skillId, + resolvedPath: "", + tier, + skillSource: null, + projectIdentity: part, + originProject, + sourceType: "historian", + intent: obs.lesson, + kind: obs.kind, + delta: obs.lesson, + normalizedHash, + createdAt: Date.now(), + }); + if (id !== null) written++; + } catch (err) { + // Best-effort: one bad observation must not block the publish, but + // log it so silent persistence failures (schema drift, DB lock, + // constraint violation) remain observable. + log( + `[skill-memory] promoteSkillObservations: skipped observation for skill "${obs.skillId}" — ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + + return written; +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts new file mode 100644 index 00000000..48ba3797 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, test } from "bun:test"; +import { parseSkillProvenance } from "./provenance"; + +// Build fixture paths from process.env.HOME to avoid hardcoded /home/icetea paths +// that break on Mac, CI, or root environments. +const HOME = process.env.HOME ?? process.env.USERPROFILE ?? "/home/user"; + +describe("parseSkillProvenance", () => { + test("parses Base directory line from skill output (global skill)", () => { + const output = `# Skill Content\nSome content here.\nBase directory for this skill: file://${HOME}/.config/opencode/skills/trilium`; + const result = parseSkillProvenance(output, "trilium"); + expect(result).not.toBeNull(); + expect(result!.resolvedPath).toBe(`${HOME}/.config/opencode/skills/trilium/SKILL.md`); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("opencode-global"); + }); + + test("parses Base directory line from skill output (project skill)", () => { + const output = `# Skill Content\nBase directory for this skill: file://${HOME}/projects/magic-context/.agents/skills/find-docs`; + const result = parseSkillProvenance(output, "find-docs"); + expect(result).not.toBeNull(); + expect(result!.resolvedPath).toBe( + `${HOME}/projects/magic-context/.agents/skills/find-docs/SKILL.md`, + ); + expect(result!.tier).toBe("project"); + expect(result!.skillSource).toBe("agents-skills"); + }); + + test("returns null when Base directory line is absent", () => { + const output = "# Skill Content\nNo base directory line here."; + expect(parseSkillProvenance(output, "some-skill")).toBeNull(); + }); + + test("handles ~/.claude/skills/ as global tier", () => { + const output = `Base directory for this skill: file://${HOME}/.claude/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("claude-skills"); + }); + + test("handles ~/.agents/skills/ as global tier", () => { + const output = `Base directory for this skill: file://${HOME}/.agents/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("agents-skills"); + }); + + test("handles .opencode/skills/ as project tier", () => { + const output = `Base directory for this skill: file://${HOME}/projects/foo/.opencode/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("project"); + expect(result!.skillSource).toBe("opencode-project"); + }); + + test("handles the SINGULAR ~/.config/opencode/skill/ global path (OPENCODE_SKILL_PATTERN covers both)", () => { + // Regression: opencode's pattern is {skill,skills}/**/SKILL.md, so the + // global config dir resolves under singular `skill/` too. Before the fix, + // deriveSkillTier classified it as project → notes written to the wrong + // partition + cold recall couldn't find SKILL.md. + const output = `Base directory for this skill: file://${HOME}/.config/opencode/skill/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("opencode-global"); + }); + + test("handles the plural ~/.config/opencode/skills/ global path", () => { + const output = `Base directory for this skill: file://${HOME}/.config/opencode/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("opencode-global"); + }); + + test("takes the LAST line-anchored match when skill CONTENT echoes the marker phrase", () => { + // A skill that documents skill-memory itself could contain the marker + // phrase in its body. opencode appends the REAL provenance line last, so + // last-match must win — a first-match parse would resolve the bogus URL. + const output = + `# Skill: skill-memory internals\n` + + `Example: Base directory for this skill: file:///decoy/path/evil-skill\n` + + `more prose\n` + + `Base directory for this skill: file://${HOME}/.config/opencode/skills/real-skill`; + const result = parseSkillProvenance(output, "real-skill"); + expect(result!.resolvedPath).toBe(`${HOME}/.config/opencode/skills/real-skill/SKILL.md`); + expect(result!.tier).toBe("global"); + }); + + test("ignores a mid-line (non-line-anchored) marker mention", () => { + // "see the Base directory for this skill: file:///x" embedded mid-sentence + // (not at column 0) must NOT be captured. + const output = `Note: see the Base directory for this skill: file:///wrong/x for details.\nBase directory for this skill: file://${HOME}/.config/opencode/skills/right`; + const result = parseSkillProvenance(output, "right"); + expect(result!.resolvedPath).toBe(`${HOME}/.config/opencode/skills/right/SKILL.md`); + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts new file mode 100644 index 00000000..3cf00000 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts @@ -0,0 +1,111 @@ +import { fileURLToPath } from "node:url"; + +export interface SkillProvenance { + resolvedPath: string; + tier: "project" | "global"; + skillSource: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills"; + skillId: string; + loadedAt: number; +} + +// Matches: "Base directory for this skill: file:///abs/path/to/skill/dir" +// Uses fileURLToPath (not naive regex capture) for cross-platform correctness. +// Anchored to line-start (`^…/gm`) AND we take the LAST match: opencode appends +// this provenance line at the END of the tool output, so if the skill's own +// CONTENT contains the same phrase (e.g. a skill documenting skill-memory +// provenance), a first-match/unanchored parse would capture the wrong URL and +// misdirect recall to a bogus skill identity. Line-anchoring rejects mid-prose +// mentions; last-match ensures the real trailing provenance line wins even if an +// example block reproduces it at column 0. +const BASE_DIR_REGEX = /^Base directory for this skill: (file:\/\/\/[^\n\r]+)/gm; + +export function parseSkillProvenance(output: string, skillId: string): SkillProvenance | null { + const matches = [...output.matchAll(BASE_DIR_REGEX)]; + if (matches.length === 0) return null; + + const fileUrl = matches[matches.length - 1][1].trim(); + let absDir: string; + try { + // Normalize OS-native separators to forward slashes: on Windows + // fileURLToPath yields backslash paths, which would fail the + // forward-slash startsWith/includes tier/source checks below and + // misclassify global skills as project-local. + absDir = fileURLToPath(new URL(fileUrl)).replace(/\\/g, "/"); + } catch { + return null; + } + + const resolvedPath = `${absDir}/SKILL.md`; + const tier = deriveSkillTier(absDir); + const skillSource = deriveSkillSource(absDir); + + return { resolvedPath, tier, skillSource, skillId, loadedAt: Date.now() }; +} + +export function deriveSkillTier(absDir: string): "project" | "global" { + // Global dirs (discovered via Global.Path.config or EXTERNAL_DIR constants): + // ~/.config/opencode/skills/ — via config.directories() + {skill,skills}/**/SKILL.md + // ~/.agents/skills/ — via AGENTS_EXTERNAL_DIR + skills/**/SKILL.md + // ~/.claude/skills/ — via CLAUDE_EXTERNAL_DIR + skills/**/SKILL.md + const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").replace(/\\/g, "/"); + if ( + // opencode's OPENCODE_SKILL_PATTERN is `{skill,skills}/**/SKILL.md`, so + // the global config dir resolves under BOTH singular and plural. + absDir.startsWith(`${home}/.config/opencode/skills/`) || + absDir.startsWith(`${home}/.config/opencode/skill/`) || + absDir.startsWith(`${home}/.agents/skills/`) || + absDir.startsWith(`${home}/.claude/skills/`) + ) { + return "global"; + } + return "project"; +} + +export function deriveSkillSource( + absDir: string, +): "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" { + const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").replace(/\\/g, "/"); + if ( + absDir.startsWith(`${home}/.config/opencode/skills/`) || + absDir.startsWith(`${home}/.config/opencode/skill/`) + ) + return "opencode-global"; + if (absDir.startsWith(`${home}/.claude/skills/`)) return "claude-skills"; + if (absDir.includes("/.agents/skills/")) return "agents-skills"; + // Both singular .opencode/skill/ and plural .opencode/skills/ are valid — + // opencode's OPENCODE_SKILL_PATTERN = "{skill,skills}/**/SKILL.md" covers both. + if (absDir.includes("/.opencode/skill/") || absDir.includes("/.opencode/skills/")) { + return "opencode-project"; + } + return "opencode-project"; // default for unknown project-local paths +} + +// ── Session-scoped skill-load registry ───────────────────────────────────── +// Key: `${sessionId}:${skillId}` — populated in tool.execute.after when +// input.tool === "skill". Cleaned up in onSessionDeleted. +// NOT persisted. No leak. + +export type SkillLoadRegistry = Map< + string, + SkillProvenance & { frontmatterConfig: import("./frontmatter").SkillMemoryConfig | null } +>; + +export function createSkillLoadRegistry(): SkillLoadRegistry { + return new Map(); +} + +export function registryKey(sessionId: string, skillId: string): string { + return `${sessionId}:${skillId}`; +} + +export function getSkillLoad( + registry: SkillLoadRegistry, + sessionId: string, + skillId: string, +): + | (SkillProvenance & { + frontmatterConfig: import("./frontmatter").SkillMemoryConfig | null; + }) + | undefined { + return registry.get(registryKey(sessionId, skillId)); +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts new file mode 100644 index 00000000..01608e0f --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -0,0 +1,551 @@ +import { describe, expect, mock, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; +import { promoteSkillObservations } from "./promote"; +import { + buildSkillMemoryBlock, + flatRecall, + rankRung1, + recallSkillMemoryBlock, + sanitizeSkillIntentForFts, +} from "./recall"; +import { insertSkillMemoryNote } from "./storage"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("flatRecall", () => { + test("returns empty array when no notes exist (cold-start rung 5)", () => { + const db = makeDb(); + try { + const notes = flatRecall(db, "nonexistent-skill", "global", "git:abc", { + maxTokens: 1500, + maxPinnedTokens: 4000, + }); + expect(notes).toEqual([]); + } finally { + closeQuietly(db); + } + }); + + test("returns notes up to token budget", () => { + const db = makeDb(); + try { + const now = Date.now(); + for (let i = 0; i < 5; i++) { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "intent", + kind: "gotcha", + delta: `note ${i} — ${"x".repeat(40)}`, + normalizedHash: `h${i}`, + createdAt: now - i * 1000, + }); + } + // Token-budget truncation test (arithmetic verified, framing-inclusive): + // delta = "note N — " (9 chars) + 40 "x"s = 49 chars → Math.ceil(49/4) = 13 tokens, + // plus NOTE_FRAMING_TOKENS (20) for the wrapper = 33 tokens each. + // maxTokens: 70 → 1st fits (33 ≤ 70), 2nd fits (33+33=66 ≤ 70), + // 3rd would exceed (66+33=99 > 70) → exactly 2 notes fit. + const notes = flatRecall(db, "tdd", "global", "git:abc", { + maxTokens: 70, // 2 notes × 33 tokens = 66 ≤ 70; 3rd would push to 99 > 70 + maxPinnedTokens: 4000, + }); + expect(notes.length).toBe(2); + } finally { + closeQuietly(db); + } + }); +}); + +describe("sanitizeSkillIntentForFts", () => { + test("quotes tokens and neutralizes FTS operators", () => { + expect(sanitizeSkillIntentForFts("debug AND fix (urgent)")).toBe( + '"debug" OR "and" OR "fix" OR "urgent"', + ); + expect(sanitizeSkillIntentForFts("!!!")).toBe(""); + expect(sanitizeSkillIntentForFts('say "hi"')).toBe('"say" OR "hi"'); + }); +}); + +describe("rankRung1", () => { + test("clamps negative cosine and guards div-by-zero", () => { + const q = new Float32Array([1, 0]); + const notes = [ + { + id: 1, + intentVec: new Float32Array([0, 1]), + deltaVec: new Float32Array([0, 1]), + ts: 5, + hit: 0, + }, + ]; + const ranked = rankRung1(q, notes, { relevance: 0.6, recency: 0.25, hit: 0.15 }); + expect(ranked.length).toBe(1); + expect(Number.isNaN(ranked[0].score)).toBe(false); + }); + + test("orders by weighted blend (relevance leads)", () => { + const q = new Float32Array([1, 0]); + const notes = [ + { + id: 1, + intentVec: new Float32Array([1, 0]), + deltaVec: new Float32Array([1, 0]), + ts: 1, + hit: 0, + }, + { + id: 2, + intentVec: new Float32Array([0, 1]), + deltaVec: new Float32Array([0, 1]), + ts: 100, + hit: 50, + }, + ]; + const ranked = rankRung1(q, notes, { relevance: 0.6, recency: 0.25, hit: 0.15 }); + expect(ranked[0].id).toBe(1); + }); +}); + +let EMBED_UP = true; +mock.module("../memory/embedding", () => ({ + embedTextForProject: async (_p: string, text: string) => + EMBED_UP + ? { + vector: text.includes("auth") + ? new Float32Array([1, 0]) + : new Float32Array([0, 1]), + modelId: "m1", + generation: 1, + } + : null, +})); + +function modeOf(block: string): string | null { + return block.match(/]*\bmode="([^"]+)"/)?.[1] ?? null; +} +const cfg = { + enabled: true as const, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, +}; + +describe("recallSkillMemoryBlock (intent-scoped rungs)", () => { + test("rung 1 full: provider up + intent + a model-matched embedded note", async () => { + const db = makeDb(); + EMBED_UP = true; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "fix auth", + kind: "fix", + delta: "auth note", + normalizedHash: "h1", + createdAt: 1, + intentEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + deltaEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + embeddingModelVersion: "m1", + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "auth bug", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("full"); + }); + + test("rung 2 no-intent: provider up, no intent → flat", async () => { + const db = makeDb(); + EMBED_UP = true; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("no-intent"); + }); + + test("rung 3 fts5-fallback: provider down + intent → FTS", async () => { + const db = makeDb(); + EMBED_UP = false; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "fix auth flake", + kind: "fix", + delta: "mock timers", + normalizedHash: "h1", + createdAt: 1, + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "auth", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("fts5-fallback"); + }); + + test("rung 4 flat-fts: provider down + UNINDEXABLE intent (sanitize→empty) → flat", async () => { + const db = makeDb(); + EMBED_UP = false; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "!!! ???", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("flat-fts"); + }); + + test("rung 5 cold: no notes → empty block", async () => { + const db = makeDb(); + EMBED_UP = true; + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "x", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(block).toBe(""); + }); + + test("zero model-matched → falls to rung 3, never empty full block", async () => { + const db = makeDb(); + EMBED_UP = true; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "auth fix", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + intentEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + embeddingModelVersion: "OLD-model", + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "auth", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("fts5-fallback"); + }); + + test("intent-scoped recall matches a note sharing SOME (not all) intent tokens (OR semantics)", async () => { + const db = makeDb(); + EMBED_UP = false; // force rung 3 FTS path + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: null, + projectIdentity: "git:abc", + intent: "fix the flaky auth login test", + kind: "fix", + delta: "mock the system clock in auth specs", + normalizedHash: "h1", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: null, + projectIdentity: "git:abc", + intent: "speed up the docker build cache", + kind: "discovery", + delta: "layer ordering matters", + normalizedHash: "h2", + createdAt: Date.now(), + }); + // A multi-token NL intent that shares SOME tokens with note 1 (auth, test) but NOT all — + // under AND-join this matches ZERO notes (the bug); under OR-join + bm25 it returns note 1. + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + intent: "auth test timing clock stabilization", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: cfg, + }); + expect(block).not.toBe(""); // RED with AND (empty), GREEN with OR + expect(block).toContain('mode="fts5-fallback"'); // proves rung-3 path + expect(block).toContain("mock the system clock"); // note 1's delta — the relevant note surfaced + expect(block).not.toContain("layer ordering"); // note 2 (docker) shares no tokens → not matched + } finally { + closeQuietly(db); + } + }); + + test("pinned notes appear even when intent doesn't match them (M2)", async () => { + const db = makeDb(); + EMBED_UP = true; + db.prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','*','old auth fix','fix','rotate token','h1',0,1,1)`, + ).run(); + for (let i = 0; i < 10; i++) { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: `note ${i}`, + kind: "fix", + delta: `delta ${i}`, + normalizedHash: `h${i + 2}`, + createdAt: 1000 + i, + intentEmbedding: float32ArrayToBlob(new Float32Array([0, 1])), + deltaEmbedding: float32ArrayToBlob(new Float32Array([0, 1])), + embeddingModelVersion: "m1", + }); + } + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "frontend css", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(block).toContain("rotate token"); + expect(modeOf(block)).toBe("full"); + }); +}); + +describe("buildSkillMemoryBlock", () => { + test("returns empty string when notes array is empty (cold-start)", () => { + expect(buildSkillMemoryBlock("tdd", "no-intent", [], 0)).toBe(""); + }); + + test("builds correct XML block with notes", () => { + const notes = [ + { + id: 1, + skill_id: "tdd", + kind: "gotcha" as const, + delta: "Always mock the clock", + intent: "fix flaky test", + hit_count: 3, + recall_count: 0, + pinned: 0, + normalized_hash: "h1", + created_at: Date.now(), + last_used_at: Date.now(), + resolved_path: "/p", + tier: "global" as const, + skill_source: "opencode-global" as const, + project_identity: "git:abc", + tags: null, + intent_embedding: null, + embedding_model_version: null, + }, + ]; + const block = buildSkillMemoryBlock("tdd", "no-intent", notes, 0); + expect(block).toContain(' { + test("a global note learned in repo A surfaces when recalled from repo B", async () => { + const db = makeDb(); + try { + promoteSkillObservations(db, "git:repoA", [ + { + skillId: "council", + kind: "gotcha", + lesson: "aggregator needs a fast model", + }, + ]); + const block = await recallSkillMemoryBlock(db, { + skill: "council", + scope: "global", + projectIdentity: "git:repoB", + frontmatterConfig: cfg, + }); + expect(block).toContain("aggregator needs a fast model"); + } finally { + closeQuietly(db); + } + }); + + test("a PROJECT-LOCAL skill surfaces a historian-written global note (P1 regression)", async () => { + // Regression: promoteSkillObservations always writes tier='global'/'*', + // but recall for a project-local skill uses scope='project'. Before the + // union-on-recall fix, the project-tier query never matched the global + // '*' rows, so historian notes were orphaned for project-local skills. + const db = makeDb(); + try { + promoteSkillObservations(db, "git:repoA", [ + { + skillId: "tdd", + kind: "discovery", + lesson: "spike the parser before writing the plan", + }, + ]); + // Recall as a PROJECT-tier skill (the failing case pre-fix). + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "project", + projectIdentity: "git:repoA", + frontmatterConfig: cfg, + }); + expect(block).toContain("spike the parser before writing the plan"); + } finally { + closeQuietly(db); + } + }); + + test("a project-local AGENT note and a global HISTORIAN note both surface for the same skill", async () => { + const db = makeDb(); + try { + // Agent-written, project-tier note. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/repo/.opencode/skills/tdd/SKILL.md", + tier: "project", + skillSource: "opencode-project", + projectIdentity: "git:repoLocal", + intent: "local lesson", + kind: "gotcha", + delta: "project-local agent note", + normalizedHash: "pl1", + createdAt: Date.now(), + }); + // Historian-written, global note for the same skill. + promoteSkillObservations(db, "git:repoLocal", [ + { skillId: "tdd", kind: "discovery", lesson: "global historian note" }, + ]); + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "project", + projectIdentity: "git:repoLocal", + frontmatterConfig: cfg, + }); + expect(block).toContain("project-local agent note"); + expect(block).toContain("global historian note"); + } finally { + closeQuietly(db); + } + }); +}); + +describe("recallSkillMemoryBlock bumps recall_count for surfaced notes", () => { + test("a surfaced note's recall_count increments per recall (no-intent rung)", async () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix a flaky test", + kind: "fix", + delta: "mock the clock", + normalizedHash: "rc1", + createdAt: Date.now(), + }); + const cfg = { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000 }; + // Two recalls (no intent → rung 2, which surfaces the note both times). + const b1 = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: cfg, + }); + const b2 = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: cfg, + }); + expect(b1).toContain("mock the clock"); + expect(b2).toContain("mock the clock"); + const row = db + .prepare( + "SELECT recall_count, last_used_at FROM skill_memory WHERE normalized_hash='rc1'", + ) + .get() as { recall_count: number; last_used_at: number | null }; + expect(row.recall_count).toBe(2); // bumped once per recall + expect(row.last_used_at).toBeNull(); // recall must NOT touch recency + } finally { + closeQuietly(db); + } + }); + + test("a cold-start recall (no notes) bumps nothing and returns empty", async () => { + const db = makeDb(); + try { + const block = await recallSkillMemoryBlock(db, { + skill: "ghost", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000 }, + }); + expect(block).toBe(""); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.ts new file mode 100644 index 00000000..7d4c7ef7 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.ts @@ -0,0 +1,301 @@ +import { log } from "../../../shared/logger"; +import type { Database } from "../../../shared/sqlite"; +import { cosineSimilarity } from "../memory/cosine-similarity"; +import { embedTextForProject } from "../memory/embedding"; +import { toFloat32Array } from "../memory/storage-memory-embeddings"; +import type { SkillMemoryConfig } from "./frontmatter"; +import { + bumpRecallCountByIds, + getPinnedNotes, + getRankingCandidates, + getSkillMemoryNotes, + partitionKey, + type SkillMemoryNote, + searchSkillMemoryFts, +} from "./storage"; + +export interface FlatRecallOptions { + maxTokens: number; + maxPinnedTokens: number; +} + +// Per-note +// framing that budgetFill's delta-only estimate would otherwise ignore (~80 +// chars ≈ 20 tokens/note). Counting it keeps the rendered block within max_tokens +// instead of overshooting ~13% on a 10-note block (rev-1 S1). +const NOTE_FRAMING_TOKENS = 20; + +// Rough token estimate: 1 token ≈ 4 chars (conservative for XML overhead) +function estimateTokens(text: string): number { + return Math.ceil(text.length / 4); +} + +/** + * Flat recall (rungs 2 + 4): recency × hit_count, no embeddings. + * Greedy fill by composite score up to maxTokens budget. + * Pinned notes are always included (up to maxPinnedTokens). + */ +export function flatRecall( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + options: FlatRecallOptions, +): SkillMemoryNote[] { + const candidates = getSkillMemoryNotes(db, skillId, tier, projectIdentity, 50); + if (candidates.length === 0) return []; + return budgetFill(candidates, options.maxTokens, options.maxPinnedTokens); +} + +/** + * Build the XML block to append to the skill tool result. + * Returns empty string for cold-start (no notes) — no empty stub injected. + */ +/** Escape a natural-language intent into a safe FTS5 MATCH query: quote each alphanumeric token. */ +export function sanitizeSkillIntentForFts(intent: string): string { + const tokens = intent.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? []; + return tokens.map((t) => `"${t.replace(/"/g, '""')}"`).join(" OR "); +} + +export interface Rung1Note { + id: number; + intentVec: Float32Array | null; + deltaVec: Float32Array | null; + ts: number; + hit: number; +} +export interface Rung1Weights { + relevance: number; + recency: number; + hit: number; +} + +export function rankRung1( + q: Float32Array, + notes: Rung1Note[], + w: Rung1Weights, +): Array<{ id: number; score: number }> { + if (notes.length === 0) return []; + const tsVals = notes.map((n) => n.ts); + const minTs = Math.min(...tsVals), + maxTs = Math.max(...tsVals); + const maxHit = Math.max(...notes.map((n) => n.hit)); + const denR = maxTs - minTs; + return notes + .map((n) => { + const ic = n.intentVec ? cosineSimilarity(q, n.intentVec) : -1; + const dc = n.deltaVec ? cosineSimilarity(q, n.deltaVec) : -1; + const relevance = Math.max(0, Math.max(ic, dc)); + const recency = denR === 0 ? 1.0 : (n.ts - minTs) / denR; + const hitN = maxHit === 0 ? 0.0 : n.hit / maxHit; + return { + id: n.id, + score: w.relevance * relevance + w.recency * recency + w.hit * hitN, + }; + }) + .sort((a, b) => b.score - a.score); +} + +/** Normalize the 3 weights to sum 1; all-zero (or omitted) → defaults. */ +function resolveWeights(cfg: SkillMemoryConfig): Rung1Weights { + const r = cfg.ranking_relevance ?? 0.6, + c = cfg.ranking_recency ?? 0.25, + h = cfg.ranking_hit ?? 0.15; + const sum = r + c + h; + if (sum === 0) return { relevance: 0.6, recency: 0.25, hit: 0.15 }; + return { relevance: r / sum, recency: c / sum, hit: h / sum }; +} + +/** Prepend pinned notes (already pinned-first ordered by getPinnedNotes) ahead of `rest`, deduped by id. */ +function unionPinnedFirst(pinned: SkillMemoryNote[], rest: SkillMemoryNote[]): SkillMemoryNote[] { + const seen = new Set(pinned.map((n) => n.id)); + return [...pinned, ...rest.filter((n) => !seen.has(n.id))]; +} + +/** Greedy fill pinned-first up to token budget; pinned up to maxPinnedTokens, total up to maxTokens. */ +function budgetFill( + notes: SkillMemoryNote[], + maxTokens: number, + maxPinnedTokens: number, +): SkillMemoryNote[] { + const result: SkillMemoryNote[] = []; + let pinnedTokens = 0; + let totalTokens = 0; + // The total budget is the hard ceiling, so the pinned sub-budget can never + // exceed it — clamp so the default max_pinned_tokens (4000) > max_tokens + // (1500) can't imply pinned notes get more room than the whole block (rev-2). + const effectiveMaxPinned = Math.min(maxPinnedTokens, maxTokens); + + for (const note of notes) { + const tokens = estimateTokens(note.delta) + NOTE_FRAMING_TOKENS; + if (note.pinned === 1) { + if (pinnedTokens + tokens > effectiveMaxPinned) continue; + pinnedTokens += tokens; + } + if (totalTokens + tokens > maxTokens) continue; + result.push(note); + totalTokens += tokens; + } + + return result; +} + +export function buildSkillMemoryBlock( + skillId: string, + mode: "no-intent" | "flat-fts" | "full" | "fts5-fallback", + notes: SkillMemoryNote[], + pinnedCount: number, +): string { + if (notes.length === 0) return ""; + + const noteXml = notes + .map((n) => { + const intentAttr = n.intent ? ` intent="${escapeXml(n.intent)}"` : ""; + const pinnedAttr = n.pinned === 1 ? ` pinned="true"` : ` pinned="false"`; + return ( + `\n` + + `${escapeXml(n.delta)}\n` + + `` + ); + }) + .join("\n"); + + const footer = + `\n\n---\n` + + `*After using this skill, call \`ctx_skill_note\` — record only gotchas, novel discoveries, or error→fix; skip routine successes.*`; + + return ( + `\n` + + noteXml + + `\n` + + footer + ); +} + +function escapeXml(str: string): string { + return str + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +/** + * Shared recall core: reads notes from DB, ranks/budgets them, and formats the + * block string. Returns empty string when no notes exist or + * skill-memory is not enabled. + * + * Used by BOTH: + * - maybeInjectSkillMemory (transparent after-hook path) — appends to output.output + * - ctx_skill_recall tool (explicit agent-callable path) — returns as tool result + * + * Lives in the feature layer (not hook-handlers.ts) to avoid tools→hooks layering. + * P2 embeddings benefit both paths automatically when this function is upgraded. + */ +export async function recallSkillMemoryBlock( + db: Database, + opts: { + skill: string; + intent?: string; + scope: "project" | "global"; + projectIdentity: string; + frontmatterConfig: SkillMemoryConfig | null; + maxTokens?: number; + }, +): Promise { + if (!opts.frontmatterConfig?.enabled) return ""; + try { + const part = partitionKey(opts.scope, opts.projectIdentity); + const maxTokens = opts.maxTokens ?? opts.frontmatterConfig.max_tokens; + const maxPinned = opts.frontmatterConfig.max_pinned_tokens; + const intent = opts.intent?.trim(); + + // Single chokepoint for every rung: bump read-side recall_count for the notes + // actually surfaced, then format the block. Empty selection → empty string (no bump). + const finalize = ( + mode: "no-intent" | "flat-fts" | "full" | "fts5-fallback", + notes: SkillMemoryNote[], + ): string => { + if (notes.length === 0) return ""; + bumpRecallCountByIds( + db, + notes.map((n) => n.id), + ); + return buildSkillMemoryBlock( + opts.skill, + mode, + notes, + notes.filter((n) => n.pinned === 1).length, + ); + }; + + // Rung 2: no intent → flat recency×hit (nothing to embed/FTS-match → always "no-intent"). + if (!intent) { + const notes = flatRecall(db, opts.skill, opts.scope, part, { + maxTokens, + maxPinnedTokens: maxPinned, + }); + return finalize("no-intent", notes); + } + + const q = await embedTextForProject(opts.projectIdentity, intent); + if (q) { + const candidates = getRankingCandidates(db, opts.skill, opts.scope, part, 200); + const matched = candidates.filter( + (n) => + n.embedding_model_version === q.modelId && + (n.intent_embedding || n.delta_embedding), + ); + if (matched.length > 0) { + const weights = resolveWeights(opts.frontmatterConfig); + const ranked = rankRung1( + q.vector, + matched.map((n) => ({ + id: n.id, + intentVec: n.intent_embedding ? toFloat32Array(n.intent_embedding) : null, + deltaVec: n.delta_embedding ? toFloat32Array(n.delta_embedding) : null, + ts: n.last_used_at ?? n.created_at, + hit: n.hit_count, + })), + weights, + ); + const byId = new Map(candidates.map((n) => [n.id, n])); + const rankedNotes = ranked + .map((r) => byId.get(r.id)) + .filter((n): n is SkillMemoryNote => n != null); + const ordered = unionPinnedFirst( + getPinnedNotes(db, opts.skill, opts.scope, part), + rankedNotes, + ); + const selected = budgetFill(ordered, maxTokens, maxPinned); + return finalize("full", selected); + } + // zero model-matched → fall to FTS rung 3. + } + + const match = sanitizeSkillIntentForFts(intent); + if (match === "") { + const notes = flatRecall(db, opts.skill, opts.scope, part, { + maxTokens, + maxPinnedTokens: maxPinned, + }); + return finalize("flat-fts", notes); + } + const ftsNotes = searchSkillMemoryFts(db, opts.skill, opts.scope, part, match, 50); + const ordered = unionPinnedFirst( + getPinnedNotes(db, opts.skill, opts.scope, part), + ftsNotes, + ); + const selected = budgetFill(ordered, maxTokens, maxPinned); + return finalize("fts5-fallback", selected); + } catch (err) { + // Cache-safe + non-choking: never throw from recall (a thrown error here + // would surface in the skill tool result). But log it — a broken FTS + // table or bad vector blob would otherwise look identical to "no notes", + // making durable skill-memory data loss undiagnosable. + log( + `[skill-memory] recallSkillMemoryBlock failed for skill "${opts.skill}": ${err instanceof Error ? err.message : String(err)}`, + ); + return ""; + } +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts new file mode 100644 index 00000000..f67a8e07 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts @@ -0,0 +1,71 @@ +import { expect, mock, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; + +const embedCalls: string[] = []; + +// Provider "up" with a deterministic model — MUST precede the reembed.ts import. +mock.module("../memory/embedding", () => ({ + embedTextForProject: async (projectIdentity: string, _text: string) => { + embedCalls.push(projectIdentity); + return { + vector: new Float32Array([0.1, 0.2, 0.3]), + modelId: "test-model", + generation: 1, + }; + }, +})); +const { reembedStaleSkillNotes } = await import("./reembed"); + +test("reembedStaleSkillNotes fills NULL embeddings (bounded, idempotent)", async () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + db.prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','git:abc','fix auth','fix','mock Date','h1',0,0,1)`, + ).run(); + const result1 = await reembedStaleSkillNotes(db, "git:abc"); + expect(result1.reembedded).toBe(1); + const row = db + .prepare( + "SELECT intent_embedding, delta_embedding, embedding_model_version FROM skill_memory WHERE normalized_hash='h1'", + ) + .get() as { + intent_embedding: unknown; + delta_embedding: unknown; + embedding_model_version: string; + }; + expect(row.intent_embedding).not.toBeNull(); + expect(row.delta_embedding).not.toBeNull(); + expect(row.embedding_model_version).toBe("test-model"); + const result2 = await reembedStaleSkillNotes(db, "git:abc"); + expect(result2.reembedded).toBe(0); + } finally { + closeQuietly(db); + } +}); + +test("reembed selects global '*' notes and embeds them under the real identity", async () => { + const { promoteSkillObservations } = await import("./promote"); + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "fix", lesson: "L7" }, + ]); + embedCalls.length = 0; + + const res = await reembedStaleSkillNotes(db, "git:repoA"); + + expect(res.reembedded).toBe(1); + expect(embedCalls.length).toBeGreaterThan(0); + expect(embedCalls.every((c) => c === "git:repoA")).toBe(true); + } finally { + closeQuietly(db); + } +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.ts new file mode 100644 index 00000000..7b058d17 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.ts @@ -0,0 +1,43 @@ +import type { Database } from "../../../shared/sqlite"; +import { embedTextForProject } from "../memory/embedding"; +import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; + +const REEMBED_CAP = 200; + +/** Programmatic (no-LLM) backfill: re-embed notes with NULL/stale vectors. Idempotent; skips when provider off. */ +export async function reembedStaleSkillNotes( + db: Database, + projectIdentity: string, +): Promise<{ reembedded: number }> { + // Probe the current model version once (also tells us if the provider is up). + const probe = await embedTextForProject(projectIdentity, "probe"); + if (!probe) return { reembedded: 0 }; // provider off — nothing to do + const currentModel = probe.modelId; + + // Stale = NULL embeddings OR embedded under a DIFFERENT model version. + // `embedding_model_version IS NOT ?` is NULL-safe SQL; the prior IS NULL OR-clauses already catch NULL-embedding + // rows; INSERT always sets all three embedding columns together, so non-NULL model_version + NULL embeddings can't occur. + const stale = db + .prepare( + `SELECT id, intent, delta FROM skill_memory + WHERE (project_identity = ? OR project_identity = '*') + AND (intent_embedding IS NULL OR delta_embedding IS NULL OR embedding_model_version IS NOT ?) + LIMIT ?`, + ) + .all(projectIdentity, currentModel, REEMBED_CAP) as Array<{ + id: number; + intent: string; + delta: string; + }>; + let n = 0; + for (const row of stale) { + const iv = await embedTextForProject(projectIdentity, row.intent); + const dv = await embedTextForProject(projectIdentity, row.delta); + if (!iv || !dv) break; // provider went down mid-batch — stop + db.prepare( + `UPDATE skill_memory SET intent_embedding=?, delta_embedding=?, embedding_model_version=? WHERE id=?`, + ).run(float32ArrayToBlob(iv.vector), float32ArrayToBlob(dv.vector), dv.modelId, row.id); + n++; + } + return { reembedded: n }; +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts new file mode 100644 index 00000000..9e36c2c9 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts @@ -0,0 +1,559 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; +import { + bumpHitCount, + bumpHitCountById, + bumpRecallCountByIds, + getDedupCandidates, + getPinnedNotes, + getRankingCandidates, + getSkillMemoryNotes, + getSkillMemoryStats, + type InsertSkillMemoryNoteArgs, + insertSkillMemoryNote, + partitionKey, + searchSkillMemoryFts, +} from "./storage"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("skill_memory storage", () => { + test("insertSkillMemoryNote inserts a new row", () => { + const db = makeDb(); + try { + const args: InsertSkillMemoryNoteArgs = { + skillId: "test-driven-development", + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc123", + intent: "fix a flaky test in auth", + kind: "gotcha", + delta: "Always mock the clock in auth tests — real timers cause flakiness", + normalizedHash: "hash-001", + createdAt: Date.now(), + }; + const id = insertSkillMemoryNote(db, args); + expect(typeof id).toBe("number"); + expect(id).toBeGreaterThan(0); + } finally { + closeQuietly(db); + } + }); + + test("insertSkillMemoryNote returns null on duplicate normalized_hash (UNIQUE constraint)", () => { + const db = makeDb(); + try { + const args: InsertSkillMemoryNoteArgs = { + skillId: "tdd", + resolvedPath: "/path/SKILL.md", + tier: "project", + skillSource: "opencode-project", + projectIdentity: "git:abc123", + intent: "intent", + kind: "fix", + delta: "delta content", + normalizedHash: "dup-hash", + createdAt: Date.now(), + }; + insertSkillMemoryNote(db, args); + const result = insertSkillMemoryNote(db, args); // duplicate + expect(result).toBeNull(); + } finally { + closeQuietly(db); + } + }); + + test("insertSkillMemoryNote stores intent_embedding/delta_embedding/embedding_model_version", () => { + const db = makeDb(); + try { + const iv = float32ArrayToBlob(new Float32Array([1, 0, 0])); + const dv = float32ArrayToBlob(new Float32Array([0, 1, 0])); + const id = insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "emb-hash", + createdAt: 1, + intentEmbedding: iv, + deltaEmbedding: dv, + embeddingModelVersion: "m1", + }); + const row = db + .prepare("SELECT embedding_model_version FROM skill_memory WHERE id=?") + .get(id) as { embedding_model_version: string }; + expect(row.embedding_model_version).toBe("m1"); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryNotes returns notes ordered by recency × hit_count", () => { + const db = makeDb(); + try { + const now = Date.now(); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "note A (high hit_count)", + normalizedHash: "h1", + createdAt: now - 10000, + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "discovery", + delta: "note B (recent)", + normalizedHash: "h2", + createdAt: now, + }); + // Bump hit_count on note A + bumpHitCount(db, "tdd", "global", "git:abc", "h1"); + bumpHitCount(db, "tdd", "global", "git:abc", "h1"); + + const notes = getSkillMemoryNotes(db, "tdd", "global", "git:abc", 10); + expect(notes.length).toBe(2); + // Both notes should be returned; order is recency × hit_count + expect(notes.map((n) => n.delta)).toContain("note A (high hit_count)"); + expect(notes.map((n) => n.delta)).toContain("note B (recent)"); + } finally { + closeQuietly(db); + } + }); + + test("bumpHitCountById increments by id", () => { + const db = makeDb(); + try { + const id = Number( + ( + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','git:x','i','fix','d','h',0,0,1) RETURNING id`, + ) + .get() as { id: number } + ).id, + ); + bumpHitCountById(db, id); + const row = db.prepare("SELECT hit_count FROM skill_memory WHERE id=?").get(id) as { + hit_count: number; + }; + expect(row.hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("bumpHitCount increments hit_count and updates last_used_at", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "workflow", + delta: "workflow note", + normalizedHash: "h-bump", + createdAt: Date.now(), + }); + bumpHitCount(db, "tdd", "global", "git:abc", "h-bump"); + bumpHitCount(db, "tdd", "global", "git:abc", "h-bump"); + const notes = getSkillMemoryNotes(db, "tdd", "global", "git:abc", 10); + expect(notes[0].hit_count).toBe(2); + expect(notes[0].last_used_at).not.toBeNull(); + } finally { + closeQuietly(db); + } + }); + + test("bumpRecallCountByIds increments recall_count without touching last_used_at or hit_count", () => { + const db = makeDb(); + try { + const mkId = (hash: string): number => + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "fix", + delta: `d-${hash}`, + normalizedHash: hash, + createdAt: Date.now(), + }) as number; + const id1 = mkId("r1"); + const id2 = mkId("r2"); + const id3 = mkId("r3"); + + // Surface only id1 + id2 twice; id3 never recalled. + bumpRecallCountByIds(db, [id1, id2]); + bumpRecallCountByIds(db, [id1, id2]); + + const rows = db + .prepare( + "SELECT id, recall_count, hit_count, last_used_at FROM skill_memory ORDER BY id", + ) + .all() as Array<{ + id: number; + recall_count: number; + hit_count: number; + last_used_at: number | null; + }>; + const byId = new Map(rows.map((r) => [r.id, r])); + expect(byId.get(id1)?.recall_count).toBe(2); + expect(byId.get(id2)?.recall_count).toBe(2); + expect(byId.get(id3)?.recall_count).toBe(0); + // read-counter must NOT pollute write-side salience or recency + expect(byId.get(id1)?.hit_count).toBe(0); + expect(byId.get(id1)?.last_used_at).toBeNull(); + } finally { + closeQuietly(db); + } + }); + + test("bumpRecallCountByIds is a no-op on empty ids", () => { + const db = makeDb(); + try { + // must not throw + bumpRecallCountByIds(db, []); + expect(true).toBe(true); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryStats returns totals scoped to project_identity", () => { + const db = makeDb(); + try { + // Seed 3 notes for skill "tdd" under project "git:abc", 1 of them pinned. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i1", + kind: "gotcha", + delta: "n1", + normalizedHash: "stats-h1", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i2", + kind: "fix", + delta: "n2", + normalizedHash: "stats-h2", + createdAt: Date.now(), + }); + // pin the second one directly via SQL — there's no pin API in storage yet + db.prepare("UPDATE skill_memory SET pinned = 1 WHERE normalized_hash = ?").run( + "stats-h2", + ); + + // Seed 2 notes for a different skill "debugging" under the same project. + insertSkillMemoryNote(db, { + skillId: "debugging", + resolvedPath: "/p2", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i3", + kind: "discovery", + delta: "n3", + normalizedHash: "stats-h3", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "debugging", + resolvedPath: "/p2", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i4", + kind: "workflow", + delta: "n4", + normalizedHash: "stats-h4", + createdAt: Date.now(), + }); + + // Seed 1 project-tier note under a DIFFERENT project — must NOT be counted. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "project", + skillSource: "opencode-global", + projectIdentity: "git:other", + intent: "i5", + kind: "gotcha", + delta: "n5", + normalizedHash: "stats-h5", + createdAt: Date.now(), + }); + + const stats = getSkillMemoryStats(db, "git:abc"); + expect(stats.totalNotes).toBe(4); + expect(stats.skillsWithNotes).toBe(2); + expect(stats.pinnedNotes).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryNotes: equal timestamps don't break ordering (NULLIF guard)", () => { + const db = makeDb(); + try { + const ts = 1_000_000; + const ins = (hash: string, hits: number) => + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at,last_used_at) + VALUES ('s','/p','global','*','i','fix','d',?,?,0,?,?)`, + ) + .run(hash, hits, ts, ts); + ins("a", 1); + ins("b", 5); + const notes = getSkillMemoryNotes(db, "s", "global", "git:x", 10); + expect(notes[0].normalized_hash).toBe("b"); // higher hit_count first when timestamps equal + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryStats returns all-zeros when no notes exist for the project", () => { + const db = makeDb(); + try { + const stats = getSkillMemoryStats(db, "git:empty"); + expect(stats.totalNotes).toBe(0); + expect(stats.skillsWithNotes).toBe(0); + expect(stats.pinnedNotes).toBe(0); + } finally { + closeQuietly(db); + } + }); + + test("getDedupCandidates returns top-N same-scope rows with delta_embedding + model version", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d1", + normalizedHash: "dedup-h1", + createdAt: 1, + deltaEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + embeddingModelVersion: "m1", + }); + const cands = getDedupCandidates(db, "s", "global", "git:x", 200); + expect(cands.length).toBe(1); + expect(cands[0].delta_embedding).toBeTruthy(); + } finally { + closeQuietly(db); + } + }); + + test("getRankingCandidates returns scope-filtered rows ordered by recency", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "rank-h1", + createdAt: 1, + }); + const cands = getRankingCandidates(db, "s", "global", "git:x", 10); + expect(cands.length).toBe(1); + expect(cands[0].skill_id).toBe("s"); + } finally { + closeQuietly(db); + } + }); + + test("searchSkillMemoryFts returns scope-filtered BM25 matches", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "fix flaky auth test", + kind: "fix", + delta: "mock Date.now", + normalizedHash: "fts-h1", + createdAt: 1, + }); + insertSkillMemoryNote(db, { + skillId: "OTHER", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "auth", + kind: "fix", + delta: "x", + normalizedHash: "fts-h2", + createdAt: 1, + }); + const hits = searchSkillMemoryFts(db, "s", "global", "git:x", '"auth"', 10); + expect(hits.every((h) => h.skill_id === "s")).toBe(true); + expect(hits.length).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("getPinnedNotes returns only pinned same-scope rows", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "unpinned", + normalizedHash: "pin-h1", + createdAt: 1, + }); + const pid = Number( + ( + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','*','i','fix','pinned','pin-h2',0,1,2) RETURNING id`, + ) + .get() as { id: number } + ).id, + ); + const pinned = getPinnedNotes(db, "s", "global", "git:x"); + expect(pinned.length).toBe(1); + expect(pinned[0].id).toBe(pid); + } finally { + closeQuietly(db); + } + }); +}); + +describe("partitionKey", () => { + test("global tier maps to '*' sentinel", () => { + expect(partitionKey("global", "git:repoA")).toBe("*"); + }); + test("project tier passes through the real identity", () => { + expect(partitionKey("project", "git:repoA")).toBe("git:repoA"); + }); +}); + +describe("storage v41 fields", () => { + test("insert stamps origin_project + source_type; null resolvedPath -> ''", () => { + const db = makeDb(); + try { + const id = insertSkillMemoryNote(db, { + skillId: "council", + resolvedPath: null, + tier: "global", + skillSource: null, + projectIdentity: "*", + originProject: "git:repoA", + sourceType: "historian", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + expect(id).not.toBeNull(); + const row = db + .prepare( + "SELECT resolved_path, origin_project, source_type FROM skill_memory WHERE id=?", + ) + .get(id) as { resolved_path: string; origin_project: string; source_type: string }; + expect(row.resolved_path).toBe(""); + expect(row.origin_project).toBe("git:repoA"); + expect(row.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryStats counts global ('*') notes alongside the project's own", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "project", + skillSource: null, + projectIdentity: "git:repoA", + intent: "i", + kind: "fix", + delta: "d1", + normalizedHash: "p1", + createdAt: 1, + }); + insertSkillMemoryNote(db, { + skillId: "council", + resolvedPath: "", + tier: "global", + skillSource: null, + projectIdentity: "*", + originProject: "git:repoB", + sourceType: "historian", + intent: "i", + kind: "fix", + delta: "d2", + normalizedHash: "g1", + createdAt: 2, + }); + const stats = getSkillMemoryStats(db, "git:repoA"); + expect(stats.totalNotes).toBe(2); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.ts new file mode 100644 index 00000000..7137afdf --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.ts @@ -0,0 +1,354 @@ +import type { Database } from "../../../shared/sqlite"; + +/** + * The DB partition key for a note's tier. Global-tier notes are stored under the + * '*' sentinel so a lesson learned in any repo is recallable everywhere; project-tier + * notes keep their real project identity. ALL global-tier reads/writes/dedup MUST + * route through this so no call site is missed. + */ +export function partitionKey(tier: "project" | "global", projectIdentity: string): string { + return tier === "global" ? "*" : projectIdentity; +} + +/** + * RECALL-path partition predicate: a note is recallable for (tier, projectIdentity) + * when it is either in the skill's OWN partition OR in the cross-project global + * '*' partition (where historian-extracted lessons live). This is why a + * project-local skill still surfaces global historian notes — without the global + * branch, recall for a project-tier skill (`tier='project'`) would never match the + * `tier='global'/project_identity='*'` rows that promoteSkillObservations writes. + * + * For a global-tier skill the two branches are identical (both `tier='global' AND + * project_identity='*'`), so the OR is a harmless no-op — a row still matches once + * (it's a row filter, not a join, so no duplication). + * + * WRITE/dedup paths (insert, findExistingNote, bumpHitCount, getDedupCandidates) + * deliberately do NOT use this — they must target one exact partition. + * + * Returns `{ clause, binds }`; callers splice `clause` into the WHERE and spread + * `binds` (tier, ownPartition) at the matching `?` positions. + */ +function recallPartitionPredicate( + tier: "project" | "global", + projectIdentity: string, + columnPrefix = "", +): { clause: string; binds: [string, string] } { + const t = `${columnPrefix}tier`; + const p = `${columnPrefix}project_identity`; + return { + clause: `((${t} = ? AND ${p} = ?) OR (${t} = 'global' AND ${p} = '*'))`, + binds: [tier, partitionKey(tier, projectIdentity)], + }; +} + +export interface SkillMemoryNote { + id: number; + skill_id: string; + resolved_path: string; + tier: "project" | "global"; + skill_source: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" | null; + project_identity: string; + origin_project: string | null; + source_type: string | null; + intent: string; + intent_embedding: Buffer | null; + delta_embedding: Buffer | null; + embedding_model_version: string | null; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + delta: string; + tags: string | null; + hit_count: number; + recall_count: number; + pinned: number; + normalized_hash: string; + created_at: number; + last_used_at: number | null; +} + +export interface InsertSkillMemoryNoteArgs { + skillId: string; + resolvedPath: string | null; + tier: "project" | "global"; + skillSource: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" | null; + projectIdentity: string; + originProject?: string | null; + sourceType?: string | null; + intent: string; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + delta: string; + tags?: string[]; + intentEmbedding?: Buffer | null; + deltaEmbedding?: Buffer | null; + embeddingModelVersion?: string | null; + normalizedHash: string; + createdAt: number; +} + +/** + * Insert a new skill_memory note. Returns the new row id, or null if a + * duplicate normalized_hash already exists for this (skill_id, tier, project_identity). + * On duplicate, callers should call bumpHitCount instead. + */ +export function insertSkillMemoryNote( + db: Database, + args: InsertSkillMemoryNoteArgs, +): number | null { + try { + const result = db + .prepare( + `INSERT INTO skill_memory + (skill_id, resolved_path, tier, skill_source, project_identity, origin_project, source_type, + intent, kind, delta, tags, intent_embedding, delta_embedding, embedding_model_version, + hit_count, pinned, normalized_hash, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)`, + ) + .run( + args.skillId, + args.resolvedPath ?? "", + args.tier, + args.skillSource ?? null, + partitionKey(args.tier, args.projectIdentity), + args.originProject ?? null, + args.sourceType ?? null, + args.intent, + args.kind, + args.delta, + args.tags ? JSON.stringify(args.tags) : null, + args.intentEmbedding ?? null, + args.deltaEmbedding ?? null, + args.embeddingModelVersion ?? null, + args.normalizedHash, + args.createdAt, + ); + return result.lastInsertRowid as number; + } catch (err: unknown) { + // UNIQUE constraint violation = duplicate + if (err instanceof Error && err.message.includes("UNIQUE constraint failed")) { + return null; + } + throw err; + } +} + +/** + * Retrieve notes for flat recall (rungs 2 + 4). + * Ordered by normalized additive recency + hit_count score (pinned notes first). + * + * Scoring: recency_norm + hit_norm where: + * recency_norm = (ts - min_ts) / NULLIF(max_ts - min_ts, 0) — 0..1 range (0 when all timestamps equal) + * hit_norm = hit_count / NULLIF(MAX(hit_count) OVER (), 0) — 0..1 range (0 when all hit_counts 0) + * Additive (not multiplicative) so hit_count is not swamped by timestamp scale. + * + * NOTE: The window-function form requires SQLite ≥ 3.25 (2018). Bun ships SQLite ≥ 3.39. + * If the window form causes issues, fall back to the simpler: + * (COALESCE(last_used_at, created_at) / 1000000.0) + (hit_count * 0.1) DESC + * which is less precise but avoids the window function. + * + * TODO: add an ordering test that inserts notes with known recency/hit_count values + * and asserts the returned order matches the expected ranking. + */ +export function getSkillMemoryNotes( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + limit: number, +): SkillMemoryNote[] { + const { clause, binds } = recallPartitionPredicate(tier, projectIdentity); + return db + .prepare( + `SELECT * + FROM skill_memory + WHERE skill_id = ? AND ${clause} + ORDER BY + pinned DESC, + ( + COALESCE( + (COALESCE(last_used_at, created_at) - MIN(COALESCE(last_used_at, created_at)) OVER ()) * 1.0 + / NULLIF(MAX(COALESCE(last_used_at, created_at)) OVER () - MIN(COALESCE(last_used_at, created_at)) OVER (), 0), + 0.0 + ) + + + COALESCE( + hit_count * 1.0 / NULLIF(MAX(hit_count) OVER (), 0), + 0.0 + ) + ) DESC, + created_at DESC + LIMIT ?`, + ) + .all(skillId, ...binds, limit) as SkillMemoryNote[]; +} + +/** + * Bump hit_count and update last_used_at for a note identified by its + * normalized_hash within a (skill_id, tier, project_identity) scope. + */ +export function bumpHitCount( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + normalizedHash: string, +): void { + db.prepare( + `UPDATE skill_memory + SET hit_count = hit_count + 1, last_used_at = ? + WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, + ).run(Date.now(), skillId, tier, partitionKey(tier, projectIdentity), normalizedHash); +} + +/** + * Bump hit_count + last_used_at for a note identified by id (used by cosine dedup, which has no hash). + */ +export function bumpHitCountById(db: Database, id: number): void { + db.prepare( + `UPDATE skill_memory SET hit_count = hit_count + 1, last_used_at = ? WHERE id = ?`, + ).run(Date.now(), id); +} + +/** + * Bump recall_count for notes actually surfaced in a recall block (read-side usage). + * Distinct from hit_count (write-side re-record salience): recall_count answers + * "which notes are recalled most". Deliberately does NOT touch last_used_at — the + * ranking's recency term must reflect when a lesson was learned/re-recorded, not when + * it was surfaced, else surfaced notes would always win recency and starve new notes. + * Best-effort, no-throw: a counter write must never break recall. + */ +export function bumpRecallCountByIds(db: Database, ids: number[]): void { + if (ids.length === 0) return; + try { + const placeholders = ids.map(() => "?").join(","); + db.prepare( + `UPDATE skill_memory SET recall_count = recall_count + 1 WHERE id IN (${placeholders})`, + ).run(...ids); + } catch { + // never let usage-tracking break a recall + } +} + +/** + * Check if a note with the given normalized_hash already exists. + * Returns the existing note's id and hit_count, or null. + */ +export function findExistingNote( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + normalizedHash: string, +): { id: number; hit_count: number } | null { + return ( + (db + .prepare( + `SELECT id, hit_count FROM skill_memory + WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, + ) + .get(skillId, tier, partitionKey(tier, projectIdentity), normalizedHash) as { + id: number; + hit_count: number; + } | null) ?? null + ); +} + +/** + * Aggregate stats for the skill_memory table scoped to a project identity. + * Used by the ctx-status / TUI status dialog (mirrors the external-memory + * status surface). Sync, single query; safe to call on every status poll. + */ +export function getDedupCandidates( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + limit: number, +): Array> { + return db + .prepare( + `SELECT id, delta_embedding, embedding_model_version FROM skill_memory + WHERE skill_id=? AND tier=? AND project_identity=? + ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, + ) + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as Array< + Pick + >; +} + +export function getRankingCandidates( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + limit: number, +): SkillMemoryNote[] { + const { clause, binds } = recallPartitionPredicate(tier, projectIdentity); + return db + .prepare( + `SELECT * FROM skill_memory + WHERE skill_id=? AND ${clause} + ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, + ) + .all(skillId, ...binds, limit) as SkillMemoryNote[]; +} + +export function searchSkillMemoryFts( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + matchQuery: string, + limit: number, +): SkillMemoryNote[] { + return db + .prepare( + `SELECT m.* FROM skill_memory_fts f + JOIN skill_memory m ON m.id = f.rowid + WHERE skill_memory_fts MATCH ? + AND m.skill_id=? AND ${recallPartitionPredicate(tier, projectIdentity, "m.").clause} + ORDER BY bm25(skill_memory_fts) ASC, COALESCE(m.last_used_at, m.created_at) DESC + LIMIT ?`, + ) + .all( + matchQuery, + skillId, + ...recallPartitionPredicate(tier, projectIdentity, "m.").binds, + limit, + ) as SkillMemoryNote[]; +} + +export function getPinnedNotes( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, +): SkillMemoryNote[] { + const { clause, binds } = recallPartitionPredicate(tier, projectIdentity); + return db + .prepare( + `SELECT * FROM skill_memory + WHERE skill_id=? AND ${clause} AND pinned=1 + ORDER BY COALESCE(last_used_at, created_at) DESC`, + ) + .all(skillId, ...binds) as SkillMemoryNote[]; +} + +export function getSkillMemoryStats( + db: Database, + projectIdentity: string, +): { totalNotes: number; skillsWithNotes: number; pinnedNotes: number } { + const row = db + .prepare( + `SELECT + COUNT(*) AS total, + COUNT(DISTINCT skill_id) AS skills, + COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned + FROM skill_memory + WHERE project_identity = ? OR project_identity = '*'`, + ) + .get(projectIdentity) as { total: number; skills: number; pinned: number } | undefined; + return { + totalNotes: Number(row?.total ?? 0), + skillsWithNotes: Number(row?.skills ?? 0), + pinnedNotes: Number(row?.pinned ?? 0), + }; +} diff --git a/packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts b/packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts new file mode 100644 index 00000000..f7a998b7 --- /dev/null +++ b/packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, test } from "bun:test"; + +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { runMigrations } from "./migrations"; +import { initializeDatabase } from "./storage-db"; + +function cols(db: Database, table: string): Set { + return new Set( + (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>).map( + (r) => r.name, + ), + ); +} + +function objExists(db: Database, name: string): boolean { + return !!db.prepare("SELECT 1 FROM sqlite_master WHERE name=?").get(name); +} + +describe("skill_memory init-time self-heal net", () => { + test("initializeDatabase ALONE creates skill_memory with the full final column set + FTS vtable + triggers", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); // NO runMigrations + const c = cols(db, "skill_memory"); + for (const need of [ + "origin_project", + "source_type", + "delta_embedding", + "recall_count", + "skill_id", + "tier", + "project_identity", + "normalized_hash", + "intent_embedding", + ]) { + expect(c.has(need)).toBe(true); + } + expect(objExists(db, "skill_memory_fts")).toBe(true); + expect(objExists(db, "skill_memory_ai")).toBe(true); + expect(objExists(db, "skill_memory_ad")).toBe(true); + expect(objExists(db, "skill_memory_au")).toBe(true); + } finally { + closeQuietly(db); + } + }); + + test("init-only schema MATCHES fully-migrated schema (no fresh-vs-migrated divergence)", () => { + const a = new Database(":memory:"); + const b = new Database(":memory:"); + try { + initializeDatabase(a); + initializeDatabase(b); + runMigrations(b); + expect([...cols(a, "skill_memory")].sort()).toEqual( + [...cols(b, "skill_memory")].sort(), + ); + expect(objExists(a, "skill_memory_fts")).toBe(objExists(b, "skill_memory_fts")); + } finally { + closeQuietly(a); + closeQuietly(b); + } + }); + + test("heals a renumber-skip: existing skill_memory missing origin_project/source_type gets them on init", () => { + const db = new Database(":memory:"); + try { + // Simulate a DB created at the v39/v40 shape (no origin_project/source_type), + // as if v41 was skipped by a renumber collision. + db.exec(`CREATE TABLE skill_memory ( + id INTEGER PRIMARY KEY AUTOINCREMENT, skill_id TEXT NOT NULL, resolved_path TEXT NOT NULL, + tier TEXT NOT NULL, skill_source TEXT, project_identity TEXT NOT NULL, intent TEXT, + intent_embedding BLOB, embedding_model_version TEXT, kind TEXT NOT NULL, delta TEXT NOT NULL, + tags TEXT, hit_count INTEGER NOT NULL DEFAULT 0, pinned INTEGER NOT NULL DEFAULT 0, + normalized_hash TEXT NOT NULL, created_at INTEGER NOT NULL, last_used_at INTEGER, + delta_embedding BLOB, recall_count INTEGER NOT NULL DEFAULT 0, + UNIQUE(skill_id, tier, project_identity, normalized_hash) + );`); + expect(cols(db, "skill_memory").has("origin_project")).toBe(false); + initializeDatabase(db); // the net must ADD the missing columns + expect(cols(db, "skill_memory").has("origin_project")).toBe(true); + expect(cols(db, "skill_memory").has("source_type")).toBe(true); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/storage-db.ts b/packages/plugin/src/features/magic-context/storage-db.ts index d1fcc9ec..3d8e364d 100644 --- a/packages/plugin/src/features/magic-context/storage-db.ts +++ b/packages/plugin/src/features/magic-context/storage-db.ts @@ -38,7 +38,7 @@ export function getSchemaFenceRejection(): { return lastSchemaFenceRejection; } -export const LATEST_SUPPORTED_VERSION = 49; +export const LATEST_SUPPORTED_VERSION = 52; // chmod is meaningless on Windows (POSIX modes are not honored), so all // permission tightening is skipped there. mkdir's `mode` is likewise ignored. @@ -632,6 +632,40 @@ export function initializeDatabase(db: Database): void { CREATE INDEX IF NOT EXISTS idx_memory_mutation_log_project ON memory_mutation_log(project_path, id); + CREATE TABLE IF NOT EXISTS skill_memory ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + skill_id TEXT NOT NULL, + resolved_path TEXT NOT NULL, + tier TEXT NOT NULL CHECK(tier IN ('project', 'global')), + skill_source TEXT CHECK(skill_source IN ( + 'opencode-project', 'opencode-global', + 'claude-skills', 'agents-skills' + )), + project_identity TEXT NOT NULL, + intent TEXT NOT NULL, + intent_embedding BLOB, + embedding_model_version TEXT, + kind TEXT NOT NULL CHECK(kind IN ('gotcha', 'discovery', 'fix', 'workflow')), + delta TEXT NOT NULL, + tags TEXT, + hit_count INTEGER NOT NULL DEFAULT 0, + pinned INTEGER NOT NULL DEFAULT 0 CHECK(pinned IN (0, 1)), + normalized_hash TEXT NOT NULL, + created_at INTEGER NOT NULL, + last_used_at INTEGER, + delta_embedding BLOB, + recall_count INTEGER NOT NULL DEFAULT 0, + origin_project TEXT, + source_type TEXT, + UNIQUE(skill_id, tier, project_identity, normalized_hash) + ); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_lookup + ON skill_memory(skill_id, tier, project_identity, last_used_at DESC); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_fts_prep + ON skill_memory(skill_id, tier, project_identity, kind); + CREATE TABLE IF NOT EXISTS dream_state ( key TEXT PRIMARY KEY, value TEXT NOT NULL @@ -787,6 +821,14 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en tokenize='porter unicode61' ); + CREATE VIRTUAL TABLE IF NOT EXISTS skill_memory_fts USING fts5( + intent, + delta, + content='skill_memory', + content_rowid='id', + tokenize='porter unicode61' + ); + CREATE VIRTUAL TABLE IF NOT EXISTS message_history_fts USING fts5( session_id UNINDEXED, message_ordinal UNINDEXED, @@ -816,6 +858,19 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en INSERT INTO memories_fts(rowid, content, category) VALUES (new.id, new.content, new.category); END; + CREATE TRIGGER IF NOT EXISTS skill_memory_ai AFTER INSERT ON skill_memory BEGIN + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_ad AFTER DELETE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_au AFTER UPDATE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + CREATE TABLE IF NOT EXISTS session_meta ( session_id TEXT PRIMARY KEY, harness TEXT NOT NULL DEFAULT 'opencode', @@ -1027,6 +1082,26 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en CREATE INDEX IF NOT EXISTS idx_message_history_index_updated_at ON message_history_index(updated_at); `); + // Self-heal: backfill skill_memory_fts if it's empty while skill_memory has + // rows. The CREATE TABLE/TRIGGER block above only indexes FUTURE writes; rows + // that predate the FTS table (e.g. a DB where v50 ran but v51 hadn't, or a + // lost migration row) would be invisible to FTS rung-3 recall until re-saved. + // Guarded so this fires once (on the gap), not on every boot. Mirrors v51's + // INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild'). + try { + const ftsCount = ( + db.prepare("SELECT COUNT(*) AS n FROM skill_memory_fts").get() as { n: number } + ).n; + const rowCount = ( + db.prepare("SELECT COUNT(*) AS n FROM skill_memory").get() as { n: number } + ).n; + if (ftsCount === 0 && rowCount > 0) { + db.exec("INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild');"); + } + } catch { + // Non-fatal: FTS rung-3 degrades gracefully (embedding + flat recall unaffected). + } + ensureColumn(db, "primer_candidates", "harness", "TEXT NOT NULL DEFAULT 'opencode'"); ensureColumn(db, "primer_candidates", "source_start_message_id", "TEXT NOT NULL DEFAULT ''"); ensureColumn(db, "primer_candidates", "source_end_message_id", "TEXT NOT NULL DEFAULT ''"); @@ -1116,6 +1191,10 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en ensureColumn(db, "compartments", "start_message_id", "TEXT DEFAULT ''"); ensureColumn(db, "compartments", "end_message_id", "TEXT DEFAULT ''"); ensureColumn(db, "memory_embeddings", "model_id", "TEXT"); + ensureColumn(db, "skill_memory", "delta_embedding", "BLOB"); + ensureColumn(db, "skill_memory", "recall_count", "INTEGER NOT NULL DEFAULT 0"); + ensureColumn(db, "skill_memory", "origin_project", "TEXT"); + ensureColumn(db, "skill_memory", "source_type", "TEXT"); ensureColumn(db, "session_meta", "memory_block_cache", "TEXT DEFAULT ''"); ensureColumn(db, "session_meta", "memory_block_count", "INTEGER DEFAULT 0"); ensureColumn(db, "session_meta", "pi_stable_id_scheme", "INTEGER"); diff --git a/packages/plugin/src/hooks/magic-context/command-handler.ts b/packages/plugin/src/hooks/magic-context/command-handler.ts index 89d16f54..4382c75b 100644 --- a/packages/plugin/src/hooks/magic-context/command-handler.ts +++ b/packages/plugin/src/hooks/magic-context/command-handler.ts @@ -552,7 +552,12 @@ export function createMagicContextCommandHandler(deps: { } const liveModelKey = deps.getLiveModelKey?.(sessionId); const liveContextLimit = deps.getContextLimit?.(sessionId); - const statusOutput = executeStatus( + // Use dreamer's directory when available (== project's working + // directory for dreamer-aware sessions); fall back to cwd so + // the new "Skill memory" section can resolve a project identity + // for sessions that don't have dreamer configured. + const statusDirectory = deps.dreamer?.projectPath ?? process.cwd(); + const statusOutput = await executeStatus( deps.db, sessionId, deps.protectedTags, @@ -562,6 +567,7 @@ export function createMagicContextCommandHandler(deps: { deps.commitClusterTrigger, deps.executeThresholdTokens, liveContextLimit, + statusDirectory, ); result += result ? `\n\n${statusOutput}` : statusOutput; } diff --git a/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts b/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts index 6ea61acf..776e984d 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts @@ -362,3 +362,43 @@ describe("parseCompartmentOutput — fact scoping (audit Fix 6)", () => { expect(parsed.facts.some((f) => f.category === "NAMING")).toBe(false); }); }); + +describe("parseCompartmentOutput — skill_observations", () => { + it("parses skill observations into {skillId, kind, lesson}", () => { + const xml = ` + + +* council | gotcha | aggregator needs a fast non-deficit model +* test-driven-development | fix | mock the clock in auth tests + +`; + const parsed = parseCompartmentOutput(xml); + expect(parsed.skillObservations).toEqual([ + { + skillId: "council", + kind: "gotcha", + lesson: "aggregator needs a fast non-deficit model", + }, + { + skillId: "test-driven-development", + kind: "fix", + lesson: "mock the clock in auth tests", + }, + ]); + }); + + it("absent block -> empty array", () => { + expect(parseCompartmentOutput("").skillObservations).toEqual([]); + }); + + it("malformed lines (bad kind, missing fields) are skipped, non-choke", () => { + const xml = ` +* council | general | invalid kind dropped +* onlyskill | fix +* good | discovery | kept +`; + expect(parseCompartmentOutput(xml).skillObservations).toEqual([ + { skillId: "good", kind: "discovery", lesson: "kept" }, + ]); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/compartment-parser.ts b/packages/plugin/src/hooks/magic-context/compartment-parser.ts index 55ce7551..721f7616 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-parser.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-parser.ts @@ -44,6 +44,12 @@ export interface ParsedPrimerCandidate { originCompartmentIndex?: number; } +export interface ParsedSkillObservation { + skillId: string; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + lesson: string; +} + export interface ParsedCompartmentOutput { compartments: ParsedCompartment[]; facts: ParsedFact[]; @@ -51,6 +57,7 @@ export interface ParsedCompartmentOutput { unprocessedFrom: number | null; userObservations: string[]; primerCandidates: ParsedPrimerCandidate[]; + skillObservations: ParsedSkillObservation[]; } // Open tag captured separately from body so attributes (start/end/title/ @@ -84,6 +91,9 @@ const PRIMER_CANDIDATES_REGEX = /(.*?)<\/primer_candidates>/s // (*/-/1.) is still accepted and falls back to the chunk span at emission. const PRIMER_ELEMENT_REGEX = /(.*?)<\/primer>/gs; const PRIMER_ITEM_REGEX = /^\s*(?:\*|-|\d+\.)\s*(.+)$/gm; +const SKILL_OBSERVATIONS_REGEX = /(.*?)<\/skill_observations>/s; +const SKILL_OBS_ITEM_REGEX = /^\s*\*\s*([^|\n]+)\|([^|\n]+)\|([^\n]+)$/gm; +const SKILL_OBS_KINDS = new Set(["gotcha", "discovery", "fix", "workflow"]); // Events: scan the block (if any) for event elements. Kinds // are parsed kind-agnostically — any element with an `at_compartment` attr is an @@ -235,11 +245,36 @@ export function parseCompartmentOutput(text: string): ParsedCompartmentOutput { } } + const skillObservations: ParsedSkillObservation[] = []; + const skillObsMatch = text.match(SKILL_OBSERVATIONS_REGEX); + if (skillObsMatch) { + for (const itemMatch of skillObsMatch[1].matchAll(SKILL_OBS_ITEM_REGEX)) { + const skillId = unescapeXml(itemMatch[1].trim()); + const kind = itemMatch[2].trim(); + const lesson = unescapeXml(itemMatch[3].trim()); + if (skillId && lesson && SKILL_OBS_KINDS.has(kind)) { + skillObservations.push({ + skillId, + kind: kind as ParsedSkillObservation["kind"], + lesson, + }); + } + } + } + const events = parseEvents(text); compartments.sort((a, b) => a.startMessage - b.startMessage); - return { compartments, facts, events, unprocessedFrom, userObservations, primerCandidates }; + return { + compartments, + facts, + events, + unprocessedFrom, + userObservations, + primerCandidates, + skillObservations, + }; } /** diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts index d4127813..e1f81bff 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts @@ -23,6 +23,7 @@ import { } from "../../features/magic-context/memory"; import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { getMemoriesByProject } from "../../features/magic-context/memory/storage-memory"; +import { promoteSkillObservations } from "../../features/magic-context/skill-memory/promote"; import { clearEmergencyDrainLatch, clearEmergencyRecovery, @@ -874,6 +875,28 @@ export async function runCompartmentAgent(deps: CompartmentRunnerDeps): Promise< sessionLog(sessionId, "failed to store primer candidates:", error); } } + + // Skill-memory historian extraction: promote the + // historian emitted into per-skill notes. Same promotionActive + + // !discardedLast gate as facts/primers so a provisional tail does not + // double-emit. + if ( + promotionActive && + !discardedLast && + validatedPass.skillObservations && + validatedPass.skillObservations.length > 0 + ) { + try { + const written = promoteSkillObservations( + db, + promotionProjectIdentity, + validatedPass.skillObservations, + ); + sessionLog(sessionId, `promoted ${written} skill observation(s)`); + } catch (error) { + sessionLog(sessionId, "failed to promote skill observations:", error); + } + } } catch (error: unknown) { // Historian runs are fail-closed because they update durable compartment state. const desc = describeError(error); diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts index 146dcfbd..2cd326fe 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts @@ -1,6 +1,6 @@ import type { PluginContext } from "../../plugin/types"; import type { Database } from "../../shared/sqlite"; -import type { ParsedEvent } from "./compartment-parser"; +import type { ParsedEvent, ParsedSkillObservation } from "./compartment-parser"; import type { ProtectedTailBoundarySnapshot } from "./protected-tail-boundary"; import type { NotificationParams } from "./send-session-notification"; @@ -171,6 +171,7 @@ export type ValidatedHistorianPassResult = * emitted compartments (same convention as `` at_compartment); * undefined → emission falls back to the chunk span. */ primerCandidates?: Array<{ question: string; originCompartmentIndex?: number }>; + skillObservations?: ParsedSkillObservation[]; /** v2: historian-extracted events (stored, not rendered). */ events?: ParsedEvent[]; /** diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts index 4c25ebdf..11a6a1f8 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts @@ -253,3 +253,24 @@ describe("validateHistorianOutput primer candidate contract", () => { } }); }); + +describe("validateHistorianOutput — skillObservations", () => { + test("validated pass surfaces skillObservations when present", () => { + const xml = ` + +summary + + +* council | gotcha | use a fast aggregator + +1-23 +`; + const result = validateHistorianOutput(xml, "ses-test", buildChunk(1, 2), [], 0); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.skillObservations).toEqual([ + { skillId: "council", kind: "gotcha", lesson: "use a fast aggregator" }, + ]); + } + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts index b97f0a1e..ac6b2a28 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts @@ -109,6 +109,8 @@ export function validateHistorianOutput( userObservations: parsed.userObservations.length > 0 ? parsed.userObservations : undefined, primerCandidates: parsed.primerCandidates.length > 0 ? parsed.primerCandidates.slice(0, 1) : undefined, + skillObservations: + parsed.skillObservations.length > 0 ? parsed.skillObservations : undefined, // v2: surface events so the runner can persist them (stored, not rendered). events: parsed.events.length > 0 ? parsed.events : undefined, }; diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts b/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts index 3fbe4d4e..bf623168 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts @@ -1041,6 +1041,59 @@ describe("runCompartmentAgent", () => { expect(promptSession.mock.calls[0]?.[0]?.body.agent).toBe("historian"); }); + it("promotes skillObservations as global '*' notes (gated, non-discard-last)", async () => { + useTempDataHome("compartment-runner-skillobs-"); + createOpenCodeDb("ses-skillobs", [ + { id: "m-1", role: "user", text: "First" }, + { id: "m-2", role: "assistant", text: "Second" }, + { id: "m-3", role: "user", text: "protected 1" }, + { id: "m-4", role: "user", text: "protected 2" }, + { id: "m-5", role: "user", text: "protected 3" }, + { id: "m-6", role: "user", text: "protected 4" }, + { id: "m-7", role: "user", text: "protected 5" }, + ]); + const db = openDatabase(); + const client = { + session: { + get: mock(async () => ({ data: { directory: "/tmp/skillobs" } })), + create: mock(async () => ({ data: { id: "ses-agent" } })), + prompt: mock(async () => ({})), + messages: mock(async () => ({ + data: [ + { + info: { role: "assistant", time: { created: 1 } }, + parts: [ + { + type: "text", + text: `Summary\n\n* council | gotcha | fast aggregator\n`, + }, + ], + }, + ], + })), + delete: mock(async () => ({})), + }, + } as unknown as PluginContext["client"]; + try { + await runCompartmentAgentWithLease({ + client, + db, + sessionId: "ses-skillobs", + historianChunkTokens: 10_000, + directory: "/tmp", + }); + const row = db + .prepare( + "SELECT project_identity, source_type FROM skill_memory WHERE skill_id='council'", + ) + .get() as { project_identity: string; source_type: string } | undefined; + expect(row?.project_identity).toBe("*"); + expect(row?.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); + it("keeps a committed publish succeeded and signaled when post-commit project registration throws", async () => { useTempDataHome("compartment-runner-post-commit-registration-throw-"); createOpenCodeDb("ses-post-commit-registration", [ diff --git a/packages/plugin/src/hooks/magic-context/execute-status.ts b/packages/plugin/src/hooks/magic-context/execute-status.ts index cfe95ffb..f363f068 100644 --- a/packages/plugin/src/hooks/magic-context/execute-status.ts +++ b/packages/plugin/src/hooks/magic-context/execute-status.ts @@ -1,6 +1,8 @@ import { DEFAULT_EXECUTE_THRESHOLD_PERCENTAGE } from "../../config/schema/magic-context"; import { getCompartments } from "../../features/magic-context/compartment-storage"; +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { parseCacheTtl } from "../../features/magic-context/scheduler"; +import { getSkillMemoryStats } from "../../features/magic-context/skill-memory/storage"; import { getPendingOps } from "../../features/magic-context/storage"; import { getOrCreateSessionMeta } from "../../features/magic-context/storage-meta"; import { getTagsBySession } from "../../features/magic-context/storage-tags"; @@ -31,7 +33,7 @@ function formatExecuteThreshold( return `${thresholdPercentage}%`; } -export function executeStatus( +export async function executeStatus( db: Database, sessionId: string, protectedTags: number, @@ -43,7 +45,8 @@ export function executeStatus( commitClusterTrigger?: { enabled: boolean; min_clusters: number }, executeThresholdTokens?: { default?: number; [modelKey: string]: number | undefined }, contextLimit?: number, -): string { + directory?: string, +): Promise { // Single source of truth — resolver tells us both the effective percentage AND // which config source won (tokens vs percentage). Previously /ctx-status // reimplemented the token-match check here and missed progressive base-model @@ -185,6 +188,32 @@ export function executeStatus( } } + // Skill-memory stats — only when a directory is available to resolve + // the project identity (skill_memory is partitioned on + // project_identity). Mirrors the external-memory section's pattern: + // surface counts only when there is something to show, skip otherwise. + // Wrapped in try/catch so a missing skill_memory table (e.g. pre-v37 + // migration in tests) doesn't fail the whole status output — same + // defensive pattern the tags / pending_ops queries use. + if (directory) { + try { + const projectIdentity = resolveProjectIdentity(directory); + if (projectIdentity) { + const skillStats = getSkillMemoryStats(db, projectIdentity); + if (skillStats.totalNotes > 0) { + lines.push( + "", + "### Skill memory", + `- notes: ${skillStats.totalNotes} (across ${skillStats.skillsWithNotes} ${skillStats.skillsWithNotes === 1 ? "skill" : "skills"})`, + `- pinned: ${skillStats.pinnedNotes}`, + ); + } + } + } catch { + // skill_memory may not exist (pre-v37 schema) — skip silently + } + } + return lines.join("\n"); } catch (error) { sessionLog(sessionId, "ctx-status failed:", error); diff --git a/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts b/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts index f7357b8c..c30cdd95 100644 --- a/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts +++ b/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts @@ -29,13 +29,14 @@ The primary agent retains two tools — \`ctx_search\` (find a compartment by co ## What you produce -For each pass, you emit five things: +For each pass, you emit up to six things: 1. **Compartments** — completed logical work units from the raw history you just received. Each compartment is stored at four progressive verbosity tiers (\`\`/\`\`/\`\`/\`\`) and carries an \`importance\` score. The decay system renders a different tier depending on how the compartment has aged and how important it is. 2. **Facts** — durable cross-cutting **world knowledge** that survives past any single compartment: stable rules, defaults, constraints, naming choices. 3. **Events** *(optional)* — specific anchor moments worth extracting from compartment narrative: causal incidents (something broke, was investigated, got resolved) and trajectory corrections (a strategy was abandoned for another). 4. **User observations** *(optional)* — universal behavioral patterns about the human user, fed to a separate dreamer review pipeline that promotes recurring patterns into stable user-profile memories. 5. **Primer candidates** *(optional)* — durable standing questions about how the project works that this chunk helps answer, fed to a separate dreamer review pipeline that promotes recurring project primers. +6. **Skill observations** *(optional)* — universal reusable lessons about skills the agent actually used, fed to the skill-memory pipeline. You also receive two reference blocks — \`\` for calibration and \`\` for continuity with your prior work in this session. Read both before producing your output. @@ -697,6 +698,24 @@ After outputting compartments, facts, events, and user observations, also output \`\`\` If no candidates, omit the \`\` section entirely. +## Skill observations (optional, experimental) + +After outputting compartments, facts, events, and user observations, also output a \`\` section IF the chunk shows the agent USING a skill (a \`TC: skill()\` marker) and learning a UNIVERSAL, reusable lesson about that skill's behavior. + +- Each line: \`* | | \` where \`\` is exactly one of \`gotcha\`, \`discovery\`, \`fix\`, \`workflow\`. +- \`\` is the name from the \`TC: skill()\` marker in the chunk. +- Good: \`council | gotcha | the aggregator step needs a fast non-deficit model or it stalls\` — a universal fact about the skill's behavior. +- Bad (DO NOT emit): project-specific usage, one-off task detail, or anything not tied to a skill actually loaded in this chunk. +- Only emit with strong evidence the agent learned something reusable. Zero observations is the normal case. +- Do not re-emit a lesson already visible in the chunk's prior skill notes. +- The output shape gains an additional section: +\`\`\` + +* council | gotcha | the aggregator step needs a fast non-deficit model +* test-driven-development | fix | mock the clock in auth tests to kill flakiness + +\`\`\` +If there are no skill observations, omit the \`\` section entirely. --- @@ -762,6 +781,9 @@ Output valid XML only in this shape: How does subsystem X work? + +* skill-id | kind | lesson text + FIRST-LAST INDEX @@ -774,6 +796,7 @@ Rules: - Omit \`\` section entirely if no events were extracted (this is the normal case for most compartments). - Omit \`\` section entirely if no observations were extracted. - Omit \`\` section entirely if no primer candidates were extracted. +- Omit \`\` section entirely if no observations were extracted. - Compartments must be ordered, contiguous for the ranges they cover, and non-overlapping. - All four \`\`/\`\`/\`\`/\`\` elements must appear in every compartment, in that order. P4 may be self-closed, an anchor-only fragment, or one sentence depending on what makes the compartment recognizable (see P4 section). - \`episode_type\` may be a single activity or a comma-separated list of activities the compartment spans (e.g. \`episode_type="design,feature,release"\`). Multiple activities do not split a compartment — they describe one arc that touched multiple activity types. diff --git a/packages/plugin/src/hooks/magic-context/historian-prompt.source.md b/packages/plugin/src/hooks/magic-context/historian-prompt.source.md index ad32b7e9..c0f06eeb 100644 --- a/packages/plugin/src/hooks/magic-context/historian-prompt.source.md +++ b/packages/plugin/src/hooks/magic-context/historian-prompt.source.md @@ -22,13 +22,14 @@ The primary agent retains two tools — `ctx_search` (find a compartment by cont ## What you produce -For each pass, you emit five things: +For each pass, you emit up to six things: 1. **Compartments** — completed logical work units from the raw history you just received. Each compartment is stored at four progressive verbosity tiers (``/``/``/``) and carries an `importance` score. The decay system renders a different tier depending on how the compartment has aged and how important it is. 2. **Facts** — durable cross-cutting **world knowledge** that survives past any single compartment: stable rules, defaults, constraints, naming choices. 3. **Events** *(optional)* — specific anchor moments worth extracting from compartment narrative: causal incidents (something broke, was investigated, got resolved) and trajectory corrections (a strategy was abandoned for another). 4. **User observations** *(optional)* — universal behavioral patterns about the human user, fed to a separate dreamer review pipeline that promotes recurring patterns into stable user-profile memories. 5. **Primer candidates** *(optional)* — durable standing questions about how the project works that this chunk helps answer, fed to a separate dreamer review pipeline that promotes recurring project primers. +6. **Skill observations** *(optional)* — universal reusable lessons about skills the agent actually used, fed to the skill-memory pipeline. You also receive two reference blocks — `` for calibration and `` for continuity with your prior work in this session. Read both before producing your output. @@ -690,6 +691,24 @@ After outputting compartments, facts, events, and user observations, also output ``` If no candidates, omit the `` section entirely. +## Skill observations (optional, experimental) + +After outputting compartments, facts, events, and user observations, also output a `` section IF the chunk shows the agent USING a skill (a `TC: skill()` marker) and learning a UNIVERSAL, reusable lesson about that skill's behavior. + +- Each line: `* | | ` where `` is exactly one of `gotcha`, `discovery`, `fix`, `workflow`. +- `` is the name from the `TC: skill()` marker in the chunk. +- Good: `council | gotcha | the aggregator step needs a fast non-deficit model or it stalls` — a universal fact about the skill's behavior. +- Bad (DO NOT emit): project-specific usage, one-off task detail, or anything not tied to a skill actually loaded in this chunk. +- Only emit with strong evidence the agent learned something reusable. Zero observations is the normal case. +- Do not re-emit a lesson already visible in the chunk's prior skill notes. +- The output shape gains an additional section: +``` + +* council | gotcha | the aggregator step needs a fast non-deficit model +* test-driven-development | fix | mock the clock in auth tests to kill flakiness + +``` +If there are no skill observations, omit the `` section entirely. --- @@ -755,6 +774,9 @@ Output valid XML only in this shape: How does subsystem X work? + +* skill-id | kind | lesson text + FIRST-LAST INDEX @@ -767,6 +789,7 @@ Rules: - Omit `` section entirely if no events were extracted (this is the normal case for most compartments). - Omit `` section entirely if no observations were extracted. - Omit `` section entirely if no primer candidates were extracted. +- Omit `` section entirely if no observations were extracted. - Compartments must be ordered, contiguous for the ranges they cover, and non-overlapping. - All four ``/``/``/`` elements must appear in every compartment, in that order. P4 may be self-closed, an anchor-only fragment, or one sentence depending on what makes the compartment recognizable (see P4 section). - `episode_type` may be a single activity or a comma-separated list of activities the compartment spans (e.g. `episode_type="design,feature,release"`). Multiple activities do not split a compartment — they describe one arc that touched multiple activity types. diff --git a/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts b/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts index 8d2c0a46..f9bab309 100644 --- a/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts +++ b/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts @@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test"; import { runMigrations } from "../../features/magic-context/migrations"; +import { insertSkillMemoryNote } from "../../features/magic-context/skill-memory/storage"; import { initializeDatabase } from "../../features/magic-context/storage-db"; import { getOrCreateSessionMeta, @@ -13,7 +14,11 @@ import { } from "../../features/magic-context/storage-meta-persisted"; import { Database } from "../../shared/sqlite"; import { closeQuietly } from "../../shared/sqlite-helpers"; -import { createEventHook, createToolExecuteAfterHook } from "./hook-handlers"; +import { + createEventHook, + createToolExecuteAfterHook, + maybeInjectSkillMemory, +} from "./hook-handlers"; function createTestDb(): Database { const db = new Database(":memory:"); @@ -22,10 +27,21 @@ function createTestDb(): Database { return db; } +const CFG = { + enabled: true as const, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, +}; + function createTestHook(db: Database): ReturnType { return createToolExecuteAfterHook({ db, channel1StateBySession: new Map(), + skillLoadRegistry: new Map(), + sessionDirectoryBySession: new Map(), + defaultDirectory: "/tmp/test", + intentByCallId: new Map(), }); } @@ -235,3 +251,31 @@ describe("createEventHook mid-session model switch clears overflow state", () => } }); }); + +describe("maybeInjectSkillMemory intent threading", () => { + test("maybeInjectSkillMemory threads intent → FTS rung (vs no-intent without it)", async () => { + const db = createTestDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix the auth flake", + kind: "fix", + delta: "stub the clock", + normalizedHash: "h1", + createdAt: Date.now(), + }); + const withIntent = { output: "# tool result" }; + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", CFG, withIntent, "auth"); + expect(withIntent.output).toContain('mode="fts5-fallback"'); + const noIntent = { output: "# tool result" }; + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", CFG, noIntent, undefined); + expect(noIntent.output).toContain('mode="no-intent"'); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/hook-handlers.ts b/packages/plugin/src/hooks/magic-context/hook-handlers.ts index ee449937..affe0ccb 100644 --- a/packages/plugin/src/hooks/magic-context/hook-handlers.ts +++ b/packages/plugin/src/hooks/magic-context/hook-handlers.ts @@ -1,8 +1,11 @@ +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { clearSessionTracking, scheduleIncrementalIndex, scheduleReconciliation, } from "../../features/magic-context/message-index-async"; +import type { SkillMemoryConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import { recallSkillMemoryBlock } from "../../features/magic-context/skill-memory/recall"; import { clearPersistedReasoningWatermark } from "../../features/magic-context/storage"; import { getOrCreateSessionMeta, @@ -22,6 +25,7 @@ import { import { clearSidebarSnapshotCache } from "../../plugin/sidebar-snapshot-cache"; import type { PluginContext } from "../../plugin/types"; import { sessionLog } from "../../shared/logger"; +import type { Database } from "../../shared/sqlite"; import { clearAutoSearchForSession } from "./auto-search-runner"; import { buildChannel1Reminder, @@ -467,16 +471,241 @@ function maybeInjectChannel1Nudge( ); } +// ── intentByCallId stash map ──────────────────────────────────────────────── +// Keyed by callID (= options.toolCallId, identical before↔after). +// Bounded: 60s TTL + 256-entry hard cap. The after-hook deletes in a finally; +// this map is the backstop for callIDs whose after-hook never fires (crash, +// swallowed exception, tool error). +// Spike C (Task 0a) confirmed: tool.execute.before fires PRE-validation on +// raw output.args, so intent is present before Effect-Schema strips it. + +export type IntentByCallIdMap = Map; + +export function createIntentByCallIdMap(): IntentByCallIdMap { + return new Map(); +} + +/** + * Composite key for the intent stash: `${sessionId}:${callId}`. Keying by + * session (not bare callID) lets onSessionDeleted prune one session's entries + * by prefix without evicting concurrent sessions' in-flight intents. + */ +export function intentKey(sessionId: string, callId: string): string { + return `${sessionId}:${callId}`; +} + +/** Delete all stash entries belonging to one session (prefix prune on delete). */ +export function pruneIntentsForSession(map: IntentByCallIdMap, sessionId: string): void { + const prefix = `${sessionId}:`; + for (const key of map.keys()) { + if (key.startsWith(prefix)) map.delete(key); + } +} + +const INTENT_TTL_MS = 60_000; +const INTENT_MAP_CAP = 256; + +export function stashIntent(map: IntentByCallIdMap, callId: string, intent: string): void { + // Sweep stale entries (TTL backstop) + const now = Date.now(); + for (const [key, entry] of map) { + if (now - entry.ts > INTENT_TTL_MS) { + map.delete(key); + } + } + // Hard cap: evict oldest if at limit + if (map.size >= INTENT_MAP_CAP) { + let oldestKey: string | undefined; + let oldestTs = Infinity; + for (const [key, entry] of map) { + if (entry.ts < oldestTs) { + oldestTs = entry.ts; + oldestKey = key; + } + } + if (oldestKey !== undefined) map.delete(oldestKey); + } + map.set(callId, { intent, ts: now }); +} + +export function getAndDeleteIntent(map: IntentByCallIdMap, callId: string): string | null { + const entry = map.get(callId); + if (!entry) return null; + map.delete(callId); + return entry.intent; +} + +// ── createToolExecuteBeforeHook ───────────────────────────────────────────── + +/** + * Append a block to output.output when: + * 1. frontmatterConfig is non-null (skill has skill-memory: enabled: true) + * 2. Notes exist for this skill in the DB + * 3. output.output is a non-empty string + * + * Delegates to recallSkillMemoryBlock (feature layer) for the shared recall+format core. + * Append ordering: this runs BEFORE maybeInjectChannel1Nudge (skill-memory + * content before Channel-1 meta-reminder). See design §2.6. + */ +export async function maybeInjectSkillMemory( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + frontmatterConfig: SkillMemoryConfig | null, + output: { output?: unknown }, + intent?: string, +): Promise { + if (typeof output.output !== "string" || output.output.length === 0) return; + + // Delegate to shared recall core (also used by ctx_skill_recall tool) + const block = await recallSkillMemoryBlock(db, { + skill: skillId, + intent, + scope: tier, + projectIdentity, + frontmatterConfig, + }); + if (block) { + output.output = `${output.output}\n\n${block}`; + } +} + +export function createToolExecuteBeforeHook(args: { intentByCallId: IntentByCallIdMap }) { + return async (input: unknown, output?: unknown) => { + const typedInput = input as { tool?: string; callID?: string; sessionID?: string }; + const typedOutput = output as { args?: Record } | undefined; + if (typedInput.tool !== "skill") return; + if (!typedInput.callID || !typedInput.sessionID) return; + const intent = typedOutput?.args?.intent; + if (typeof intent !== "string") return; + // Key by sessionID:callID so a concurrent session's delete (which prunes + // by prefix) can't evict this session's in-flight intents. + stashIntent( + args.intentByCallId, + intentKey(typedInput.sessionID, typedInput.callID), + intent, + ); + }; +} + export function createToolExecuteAfterHook(args: { db: Parameters[0]; channel1StateBySession: Map; + skillLoadRegistry: import("../../features/magic-context/skill-memory/provenance").SkillLoadRegistry; + /** Resolved session.directory values, used to compute projectIdentity for + * the skill-memory recall. The hook's transform pass populates this on + * every message turn; on the first skill call before the map is seeded, + * we fall back to `defaultDirectory` (deps.directory). */ + sessionDirectoryBySession: Map; + defaultDirectory: string; + intentByCallId: IntentByCallIdMap; }) { return async (input: unknown, output?: unknown) => { - const typedInput = input as { tool?: string; sessionID?: string; args?: unknown }; + const typedInput = input as { + tool?: string; + sessionID?: string; + callID?: string; + args?: unknown; + }; if (!typedInput.sessionID || !typedInput.tool) { return; } + // Skill-memory: populate registry when skill tool completes. + // Frontmatter MUST be read from DISK (proven in Task 0b: opencode's + // skill loader strips the skill-memory: block from the model-facing + // output). Reading output.output would always yield null. We re-read + // SKILL.md from provenance.resolvedPath (which IS present in the + // output's "Base directory for this skill:" line). + if (typedInput.tool === "skill") { + const typedOutput = output as { output?: unknown } | undefined; + if (typeof typedOutput?.output === "string") { + const skillArgs = typedInput.args as { name?: unknown } | undefined; + const skillId = typeof skillArgs?.name === "string" ? skillArgs.name : null; + if (skillId) { + // One dynamic import of the provenance module shared by both + // the registry-populate and the injection blocks below + // (lazy-loaded only when the skill tool actually fires). + const { parseSkillProvenance, registryKey } = await import( + "../../features/magic-context/skill-memory/provenance" + ); + try { + const { parseFrontmatterConfig } = await import( + "../../features/magic-context/skill-memory/frontmatter" + ); + const provenance = parseSkillProvenance(typedOutput.output, skillId); + if (provenance) { + let frontmatterConfig: + | import("../../features/magic-context/skill-memory/frontmatter").SkillMemoryConfig + | null = null; + try { + const { readFileSync } = await import("node:fs"); + const rawSkillContent = readFileSync( + provenance.resolvedPath, + "utf-8", + ); + frontmatterConfig = parseFrontmatterConfig(rawSkillContent); + } catch { + // Non-fatal: SKILL.md unreadable → frontmatterConfig stays null + // (skill-memory disabled for this skill load) + } + args.skillLoadRegistry.set(registryKey(typedInput.sessionID, skillId), { + ...provenance, + frontmatterConfig, + }); + } + } catch { + // Non-fatal: registry miss means ctx_skill_note will surface an actionable error + } + + // Skill-memory injection (BEFORE Channel-1 nudge — design §2.6). + // Re-read skillId/args from typedInput; resolve sessionDir to + // projectIdentity; delegate to maybeInjectSkillMemory which + // appends the block to output.output. + // Non-fatal: recall failure must never block the tool result. + try { + const registryEntry = args.skillLoadRegistry.get( + registryKey(typedInput.sessionID, skillId), + ); + if (registryEntry) { + // First-turn fallback: if the map has no entry yet + // (skill tool fires before sessionDirectoryBySession + // is populated), fall back to args.defaultDirectory. + // Intentional: multi-project / Desktop-launched sessions + // may misattribute on the very first skill call; + // subsequent calls resolve correctly. + const sessionDir = + args.sessionDirectoryBySession.get(typedInput.sessionID) ?? + args.defaultDirectory; + const projectIdentity = resolveProjectIdentity(sessionDir); + const stashed = typedInput.callID + ? (getAndDeleteIntent( + args.intentByCallId, + intentKey(typedInput.sessionID, typedInput.callID), + ) ?? undefined) + : undefined; + await maybeInjectSkillMemory( + args.db, + skillId, + registryEntry.tier, + projectIdentity, + registryEntry.frontmatterConfig, + output as { output?: unknown }, + stashed, + ); + } + } catch (error) { + sessionLog( + typedInput.sessionID, + "skill-memory injection failed (ignored):", + error, + ); + } + } + } + } + if (typedInput.tool === "ctx_reduce") { // Mark the Channel 1 baseline dirty so the next nudge re-measures the // (now smaller) reclaimable tail instead of replaying a stale band. diff --git a/packages/plugin/src/hooks/magic-context/hook.ts b/packages/plugin/src/hooks/magic-context/hook.ts index bd823c60..d6802695 100644 --- a/packages/plugin/src/hooks/magic-context/hook.ts +++ b/packages/plugin/src/hooks/magic-context/hook.ts @@ -27,6 +27,10 @@ import { getEmbeddingCoverageStatus, } from "../../features/magic-context/project-embedding-registry"; import type { Scheduler } from "../../features/magic-context/scheduler"; +import { + createSkillLoadRegistry, + type SkillLoadRegistry, +} from "../../features/magic-context/skill-memory/provenance"; import { getDatabasePersistenceError, getSessionsWithPendingMarker, @@ -73,8 +77,11 @@ import { createChatMessageHook, createCommandExecuteBeforeHook, createEventHook, + createIntentByCallIdMap, createToolExecuteAfterHook, + createToolExecuteBeforeHook, getLiveNotificationParams, + pruneIntentsForSession, } from "./hook-handlers"; import type { LiveSessionState } from "./live-session-state"; import { sendIgnoredMessage } from "./send-session-notification"; @@ -282,6 +289,17 @@ export function createMagicContextHook(deps: MagicContextDeps) { // Written at the end of each transform pass (post-drop), read in // tool.execute.after. Only populated for primary sessions. const channel1StateBySession = new Map(); + // intentByCallId: stash for skill tool intent captured pre-validation in + // tool.execute.before. Bounded: 60s TTL + 256-entry hard cap + finally-delete + // in after-hook. Cleared in onSessionDeleted. + const intentByCallId = createIntentByCallIdMap(); + // skillLoadRegistry: session-scoped registry of (skillId → SkillProvenance + + // frontmatterConfig), populated in tool.execute.after for the skill tool. + // Per-session cleanup in onSessionDeleted (keyed as `${sessionId}:${skillId}`). + // Exposed on the hook's return value so the same instance flows to + // createCtxSkillNoteTool (index.ts, Task 8) — otherwise the tool sees a + // disconnected empty Map and recall is dead on arrival. + const skillLoadRegistry: SkillLoadRegistry = createSkillLoadRegistry(); /** * Return the live provider/model for a session. @@ -664,6 +682,19 @@ export function createMagicContextHook(deps: MagicContextDeps) { internalChildSessions.delete(sessionId); channel1StateBySession.delete(sessionId); clearEmbedSessionState(sessionId); + // intentByCallId is keyed `${sessionID}:${callID}` — prune only THIS + // session's entries by prefix so a concurrent session's delete can't + // evict another session's in-flight intents (which would silently + // degrade its skill-memory recall to the flat rung). The 60s TTL + + // 256-entry cap remain the leak backstops. + pruneIntentsForSession(intentByCallId, sessionId); + // skillLoadRegistry is keyed as `${sessionId}:${skillId}` so we can prune + // per-session entries without cross-session bleed. Without this, deleted + // sessions' skill loads would persist in the registry for the plugin's + // lifetime (potentially days/weeks), slowly leaking memory. + for (const key of skillLoadRegistry.keys()) { + if (key.startsWith(`${sessionId}:`)) skillLoadRegistry.delete(key); + } }, }); @@ -927,6 +958,19 @@ export function createMagicContextHook(deps: MagicContextDeps) { "tool.execute.after": createToolExecuteAfterHook({ db, channel1StateBySession, + skillLoadRegistry, + // Resolve session-specific directory from the map populated by the + // transform pass; fall back to the hook's own directory (deps.directory) + // for the first-turn case where the map isn't seeded yet. + sessionDirectoryBySession, + defaultDirectory: deps.directory, + intentByCallId, }), + "tool.execute.before": createToolExecuteBeforeHook({ intentByCallId }), + // Exposed so index.ts can pass the SAME instance to createCtxSkillNoteTool. + // The after-hook populates this registry; the tool reads from it. Without + // this, the tool would receive a fresh empty Map and ctx_skill_note would + // always return "No recent skill load found". + skillLoadRegistry, }; } diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts new file mode 100644 index 00000000..ec3accc6 --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, test } from "bun:test"; + +import { extractToolCallSummaries } from "./read-session-formatting"; + +describe("extractToolCallSummaries — skill tool", () => { + test("surfaces the skill name as TC: skill()", () => { + const parts = [ + { + type: "tool", + tool: "skill", + state: { input: { name: "test-driven-development" }, metadata: {} }, + }, + ]; + + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(test-driven-development)"]); + }); + + test("skill branch wins even if metadata.description is present (regression-proof)", () => { + const parts = [ + { + type: "tool", + tool: "skill", + state: { input: { name: "council" }, metadata: { description: "Load skill" } }, + }, + ]; + + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(council)"]); + }); + + test("skill with no name falls through to bare TC: skill", () => { + const parts = [{ type: "tool", tool: "skill", state: { input: {}, metadata: {} } }]; + + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill"]); + }); + + test("preserves a skill name containing ')' verbatim (does not drop the marker)", () => { + // A ")" doesn't break the single-line marker and the historian reads it as + // natural language — preserve the name (identity key) rather than drop it. + const parts = [ + { + type: "tool", + tool: "skill", + state: { input: { name: "weird(name)" }, metadata: {} }, + }, + ]; + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(weird(name))"]); + }); + + test("drops the name only when it contains a real line-breaker (CR/LF/tab)", () => { + const parts = [ + { type: "tool", tool: "skill", state: { input: { name: "bad\nname" }, metadata: {} } }, + ]; + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill"]); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts index 2e6f75db..338f210c 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts @@ -70,6 +70,29 @@ export function extractToolCallSummaries(parts: unknown[]): string[] { const input = state.input as Record | null; const metadata = state.metadata as Record | null; + // Skill tool: surface the skill name (input.name) before the description + // fallback, which would otherwise mask it if metadata.description exists. + // The name is an IDENTITY key — the historian extracts the skill-id from + // this marker and recall keys on the raw input.name, so the marker name + // MUST equal the raw name (no truncation, no mutation) or the stored id + // won't match recall. Emit it VERBATIM when marker-safe; if it contains a + // marker-breaking char (CR/LF/tab/")") — which a real skill directory name + // never does — drop the name (`TC: skill`) rather than emit a corrupted or + // mutated identity. + if (p.tool === "skill") { + const rawName = input && typeof input.name === "string" ? input.name : ""; + // Only CR/LF/tab genuinely corrupt the single-line `TC: skill()` + // marker; a ")" does NOT break the line and the historian reads the + // marker as natural language (not a strict paren-matched parse), so a + // ")"-containing name is preserved VERBATIM rather than dropped — + // dropping the name loses historian attribution + recall keying, which + // is worse than a cosmetically-ambiguous paren. (Real skill directory + // names are slugs; this is defensive.) + const markerSafe = rawName !== "" && !/[\r\n\t]/.test(rawName); + summaries.push(markerSafe ? `TC: skill(${rawName})` : "TC: skill"); + continue; + } + // Prefer explicit description (bash tool always has one) const description = (input && typeof input.description === "string" && input.description) || diff --git a/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts b/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts new file mode 100644 index 00000000..3e231f53 --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts @@ -0,0 +1,212 @@ +import { describe, expect, test } from "bun:test"; +import { runMigrations } from "../../features/magic-context/migrations"; +import { recallSkillMemoryBlock } from "../../features/magic-context/skill-memory/recall"; +import { insertSkillMemoryNote } from "../../features/magic-context/skill-memory/storage"; +import { initializeDatabase } from "../../features/magic-context/storage-db"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { maybeInjectSkillMemory } from "../magic-context/hook-handlers"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("recallSkillMemoryBlock (shared recall core)", () => { + test("returns non-empty string containing { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock in auth tests", + normalizedHash: "h-recall-1", + createdAt: Date.now(), + }); + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + expect(block).toContain(" { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "some note", + normalizedHash: "h-recall-2", + createdAt: Date.now(), + }); + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: null, + }); + expect(block).toBe(""); + } finally { + closeQuietly(db); + } + }); + + test("returns empty string when no notes exist (cold-start)", async () => { + const db = makeDb(); + try { + const block = await recallSkillMemoryBlock(db, { + skill: "nonexistent-skill", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + expect(block).toBe(""); + } finally { + closeQuietly(db); + } + }); +}); + +describe("maybeInjectSkillMemory", () => { + test("appends skill-memory block to output.output when notes exist", async () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock in auth tests", + normalizedHash: "h1", + createdAt: Date.now(), + }); + + const output = { output: "# TDD Skill\nContent here." }; + // Pass enabled frontmatterConfig — null triggers the early-return guard + // (`if (!frontmatterConfig?.enabled) return;`) and the block is never injected. + await maybeInjectSkillMemory( + db, + "tdd", + "global", + "git:abc", + { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000, dedup_threshold: 0.92 }, + output, + ); + + expect(output.output).toContain(" { + const db = makeDb(); + try { + const output = { output: "# TDD Skill\nContent here." }; + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); + expect(output.output).not.toContain(" { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "some note", + normalizedHash: "h2", + createdAt: Date.now(), + }); + const output = { output: "# TDD Skill\nContent here." }; + // null frontmatterConfig = skill-memory not enabled for this skill + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); + expect(output.output).not.toContain(" { + // maybeInjectSkillMemory APPENDS to output.output — it does NOT prepend. + // So if a sentinel is already in the output, the skill-memory block lands AFTER it. + // This test verifies the append contract: skillMemoryPos > channel1Pos. + // + // The ordering contract between maybeInjectSkillMemory and maybeInjectChannel1Nudge + // (skill-memory before Channel-1 meta-reminder) is enforced at the + // createToolExecuteAfterHook level, not at the single-function level. + // TODO (U11): add a createToolExecuteAfterHook integration test with stubs to verify + // that maybeInjectSkillMemory fires before maybeInjectChannel1Nudge. + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "some note", + normalizedHash: "h3", + createdAt: Date.now(), + }); + const output = { output: "# TDD Skill\nContent here.\n" }; + await maybeInjectSkillMemory( + db, + "tdd", + "global", + "git:abc", + { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000, dedup_threshold: 0.92 }, + output, + ); + const skillMemoryPos = output.output.indexOf(" { + test("stashIntent stores intent keyed by callId", () => { + const map = createIntentByCallIdMap(); + stashIntent(map, "call-1", "fix the bug in auth"); + expect(getAndDeleteIntent(map, "call-1")).toBe("fix the bug in auth"); + }); + + test("getAndDeleteIntent removes the entry (finally-delete semantics)", () => { + const map = createIntentByCallIdMap(); + stashIntent(map, "call-2", "some intent"); + getAndDeleteIntent(map, "call-2"); + expect(getAndDeleteIntent(map, "call-2")).toBeNull(); + }); + + test("stashIntent evicts entries older than 60s (TTL backstop)", () => { + const map = createIntentByCallIdMap(); + // Manually insert a stale entry (65s > 60s TTL, clearly expired regardless of boundary strictness) + map.set("stale-call", { intent: "old intent", ts: Date.now() - 65_000 }); + stashIntent(map, "new-call", "new intent"); // triggers sweep + expect(map.has("stale-call")).toBe(false); + expect(map.has("new-call")).toBe(true); + }); + + test("stashIntent hard-caps map at 256 entries (evicts oldest)", () => { + const map = createIntentByCallIdMap(); + // Fill to 256 + for (let i = 0; i < 256; i++) { + map.set(`call-${i}`, { intent: `intent-${i}`, ts: Date.now() - i }); + } + // Adding one more should evict the oldest + stashIntent(map, "call-overflow", "overflow intent"); + expect(map.size).toBeLessThanOrEqual(256); + expect(map.has("call-overflow")).toBe(true); + }); + + test("pruneIntentsForSession removes ONLY the deleted session's entries", () => { + // Regression (P1): a bare-callID key + .clear() on session delete wiped + // EVERY concurrent session's in-flight intents. Keying by + // `${sessionId}:${callId}` + prefix-prune isolates the delete. + const map = createIntentByCallIdMap(); + stashIntent(map, intentKey("ses-A", "call-1"), "A intent"); + stashIntent(map, intentKey("ses-B", "call-1"), "B intent"); + stashIntent(map, intentKey("ses-B", "call-2"), "B intent 2"); + + pruneIntentsForSession(map, "ses-A"); + + // ses-A's entry is gone; ses-B's two survive (callID "call-1" collides + // across sessions but the composite key keeps them distinct). + expect(getAndDeleteIntent(map, intentKey("ses-A", "call-1"))).toBeNull(); + expect(getAndDeleteIntent(map, intentKey("ses-B", "call-1"))).toBe("B intent"); + expect(getAndDeleteIntent(map, intentKey("ses-B", "call-2"))).toBe("B intent 2"); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts b/packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts new file mode 100644 index 00000000..9c4e8090 --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, test } from "bun:test"; + +// Test the intent injection logic in isolation (pure function). +// Import from the dedicated module, not from index.ts (which is the plugin entry point). +import { injectSkillIntentParam } from "./skill-tool-definition"; + +type OutputShape = Parameters[1]; + +describe("skill tool definition intent injection", () => { + test("assigns output.jsonSchema for the skill tool (current opencode: jsonSchema starts undefined)", () => { + const output: OutputShape = { + description: "Load a specialized skill", + parameters: {}, // Effect Schema — opencode's tool.definition hook passes this; we leave it alone + jsonSchema: undefined, + }; + injectSkillIntentParam("skill", output); + // New contract: opencode advertises the model-facing schema from + // `output.jsonSchema ?? fromSchema(output.parameters)`. The skill + // tool's `jsonSchema` is currently undefined, so we MUST assign one. + expect(output.jsonSchema).toBeDefined(); + const js = output.jsonSchema as NonNullable; + expect(js.type).toBe("object"); + expect(js.properties).toBeDefined(); + // Mirrors the skill tool's real param: a single required `name` string. + expect(js.properties.name).toBeDefined(); + expect((js.properties.name as { type?: string }).type).toBe("string"); + // The injected `intent` is present and is a string. + expect(js.properties.intent).toBeDefined(); + expect((js.properties.intent as { type?: string }).type).toBe("string"); + // `name` is required; `intent` is optional. + expect(js.required).toEqual(["name"]); + expect(js.required).not.toContain("intent"); + }); + + test("does not touch output.jsonSchema for non-skill tool ids", () => { + const output: OutputShape = { + description: "Read a file", + parameters: {}, + jsonSchema: undefined, + }; + injectSkillIntentParam("read", output); + expect(output.jsonSchema).toBeUndefined(); + }); + + test("extends an existing output.jsonSchema in place (forward-compat)", () => { + const output: OutputShape = { + description: "Skill", + parameters: {}, + jsonSchema: { + type: "object", + properties: { + name: { + type: "string", + description: "The name of the skill from available_skills", + }, + }, + required: ["name"], + }, + }; + injectSkillIntentParam("skill", output); + const js = output.jsonSchema as NonNullable; + // Preserves the existing properties (does not clobber `name`). + expect(js.properties.name).toBeDefined(); + expect((js.properties.name as { type?: string }).type).toBe("string"); + // Adds `intent` alongside. + expect(js.properties.intent).toBeDefined(); + expect((js.properties.intent as { type?: string }).type).toBe("string"); + // Required list untouched. + expect(js.required).toEqual(["name"]); + }); + + test("is idempotent — calling twice does not double-add intent", () => { + const output: OutputShape = { + description: "Skill", + parameters: {}, + jsonSchema: undefined, + }; + injectSkillIntentParam("skill", output); + const first = JSON.stringify(output.jsonSchema); + injectSkillIntentParam("skill", output); + const second = JSON.stringify(output.jsonSchema); + expect(second).toBe(first); + const js = output.jsonSchema as NonNullable; + const intentKeys = Object.keys(js.properties).filter((k) => k === "intent"); + expect(intentKeys.length).toBe(1); + }); + + test("intent description mentions skill-memory recall (drives the recall surface)", () => { + const output: OutputShape = { + description: "Skill", + parameters: {}, + jsonSchema: undefined, + }; + injectSkillIntentParam("skill", output); + const js = output.jsonSchema as NonNullable; + const desc = (js.properties.intent as { description?: string }).description ?? ""; + expect(desc).toContain("skill-memory recall"); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/skill-tool-definition.ts b/packages/plugin/src/hooks/magic-context/skill-tool-definition.ts new file mode 100644 index 00000000..9d78dc48 --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/skill-tool-definition.ts @@ -0,0 +1,81 @@ +/** + * Injects an optional `intent` parameter into the `skill` tool's schema. + * Called from the `tool.definition` hook. + * + * opencode advertises the model-facing schema for a tool via + * fromTool = tool.jsonSchema ?? fromSchema(tool.parameters) + * The `output` of the `tool.definition` hook has shape + * { description, parameters: , jsonSchema: }. + * + * For the `skill` tool: + * - `output.parameters` is an Effect Schema (a Decoder object) — not a plain JSON Schema. + * Mutating `.properties.intent` on it is a no-op for the model-facing schema. + * - `output.jsonSchema` is currently `undefined` (the skill tool never sets it and + * `Tool.define` does not synthesize one), so opencode derives the model-facing + * schema from the Effect `parameters` — which only has `name`. Our mutation is + * invisible to the model. The model sees a `skill` tool with ONLY `name`. + * + * The fix: ASSIGN `output.jsonSchema` with a JSON Schema object that mirrors the + * skill tool's real params (a single required `name` string) plus the optional + * `intent`. This makes `output.jsonSchema !== tool.jsonSchema` (the new reference + * satisfies opencode's registry gate `output.parameters === tool.parameters || + * output.jsonSchema !== tool.jsonSchema`), so the new schema is what opencode + * advertises to the model. We deliberately leave `output.parameters` untouched + * — the Effect schema still governs execute-time validation, which strips + * `intent` via `onExcessProperty: "ignore"` so the existing `tool.execute.before` + * capture path keeps working unchanged. + * + * If a future opencode version starts precomputing a `jsonSchema` for the skill + * tool, we extend it in place rather than clobbering it. + * + * Lives here (not in index.ts) to avoid leaking an internal helper through the + * plugin entry point. index.ts imports and calls it directly. + */ +export function injectSkillIntentParam( + toolID: string, + output: { + parameters?: unknown; + jsonSchema?: { + type?: string; + properties?: Record; + required?: string[]; + additionalProperties?: boolean; + }; + }, +): void { + if (toolID !== "skill") return; + const INTENT_PROP = { + type: "string", + description: + "Optional: describe what you are trying to accomplish with this skill (used for skill-memory recall). E.g. 'fix a flaky test in the auth module'.", + }; + // Forward-compat + idempotency: if opencode (a future version) already provides a + // jsonSchema object, extend it in place rather than clobbering it. + const existing = output.jsonSchema; + if ( + existing && + typeof existing === "object" && + existing.properties && + typeof existing.properties === "object" + ) { + if (!("intent" in existing.properties)) { + existing.properties.intent = INTENT_PROP; + } + return; + } + // Current opencode: the skill tool has no precomputed jsonSchema (undefined), so opencode + // would derive the model-facing schema from the Effect `parameters` (name only). Construct + // a jsonSchema mirroring the skill tool's real params (name, required) PLUS the optional intent. + output.jsonSchema = { + type: "object", + properties: { + name: { + type: "string", + description: "The name of the skill from available_skills", + }, + intent: INTENT_PROP, + }, + required: ["name"], + additionalProperties: false, + }; +} diff --git a/packages/plugin/src/index.ts b/packages/plugin/src/index.ts index c246d06e..fe6fbf5a 100644 --- a/packages/plugin/src/index.ts +++ b/packages/plugin/src/index.ts @@ -28,6 +28,7 @@ import { HISTORIAN_EDITOR_SYSTEM_PROMPT, } from "./hooks/magic-context/compartment-prompt"; import { createLiveSessionState } from "./hooks/magic-context/live-session-state"; +import { injectSkillIntentParam } from "./hooks/magic-context/skill-tool-definition"; import { cleanupConflictWarnings, sendConflictWarning } from "./plugin/conflict-warning-hook"; import { startDreamScheduleTimer } from "./plugin/dream-timer"; import { ensureProjectRegisteredFromOpenCodeDirectory } from "./plugin/embedding-bootstrap"; @@ -148,9 +149,34 @@ const server: Plugin = async (ctx) => { liveSessionState, }); + // Fail-loud guard: skillLoadRegistry is required for ctx_skill_note to + // verify the skill was loaded this session. If the after-hook wiring + // is broken, ctx_skill_note would silently read an empty Map and + // every note would return "No recent skill load found" — the exact + // opposite of "fail loud". Catch a wiring regression at startup, not + // at the first ctx_skill_note call from an agent. + // + // ONLY when the plugin is enabled: when disabled by config (`enabled: false`) + // or by a detected conflict (sets enabled=false above), createSessionHooks + // returns `{ magicContext: null }` by design and no tools are exposed, so a + // missing registry is expected — throwing here would crash plugin init on + // the disabled path (an entry-module throw — the exact load-crash class this + // plugin must avoid). + if (pluginConfig.enabled && !hooks.magicContext?.skillLoadRegistry) { + throw new Error( + "[magic-context] ctx_skill_note registration failed: " + + "hooks.magicContext.skillLoadRegistry is missing. " + + "Ensure createMagicContextHook() returns skillLoadRegistry in its return object.", + ); + } const tools = createToolRegistry({ ctx, pluginConfig, + // Disabled path: magicContext is null and createToolRegistry early-returns + // {} without using the registry. Pass a throwaway Map so the argument + // expression never dereferences null (createToolRegistry never reads it + // when disabled). + skillLoadRegistry: hooks.magicContext?.skillLoadRegistry ?? new Map(), }); // v22 deferred legacy-memory identity backfill. createSessionHooks() opens @@ -450,15 +476,33 @@ const server: Plugin = async (ctx) => { await hooks.magicContext?.["chat.message"]?.(input); }, "tool.definition": async (input, output) => { + const typedInput = input as { toolID?: string }; + const typedOutput = output as { + description?: unknown; + parameters?: unknown; + jsonSchema?: { + type?: string; + properties?: Record; + required?: string[]; + additionalProperties?: boolean; + }; + }; + if (!typedInput.toolID) return; + // Inject optional intent param for skill-memory recall FIRST — it only + // mutates the skill tool's advertised JSON schema and does NOT need + // chat context, so it must run even on a tool.definition flight that + // fires before any chat.message (otherwise the model never sees the + // `intent` param that flight and skill-memory recall silently degrades). + injectSkillIntentParam( + typedInput.toolID, + typedOutput as Parameters[1], + ); // Attribute tool schema tokens to the most recent chat-message context. // If no chat.message has fired yet in this process (e.g. a subagent // flight that reuses a historian/dreamer/sidekick agent whose - // chat.message preceded plugin init), skip — the measurement will - // land correctly on the next flight. + // chat.message preceded plugin init), skip the attribution — the + // measurement will land correctly on the next flight. if (!lastChatContext) return; - const typedInput = input as { toolID?: string }; - const typedOutput = output as { description?: unknown; parameters?: unknown }; - if (!typedInput.toolID) return; recordToolDefinition( lastChatContext.providerID, lastChatContext.modelID, @@ -471,6 +515,9 @@ const server: Plugin = async (ctx) => { "tool.execute.after": async (input, output) => { await hooks.magicContext?.["tool.execute.after"]?.(input, output); }, + "tool.execute.before": async (input, output) => { + await hooks.magicContext?.["tool.execute.before"]?.(input, output); + }, "experimental.text.complete": async (input, output) => { await hooks.magicContext?.["experimental.text.complete"]?.(input, output); }, diff --git a/packages/plugin/src/plugin/rpc-handlers.test.ts b/packages/plugin/src/plugin/rpc-handlers.test.ts index 09825cec..dd219c09 100644 --- a/packages/plugin/src/plugin/rpc-handlers.test.ts +++ b/packages/plugin/src/plugin/rpc-handlers.test.ts @@ -5,6 +5,7 @@ import { replaceAllCompartmentState } from "../features/magic-context/compartmen import { insertMemory } from "../features/magic-context/memory"; import { resolveProjectIdentity } from "../features/magic-context/memory/project-identity"; import { runMigrations } from "../features/magic-context/migrations"; +import { insertSkillMemoryNote } from "../features/magic-context/skill-memory/storage"; import { initializeDatabase } from "../features/magic-context/storage-db"; import { createLiveSessionState } from "../hooks/magic-context/live-session-state"; import { estimateTokens } from "../hooks/magic-context/read-session-formatting"; @@ -238,3 +239,121 @@ describe("buildStatusDetail — history token reuse (council audit bg_51106601 # } }); }); + +describe("buildStatusDetail — skill memory section", () => { + test("seeds skill_memory rows → detail.skillMemory reflects totals/skills/pinned scoped to the project", async () => { + const db = createTestDb(); + try { + const sessionId = "ses-status-skillmem-pop"; + const directory = process.cwd(); + const projectIdentity = resolveProjectIdentity(directory); + + db.prepare( + "INSERT INTO session_meta (session_id, last_input_tokens, last_context_percentage) VALUES (?, 0, 0)", + ).run(sessionId); + + // 3 notes for "tdd", 2 of them pinned + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i1", + kind: "gotcha", + delta: "n1", + normalizedHash: "sm-pop-h1", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i2", + kind: "fix", + delta: "n2", + normalizedHash: "sm-pop-h2", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i3", + kind: "workflow", + delta: "n3", + normalizedHash: "sm-pop-h3", + createdAt: Date.now(), + }); + db.prepare("UPDATE skill_memory SET pinned = 1 WHERE normalized_hash IN (?, ?)").run( + "sm-pop-h1", + "sm-pop-h2", + ); + + // 1 note for a different skill, not pinned + insertSkillMemoryNote(db, { + skillId: "debugging", + resolvedPath: "/p2", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i4", + kind: "discovery", + delta: "n4", + normalizedHash: "sm-pop-h4", + createdAt: Date.now(), + }); + + const detail = await buildStatusDetail(db, sessionId, directory); + expect(detail.skillMemory).not.toBeNull(); + expect(detail.skillMemory?.totalNotes).toBe(4); + expect(detail.skillMemory?.skillsWithNotes).toBe(2); + expect(detail.skillMemory?.pinnedNotes).toBe(2); + } finally { + closeQuietly(db); + } + }); + + test("project-tier note under a different project identity does not count toward this project's stats (scoping isolation)", async () => { + const db = createTestDb(); + try { + const sessionId = "ses-status-skillmem-noproj"; + // Scoping check: a PROJECT-tier note under a different project identity + // must NOT count toward the resolved project's stats. (Global-tier notes + // are intentionally cross-project — stored under the '*' sentinel and + // counted everywhere per the F5 design — so a global fixture here would + // legitimately count; global counting is covered in storage.test.ts. + // Project-tier still partitions by real identity, which is what isolates.) + const directory = process.cwd(); + + db.prepare( + "INSERT INTO session_meta (session_id, last_input_tokens, last_context_percentage) VALUES (?, 0, 0)", + ).run(sessionId); + + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "project", + skillSource: "opencode-project", + projectIdentity: "git:some-other-project", + intent: "i", + kind: "gotcha", + delta: "isolated", + normalizedHash: "sm-iso-h1", + createdAt: Date.now(), + }); + + const detail = await buildStatusDetail(db, sessionId, directory); + expect(detail.skillMemory).not.toBeNull(); + expect(detail.skillMemory?.totalNotes).toBe(0); + expect(detail.skillMemory?.skillsWithNotes).toBe(0); + expect(detail.skillMemory?.pinnedNotes).toBe(0); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/plugin/rpc-handlers.ts b/packages/plugin/src/plugin/rpc-handlers.ts index 5894c927..aa6e9c57 100644 --- a/packages/plugin/src/plugin/rpc-handlers.ts +++ b/packages/plugin/src/plugin/rpc-handlers.ts @@ -5,6 +5,7 @@ import type { MagicContextConfig } from "../config/schema/magic-context"; import { resolveProjectIdentity } from "../features/magic-context/memory/project-identity"; import { getEmbeddingCoverageStatus } from "../features/magic-context/project-embedding-registry"; +import { getSkillMemoryStats } from "../features/magic-context/skill-memory/storage"; import { type ContextDatabase as Database, openDatabase, @@ -536,8 +537,18 @@ export function buildStatusDetail( compressionBudget: null, compressionUsage: null, toastDurationMs: 5000, + skillMemory: null, }; + // Skill-memory stats — scoped to the session's project identity (the + // skill_memory table is partitioned on project_identity). base.projectIdentity + // is resolved by buildSidebarSnapshot; we re-use it here to avoid a second + // resolveProjectIdentity call. Null identity → null stats (status dialog + // hides the section). Single SQL aggregate, sync, safe to call every poll. + detail.skillMemory = base.projectIdentity + ? getSkillMemoryStats(db, base.projectIdentity) + : null; + try { const meta = db .prepare<[string], Record>( diff --git a/packages/plugin/src/plugin/tool-registry.ts b/packages/plugin/src/plugin/tool-registry.ts index 14bfd1e7..36a45480 100644 --- a/packages/plugin/src/plugin/tool-registry.ts +++ b/packages/plugin/src/plugin/tool-registry.ts @@ -3,6 +3,7 @@ import type { MagicContextPluginConfig } from "../config"; import { isDreamerRunnable } from "../config/agent-disable"; import { DEFAULT_PROTECTED_TAGS } from "../features/magic-context/defaults"; import { resolveProjectIdentity } from "../features/magic-context/memory/project-identity"; +import type { SkillLoadRegistry } from "../features/magic-context/skill-memory/provenance"; import { getDatabasePersistenceError, isDatabasePersisted, @@ -14,6 +15,8 @@ import { CTX_MEMORY_ACTIONS, createCtxMemoryTools } from "../tools/ctx-memory"; import { createCtxNoteTools } from "../tools/ctx-note"; import { createCtxReduceTools } from "../tools/ctx-reduce"; import { createCtxSearchTools } from "../tools/ctx-search"; +import { CTX_SKILL_NOTE_TOOL_NAME, createCtxSkillNoteTool } from "../tools/ctx-skill-note"; +import { CTX_SKILL_RECALL_TOOL_NAME, createCtxSkillRecallTool } from "../tools/ctx-skill-recall"; import { ensureProjectRegisteredFromOpenCodeDirectory } from "./embedding-bootstrap"; import { normalizeToolArgSchemas } from "./normalize-tool-arg-schemas"; import type { PluginContext } from "./types"; @@ -21,6 +24,7 @@ import type { PluginContext } from "./types"; export function createToolRegistry(args: { ctx: PluginContext; pluginConfig: MagicContextPluginConfig; + skillLoadRegistry: SkillLoadRegistry; }): Record { const { ctx, pluginConfig } = args; @@ -101,6 +105,28 @@ export function createToolRegistry(args: { allowedActions: [...CTX_MEMORY_ACTIONS], }) : {}), + // ctx_skill_note: skill-specific memory tool. Reads the session-scoped + // skillLoadRegistry to verify the skill was actually loaded this session + // before allowing a note to be written. The fail-loud guard for a missing + // registry lives in index.ts (the only place hooks.magicContext is in + // scope) so a wiring regression is caught at startup, not at runtime. + // NOT gated on memoryEnabled — skill-memory is an independent store. + [CTX_SKILL_NOTE_TOOL_NAME]: createCtxSkillNoteTool({ + db, + skillLoadRegistry: args.skillLoadRegistry, + }), + // ctx_skill_recall: explicit agent-callable recall tool. Registry-first + // (reuses the already-parsed frontmatterConfig from the transparent path) + // with disk-fallback for cold-start sessions where the skill hasn't been + // loaded yet. projectDirectory is a fallback only — execute() prefers + // toolContext.directory (the session's actual working dir) so `opencode -s` + // from outside the project resolves correctly. No fail-loud guard needed: + // skillLoadRegistry is optional for ctx_skill_recall. + [CTX_SKILL_RECALL_TOOL_NAME]: createCtxSkillRecallTool({ + db, + projectDirectory: ctx.directory, + skillLoadRegistry: args.skillLoadRegistry, + }), }; // Patch arg schemas so property-level .describe() text survives JSON Schema serialization. diff --git a/packages/plugin/src/shared/rpc-types.ts b/packages/plugin/src/shared/rpc-types.ts index 66f4d4e5..6c7e837c 100644 --- a/packages/plugin/src/shared/rpc-types.ts +++ b/packages/plugin/src/shared/rpc-types.ts @@ -123,6 +123,16 @@ export interface StatusDetail extends SidebarSnapshot { compressionUsage: string | null; /** Effective configured toast duration in ms after config resolution. */ toastDurationMs: number; + /** + * Per-skill cross-session recall store (skill_memory table) stats scoped + * to the session's project identity. Null when no project identity is + * available (status dialog will hide the section). + */ + skillMemory?: { + totalNotes: number; + skillsWithNotes: number; + pinnedNotes: number; + } | null; } /** Embedding coverage for `/ctx-embed` status (mirrors getEmbeddingCoverageStatus). */ diff --git a/packages/plugin/src/tools/ctx-skill-note/index.ts b/packages/plugin/src/tools/ctx-skill-note/index.ts new file mode 100644 index 00000000..e8623189 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/index.ts @@ -0,0 +1,3 @@ +export { CTX_SKILL_NOTE_TOOL_NAME, createCtxSkillNoteTool } from "./tools"; +export type { CtxSkillNoteArgs, CtxSkillNoteToolDeps, SkillNoteKind } from "./types"; +export { VALID_KINDS } from "./types"; diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.test.ts b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts new file mode 100644 index 00000000..9c13047b --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts @@ -0,0 +1,267 @@ +import { describe, expect, mock, test } from "bun:test"; +import { runMigrations } from "../../features/magic-context/migrations"; +import { + createSkillLoadRegistry, + registryKey, + type SkillLoadRegistry, +} from "../../features/magic-context/skill-memory/provenance"; +import { initializeDatabase } from "../../features/magic-context/storage-db"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { createCtxSkillNoteTool } from "./tools"; + +mock.module("../../features/magic-context/memory/embedding", () => ({ + embedTextForProject: async (_p: string, text: string) => ({ + vector: text.includes("mock Date.now") + ? new Float32Array([1, 0]) + : new Float32Array([0, 1]), + modelId: "test-model", + generation: 1, + }), +})); + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +const toolContext = (sessionID = "ses_test", agent = "general") => + ({ sessionID, agent, directory: "/tmp/test" }) as never; + +describe("ctx_skill_note tool", () => { + test("rejects kind='general' with actionable error (hard gate)", async () => { + const db = makeDb(); + const registry = createSkillLoadRegistry(); + try { + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + const result = await t.execute( + { skill: "tdd", intent: "fix test", kind: "general" as never, delta: "some note" }, + toolContext(), + ); + expect(typeof result).toBe("string"); + expect(result).toContain("ctx_memory"); + } finally { + closeQuietly(db); + } + }); + + test("returns actionable error when skill not in registry", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + const result = await t.execute( + { + skill: "nonexistent-skill", + intent: "fix test", + kind: "gotcha", + delta: "some note", + }, + toolContext(), + ); + expect(typeof result).toBe("string"); + expect(result).toContain("No recent skill load found"); + expect(result).toContain("nonexistent-skill"); + } finally { + closeQuietly(db); + } + }); + + test("inserts note when skill is in registry", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + // Pre-populate registry + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + const result = await t.execute( + { + skill: "tdd", + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock", + }, + toolContext(), + ); + expect(typeof result).toBe("string"); + expect(result).toContain("saved"); + } finally { + closeQuietly(db); + } + }); + + test("global-tier note is written under '*' with origin_project = real repo", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "council"), { + resolvedPath: "/home/user/.config/opencode/skills/council/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "council", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { skill: "council", intent: "x", kind: "gotcha", delta: "global lesson xyz" }, + toolContext(), + ); + const row = db + .prepare( + "SELECT project_identity, origin_project FROM skill_memory WHERE delta='global lesson xyz'", + ) + .get() as { project_identity: string; origin_project: string }; + expect(row.project_identity).toBe("*"); + expect(row.origin_project).not.toBe("*"); + expect(row.origin_project.length).toBeGreaterThan(0); + } finally { + closeQuietly(db); + } + }); + + test("deduplicates: bumps hit_count on exact duplicate delta", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { skill: "tdd", intent: "fix test", kind: "gotcha", delta: "Exact duplicate note" }, + toolContext(), + ); + const result = await t.execute( + { skill: "tdd", intent: "fix test", kind: "gotcha", delta: "Exact duplicate note" }, + toolContext(), + ); + expect(result).toContain("already recorded"); + } finally { + closeQuietly(db); + } + }); + + test("near-duplicate delta under a different intent bumps hit_count (one row)", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { + skill: "tdd", + intent: "fix flaky auth test", + kind: "fix", + delta: "mock Date.now in auth tests", + }, + toolContext(), + ); + await t.execute( + { + skill: "tdd", + intent: "stabilize login spec", + kind: "fix", + delta: "also mock Date.now in auth tests", + }, + toolContext(), + ); + const rows = db + .prepare("SELECT hit_count FROM skill_memory WHERE skill_id='tdd'") + .all() as Array<{ hit_count: number }>; + expect(rows.length).toBe(1); + expect(rows[0].hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("same-intent different-delta inserts a second row", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { + skill: "tdd", + intent: "fix flaky auth test", + kind: "fix", + delta: "mock Date.now in auth tests", + }, + toolContext(), + ); + await t.execute( + { + skill: "tdd", + intent: "fix flaky auth test", + kind: "fix", + delta: "use a fixed seed for the rng", + }, + toolContext(), + ); + const rows = db.prepare("SELECT id FROM skill_memory WHERE skill_id='tdd'").all(); + expect(rows.length).toBe(2); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.ts b/packages/plugin/src/tools/ctx-skill-note/tools.ts new file mode 100644 index 00000000..6de52e8b --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/tools.ts @@ -0,0 +1,177 @@ +import { type ToolContext, type ToolDefinition, tool } from "@opencode-ai/plugin"; +import { cosineSimilarity } from "../../features/magic-context/memory/cosine-similarity"; +import { embedTextForProject } from "../../features/magic-context/memory/embedding"; +import { computeNormalizedHash } from "../../features/magic-context/memory/normalize-hash"; +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; +import { + float32ArrayToBlob, + toFloat32Array, +} from "../../features/magic-context/memory/storage-memory-embeddings"; +import { getSkillLoad } from "../../features/magic-context/skill-memory/provenance"; +import { + bumpHitCount, + bumpHitCountById, + findExistingNote, + getDedupCandidates, + insertSkillMemoryNote, + partitionKey, +} from "../../features/magic-context/skill-memory/storage"; +import { + CTX_SKILL_NOTE_TOOL_NAME, + type CtxSkillNoteArgs, + type CtxSkillNoteToolDeps, + VALID_KINDS, +} from "./types"; + +// NOTE on tool() API: the real @opencode-ai/plugin tool() takes: +// { description, args: ZodRawShape, execute(args, context: ToolContext) } +// `name` is registry-level (passed when registering, not inside tool body). +// `args` uses tool.schema.* (Zod-like) for field definitions, NOT a JSON Schema object. +// See packages/plugin/src/tools/ctx-memory/tools.ts for the canonical pattern. +// +// The tool body below uses the correct API shape. The `name` field is intentionally +// absent from the tool() call — it is provided at registration time in the tool registry. + +export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefinition { + return tool({ + description: + "Record a skill-specific note (gotcha, discovery, fix, or workflow step) for future recall. " + + "Call after using a skill when you hit a non-obvious issue, found a better approach, or fixed a skill-specific error. " + + "Skip routine successes. Notes are recalled automatically on the next load of the same skill.", + args: { + skill: tool.schema.string().describe("The skill name (e.g. 'test-driven-development')"), + intent: tool.schema + .string() + .describe("The task/intent context when this note was learned"), + kind: tool.schema + .enum(VALID_KINDS) + .describe( + "Note type: 'gotcha' (non-obvious trap), 'discovery' (better approach found), " + + "'fix' (error→solution), 'workflow' (step that must not be skipped). " + + "Do NOT use 'general' — general observations belong in ctx_memory.", + ), + delta: tool.schema + .string() + .describe("The note content — concise, actionable, specific to this skill"), + tags: tool.schema + .array(tool.schema.string()) + .optional() + .describe("Optional tags for future filtering"), + }, + execute: async (args: CtxSkillNoteArgs, toolContext: ToolContext) => { + // Hard gate: reject kind='general' + if ((args.kind as string) === "general") { + return ( + "'kind: general' is not a valid skill-memory note type. " + + "General observations belong in `ctx_memory` with an appropriate category " + + "(e.g. PROJECT_RULES, CONSTRAINTS, ARCHITECTURE). " + + "Use ctx_skill_note only for gotchas, discoveries, fixes, or workflow steps specific to this skill." + ); + } + + if (!VALID_KINDS.includes(args.kind)) { + return `Invalid kind '${args.kind}'. Must be one of: ${VALID_KINDS.join(", ")}.`; + } + + const sessionId = toolContext.sessionID; + if (!sessionId) return "Error: no session ID available."; + + // Resolve skill from session-scoped registry + const registryEntry = getSkillLoad(deps.skillLoadRegistry, sessionId, args.skill); + if (!registryEntry) { + return ( + `No recent skill load found for '${args.skill}' in this session — load it first with the skill tool. ` + + `If you just loaded it, this may indicate a provenance parse failure (check that the skill output contains a 'Base directory for this skill:' line).` + ); + } + + // Use toolContext.directory (the session's working directory) rather than + // a launch dir. This matches ctx_memory's pattern and correctly handles + // `opencode -s` launched outside the project root. + const projectIdentity = resolveProjectIdentity(toolContext.directory); + const part = partitionKey(registryEntry.tier, projectIdentity); + const normalizedHash = computeNormalizedHash(args.delta); + + // Check for exact duplicate + const existing = findExistingNote( + deps.db, + args.skill, + registryEntry.tier, + part, + normalizedHash, + ); + if (existing) { + bumpHitCount(deps.db, args.skill, registryEntry.tier, part, normalizedHash); + return ( + `Note already recorded (hit_count now ${existing.hit_count + 1}). ` + + `Exact duplicate detected — hit count bumped to reinforce recall priority.` + ); + } + + // Embed both fields (best-effort; null on provider-off/unseeded — note still inserts). + let intentEmb = await embedTextForProject(projectIdentity, args.intent); + const deltaEmb = await embedTextForProject(projectIdentity, args.delta); + // Guard mixed vector spaces: if a re-registration happened between the two embeds, discard intent to keep one space. + if (intentEmb && deltaEmb && intentEmb.modelId !== deltaEmb.modelId) { + intentEmb = null; + } + const modelVersion = deltaEmb?.modelId ?? intentEmb?.modelId ?? null; + + // Delta-only semantic dedup (bounded top-200, model-matched). + if (deltaEmb) { + const cands = getDedupCandidates( + deps.db, + args.skill, + registryEntry.tier, + part, + 200, + ); + const threshold = registryEntry.frontmatterConfig?.dedup_threshold ?? 0.92; + for (const c of cands) { + if (!c.delta_embedding || c.embedding_model_version !== deltaEmb.modelId) + continue; + if ( + cosineSimilarity(deltaEmb.vector, toFloat32Array(c.delta_embedding)) >= + threshold + ) { + bumpHitCountById(deps.db, c.id); + return "Note already recorded (semantic duplicate — hit_count bumped)."; + } + } + } + + // Insert new note + const id = insertSkillMemoryNote(deps.db, { + skillId: args.skill, + resolvedPath: registryEntry.resolvedPath, + tier: registryEntry.tier, + skillSource: registryEntry.skillSource, + projectIdentity: part, + originProject: projectIdentity, + intent: args.intent, + kind: args.kind, + delta: args.delta, + tags: args.tags, + normalizedHash, + intentEmbedding: intentEmb ? float32ArrayToBlob(intentEmb.vector) : null, + deltaEmbedding: deltaEmb ? float32ArrayToBlob(deltaEmb.vector) : null, + embeddingModelVersion: modelVersion, + createdAt: Date.now(), + }); + + if (id === null) { + // Race condition: another process inserted the same hash + bumpHitCount(deps.db, args.skill, registryEntry.tier, part, normalizedHash); + return "Note already recorded (concurrent insert detected — hit count bumped)."; + } + + return ( + `Skill note saved (id=${id}, skill=${args.skill}, kind=${args.kind}, tier=${registryEntry.tier}). ` + + `It will be recalled on the next load of '${args.skill}' in this project.` + ); + }, + }); +} + +// Re-export the tool name for the registration site (lives in plugin/tool-registry.ts). +export { CTX_SKILL_NOTE_TOOL_NAME }; diff --git a/packages/plugin/src/tools/ctx-skill-note/types.ts b/packages/plugin/src/tools/ctx-skill-note/types.ts new file mode 100644 index 00000000..c50bbe99 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/types.ts @@ -0,0 +1,23 @@ +import type { SkillLoadRegistry } from "../../features/magic-context/skill-memory/provenance"; +import type { Database } from "../../shared/sqlite"; + +export const CTX_SKILL_NOTE_TOOL_NAME = "ctx_skill_note"; + +export const VALID_KINDS = ["gotcha", "discovery", "fix", "workflow"] as const; +export type SkillNoteKind = (typeof VALID_KINDS)[number]; + +export interface CtxSkillNoteArgs { + skill: string; + intent: string; + kind: SkillNoteKind; + delta: string; + tags?: string[]; +} + +export interface CtxSkillNoteToolDeps { + db: Database; + skillLoadRegistry: SkillLoadRegistry; + // NOTE: projectDirectory is intentionally absent — execute uses toolContext.directory + // (the session's working directory) to match ctx_memory's pattern and correctly handle + // `opencode -s` launched outside the project root. See Task 8 Step 3 for rationale. +} diff --git a/packages/plugin/src/tools/ctx-skill-recall/index.ts b/packages/plugin/src/tools/ctx-skill-recall/index.ts new file mode 100644 index 00000000..c89e7c34 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/index.ts @@ -0,0 +1,2 @@ +export { CTX_SKILL_RECALL_TOOL_NAME, createCtxSkillRecallTool } from "./tools"; +export type { CtxSkillRecallArgs, CtxSkillRecallToolDeps } from "./types"; diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts new file mode 100644 index 00000000..8a17f625 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, test } from "bun:test"; +import { runMigrations } from "../../features/magic-context/migrations"; +import { parseFrontmatterConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import { insertSkillMemoryNote } from "../../features/magic-context/skill-memory/storage"; +import { initializeDatabase } from "../../features/magic-context/storage-db"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { createCtxSkillRecallTool } from "./tools"; +import type { CtxSkillRecallToolTestDeps } from "./types"; + +// DI-based tests: inject _testFrontmatterConfig + _testProjectIdentity via deps +// to bypass SKILL.md disk resolution and resolveProjectIdentity() entirely. +// This avoids: +// 1. Missing SKILL.md → null frontmatterConfig → early return → no block +// 2. resolveProjectIdentity("/tmp/test") mismatch with test-inserted projectIdentity + +const TEST_PROJECT_IDENTITY = "git:abc"; +const ENABLED_FRONTMATTER = parseFrontmatterConfig( + "---\nskill-memory:\n enabled: true\n max_tokens: 1500\n max_pinned_tokens: 4000\n dedup_threshold: 0.92\n---\n", +); + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("ctx_skill_recall tool", () => { + test("returns block string when notes exist for skill (DI injection)", async () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: TEST_PROJECT_IDENTITY, + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock in auth tests", + normalizedHash: "h-recall-tool-1", + createdAt: Date.now(), + }); + + // Inject frontmatterConfig + projectIdentity via DI — no SKILL.md needed + const testDeps: CtxSkillRecallToolTestDeps = { + db, + projectDirectory: "/tmp/test", + _testFrontmatterConfig: ENABLED_FRONTMATTER, + _testProjectIdentity: TEST_PROJECT_IDENTITY, + }; + const tool = createCtxSkillRecallTool(testDeps); + const result = await tool.execute({ skill: "tdd", intent: "fix flaky test" }, { + sessionID: "ses_test", + agent: "general", + directory: "/tmp/test", + } as never); + expect(typeof result).toBe("string"); + expect(result).toContain(" { + const db = makeDb(); + try { + // Inject enabled frontmatter + matching projectIdentity, but no notes inserted + const testDeps: CtxSkillRecallToolTestDeps = { + db, + projectDirectory: "/tmp/test", + _testFrontmatterConfig: ENABLED_FRONTMATTER, + _testProjectIdentity: TEST_PROJECT_IDENTITY, + }; + const tool = createCtxSkillRecallTool(testDeps); + const result = await tool.execute({ skill: "nonexistent-skill" }, { + sessionID: "ses_test", + agent: "general", + directory: "/tmp/test", + } as never); + expect(typeof result).toBe("string"); + expect(result).toContain("No skill-memory found"); + expect(result).toContain("nonexistent-skill"); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.ts new file mode 100644 index 00000000..2a932074 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.ts @@ -0,0 +1,197 @@ +import { readFileSync } from "node:fs"; +import { dirname } from "node:path"; +import { type ToolContext, type ToolDefinition, tool } from "@opencode-ai/plugin"; +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; +import { parseFrontmatterConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import { + deriveSkillTier, + getSkillLoad, + type SkillProvenance, +} from "../../features/magic-context/skill-memory/provenance"; +import { recallSkillMemoryBlock } from "../../features/magic-context/skill-memory/recall"; +import { + CTX_SKILL_RECALL_TOOL_NAME, + type CtxSkillRecallArgs, + type CtxSkillRecallToolDeps, + type CtxSkillRecallToolTestDeps, +} from "./types"; + +// NOTE on tool() API: same pattern as ctx_skill_note (Task 8). +// `name` is registry-level (not in tool body). `args` uses tool.schema.* Zod-like shape. +// See packages/plugin/src/tools/ctx-memory/tools.ts for the canonical pattern. + +type SkillLoadEntry = ReturnType; + +export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefinition { + return tool({ + description: + "Explicitly recall skill-memory notes for a named skill without re-loading the skill. " + + "Use when you want to query accumulated gotchas/discoveries for a skill you have already loaded " + + "this session, or when you need to recall notes without triggering a full skill load. " + + "Returns the block as a string, or a 'No skill-memory found' message when empty.", + args: { + skill: tool.schema + .string() + .describe("The skill name to recall notes for (e.g. 'test-driven-development')"), + intent: tool.schema + .string() + .optional() + .describe( + "Optional: your current task intent — used for intent-scoped recall (P2). Omit for flat recall.", + ), + max_tokens: tool.schema + .number() + .optional() + .describe( + "Optional token budget override. Defaults to the skill's frontmatter max_tokens (or 1500 if absent).", + ), + }, + execute: async (args: CtxSkillRecallArgs, toolContext: ToolContext) => { + // Test-only DI overrides (bypass all resolution). Read via an internal + // cast to the test-deps type so the seams stay OUT of the public + // CtxSkillRecallToolDeps contract (production callers can't pass them). + const testDeps = deps as CtxSkillRecallToolTestDeps; + if ( + testDeps._testFrontmatterConfig !== undefined || + testDeps._testProjectIdentity !== undefined + ) { + const projectIdentity = + testDeps._testProjectIdentity ?? + resolveProjectIdentity(toolContext.directory ?? deps.projectDirectory); + const frontmatterConfig = testDeps._testFrontmatterConfig ?? null; + const tier: "project" | "global" = "global"; // default for test injection + const block = await recallSkillMemoryBlock(deps.db, { + skill: args.skill, + intent: args.intent, + scope: tier, + projectIdentity, + frontmatterConfig, + maxTokens: args.max_tokens, + }); + if (!block) { + return `No skill-memory found for '${args.skill}' in this session.`; + } + return block; + } + + // Resolve project identity from the session's working directory + const projectDirectory = toolContext.directory ?? deps.projectDirectory; + const projectIdentity = resolveProjectIdentity(projectDirectory); + + // ── RESOLUTION: REGISTRY-FIRST + disk-fallback ────────────────────────── + // + // 1. Registry-first (common case): if the skill was loaded this session, + // the transparent path already populated SkillLoadRegistry with the + // exact resolvedPath + frontmatterConfig. Use it — no disk I/O needed. + // + // 2. Disk-fallback (cold-start): only when NOT in registry, search disk. + // Search order matches opencode's real discoverSkills() from + // packages/opencode/src/skill/index.ts:173-233: + // - Project dirs FIRST (finding U3: project shadows global) + // - Global dirs second + // Verified paths (see discoverSkills() source): + // - Global external: ~/.claude/skills/, ~/.agents/skills/ + // (pattern: skills/**/SKILL.md, via CLAUDE_EXTERNAL_DIR + AGENTS_EXTERNAL_DIR) + // - Walk-up project external: .claude/skills/, .agents/skills/ + // (same pattern, ancestor walk from project root) + // - Config dirs (pattern: {skill,skills}/**/SKILL.md): + // ~/.config/opencode/ (= Global.Path.config = xdgConfig/opencode) + // .opencode/ (walk-up from project root) + // NOTE: ~/.config/opencode/skills/ IS discovered via this path — + // config.directories() returns Global.Path.config and the pattern + // {skill,skills}/**/SKILL.md matches skills//SKILL.md under it. + // - Custom paths: cfg.skills?.paths (user-configured in opencode.jsonc) + // Singular .opencode/skill/ is also valid (OPENCODE_SKILL_PATTERN covers both). + + const sessionId = toolContext.sessionID; + const registryEntry: SkillLoadEntry = + sessionId && deps.skillLoadRegistry + ? getSkillLoad(deps.skillLoadRegistry, sessionId, args.skill) + : undefined; + + let resolvedPath: string | null = null; + let frontmatterConfig = registryEntry?.frontmatterConfig ?? null; + let tier: SkillProvenance["tier"] = "global"; + + if (registryEntry) { + // Registry hit: exact resolution, reuse already-parsed frontmatterConfig + resolvedPath = registryEntry.resolvedPath; + tier = registryEntry.tier; + } else { + // Cold-start disk fallback: search in opencode discovery order + // Project dirs first (shadows global), then global dirs + const home = process.env.HOME ?? process.env.USERPROFILE ?? ""; + const candidateDirs = [ + // Project-local dirs first (project shadows global — finding U3) + `${projectDirectory}/.opencode/skill/${args.skill}`, + `${projectDirectory}/.opencode/skills/${args.skill}`, + `${projectDirectory}/.agents/skills/${args.skill}`, + `${projectDirectory}/.claude/skills/${args.skill}`, + // Global dirs second + `${home}/.config/opencode/skills/${args.skill}`, // via Global.Path.config + {skill,skills}/**/SKILL.md + `${home}/.config/opencode/skill/${args.skill}`, // singular — OPENCODE_SKILL_PATTERN covers both + `${home}/.agents/skills/${args.skill}`, // via AGENTS_EXTERNAL_DIR + `${home}/.claude/skills/${args.skill}`, // via CLAUDE_EXTERNAL_DIR + ]; + + let rawSkillContent: string | null = null; + for (const dir of candidateDirs) { + const candidate = `${dir}/SKILL.md`; + try { + rawSkillContent = readFileSync(candidate, "utf-8"); + resolvedPath = candidate; + break; + } catch { + // Not found in this dir — try next + } + } + + if (!resolvedPath) { + // SKILL.md not found anywhere — distinct message from "no notes" cold-start + return ( + `SKILL.md not found for '${args.skill}' in any known skill directory. ` + + `Load the skill first with the skill tool, or verify the skill name is correct. ` + + `Searched: project .opencode/skill/, .opencode/skills/, .agents/skills/, .claude/skills/; ` + + `global ~/.config/opencode/skill/, ~/.config/opencode/skills/, ~/.agents/skills/, ~/.claude/skills/.` + ); + } + + // Parse frontmatter from on-disk SKILL.md + frontmatterConfig = rawSkillContent + ? parseFrontmatterConfig(rawSkillContent) + : null; + // Derive tier from the skill's directory (dirname, not a fragile + // string replace that would mis-handle a path containing "SKILL.md"). + tier = deriveSkillTier(dirname(resolvedPath)); + } + + if (!frontmatterConfig?.enabled) { + // Distinct message: skill-memory disabled in frontmatter (not "no notes") + return ( + `skill-memory is not enabled for '${args.skill}'. ` + + `To enable it, add \`skill-memory: { enabled: true }\` to the skill's SKILL.md frontmatter.` + ); + } + + // Delegate to shared recall core (feature layer — same as transparent path) + const block = await recallSkillMemoryBlock(deps.db, { + skill: args.skill, + intent: args.intent, + scope: tier, + projectIdentity, + frontmatterConfig, + maxTokens: args.max_tokens, + }); + + if (!block) { + // Cold-start: skill-memory enabled but no notes recorded yet + return `No skill-memory found for '${args.skill}' — no notes have been recorded yet. Use ctx_skill_note to record gotchas, discoveries, fixes, or workflow steps after using this skill.`; + } + + return block; + }, + }); +} + +// Re-export the tool name for the registration site (lives in plugin/tool-registry.ts). +export { CTX_SKILL_RECALL_TOOL_NAME }; diff --git a/packages/plugin/src/tools/ctx-skill-recall/types.ts b/packages/plugin/src/tools/ctx-skill-recall/types.ts new file mode 100644 index 00000000..b53994e9 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/types.ts @@ -0,0 +1,33 @@ +import type { SkillMemoryConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import type { SkillLoadRegistry } from "../../features/magic-context/skill-memory/provenance"; +import type { Database } from "../../shared/sqlite"; + +export const CTX_SKILL_RECALL_TOOL_NAME = "ctx_skill_recall"; + +export interface CtxSkillRecallArgs { + skill: string; + intent?: string; + max_tokens?: number; +} + +export interface CtxSkillRecallToolDeps { + db: Database; + projectDirectory: string; + // Optional: session-scoped skill-load registry (populated by transparent path). + // When provided, resolution is registry-first (exact, no disk I/O). + // In production, pass hooks.magicContext.skillLoadRegistry. + // In tests, inject directly to avoid SKILL.md fixture files. + skillLoadRegistry?: SkillLoadRegistry; +} + +/** + * Test-only DI overrides — kept OUT of the public production contract so a + * production call site cannot accidentally pass them to bypass disk resolution, + * frontmatter loading, and project-identity resolution. Tests pass this wider + * type (structurally assignable to CtxSkillRecallToolDeps); the tool impl reads + * the overrides via an internal cast. + */ +export interface CtxSkillRecallToolTestDeps extends CtxSkillRecallToolDeps { + _testFrontmatterConfig?: SkillMemoryConfig | null; + _testProjectIdentity?: string; +} diff --git a/packages/plugin/src/tools/index.ts b/packages/plugin/src/tools/index.ts index ab712beb..47965a65 100644 --- a/packages/plugin/src/tools/index.ts +++ b/packages/plugin/src/tools/index.ts @@ -3,3 +3,5 @@ export * from "./ctx-memory"; export * from "./ctx-note"; export * from "./ctx-reduce"; export * from "./ctx-search"; +export * from "./ctx-skill-note"; +export * from "./ctx-skill-recall"; diff --git a/packages/plugin/src/tui/index.tsx b/packages/plugin/src/tui/index.tsx index eca06f0c..daafce9a 100644 --- a/packages/plugin/src/tui/index.tsx +++ b/packages/plugin/src/tui/index.tsx @@ -463,6 +463,26 @@ const StatusDialog = (props: { api: TuiPluginApi; s: StatusDetail }) => { {s().lastDreamerRunAt && ( )} + {/* Skill-memory store (per-skill cross-session recall) — + only when the status detail carries the aggregate + (requires a resolved project identity). Mirrors the + text-mode executeStatus "### Skill memory" section. */} + {s().skillMemory && (() => { + const sm = s().skillMemory! + return ( + + + Skill Memory + + + + + ) + })()}