From 6c7c5cbca406b71c7e70e452a9a47fee4c174a49 Mon Sep 17 00:00:00 2001 From: Tehan Date: Tue, 16 Jun 2026 09:58:37 +0200 Subject: [PATCH 01/10] feat(skill-memory): transparent per-skill cross-session recall (P1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-skill "motor memory": when a skill's SKILL.md declares `skill-memory: { enabled: true }`, accumulated gotchas/discoveries/fixes/ workflow-steps surface in a block appended to the skill tool's RESULT on every load (cache-safe — rides the tool-result tail). Agents write back via ctx_skill_note; ctx_skill_recall is the explicit companion to the transparent after-hook. - migration: skill_memory table (per-skill; tier project/global; UNIQUE on skill_id/tier/project_identity/normalized_hash) + lookup indexes. - three-hook augmentation: tool.definition advertises an `intent` param; tool.execute.before stashes intent (bounded TTL); after-hook parses the skill's Base directory, reads SKILL.md frontmatter, formats the block. - flat recency×hit recall + storage layer; ctx_skill_note / ctx_skill_recall. - opt-in distill-skill-memory dreamer task; agent-prompt guidance; TUI/ctx-status stats. - docs: ARCHITECTURE / STRUCTURE / CONFIGURATION / README. --- ARCHITECTURE.md | 255 +++++++++++++++++- CONFIGURATION.md | 34 +++ README.md | 3 + STRUCTURE.md | 20 +- packages/pi-plugin/src/commands/ctx-status.ts | 2 +- .../plugin/src/agents/magic-context-prompt.ts | 19 +- .../src/agents/skill-memory-guidance.test.ts | 16 ++ .../schema/distill-skill-memory-enum.test.ts | 22 ++ .../magic-context/dreamer/task-prompts.ts | 48 ++++ .../magic-context/dreamer/task-registry.ts | 5 +- .../magic-context/migrations-v38.test.ts | 1 + .../src/features/magic-context/migrations.ts | 41 +++ .../skill-memory/frontmatter.test.ts | 47 ++++ .../magic-context/skill-memory/frontmatter.ts | 95 +++++++ .../skill-memory/provenance.test.ts | 54 ++++ .../magic-context/skill-memory/provenance.ts | 93 +++++++ .../magic-context/skill-memory/recall.test.ts | 98 +++++++ .../magic-context/skill-memory/recall.ts | 146 ++++++++++ .../skill-memory/storage.test.ts | 228 ++++++++++++++++ .../magic-context/skill-memory/storage.ts | 186 +++++++++++++ .../src/features/magic-context/storage-db.ts | 2 +- .../hooks/magic-context/command-handler.ts | 8 +- .../src/hooks/magic-context/execute-status.ts | 33 ++- .../src/hooks/magic-context/hook-handlers.ts | 191 +++++++++++++ .../plugin/src/hooks/magic-context/hook.ts | 44 +++ .../skill-memory-injection.test.ts | 212 +++++++++++++++ .../skill-memory-intent-stash.test.ts | 46 ++++ .../skill-tool-definition.test.ts | 99 +++++++ .../magic-context/skill-tool-definition.ts | 81 ++++++ packages/plugin/src/index.ts | 34 ++- .../plugin/src/plugin/rpc-handlers.test.ts | 119 ++++++++ packages/plugin/src/plugin/rpc-handlers.ts | 11 + packages/plugin/src/plugin/tool-registry.ts | 26 ++ packages/plugin/src/shared/rpc-types.ts | 10 + .../plugin/src/tools/ctx-skill-note/index.ts | 3 + .../src/tools/ctx-skill-note/tools.test.ts | 130 +++++++++ .../plugin/src/tools/ctx-skill-note/tools.ts | 143 ++++++++++ .../plugin/src/tools/ctx-skill-note/types.ts | 23 ++ .../src/tools/ctx-skill-recall/index.ts | 2 + .../src/tools/ctx-skill-recall/tools.test.ts | 87 ++++++ .../src/tools/ctx-skill-recall/tools.ts | 190 +++++++++++++ .../src/tools/ctx-skill-recall/types.ts | 24 ++ packages/plugin/src/tools/index.ts | 2 + packages/plugin/src/tui/index.tsx | 20 ++ 44 files changed, 2929 insertions(+), 24 deletions(-) create mode 100644 packages/plugin/src/agents/skill-memory-guidance.test.ts create mode 100644 packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/provenance.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/recall.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/recall.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/storage.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/storage.ts create mode 100644 packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts create mode 100644 packages/plugin/src/hooks/magic-context/skill-memory-intent-stash.test.ts create mode 100644 packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts create mode 100644 packages/plugin/src/hooks/magic-context/skill-tool-definition.ts create mode 100644 packages/plugin/src/tools/ctx-skill-note/index.ts create mode 100644 packages/plugin/src/tools/ctx-skill-note/tools.test.ts create mode 100644 packages/plugin/src/tools/ctx-skill-note/tools.ts create mode 100644 packages/plugin/src/tools/ctx-skill-note/types.ts create mode 100644 packages/plugin/src/tools/ctx-skill-recall/index.ts create mode 100644 packages/plugin/src/tools/ctx-skill-recall/tools.test.ts create mode 100644 packages/plugin/src/tools/ctx-skill-recall/tools.ts create mode 100644 packages/plugin/src/tools/ctx-skill-recall/types.ts diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 2c18f1a7..fb374102 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -39,6 +39,13 @@ This is the heart of the system and the part most easily gotten wrong. A "transf 6. Compartment phase: inject the `` (m[0]/m[1]) into `message[0]`. 7. **Postprocess** (`transform-postprocess-phase.ts`): the mutation gates — pending-op drain, heuristic cleanup, nudges, synthetic-todowrite, auto-search. +**Tool surface:** +- Purpose: Expose agent tools with validated schemas and storage-backed execution. +- Location: `src/tools/ctx-reduce/`, `src/tools/ctx-expand/`, `src/tools/ctx-note/`, `src/tools/ctx-memory/`, `src/tools/ctx-search/`, `src/tools/ctx-skill-note/`, `src/tools/ctx-skill-recall/` +- Contains: Tool definitions, argument schemas, action gating (incl. dreamer-only actions in `ctx_memory`), user-facing result formatting. The two `ctx_skill_*` tools share a `recallSkillMemoryBlock` core with the transparent after-hook path (see Skill-memory in Key Abstractions) so write-back and explicit recall both go through the same recall+format pipeline. +- Depends on: `src/features/magic-context/`, `src/features/magic-context/skill-memory/`, and `src/hooks/magic-context/read-session-chunk.ts`. +- Used by: `src/plugin/tool-registry.ts`. + ### Pass taxonomy (every pass is exactly one) - **SOFT+ (defer / `cache_hit`):** nothing new. m[0] AND m[1] replay byte-identical; the entire `system + m[0] + m[1]` prefix stays cached. Only the conversation tail moves (where `ctx_reduce`/age drops land, themselves replayed deterministically). The steady state — most passes are this. - **SOFT (cache-busting):** m[1] re-renders (new compartments / memories / user-profile surface as deltas) while m[0] stays byte-identical. `system + m[0]` stays cached; the cache busts at the m[1] breakpoint. Driven by an execute pass, `/ctx-flush`, or a deferred-history drain. @@ -102,6 +109,20 @@ The long-history pipeline. Tiered compartments + deterministic decay renderer (r `protected-tail-boundary.ts` decides, per pass, which prefix of the raw tail is eligible for the historian and which suffix stays protected — from true-raw token sizes (not user-turn counts), so sparse-user-turn sessions can't deadlock the historian (#132). Boundary anchors at `lastCompartmentEnd + 1`; token target `N` capped at `0.40 × usable` (ABS_CAP 96k); a live-prompt floor keeps it from crossing the newest meaningful user message on routine (<80%) passes. **Open tool arcs** (a tool invocation with no result in the window) only hold the boundary back when **recent** (≥ the size-walk start = the live window); a stale/interrupted open arc older than that is compactable — otherwise one dead `running` tool call at the eligible-head edge would freeze the historian indefinitely. The trigger/runner share a content-stable range fingerprint for cross-view staleness validation. +**Skill-memory flow (per-skill cross-session recall):** transparent augmentation of opencode's built-in `skill` tool — when a loaded skill declares `skill-memory: { enabled: true }` in its frontmatter, accumulated gotchas/discoveries surface in a `` block appended to the skill tool's RESULT on every load. Three opencode hooks plus two agent-callable tools implement the loop. +1. **Definition (`tool.definition` in `src/hooks/magic-context/skill-tool-definition.ts`)** — augments the `skill` tool's JSON Schema with an optional `intent` string parameter. Effect-Schema strips unknown keys (`onExcessProperty: "ignore"`) before the skill tool runs, so `intent` never reaches the skill itself; the before-hook captures it pre-validation. Idempotent (re-adds are guarded). +2. **Before (`tool.execute.before` in `src/hooks/magic-context/hook-handlers.ts` — `createToolExecuteBeforeHook`)** — stashes the raw `intent` by `callID` in a bounded closure-state `Map` (60s TTL sweep + 256-entry cap + full clear on session delete, so unpaired before-hooks never leak). The stash is the only place `intent` is observable; it's deleted in the after-hook's `finally`. +3. **After (`tool.execute.after` in `src/hooks/magic-context/hook-handlers.ts` — `createToolExecuteAfterHook`)** — runs only for `input.tool === "skill"`. Parses the `Base directory for this skill: file:///...` line from `output.output` via `parseSkillProvenance()` (`fileURLToPath`-based, cross-platform) to recover the resolved `SKILL.md` path + tier (project/global) + `skill_source`. Then re-reads `SKILL.md` from disk (opencode's skill loader strips the `skill-memory:` block from the model-facing output, so the frontmatter is unreadable from `output.output`). Populates a session-scoped `SkillLoadRegistry` keyed by `${sessionId}:${skillId}` (NOT persisted, cleaned in `onSessionDeleted`). When frontmatter has `enabled: true` and notes exist, delegates to `recallSkillMemoryBlock` (feature layer) and appends the block to `output.output` BEFORE the Channel-1 ctx_reduce nudge runs. +4. **Cache safety (keystone).** The append lands in the skill tool RESULT = conversation tail, NOT the cached m[0]/m[1] prefix. This is why the feature cannot regress the prompt-cache hit rate. Channel-1 already appends to tool output strings the same way (precedent in `maybeInjectChannel1Nudge`) — this is proven production behavior. +5. **Write-back (`ctx_skill_note`)** — `kind` is a hard gate rejecting `'general'` at the tool level; duplicates dedup on `normalized_hash` and bump `hit_count` (`computeNormalizedHash` from `memory/normalize-hash.ts`). Resolves `(skill_id, tier, project_identity, resolved_path)` from the session-scoped `SkillLoadRegistry` (so the agent must load the skill first — actionable error otherwise). Inserts into the `skill_memory` table (migration v39). The injected block footer reinforces: "After using this skill, call `ctx_skill_note` — record only gotchas, novel discoveries, or error→fix; skip routine successes." +6. **Explicit recall (`ctx_skill_recall`)** — companion tool to the transparent path; reuses `recallSkillMemoryBlock` so P2 embeddings upgrade both at once. Registry-first resolution (exact, free, no disk I/O when the skill was loaded this session) with a cold-start disk fallback that walks opencode's real `discoverSkills()` order (project dirs first — they shadow global — then global external + config dirs). +7. **Dreamer distill (`distill-skill-memory` task — opt-in, NOT a default)** — `DREAMER_TASKS` enum carries it (line 25 of `src/config/schema/magic-context.ts`); `DEFAULT_DREAMER_TASKS` does NOT (mirroring the `maintain-docs` precedent). The task prompt lives in `src/features/magic-context/dreamer/task-prompts.ts` and runs the merge/prune/promote maintenance cycle documented in CONFIGURATION.md. + +**Git-commit indexing:** +- `src/features/magic-context/git-commits/indexer.ts` reads HEAD-only non-merge commits via `git log` (NUL-byte-free format separator `\x1f`), bounded by `experimental.git_commit_indexing.{since_days, max_commits}`. +- Embeddings are generated through the same embedding provider chain as memories. +- Indexing fires from the dream-timer startup tick and periodic interval; manual `/ctx-dream` does NOT trigger commit indexing. + ## Memory, search & embeddings - **Memories** (`memory/storage-memory.ts`): project-scoped durable knowledge in the 5-category taxonomy (PROJECT_RULES / ARCHITECTURE / CONSTRAINTS / CONFIG_VALUES / NAMING), with FTS + vector side tables. `ctx_memory` exposes write/archive/update/merge/list; `list` is dreamer-only; primary agents may only mutate their own project's memories (workspace-shared categories aside). @@ -137,17 +158,178 @@ Background maintenance (V2: per-task cron scheduling). A process-wide 15-min tim Three effective modes; the heavier features (historian, nudges, adjunct injection) are gated, while tag/drop plumbing stays on everywhere. -| Feature | Primary + `ctx_reduce_enabled: true` | Primary + `ctx_reduce_enabled: false` | Subagents | +| Feature | Primary + `ctx_reduce_enabled: true` | Primary + `ctx_reduce_enabled: false` | Subagents (any `ctx_reduce_enabled`) | |---|---|---|---| | Tag DB records | ✓ | ✓ | ✓ | -| `§N§` prefix injection + `ctx_reduce` tool | ✓ | ✗ | ✓ (if `ctx_reduce` available) | -| Historian / compartments / decay / m[0]m[1] | ✓ | ✓ | ✗ | -| Channel 1 nudge | ✓ | ✗ | ✓ | -| Channel 2 nudge | ✓ | ✗ | ✓ | -| Synthetic-todowrite / auto-search | ✓ | ✓ | ✗ | -| Heuristic tool drops at execute | ✓ once/turn | ✓ once/turn | ✓ every execute pass | -| 85% force-materialize / 95% block | ✓ | ✓ | ✗ (overflow path only) | -| Caveman text compression | ✗ | opt-in | ✗ | +| `§N§` tag prefix injection in message text | ✓ | ✗ | ✗ | +| `ctx_reduce` tool | ✓ | ✗ | ✗ | +| Historian / compartments / decay rendering | ✓ | ✓ | ✗ | +| Compartment injection (``) | ✓ | ✓ | ✗ | +| ``, ``, `` system-prompt blocks | ✓ | ✓ | ✗ | +| Channel 1 ctx_reduce nudge (tool-output ``) | ✓ | ✗ | ✓ | +| Channel 2 ceiling nudge (synthetic-user, one-shot) | ✓ | ✗ | ✗ | +| Deferred-note nudges | ✓ | ✗ | ✗ | +| Synthetic-todowrite injection | ✓ | ✓ | ✗ | +| Skill-memory `` recall append (transparent after-hook) | ✓ | ✓ | ✓ | +| Auto-search hint | ✓ | ✓ | ✗ | +| Heuristic tool drops at execute threshold | ✓ (once per user turn) | ✓ (once per user turn) | ✓ (every execute pass — no once-per-turn guard) | +| Heuristic reasoning clearing | ✓ | ✓ | ✓ | +| 85 % force-materialization | ✓ | ✓ | ✗ | +| 95 % block + emergency recovery | ✓ | ✓ | ✗ (overflow handled via `overflow-detection.ts` only; no recovery flag persisted) | +| Experimental age-tier caveman text compression | ✗ | opt-in via `experimental.caveman_text_compression.enabled` | ✗ | + + +**Decay rendering (replaces the LLM compressor):** +- Purpose: Deterministically choose a render tier per compartment from age, importance, and live history-budget pressure — self-tuning as the model's context window changes, with zero LLM cost. +- Location: `src/hooks/magic-context/decay-curve.ts` (validated formula + tier boundaries), `src/hooks/magic-context/decay-render.ts` (shared OpenCode + Pi renderer). +- Pattern: Exponential half-life `H = H50·2^((I−50)/D)/max(p,0.10)` (`H50=24`, `D=25`); log-cost tier thresholds `[0.201,0.729,1.322,2.587]`; budget pressure computed once per pass; oldest-first demotion; archive/self-close past the last boundary. Council-validated invariants (monotonicity, finite demotion, O(budget) cost) locked by `decay-curve.test.ts`. + +**Compartment events (v2, stored-not-rendered):** +- Purpose: Persist historian-extracted `causal_incident` / `trajectory_correction` events as a corpus for future dreamer aggregation; never rendered into the prompt in v2.0. +- Location: `compartment_events` table (migration v23); `insertCompartmentEvents` / `getCompartmentEvents`. +- Pattern: Anchored to durable compartment ids (`at_compartment` → id at publish); discarded-tail events filtered; cleared on session deletion. + +**Message-history index:** +- Purpose: FTS-backed raw user/assistant message search outside the transform hot path. +- Location: `src/features/magic-context/message-index.ts`, `src/features/magic-context/message-index-async.ts` +- Pattern: Async reconciliation + live event indexing + pure-query reads. + +**Git-commit index:** +- Purpose: Per-project HEAD-only commit corpus for `ctx_search` integration. +- Location: `src/features/magic-context/git-commits/` +- Pattern: NUL-free git log reader + FTS index + embedding side table; populated by dream timer. + +**Dream queue and lease:** +- Purpose: Run at most one dream worker at a time and survive restarts. +- Location: `src/features/magic-context/dreamer/queue.ts`, `src/features/magic-context/dreamer/lease.ts`, `src/features/magic-context/dreamer/storage-dream-state.ts`, `src/features/magic-context/dreamer/storage-dream-runs.ts` +- Pattern: SQLite-backed queue plus cooperative lease lock plus durable run-history table. + +**Key-files pinning:** +- Purpose: Inject up to N project files into the system prompt as `` content for the active session. +- Location: `src/features/magic-context/key-files/identify-key-files.ts`, `src/features/magic-context/key-files/read-stats.ts`, `src/features/magic-context/key-files/storage-key-files.ts` +- Pattern: Per-session selection by Dreamer; budget-bound rendering; symlink-safe realpath check. + +**User memory pipeline:** +- Purpose: Extract user behavioral observations from historian output (the v2 `` block), collect candidates, and promote recurring patterns to stable global user memories. +- Location: `src/features/magic-context/user-memory/storage-user-memory.ts`, `src/features/magic-context/user-memory/review-user-memories.ts` +- Pattern: Historian extracts candidates **only when `dreamer.user_memories.enabled`** (privacy gate, enforced post-commit best-effort on both harnesses); dreamer reviews with a multi-session recurrence gate and promotes; the baseline set renders into m[0] `` (new promotions into m[1]). user_memories are globally scoped (no `project_path`). + +**Skill-memory (motor memory for skills):** +- Purpose: Per-skill cross-session recall — when a skill declares `skill-memory: { enabled: true }` in its frontmatter, accumulated gotchas/discoveries/fixes/workflow steps surface in a `` block appended to the skill tool's RESULT on every load. Agents write back via `ctx_skill_note`; explicit recall (without re-loading) is `ctx_skill_recall`. The transparent after-hook is the primary path; the two tools are companions. +- Location: `src/features/magic-context/skill-memory/{frontmatter,provenance,storage,recall}.ts`; `src/hooks/magic-context/skill-tool-definition.ts` + the `skill-memory` branches in `src/hooks/magic-context/hook-handlers.ts`; `src/tools/ctx-skill-note/`, `src/tools/ctx-skill-recall/`. Table created in migration v37 (`skill_memory`). +- Pattern: Three-hook transparent augmentation (definition → before → after). The before-hook stashes a per-callID `intent` (bounded 60s TTL + 256-cap + session-delete clear). The after-hook parses the `Base directory for this skill: file:///...` line (cross-platform via `fileURLToPath`), reads the skill's `SKILL.md` from disk to recover its `skill-memory:` frontmatter (opencode strips it from the model-facing output), populates a session-scoped `SkillLoadRegistry` (NOT persisted), and calls `recallSkillMemoryBlock` (feature layer — shared core used by the tool too) to format the injected block. Append lands in the tool RESULT (conversation tail) — cache-safe by construction. P1 retrieval is flat: recency × hit_count, no embeddings (P2 rungs are designed and marked TODO in `recall.ts`). Per-skill opt-in via SKILL.md frontmatter (`enabled: true` required; `max_tokens` 1500 / `max_pinned_tokens` 4000 / `dedup_threshold` 0.92 are tunable). Optional dreamer `distill-skill-memory` task (opt-in, NOT a default) handles merge/prune/promote maintenance. + +**TUI ↔ server RPC:** +- Purpose: Localhost RPC for sidebar data, status/recomp dialogs, and TUI-action consumption. +- Location: `src/shared/rpc-server.ts`, `src/shared/rpc-client.ts`, `src/shared/rpc-utils.ts`, `src/shared/rpc-types.ts`, `src/shared/rpc-notifications.ts`, `src/plugin/rpc-handlers.ts` +- Pattern: Server publishes ephemeral port; TUI plugin polls for state and pushes notifications via the message queue. + +**Plugin message bus (legacy):** +- Purpose: Historical SQLite-backed TUI ↔ server bus, retained for migration compatibility. +- Location: `src/features/magic-context/plugin-messages.ts` +- Pattern: Vestigial — superseded by RPC. Module remains for forward-compat with older TUI plugin versions that may still poll it; no active runtime callers in current code. + +**Compaction markers (deferred drain, plan v6):** +- Purpose: Inject OpenCode-compatible compaction boundaries into the message table so `filterCompacted` stops at historian's last compartment boundary, shrinking the transform-input array. Marker movement is deferred from historian publish into the next materializing transform pass so a single cache-bust cycle covers both the `` rebuild AND the marker boundary advance. +- Location: `src/features/magic-context/compaction-marker.ts`, `src/hooks/magic-context/compaction-marker-manager.ts`, `src/features/magic-context/storage-meta-persisted.ts` (pending blob helpers). +- Pattern: Historian incremental runner writes the prospective new boundary (`{ordinal, endMessageId, publishedAt}`) into `session_meta.pending_compaction_marker_state` in the same transaction that publishes new compartments. The next consuming transform pass that drains `deferredHistoryRefreshSessions` calls `applyDeferredCompactionMarker(...)`, which validates the pending target against the latest stored compartment via `getCompartmentsByEndMessageId(...)` plus an OpenCode-message existence check via `getOpenCodeMessageById(...)`, then sequences `removeCompactionMarker` → `injectCompactionMarker`. Returns a tagged `MarkerUpdateOutcome` (`applied` | `already-current` | `stale-skip` | `retryable-failure`); only `retryable-failure` preserves the deferred-history signal so the next pass retries. CAS-clear (`clearPendingCompactionMarkerStateIf`) on success guards against publish/drain races within and across processes. Eager paths (`/ctx-flush`, `/ctx-recomp`) call the marker manager directly and CAS-clear any stale pending blob. Restart-safe: hook init calls `getSessionsWithPendingMarker(...)` to rehydrate deferred sets so the next pass after restart still drains. `event-handler` CAS-clears pending state on `session.compacted` (provider already advanced the boundary) and on `session.deleted` via cascade. Raw-history readers strip `summary=true` / `finish="stop"` rows to preserve original ordinals. Stable feature, default `compaction_markers: true` since v0.16.x; deferred drain since v0.19 (plan v6). + +**Auto-update checker:** +- Purpose: Self-update the cached `@latest` plugin install once per plugin process — OpenCode's plugin cache no longer auto-updates. +- Location: `src/hooks/auto-update-checker/checker.ts`, `src/hooks/auto-update-checker/cache.ts`, `src/hooks/auto-update-checker/constants.ts` +- Pattern: Fires from plugin init with on-disk cross-process dedup; rewrites the install-directory dependency entry + `bun.lock` (or runs `npm install` under OpenCode's npm-managed cache). + +**Agent prompt pack:** +- Purpose: Keep hidden-agent identities and prompt text isolated from runtime wiring. +- Location: `src/agents/dreamer.ts`, `src/agents/historian.ts` (declares `HISTORIAN_AGENT` and `HISTORIAN_EDITOR_AGENT`), `src/agents/sidekick.ts`, `src/agents/magic-context-prompt.ts` +- Pattern: Constants plus prompt builders. + +**Content stripping and replay:** +- Purpose: Strip reasoning, inline thinking, placeholder shells, structural noise, processed images, merged-assistant reasoning, system-injected stripping, and caveman compression from messages, and replay those operations on every transform pass to maintain stable message content across OpenCode's message rebuilds. +- Location: `src/hooks/magic-context/strip-content.ts`, `src/hooks/magic-context/caveman.ts`, `src/hooks/magic-context/caveman-cleanup.ts`, `src/hooks/magic-context/sentinel.ts` +- Pattern: Stateless strip functions plus deterministic in-place sentinel replacement (preserves message-part array shape across passes); paired with persisted watermarks (`cleared_reasoning_through_tag`, `stripped_placeholder_ids`, `tags.caveman_depth`) read from `session_meta` and `tags`. Several strips are provider-aware: `stripReasoningFromMergedAssistants` runs only for `anthropic`; whole-message empty-sentinel writes a `[dropped]` placeholder for non-Anthropic providers so openai-compatible providers don't see empty assistant messages. + +**Protected-tail boundary (v3):** +- Purpose: Decide, per pass, which prefix of the raw tail is eligible for the historian and which suffix stays protected — from true-raw token sizes instead of user-turn counts, so sparse-user-turn sessions can't deadlock the historian (issue #132). +- Location: `src/hooks/magic-context/protected-tail-boundary.ts` (resolver), `src/hooks/magic-context/read-session-true-raw-tokens.ts` (ordinal-keyed token index, fed by cached per-tag token counts with live-tokenize fallback), `src/hooks/magic-context/compartment-trigger.ts` (trigger consumption). +- Pattern: Boundary offset anchors at `lastCompartmentEnd + 1`; token target `N` capped at `0.40 × usable`; pure function of (messages, usage, budget) — no persisted high-watermark (backward relaxation is the #132 fix). The trigger runs in the transform off the in-memory `args.messages` tail (zero opencode.db reads steady-state; the resolved snapshot is handed to the runner so the historian sees exactly what the fire decision saw), with content-stable range fingerprints for cross-view staleness validation. + +**ctx_reduce nudges (Channel 1 / Channel 2):** +- Purpose: Keep the agent reducing its own context without cache-busting mutations — Channel 1 appends a `` to tool outputs in `tool.execute.after` (persisted to OpenCode's DB, so the bytes are durable and replay for free); Channel 2 delivers a one-shot synthetic-user ceiling nudge near the execute threshold via the live-server client on step-boundary `message.updated` events (mid-turn "tool-calls" and final "stop") — the queued message lands at the next step so the agent is warned while the pile is still growing. +- Location: `src/hooks/magic-context/ctx-reduce-nudge.ts` (shared math: severity, `reclaimable ≥ usable/3` trigger), `src/hooks/magic-context/hook-handlers.ts` (Channel 1 injection), `src/hooks/magic-context/channel2-delivery.ts` (Channel 2 lease + delivery), `packages/pi-plugin/src/ctx-reduce-nudge-pi.ts` (Pi mirror: `tool_result` mutation + `agent_end` followUp). +- Pattern: Channel-1 baselines (per-session, in-memory) carry the measurement (`tailToolTokens`, `usableTokens`) the triggers evaluate; Channel 2 uses a cross-process CAS lease (`channel2_nudge_state`: pending → claimed → delivered) with full-predicate revalidation at delivery — unknown baseline never delivers, stale predicate cancels to re-armable, only confirmed sends consume the one-per-session cap. + +**Tiered emergency drop (≥85%):** +- Purpose: Replace need-blind routine tool drops with a target-headroom eviction at force-materialize pressure — reclaim down to `fixedFloor + 0.30 × (ceiling − fixedFloor)`, evicting tool outputs oldest-first across tiers (T3 misc → T2 edit/search → T1 navigation), with newest-20% recency reserves on T1/T2. +- Location: `src/hooks/magic-context/emergency-drop.ts` (pure planner), applied from `heuristic-cleanup.ts` / `heuristic-cleanup-pi.ts`. +- Pattern: Split tag sets — `floorTags` (FULL active live-window set, floor accounting) vs `tags` (tool-only `canDrop()` eviction candidates); `last_emergency_input_sample` is the idempotence latch (no re-drop until a fresh provider usage reading arrives). + +**Caveman text compression (experimental):** +- Purpose: Apply oldest-first age-tier text compression to user/assistant text outside the protected tail when `ctx_reduce_enabled=false`. +- Location: `src/hooks/magic-context/caveman.ts` +- Pattern: Four tiers (ultra/full/lite/untouched) keyed by raw-ordinal age within the non-protected region. Persisted per-tag `caveman_depth` enables byte-identical replay; depth escalation always recomputes from `source_contents` to avoid lossy double compression. + +**Synthetic todowrite injection:** +- Purpose: Inject a deterministic `tool_use`/`tool_result` pair so the agent sees current todo state through its native todowrite mental model, even when real todowrite tool calls have been dropped from the prefix. +- Location: `src/hooks/magic-context/todo-view.ts` (renderer + hash), `src/hooks/magic-context/transform-postprocess-phase.ts` (B7 logic), `src/features/magic-context/storage-meta-persisted.ts` (state persistence) +- Pattern: Capture-path is pure DB write; cache-busting-pass injects fresh and persists `(call_id, anchor_message_id, state_json)`; defer-pass replays from persisted state_json for byte-identical wire bytes. + +**Persisted session meta:** +- Purpose: Store per-session scalars and JSON blobs that must survive across transform passes and OpenCode restarts. +- Location: `src/features/magic-context/storage-meta-shared.ts`, `src/features/magic-context/storage-meta-persisted.ts`, `src/features/magic-context/storage-meta-session.ts`, `src/features/magic-context/storage-meta.ts` +- Pattern: `session_meta` SQLite table with `ensureColumn()` and versioned migrations; typed row interfaces with runtime guards; NULL coercion in `isSessionMetaRow()` so legacy rows don't trigger fallback-to-defaults on every read. + +**Cache-busting signals (plan v6):** +- Purpose: Surface durable per-pass facts the postprocess phase uses to decide whether the v12 deferred-history drain, the deferred-marker drain, and the deferred-materialization drain are eligible to fire — without re-reading transform state. +- Location: `src/hooks/magic-context/cache-busting-signals.ts`, threaded into `RunPostTransformPhaseArgs` (`historyRebuiltThisPass`, `historyRefreshExplicitBeforePrepare`, `compartmentInjectionRebuiltFromDb`, `canConsumeDeferredLate`, `phaseJustAwaitedPublication`, etc.). +- Pattern: Captured at well-defined points in `transform.ts` (e.g. `historyRefreshExplicitBeforePrepare` is read immediately before `prepareCompartmentInjection`, not later) so concurrent transform passes don't clobber each other's signals. The drain decision (`historyWasConsumedThisPass`) combines `historyRebuiltThisPass && (canConsumeDeferredLate || phaseJustAwaitedPublication || explicitRebuildHappened) && materializationSatisfied`. Degraded-cache state (null-boundary rebuild) is tracked by `degradedCacheCountBySession` in postprocess; entry logs in `inject-compartments.ts` and a warning at `DEGRADE_CACHE_WARNING_THRESHOLD=10` consecutive degraded rebuilds. + +## Entry Points + +**CLI entry:** +- Location: `packages/cli/src/index.ts` (separate `@cortexkit/magic-context` package). +- Triggers: Executed as the unified `magic-context` bin target via `npx @cortexkit/magic-context@latest `. +- Responsibilities: Detect installed harnesses (OpenCode, Pi) and dispatch `setup` / `doctor` / `migrate` flows; print usage on unknown commands. + +**Plugin entry:** +- Location: `src/index.ts` +- Triggers: OpenCode loads the package entry declared in `package.json`. +- Responsibilities: Load config; surface config-warning toasts/ignored-messages; disable the plugin when conflicting plugins are detected (DCP, OMO context-management, OpenCode auto-compaction); register hidden agents (`historian`, `historian-editor`, `dreamer`, `sidekick`); start RPC server; start auto-update checker; start dream-schedule timer; wire hooks, commands, and tools. + +**TUI plugin entry:** +- Location: `src/tui/index.tsx` (separate `./tui` export from `package.json`). +- Triggers: OpenCode TUI loads the entry declared in `tui.json`. +- Responsibilities: Register Magic Context command-palette entries (with dual-path fallback for `api.keymap.registerLayer` vs legacy `api.command.register`); register sidebar slot; mount RPC-backed data layer. + +**Message transform entry:** +- Location: `src/plugin/messages-transform.ts` +- Triggers: `experimental.chat.messages.transform` +- Responsibilities: Defensive wrapper around the magic-context hook's transform — catches transient `SQLITE_BUSY`/`SQLITE_LOCKED` errors and other failures, persists summary to `session_meta.last_transform_error`, and falls back to unmodified messages so OpenCode's prompt loop always proceeds. + +**System-prompt transform entry:** +- Location: `src/hooks/magic-context/system-prompt-hash.ts` +- Triggers: `experimental.chat.system.transform` +- Responsibilities: Inject ``, ``, `` adjunct blocks and Magic Context guidance text; persist `system_prompt_hash` for cache-stability decisions; skip injection for OpenCode's internal `title`/`summary`/`compaction` agents and any agents matched by user-configured `system_prompt_injection.skip_signatures`. + +**Event entry:** +- Location: `src/plugin/event.ts` +- Triggers: OpenCode session and message lifecycle events. +- Responsibilities: Forward lifecycle events to the runtime event handler — `message.updated` (usage tracking, model drift detection, message-index live updates, Channel-2 ceiling-nudge delivery on step boundaries), `message.removed` (tag/index cleanup, anchor cleanup), `session.deleted` (full-session cleanup). The historian trigger decision no longer runs here — it lives in the transform, fed by the in-memory message tail (the event handler has no message array and the old per-streaming-delta DB read froze the event loop on large sessions). + +**Tool entry:** +- Location: `src/plugin/tool-registry.ts` +- Triggers: Plugin initialization. +- Responsibilities: Open storage, normalize arg schemas, and expose the supported tool set. + +**Tool definition entry:** +- Location: `src/index.ts` (`tool.definition` hook calls `recordToolDefinition`) +- Triggers: OpenCode `tool.definition` hook (per tool per flight). +- Responsibilities: Record tool description and parameter token counts per `(provider, model, agent, tool_id)` for sidebar token attribution, with content-fingerprint short-circuit to avoid re-measuring stable definitions. + +**RPC server entry:** +- Location: `src/shared/rpc-server.ts` (started from `src/index.ts`) +- Triggers: Plugin initialization. +- Responsibilities: Bind localhost RPC server on ephemeral port; publish port via `session_meta` for TUI discovery; serve sidebar/status/recomp/notification endpoints registered by `src/plugin/rpc-handlers.ts`. Subagents run heuristic drops on every execute pass (no once-per-turn guard) because a long subagent run is effectively one parent turn and would otherwise starve; they have no provider-cache reuse to protect. @@ -155,6 +337,61 @@ Subagents run heuristic drops on every execute pass (no once-per-turn guard) bec Fail **closed** when storage is unavailable (better to disable than silently overflow the prompt). Fail **open** in per-turn handlers (log and skip). Wrap the outer transform so transient `SQLITE_BUSY`/`SQLITE_LOCKED` never crash the prompt loop (#23). `overflow-detection.ts` parses provider context-overflow errors (Anthropic / OpenAI / Copilot) and persists the detected limit so later passes use the lower value. Subagent model fallback (`model-suggestion-retry.ts`) iterates the chain on retryable failures; abort/timeout/context-overflow short-circuit. Hidden agents carry a `steps`/`maxSteps` cap and are aborted via `session.abort` on timeout so a weak local model can't loop forever (#154). +**Subagent rationale:** subagents are driven by a parent agent, have bounded lifetimes, and often run in parallel (council, historian, sidekick, dreamer child sessions). They still benefit from automatic heuristic drops on their own context at execute passes (running on EVERY execute pass, not once-per-turn — long-running subagents are effectively one parent turn, and they'd starve under the parent's once-per-turn gate), but turning on historian, nudges, or prompt-adjunct injections in each subagent would create redundant work and per-agent cache churn. Subagents that run into overflow fall back to the existing `overflow-detection.ts` path; the detected limit is recorded so future passes use the lower value, but no emergency-recovery flag is persisted because subagents don't consume that path. The skill-memory `` recall append is the one exception that IS active for subagents: it rides the same ungated `tool.execute.after` path as the Channel-1 nudge (it is a tool-result append, NOT a prompt-adjunct injection), so an implementer subagent that loads a skill benefits from that skill's accumulated gotchas — the append lands in the subagent's own tool result (cache-safe in its context) at the cost of a single DB read. + ## Tag identity Each `tags` row is one taggable source-content unit (`message`, `file`, or `tool`). `message`/`file` tags key on `(session_id, message_id)` (synthetic content id). **`tool` tags key on a COMPOSITE `(session_id, callID, tool_owner_message_id)`** — because OpenCode reuses a `callID` counter per assistant turn, so the same `read:32` recurs across turns; including the owning assistant message id gives each invocation its own row (migration v10). Owner derivation: invocation parts own themselves; result parts pop a FIFO of unpaired invocations; a result whose invocation was compacted away falls back to the nearest prior persisted owner. The same composite keying mirrors in the drop queue and heuristic cleanup so dropped keys match what the tagger persisted. Per-tag token counts (`token_count` / `input_token_count` / `reasoning_token_count`) are computed once on tag insert and summed for sidebar / boundary / nudge math (off the hot path). + +**Provider error parsing:** `src/features/magic-context/overflow-detection.ts` parses provider-specific context-overflow errors (Anthropic, OpenAI, GitHub Copilot) and persists the detected limit to `session_meta.detected_context_limit` so subsequent passes use the lower value. `needs_emergency_recovery` is set for primary sessions; subagents skip emergency-recovery state because they don't consume that path. + +**Subagent model fallback:** `promptSyncWithModelSuggestionRetry` in `src/shared/model-suggestion-retry.ts` iterates the resolved fallback chain (user-configured `fallback_models` or builtin chain) on retryable failures. Abort, timeout, and context-overflow errors short-circuit the chain — those won't succeed on a different model and the caller's emergency-recovery path handles them. Suggestion retry ("did you mean X?") runs inside each attempt. + +## Cross-Cutting Concerns + +**Logging:** Use buffered file logging from `src/shared/logger.ts` and write to the temp-file path returned by `getLogFilePath()`. Per-session logs use `sessionLog(sessionId, message)`; module-level logs use `log(message)`. Heavy logging batches to disk to avoid blocking the transform path. + +**Caching:** Use deferred reductions, cached memory-block injection, per-session TTL tracking, per-tag cached token counts (computed once on tag insert), persisted reminder-replay state, per-session live injection cache, persisted system-prompt hash, and persisted todo-snapshot replay state — all coordinated through `src/hooks/magic-context/` and `src/features/magic-context/storage-meta-*.ts`. + +**Storage:** Use the SQLite database created by `src/features/magic-context/storage-db.ts` under the cortexkit data directory resolved by `src/shared/data-path.ts` (`~/.local/share/cortexkit/magic-context/context.db` on Linux/macOS, XDG-equivalent on Windows). Legacy OpenCode-plugin-folder DBs are migrated forward on first boot. The same DB is shared cross-harness between OpenCode and Pi; session-scoped tables include a `harness` discriminator (`'opencode'` / `'pi'`) while project-scoped tables (memories, git commits) are shared. + +**Schema migrations:** `src/features/magic-context/migrations.ts` declares versioned migrations v1–v39 (`LATEST_SUPPORTED_VERSION = 39` in `storage-db.ts` is the schema-fence ceiling and MUST be bumped with every new migration; a unit test — `schema-version-fence.test.ts` — asserts `LATEST_SUPPORTED_VERSION === LATEST_MIGRATION_VERSION` so the two can't drift). Notable: v10 `tool_owner_message_id` (composite tool-tag identity); v11 `todo_synthetic_*` (synthetic-todowrite); v12 orphan `memory_embeddings` cleanup; v13 `pending_compaction_marker_state` (deferred-marker drain); v14 project-scoped key files + version counter; v15 `deferred_execute_state` (boundary execution); v16 context-limit cache sentinels; v17 multi-anchor note-nudge/auto-search JSON storage; v18 `pending_pi_compaction_marker_state`; v19 compartment-state lease table; v20 subagent invocation token accounting; v21 session lifetime work metrics; **v22 the v2.0 cache-architecture foundation (m[0]/m[1] split tables, `project_state` epoch counter, plus per-compartment `p1`–`p4` tier columns, `importance`, `episode_type`, `p1_embedding`, and `legacy` flag); v23 `compartment_events` (historian-extracted causal_incident / trajectory_correction, stored-not-rendered in v2.0); v24 `historian_runs` telemetry (per-run chunk range, compartment/fact/event counts, importance min/max/avg, status + failure reason, FK to `subagent_invocations`); v25 `pi_stable_id_scheme` (Pi stable-id cutover watermark); v26 `memory_mutation_log` + `cached_m1_bytes` (memory supersede-delta — non-additive in-session memory mutations render as an m[1] `` delta instead of bumping the project epoch, plus the frozen-m[1]-bytes cache column); v27 `tags.entry_fingerprint` (Pi fallback-tag adoption); v28 `git_sweep_coordinator` (lease/cooldown for cross-process git-commit sweeps); v29 `notes.anchor_ordinal` (note→conversation-tail traceback); v30 `cached_m0_system_hash` / `cached_m0_tool_set_hash` / `cached_m0_model_key` (HARD-bust m[0] markers — provider-side cache-eviction detection for the materialization taxonomy; the migration clears the m[0]/m[1] cache once so pre-v30 rows re-materialize cleanly); v31 ctx_reduce-nudge state (`last_nudge_undropped`, `channel2_nudge_state`, `last_emergency_input_sample` + startup heal zeroing legacy sticky/anchor nudge state); v32 protected-tail v3 boundary state + per-tag cached token counts (`tags.token_count` / `input_token_count` / `reasoning_token_count` — computed once on tag insert, summed for sidebar/boundary/nudge math); v33 `compartment_chunk_embeddings` table for cross-session semantic search across compartment windows; v34 `workspaces` / `workspace_members` tables plus `cached_m0_workspace_fingerprint` m[0] marker (with a one-shot m[0]/m[1] cache reset so pre-v34 rows re-materialize cleanly); v35 `workspaces.share_categories` default + epoch refresh for existing members; v36 `session_projects` ownership map + seed for pre-v36 embedded sessions; v37 emergency drain catch-up latch + historian drain failure backoff; v38 `transform_decisions` table for durable cache-event cause attribution; **v39 `skill_memory` table for per-skill cross-session recall (the `skill-memory` feature — see "Skill-memory" in Key Abstractions) with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE, plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes for the flat-recall path.** Migration runner uses `schema_migrations` table with version-ordered execution and sibling-startup race protection (duplicate-insert is tolerated). + +**Harness-aware behavior:** `src/shared/harness.ts` exposes `setHarness()`/`getHarness()` for the runtime to identify itself; production INSERTs into session-scoped tables tag rows with the current harness. Pi-specific session-resolution paths are skipped on OpenCode and vice versa. + +## Tag Identity (v3.3.1+) + +**Tag types:** `message`, `file`, `tool`. Each row in the `tags` table represents one source-content unit that can be tagged with `§N§` and dropped/truncated/replayed by the runtime. + +**Identity composition by type:** + +- **`message` and `file` tags:** identified by `(session_id, message_id)`. The `message_id` for these is a synthetic content id (`:p` for text, `:fileN` for files). These ids are globally unique within a session. + +- **`tool` tags:** identified by `(session_id, message_id, tool_owner_message_id)` — a *composite* identity. For tool tags, `message_id` is the OpenCode-generated callID (e.g. `read:32`). Pre-v3.3.1 the runtime keyed tool tags by callID alone, but OpenCode reuses a callID counter per assistant turn — so two assistant turns that each invoke `read:32` produced the SAME callID for different invocations. The fix: include the *owning assistant message id* in the key so each invocation gets its own row. + +**Schema enforcement:** schema migration v10 (`src/features/magic-context/migrations.ts`) adds `tool_owner_message_id` (`TEXT NULL`), a partial UNIQUE index `idx_tags_tool_composite` on `(session_id, message_id, tool_owner_message_id) WHERE type='tool' AND tool_owner_message_id IS NOT NULL`, and a partial lookup index `idx_tags_tool_null_owner` on `(session_id, message_id) WHERE type='tool' AND tool_owner_message_id IS NULL` to back lazy adoption. + +**Helper API surface (`src/features/magic-context/storage-tags.ts`):** + +- `getToolTagNumberByOwner(db, sessionId, callId, ownerMsgId)`: composite-identity lookup. +- `getNullOwnerToolTag(db, sessionId, callId)`: find a legacy NULL-owner orphan to lazily adopt. +- `adoptNullOwnerToolTag(db, tagId, ownerMsgId)`: attempt to claim a NULL-owner row (NULL guard ensures first claim wins). +- `getPersistedToolOwnerNearestPrior(db, sessionId, callId, beforeMessageId)`: derive the most recent prior owner for a tool result whose invocation isn't in the visible window. +- `deleteToolTagsByOwner(db, sessionId, ownerMsgId)`: cascade delete on `message.removed`. + +**Owner derivation (`src/hooks/magic-context/tag-messages.ts`):** + +For each tool observation in a transform pass: + +1. **Invocation parts** (`tool-invocation` / `tool_use`): owner = the message hosting the part. +2. **Result parts** (`tool` with output / `tool_result`): pop the FIFO queue of unpaired invocations for that callId; owner = the popped invocation's message id. +3. **Result-only window** (invocation compacted away): fall back to `getPersistedToolOwnerNearestPrior` for the most recent prior persisted owner; if none found, last-resort owner = the result's own message id. + +The same logic mirrors in `src/hooks/magic-context/read-session-chunk.ts: getRawSessionTagKeysThrough` so the drop queue produces composite keys that match what the tagger persisted. + +**Cleanup paths:** + +- `deleteTagsByMessageId(db, sessionId, messageId)` (called from `event-handler.ts` on `message.removed`) deletes BOTH content-id-scoped tags (text/file on the removed message) AND owner-scoped tool tags (`tool_owner_message_id == messageId`). +- `applyHeuristicCleanup` keys both the tag-side index and fingerprint-side map by composite `\x00`. The fingerprint VALUE includes ownerMsgId too, so cross-owner pairs with same `(toolName, args)` produce DISTINCT fingerprints and are NOT merged. + +**Legacy NULL-owner handling:** rows written by pre-v3.3.1 plugin versions have `tool_owner_message_id = NULL`. The Layer B backfill (`src/features/magic-context/tool-owner-backfill.ts`) populates those rows from OpenCode's session DB on plugin upgrade (lease-based concurrency, batched commits). When backfill is skipped (no OpenCode DB attached) lazy adoption converts orphans to non-NULL on the next observation. Drop queue and heuristic cleanup gracefully fall back to bare-callId match for unbackfilled NULL-owner rows. diff --git a/CONFIGURATION.md b/CONFIGURATION.md index 2b1bd5de..81fa3543 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -387,6 +387,7 @@ To disable the dreamer entirely, set `dreamer.disable: true`. To disable a singl | `refresh-primers` | `0 3 * * *` | Re-investigate stale primers against current code and refresh their answers. | | `evaluate-smart-notes` | `0 3 * * *` | Surface smart notes whose `ctx_note` conditions have come true. | | `review-user-memories` | `0 3 * * *` | Promote recurring behavioral observations into the `` block (privacy-sensitive). | +| `distill-skill-memory` | `""` (off) | **Opt-in** — add a schedule to enable. Merge near-duplicate skill notes, prune stale low-hit notes, promote recurring gotchas to `pinned=1`, enforce per-skill note caps. Requires the `skill_memory` table (migration v50) — auto-created on upgrade. | ### Retrospective privacy @@ -625,6 +626,39 @@ Tier boundaries are hardcoded to keep behavior predictable and prevent cache-bus **When to enable.** Turn it on if you run very long, edit-heavy sessions and want to reclaim more context without losing the agent's record of what it did. The default stays off while cache stability is being validated in the wild. Requires a restart to take effect. +## Skill-Memory (per-skill frontmatter) + +Skill-memory is the "motor memory" for skills — per-skill, cross-session recall of gotchas, discoveries, fixes, and workflow steps. The plugin transparently augments opencode's built-in `skill` tool: when a skill declares `skill-memory: { enabled: true }` in its YAML frontmatter, accumulated notes for that skill surface in a `` block appended to the skill tool's RESULT on every load. Agents write back via `ctx_skill_note`; explicit recall (without re-loading) is `ctx_skill_recall`. + +Unlike every other setting in this file, **skill-memory is configured per-skill in each `SKILL.md`'s frontmatter, not in `magic-context.jsonc`**. Absent or malformed block = inert. A bad config in one skill cannot break other skills. + +```yaml +--- +name: test-driven-development +description: ... +skill-memory: + enabled: true # required: true to activate + max_tokens: 1500 # default 1500 — token budget for unpinned notes + max_pinned_tokens: 4000 # default 4000 — separate cap for pinned notes + dedup_threshold: 0.92 # default 0.92 — P2 cosine near-dedup threshold +--- +``` + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `enabled` | `boolean` | (required `true`) | Master switch per skill. When absent or `false`, the transparent after-hook skips this skill entirely and `ctx_skill_recall` returns "skill-memory is not enabled for ''". | +| `max_tokens` | `number` | `1500` | Hard cap on tokens for unpinned notes in the injected block. Greedy fill by composite score (P1: recency × hit_count). | +| `max_pinned_tokens` | `number` | `4000` | Separate cap for pinned notes. Pinned notes are always included first; on cap overflow, least-used pinned notes are truncated in ascending `hit_count` order with an "N pinned notes omitted" marker. | +| `dedup_threshold` | `number` | `0.92` | P2 cosine near-dedup threshold. P1 ships without embeddings, so this is reserved for the P2 rollout. Tune per-skill in the `0.85`–`0.95` range. | + +**Cache safety.** The injected block lands in the tool RESULT = conversation tail, never the cached m[0]/m[1] prefix. This is the same pattern as Channel-1 (`maybeInjectChannel1Nudge`) and is why skill-memory cannot regress the prompt-cache hit rate. + +**Write-back (`ctx_skill_note`).** The injected block's footer prompts: *"After using this skill, call `ctx_skill_note` — record only gotchas, novel discoveries, or error→fix; skip routine successes."* The `kind` parameter is a hard gate: `kind: "general"` is rejected at the tool level — general observations belong in `ctx_memory` with an appropriate category. + +**Dreamer integration.** Add `"distill-skill-memory"` to your `dreamer.tasks` list to opt in to overnight maintenance (merges near-duplicates, prunes stale zero-hit notes older than 30 days, promotes recurring gotchas to pinned). It is **not** a default task — the feature is opt-in like `maintain-docs`. + +**P1 vs P2.** P1 (shipped) is flat recall (recency × hit_count, no embeddings). P2 (planned) adds intent-aware ranking via the project's existing embedding provider. The per-skill `dedup_threshold` field is reserved for P2 cosine near-dedup and has no effect on P1. + ## Commands | Command | Description | diff --git a/README.md b/README.md index b57508e8..d805e26d 100644 --- a/README.md +++ b/README.md @@ -209,6 +209,7 @@ Because it runs during idle time, the dreamer pairs well with local models, even - **`ctx_expand`**: pull a compressed history range back to the original `U:`/`A:` transcript when the agent needs the exact details. - **`ctx_note`**: a scratchpad for deferred intentions. Notes resurface at natural boundaries (after commits, after historian runs, when todos finish). **Smart notes** carry an open-ended condition the dreamer watches for. +- **Skill-memory (motor memory for skills)**: a per-skill `` block is appended to the skill tool's RESULT on every load, surfacing accumulated gotchas, discoveries, fixes, and workflow steps the agent has recorded for that skill. Per-skill opt-in via the skill's `SKILL.md` frontmatter (`skill-memory: { enabled: true }`); write back with **`ctx_skill_note`**, recall without re-loading with **`ctx_skill_recall`**. The block lands in the tool result, not the cached prompt prefix, so it never thrashes the cache. Recall works **across sessions** (a new session inherits everything) and **across harnesses** (write a memory in OpenCode, retrieve it in Pi). @@ -223,6 +224,8 @@ Recall works **across sessions** (a new session inherits everything) and **acros | `ctx_search` | Recall | Search memories, conversation history, and git commits | | `ctx_expand` | Recall | Decompress a history range back to the transcript | | `ctx_note` | Recall | Deferred intentions and dreamer-evaluated smart notes | +| `ctx_skill_note` | Recall | Write back a per-skill note (gotcha/discovery/fix/workflow) for future loads | +| `ctx_skill_recall` | Recall | Explicitly recall skill-memory notes without re-loading the skill | --- diff --git a/STRUCTURE.md b/STRUCTURE.md index 2f09201f..bacc8d8a 100644 --- a/STRUCTURE.md +++ b/STRUCTURE.md @@ -57,14 +57,14 @@ **`src/features/`:** - Purpose: Group reusable subsystem logic by feature. -- Contains: Magic-context services (storage, scheduler, tagger, search, message-index, overflow detection, compaction markers), dreamer runtime, sidekick support, memory system, user-memory pipeline, git-commit indexer, tool-definition token measurement, schema migrations, built-in commands. -- Key subdirs: `src/features/magic-context/dreamer/`, `src/features/magic-context/memory/`, `src/features/magic-context/sidekick/`, `src/features/magic-context/user-memory/`, `src/features/magic-context/git-commits/`, `src/features/builtin-commands/` -- Key files: `src/features/magic-context/storage-db.ts`, `src/features/magic-context/storage.ts` (barrel), `src/features/magic-context/migrations.ts`, `src/features/magic-context/message-index.ts`, `src/features/magic-context/search.ts`, `src/features/magic-context/overflow-detection.ts`, `src/features/magic-context/dreamer/runner.ts`, `src/features/magic-context/memory/storage-memory.ts`, `src/features/magic-context/user-memory/storage-user-memory.ts`, `src/features/builtin-commands/commands.ts` +- Contains: Magic-context services (storage, scheduler, tagger, search, message-index, overflow detection, compaction markers), dreamer runtime, sidekick support, memory system, user-memory pipeline, key-files pinning, git-commit indexer, tool-definition token measurement, schema migrations, built-in commands, **skill-memory (per-skill motor memory)**. +- Key subdirs: `src/features/magic-context/dreamer/`, `src/features/magic-context/memory/`, `src/features/magic-context/sidekick/`, `src/features/magic-context/user-memory/`, `src/features/magic-context/key-files/`, `src/features/magic-context/git-commits/`, `src/features/magic-context/skill-memory/`, `src/features/builtin-commands/` +- Key files: `src/features/magic-context/storage-db.ts`, `src/features/magic-context/storage.ts` (barrel), `src/features/magic-context/migrations.ts`, `src/features/magic-context/message-index.ts`, `src/features/magic-context/search.ts`, `src/features/magic-context/overflow-detection.ts`, `src/features/magic-context/dreamer/runner.ts`, `src/features/magic-context/memory/storage-memory.ts`, `src/features/magic-context/skill-memory/{frontmatter,provenance,storage,recall}.ts`, `src/features/magic-context/user-memory/storage-user-memory.ts`, `src/features/builtin-commands/commands.ts` **`src/tools/`:** - Purpose: Define the agent-facing tool surface. -- Contains: One directory per tool with constants, types, implementation, and tests. Five tools: `ctx-reduce`, `ctx-expand`, `ctx-note`, `ctx-memory`, `ctx-search`. -- Key files: `src/tools/ctx-reduce/tools.ts`, `src/tools/ctx-expand/tools.ts`, `src/tools/ctx-note/tools.ts`, `src/tools/ctx-memory/tools.ts`, `src/tools/ctx-search/tools.ts` +- Contains: One directory per tool with constants, types, implementation, and tests. Seven tools: `ctx-reduce`, `ctx-expand`, `ctx-note`, `ctx-memory`, `ctx-search`, `ctx-skill-note`, `ctx-skill-recall`. The two `ctx_skill_*` tools share the `recallSkillMemoryBlock` core with the transparent after-hook path. +- Key files: `src/tools/ctx-reduce/tools.ts`, `src/tools/ctx-expand/tools.ts`, `src/tools/ctx-note/tools.ts`, `src/tools/ctx-memory/tools.ts`, `src/tools/ctx-search/tools.ts`, `src/tools/ctx-skill-note/tools.ts`, `src/tools/ctx-skill-recall/tools.ts` **`src/shared/`:** - Purpose: Keep cross-feature utilities small and dependency-light. @@ -101,16 +101,24 @@ - `src/hooks/magic-context/strip-content.ts`: Strip and replay reasoning, inline thinking, structural noise, dropped placeholders, merged-assistant reasoning, processed images, and system-injected messages. - `src/hooks/magic-context/caveman.ts`: Experimental age-tier text compression for primary sessions with `ctx_reduce_enabled=false`. - `src/hooks/magic-context/todo-view.ts`: Build the deterministic synthetic todowrite tool part and compute its hash-based `call_id`. +- `src/hooks/magic-context/skill-tool-definition.ts`: `injectSkillIntentParam` — adds an optional `intent` parameter to the `skill` tool's schema via the `tool.definition` hook (Effect-Schema strips it before the skill runs; the before-hook captures it pre-validation). - `src/hooks/magic-context/inject-compartments.ts`: m[0]/m[1] history layout — `renderM0`/`renderM1`/`materializeM0`/`mustMaterialize` (mirrored in Pi's `inject-compartments-pi.ts`). - `src/hooks/magic-context/decay-curve.ts`: Council-validated deterministic tier-decay math (half-life, log-cost tier boundaries, budget pressure). - `src/hooks/magic-context/decay-render.ts`: Shared OpenCode+Pi compartment renderer built on the decay curve (replaces the removed LLM compressor). - `src/hooks/magic-context/compartment-runner-incremental.ts`: v2 historian publish path — bounded reference blocks, tiered/scored compartments, faithful per-chunk facts, discard-last, events + `p1_embedding` on publish. - `src/hooks/magic-context/reference-retrieval.ts` (+ `reference-seeds.generated.ts`): 4 rotating seed compartments + last-6 recency references for the historian prompt. - `src/hooks/magic-context/historian-prompt.generated.ts`: Generated v8.7.3 historian system prompt (source: `.alfonso/.../historian-prompt-v8.7.3.md`; re-exported via `compartment-prompt.ts`). +- `src/hooks/magic-context/hook-handlers.ts` (skill-memory branches): `createToolExecuteBeforeHook` (stashes per-callID `intent` in a bounded closure map), `maybeInjectSkillMemory` (appends the `` block to `output.output` BEFORE the Channel-1 nudge), and the `createToolExecuteAfterHook` branch that parses the `Base directory` line + reads `SKILL.md` from disk to populate the session-scoped `SkillLoadRegistry`. - `src/features/magic-context/memory/memory-migration.ts`: `/ctx-session-upgrade` 9-cat→5-cat memory re-eval (active-only, permanent-safe, epoch-bumping). +- `src/features/magic-context/skill-memory/frontmatter.ts`: Minimal YAML frontmatter parser for the per-skill `skill-memory:` block — returns `null` (inert) when absent or malformed; a bad config in one skill cannot break other skills. +- `src/features/magic-context/skill-memory/provenance.ts`: `parseSkillProvenance` (cross-platform `fileURLToPath` parser for the `Base directory for this skill: file:///...` line), `deriveSkillTier` / `deriveSkillSource` (path-based classification), and the session-scoped `SkillLoadRegistry` (`Map` — NOT persisted, cleaned in `onSessionDeleted`). +- `src/features/magic-context/skill-memory/storage.ts`: `skill_memory` table CRUD — `insertSkillMemoryNote` (UNIQUE-violation on duplicate returns null so callers can `bumpHitCount`), `getSkillMemoryNotes` (window-function flat ranking for P1), `findExistingNote`, `bumpHitCount`. The `UNIQUE(skill_id, tier, project_identity, normalized_hash)` constraint plus the `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes live in migration v37. +- `src/features/magic-context/skill-memory/recall.ts`: `recallSkillMemoryBlock` — the shared recall+format core (used by BOTH the transparent after-hook AND `ctx_skill_recall`). `flatRecall` does P1's recency × hit_count greedy-fill; `buildSkillMemoryBlock` formats the `` XML with the `ctx_skill_note` write-back footer. Lives in the feature layer to avoid a tools→hooks layering violation. +- `src/tools/ctx-skill-note/tools.ts`: `ctx_skill_note` — write-back tool. Hard gate rejects `kind: "general"` (general observations belong in `ctx_memory`). Exact-dedup on `normalized_hash` (reuses `computeNormalizedHash` from `memory/normalize-hash.ts`) bumps `hit_count`. Resolves `(skill_id, tier, project_identity, resolved_path)` from the session-scoped `SkillLoadRegistry`. +- `src/tools/ctx-skill-recall/tools.ts`: `ctx_skill_recall` — explicit recall companion. Registry-first resolution (exact, free, no disk I/O when the skill was loaded this session) with a cold-start disk fallback walking opencode's real `discoverSkills()` order (project dirs first — they shadow global). Returns distinct messages for SKILL.md-not-found vs disabled vs cold-start-no-notes. - `src/features/magic-context/storage-db.ts`: Create durable storage; run versioned migrations; resolve runtime SQLite backend. - `src/features/magic-context/storage-meta-persisted.ts`: Read and write per-session persisted scalars and JSON blobs. -- `src/features/magic-context/migrations.ts`: Versioned schema migrations v1–v44 (`LATEST_SUPPORTED_VERSION` in `storage-db.ts` must track the highest; `schema-version-fence.test.ts` asserts they stay in lockstep). +- `src/features/magic-context/migrations.ts`: Versioned schema migrations v1–v50 (`LATEST_SUPPORTED_VERSION` in `storage-db.ts` must track the highest; `schema-version-fence.test.ts` asserts they stay in lockstep). v50 adds the `skill_memory` table with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes. - `src/features/magic-context/message-index.ts`: FTS-backed raw-message index for `ctx_search`. - `src/features/magic-context/search.ts`: Unified retrieval over memories, raw messages, and git commits. diff --git a/packages/pi-plugin/src/commands/ctx-status.ts b/packages/pi-plugin/src/commands/ctx-status.ts index ef762eba..f9b5bd77 100644 --- a/packages/pi-plugin/src/commands/ctx-status.ts +++ b/packages/pi-plugin/src/commands/ctx-status.ts @@ -90,7 +90,7 @@ export function registerCtxStatusCommand( const modelKey = ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : undefined; - const statusText = executeStatus( + const statusText = await executeStatus( currentDeps.db, sessionId, currentDeps.protectedTags ?? 20, diff --git a/packages/plugin/src/agents/magic-context-prompt.ts b/packages/plugin/src/agents/magic-context-prompt.ts index 4db441e4..39e37851 100644 --- a/packages/plugin/src/agents/magic-context-prompt.ts +++ b/packages/plugin/src/agents/magic-context-prompt.ts @@ -65,6 +65,19 @@ function memoryGuidanceBlock(memoryEnabled: boolean): string { return memoryEnabled ? `${MEMORY_GUIDANCE}\n` : ""; } +// Skill-memory write-back guidance: `ctx_skill_note` captures non-obvious gotchas, +// discoveries, fixes, and workflow steps during skill use (durable across sessions); +// `ctx_skill_recall` rehydrates accumulated notes for a skill without re-loading it. +// Distinct from `ctx_memory`, which captures general project knowledge (not tied +// to a specific skill). NOT gated on memory.enabled — skill-memory is an +// independent store (its own table + tool-result-tail injection). +const CTX_SKILL_MEMORY_GUIDANCE = `Use \`ctx_skill_note\` after using a skill when you hit a non-obvious issue, found a better approach, or fixed a skill-specific error. Skip routine successes — only record gotchas, discoveries, fixes, and workflow steps that would save time on the next use. +Example: \`ctx_skill_note({skill: 'trilium', intent: 'bulk-retag a subtree', kind: 'gotcha', delta: 'ETAPI note PUT needs Content-Type: text/plain even for HTML content'})\` +Example: \`ctx_skill_note({skill: 'test-driven-development', intent: 'fix flaky auth test', kind: 'fix', delta: 'Always mock Date.now() in auth tests — real timers cause intermittent failures'})\` +Do NOT use \`ctx_skill_note\` for general project observations — those belong in \`ctx_memory\`. + +Use \`ctx_skill_recall\` to explicitly query accumulated notes for a skill without re-loading it. Call it when you want to recall gotchas/discoveries for a skill you have already loaded this session, or when you need notes without triggering a full skill load. Returns the \`\` block directly as a tool result. Example: \`ctx_skill_recall({skill: 'trilium', intent: 'bulk-retag a subtree'})\`.`; + const BASE_INTRO = ( protectedTags: number, memoryEnabled: boolean, @@ -72,7 +85,8 @@ const BASE_INTRO = ( Use \`ctx_reduce\` to mark spent tagged content as discardable and reclaim space. Marking is NOT an immediate delete — it queues the content, which stays fully visible until space is actually needed (as soon as the next turn if you're already under pressure, much later if not), so mark a tool output as soon as you're done with it rather than hoarding the call for the end of the turn. The last ${protectedTags} tags are protected (marking one just queues it until it ages out). Syntax: "3-5", "1,2,9", or "1-5,8,12-15". Do not announce or narrate \`ctx_reduce\` drops — just call the tool silently. Saying "I'll drop these outputs" wastes tokens the user does not care about. ${CTX_NOTE_GUIDANCE} -${memoryGuidanceBlock(memoryEnabled)}Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. +${memoryGuidanceBlock(memoryEnabled)}${CTX_SKILL_MEMORY_GUIDANCE} +Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. Use \`ctx_expand\` to recover the raw conversation behind a \`\` summary in \`\` — pass its \`start\`/\`end\` attributes when the summary is not enough (exact wording, values, error text). **Search before asking the user**: If you can't remember or don't know something that might have been discussed before or stored in project memory, use \`ctx_search\` before asking the user. Examples: - Can't remember where a related codebase or dependency lives → \`ctx_search(query="opencode source code path")\` @@ -93,7 +107,8 @@ Before your turn finishes, consider using \`ctx_reduce\` to drop large tool outp * a tagging system they can't observe just wastes tokens and (empirically) primes * some models to emit malformed `§N">§` tokens at the start of their own text. */ const BASE_INTRO_NO_REDUCE = (memoryEnabled: boolean): string => `${CTX_NOTE_GUIDANCE} -${memoryGuidanceBlock(memoryEnabled)}Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. +${memoryGuidanceBlock(memoryEnabled)}${CTX_SKILL_MEMORY_GUIDANCE} +Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. Use \`ctx_expand\` to recover the raw conversation behind a \`\` summary in \`\` — pass its \`start\`/\`end\` attributes when the summary is not enough (exact wording, values, error text). **Search before asking the user**: If you can't remember or don't know something that might have been discussed before or stored in project memory, use \`ctx_search\` before asking the user. Examples: - Can't remember where a related codebase or dependency lives → \`ctx_search(query="opencode source code path")\` diff --git a/packages/plugin/src/agents/skill-memory-guidance.test.ts b/packages/plugin/src/agents/skill-memory-guidance.test.ts new file mode 100644 index 00000000..d449ce03 --- /dev/null +++ b/packages/plugin/src/agents/skill-memory-guidance.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "bun:test"; +import { buildMagicContextSection } from "./magic-context-prompt"; + +describe("magic-context-prompt skill-memory guidance", () => { + it("prompt includes ctx_skill_note guidance", () => { + // Real export is buildMagicContextSection (positional args: _agent, protectedTags, ctxReduceEnabled, ...) + const prompt = buildMagicContextSection(null, 20, true, false, false, false, false); + expect(prompt).toContain("ctx_skill_note"); + }); + + it("ctx_skill_note guidance includes worked example", () => { + const prompt = buildMagicContextSection(null, 20, true, false, false, false, false); + expect(prompt).toContain("kind:"); + expect(prompt).toContain("gotcha"); + }); +}); diff --git a/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts b/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts new file mode 100644 index 00000000..a80ddff3 --- /dev/null +++ b/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, test } from "bun:test"; +import { DEFAULT_DREAMER_TASKS, DREAMER_TASKS, DreamingTaskSchema } from "./magic-context"; + +describe("distill-skill-memory dreamer task", () => { + test("distill-skill-memory is in DREAMER_TASKS enum", () => { + expect(DREAMER_TASKS).toContain("distill-skill-memory"); + }); + + test("distill-skill-memory is NOT in DEFAULT_DREAMER_TASKS (opt-in)", () => { + expect(DEFAULT_DREAMER_TASKS).not.toContain("distill-skill-memory"); + }); + + test("DreamingTaskSchema accepts distill-skill-memory", () => { + expect(() => DreamingTaskSchema.parse("distill-skill-memory")).not.toThrow(); + }); + + test("maintain-docs is also not in DEFAULT_DREAMER_TASKS (precedent)", () => { + // Verify the existing asymmetry pattern we're following + expect(DEFAULT_DREAMER_TASKS).not.toContain("maintain-docs"); + expect(DREAMER_TASKS).toContain("maintain-docs"); + }); +}); diff --git a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts index 044404b0..f6eb1b35 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts @@ -374,6 +374,52 @@ const STRUCTURE_TEMPLATE = ` **Tests:** co-located with source as \\\`*.test.ts\\\` \`\`\``; +// ── Distill Skill Memory ─────────────────────────────────────────────────── + +function buildDistillSkillMemoryPrompt(projectPath: string): string { + return `## Task: Distill Skill Memory + +**Project:** ${projectPath} + +### Goal +Maintain the skill_memory table: merge near-duplicate notes, prune stale low-hit notes, +promote recurring gotchas to pinned, enforce per-skill note caps. + +### Process +1. Query skill_memory for skills with note_count > 20 (the distill threshold): + \`\`\`sql + SELECT skill_id, tier, project_identity, COUNT(*) as note_count + FROM skill_memory + WHERE project_identity = (SELECT project_identity FROM skill_memory LIMIT 1) + GROUP BY skill_id, tier, project_identity + HAVING note_count > 20 + ORDER BY note_count DESC + LIMIT 5; + \`\`\` +2. For each qualifying skill: + a. List notes ordered by hit_count DESC, created_at DESC. + b. Merge near-duplicate notes (same kind, similar delta — use judgment). + Use ctx_skill_note with action="distill" and merge: [id, id]. + c. Prune notes with hit_count=0 AND created_at < now-30d (stale, never recalled). + Use ctx_skill_note with action="distill" and prune: id. + d. Promote notes with hit_count >= 5 to pinned=1 if not already pinned. + Use ctx_skill_note with action="distill" and promote: id. + e. If note count > 100 after pruning, archive oldest low-hit unpinned notes. +3. Log a quality alert if >30% of kind='gotcha' notes appear to be general observations + (not skill-specific). Use ctx_memory to record the alert. +4. Process at most 5 skill groups per run (rotating by last_distilled_at). + +### Tools available +- ctx_skill_note (with action="distill" — dreamer-only action for merge/prune/promote) +- Read, bash (for verification queries) + +### Success criteria +- No skill has >100 notes in the project tier. +- Pinned notes reflect genuinely recurring gotchas (hit_count >= 5). +- Stale zero-hit notes older than 30 days are pruned. +- Quality alert logged if >30% of gotcha notes are general-quality.`; +} + // ── Dispatcher ───────────────────────────────────────────────────────────── export function buildDreamTaskPrompt( @@ -401,5 +447,7 @@ export function buildDreamTaskPrompt( args.lastDreamAt ?? null, args.existingDocs ?? { architecture: false, structure: false }, ); + case "distill-skill-memory": + return buildDistillSkillMemoryPrompt(args.projectPath); } } diff --git a/packages/plugin/src/features/magic-context/dreamer/task-registry.ts b/packages/plugin/src/features/magic-context/dreamer/task-registry.ts index af48dce4..c90ea99c 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-registry.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-registry.ts @@ -22,6 +22,9 @@ export const CANONICAL_DREAM_TASKS = [ "review-user-memories", "promote-primers", "refresh-primers", + // Opt-in: distills per-skill memory (merge/prune/promote). Runs only when + // scheduled (schedule != ""); requires the skill_memory table + feature. + "distill-skill-memory", ] as const; export type DreamTaskName = (typeof CANONICAL_DREAM_TASKS)[number]; @@ -33,7 +36,7 @@ export type DreamTaskName = (typeof CANONICAL_DREAM_TASKS)[number]; * primers, retrospective) have their own specialized runners and do NOT go * through the prompt builder. */ -export const AGENTIC_DREAM_TASKS = ["curate", "maintain-docs"] as const; +export const AGENTIC_DREAM_TASKS = ["curate", "maintain-docs", "distill-skill-memory"] as const; export type AgenticDreamTask = (typeof AGENTIC_DREAM_TASKS)[number]; diff --git a/packages/plugin/src/features/magic-context/migrations-v38.test.ts b/packages/plugin/src/features/magic-context/migrations-v38.test.ts index fdcef5b5..b2703e31 100644 --- a/packages/plugin/src/features/magic-context/migrations-v38.test.ts +++ b/packages/plugin/src/features/magic-context/migrations-v38.test.ts @@ -33,6 +33,7 @@ describe("migration v38 — transform decisions", () => { const db = new Database(":memory:"); try { initializeDatabase(db); + runMigrations(db); expect(tableNames(db)).toContain("transform_decisions"); diff --git a/packages/plugin/src/features/magic-context/migrations.ts b/packages/plugin/src/features/magic-context/migrations.ts index 7321c694..f5d661b5 100644 --- a/packages/plugin/src/features/magic-context/migrations.ts +++ b/packages/plugin/src/features/magic-context/migrations.ts @@ -1877,6 +1877,47 @@ const MIGRATIONS: Migration[] = [ `); }, }, + { + // Skill-memory P1: per-skill cross-session recall. Originally v38, then + // v39/v42 across earlier rebases; renumbered to v50 after upstream v0.27 + // shipped its own v42–v49 (dreamer scheduling, memory verification, + // classification scope, per-model embedding coexistence, etc.). + version: 50, + description: "Add skill_memory table for per-skill cross-session recall", + up: (db: Database) => { + db.exec(` + CREATE TABLE IF NOT EXISTS skill_memory ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + skill_id TEXT NOT NULL, + resolved_path TEXT NOT NULL, + tier TEXT NOT NULL CHECK(tier IN ('project', 'global')), + skill_source TEXT CHECK(skill_source IN ( + 'opencode-project', 'opencode-global', + 'claude-skills', 'agents-skills' + )), + project_identity TEXT NOT NULL, + intent TEXT NOT NULL, + intent_embedding BLOB, + embedding_model_version TEXT, + kind TEXT NOT NULL CHECK(kind IN ('gotcha', 'discovery', 'fix', 'workflow')), + delta TEXT NOT NULL, + tags TEXT, + hit_count INTEGER NOT NULL DEFAULT 0, + pinned INTEGER NOT NULL DEFAULT 0 CHECK(pinned IN (0, 1)), + normalized_hash TEXT NOT NULL, + created_at INTEGER NOT NULL, + last_used_at INTEGER, + UNIQUE(skill_id, tier, project_identity, normalized_hash) + ); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_lookup + ON skill_memory(skill_id, tier, project_identity, last_used_at DESC); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_fts_prep + ON skill_memory(skill_id, tier, project_identity, kind); + `); + }, + }, ]; /** diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts new file mode 100644 index 00000000..363ca2fb --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, test } from "bun:test"; +import { parseFrontmatterConfig } from "./frontmatter"; + +describe("parseFrontmatterConfig", () => { + test("returns null when no frontmatter present", () => { + expect(parseFrontmatterConfig("# Skill\nNo frontmatter here.")).toBeNull(); + }); + + test("returns null when frontmatter has no skill-memory block", () => { + const content = `---\ntitle: My Skill\n---\n# Skill`; + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("returns null when skill-memory.enabled is false or absent", () => { + const content = `---\nskill-memory:\n enabled: false\n---\n# Skill`; + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("returns config when skill-memory.enabled is true", () => { + const content = `---\nskill-memory:\n enabled: true\n max_tokens: 2000\n dedup_threshold: 0.88\n---\n# Skill`; + const config = parseFrontmatterConfig(content); + expect(config).not.toBeNull(); + expect(config!.enabled).toBe(true); + expect(config!.max_tokens).toBe(2000); + expect(config!.dedup_threshold).toBe(0.88); + }); + + test("uses defaults when optional fields are absent", () => { + const content = `---\nskill-memory:\n enabled: true\n---\n# Skill`; + const config = parseFrontmatterConfig(content); + expect(config!.max_tokens).toBe(1500); + expect(config!.max_pinned_tokens).toBe(4000); + expect(config!.dedup_threshold).toBe(0.92); + }); + + test("returns null on malformed YAML (non-choke)", () => { + const content = `---\nskill-memory:\n enabled: [invalid yaml\n---\n# Skill`; + // Must not throw — malformed config = inert + expect(() => parseFrontmatterConfig(content)).not.toThrow(); + expect(parseFrontmatterConfig(content)).toBeNull(); + }); + + test("returns null when skill-memory block is not an object", () => { + const content = `---\nskill-memory: true\n---\n# Skill`; + expect(parseFrontmatterConfig(content)).toBeNull(); + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts new file mode 100644 index 00000000..8bb5d20d --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts @@ -0,0 +1,95 @@ +/** + * Minimal YAML frontmatter parser for the `skill-memory:` block. + * Does NOT depend on a full YAML library — parses only the specific + * `skill-memory:` sub-block using line-by-line key:value extraction. + * Malformed or absent blocks return null (inert). A bad config in one + * skill cannot break other skills. + */ + +export interface SkillMemoryConfig { + enabled: true; + max_tokens: number; + max_pinned_tokens: number; + dedup_threshold: number; +} + +const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---/m; + +export function parseFrontmatterConfig(content: string): SkillMemoryConfig | null { + try { + const fmMatch = content.match(FRONTMATTER_REGEX); + if (!fmMatch) return null; + + const fmText = fmMatch[1]; + const skillMemoryBlock = extractSkillMemoryBlock(fmText); + if (!skillMemoryBlock) return null; + + const enabled = skillMemoryBlock.enabled; + if (enabled !== true && enabled !== "true") return null; + + return { + enabled: true, + max_tokens: toNumber(skillMemoryBlock.max_tokens, 1500), + max_pinned_tokens: toNumber(skillMemoryBlock.max_pinned_tokens, 4000), + dedup_threshold: toNumber(skillMemoryBlock.dedup_threshold, 0.92), + }; + } catch { + // Non-choke: malformed config = inert + return null; + } +} + +function toNumber(value: unknown, defaultValue: number): number { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return defaultValue; +} + +/** + * Extract the `skill-memory:` sub-block from YAML frontmatter text. + * Returns a flat key→value map of the block's immediate children. + * Returns null if the block is absent or not an object. + */ +function extractSkillMemoryBlock(fmText: string): Record | null { + const lines = fmText.split(/\r?\n/); + let inSkillMemory = false; + const result: Record = {}; + let found = false; + + for (const line of lines) { + if (!inSkillMemory) { + if (/^skill-memory:\s*$/.test(line)) { + inSkillMemory = true; + found = true; + } + continue; + } + // End of skill-memory block: a line that starts without indentation + if (/^\S/.test(line)) break; + // Parse indented key: value lines + const kvMatch = line.match(/^\s{2,}(\w+):\s*(.*)$/); + if (kvMatch) { + const key = kvMatch[1]; + const rawVal = kvMatch[2].trim(); + result[key] = parseYamlScalar(rawVal); + } + } + + return found ? result : null; +} + +function parseYamlScalar(raw: string): unknown { + if (raw === "true") return true; + if (raw === "false") return false; + if (raw === "null" || raw === "~") return null; + const num = Number(raw); + if (raw !== "" && Number.isFinite(num)) return num; + // Strip surrounding quotes + if ((raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'"))) { + return raw.slice(1, -1); + } + return raw; +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts new file mode 100644 index 00000000..eadbe637 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, test } from "bun:test"; +import { parseSkillProvenance } from "./provenance"; + +// Build fixture paths from process.env.HOME to avoid hardcoded /home/icetea paths +// that break on Mac, CI, or root environments. +const HOME = process.env.HOME ?? process.env.USERPROFILE ?? "/home/user"; + +describe("parseSkillProvenance", () => { + test("parses Base directory line from skill output (global skill)", () => { + const output = `# Skill Content\nSome content here.\nBase directory for this skill: file://${HOME}/.config/opencode/skills/trilium`; + const result = parseSkillProvenance(output, "trilium"); + expect(result).not.toBeNull(); + expect(result!.resolvedPath).toBe(`${HOME}/.config/opencode/skills/trilium/SKILL.md`); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("opencode-global"); + }); + + test("parses Base directory line from skill output (project skill)", () => { + const output = `# Skill Content\nBase directory for this skill: file://${HOME}/projects/magic-context/.agents/skills/find-docs`; + const result = parseSkillProvenance(output, "find-docs"); + expect(result).not.toBeNull(); + expect(result!.resolvedPath).toBe( + `${HOME}/projects/magic-context/.agents/skills/find-docs/SKILL.md`, + ); + expect(result!.tier).toBe("project"); + expect(result!.skillSource).toBe("agents-skills"); + }); + + test("returns null when Base directory line is absent", () => { + const output = "# Skill Content\nNo base directory line here."; + expect(parseSkillProvenance(output, "some-skill")).toBeNull(); + }); + + test("handles ~/.claude/skills/ as global tier", () => { + const output = `Base directory for this skill: file://${HOME}/.claude/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("claude-skills"); + }); + + test("handles ~/.agents/skills/ as global tier", () => { + const output = `Base directory for this skill: file://${HOME}/.agents/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("agents-skills"); + }); + + test("handles .opencode/skills/ as project tier", () => { + const output = `Base directory for this skill: file://${HOME}/projects/foo/.opencode/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("project"); + expect(result!.skillSource).toBe("opencode-project"); + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts new file mode 100644 index 00000000..d1a59441 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts @@ -0,0 +1,93 @@ +import { fileURLToPath } from "node:url"; + +export interface SkillProvenance { + resolvedPath: string; + tier: "project" | "global"; + skillSource: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills"; + skillId: string; + loadedAt: number; +} + +// Matches: "Base directory for this skill: file:///abs/path/to/skill/dir" +// Uses fileURLToPath (not naive regex capture) for cross-platform correctness. +const BASE_DIR_REGEX = /Base directory for this skill: (file:\/\/\/[^\n\r]+)/m; + +export function parseSkillProvenance(output: string, skillId: string): SkillProvenance | null { + const match = output.match(BASE_DIR_REGEX); + if (!match) return null; + + const fileUrl = match[1].trim(); + let absDir: string; + try { + absDir = fileURLToPath(new URL(fileUrl)); + } catch { + return null; + } + + const resolvedPath = `${absDir}/SKILL.md`; + const tier = deriveSkillTier(absDir); + const skillSource = deriveSkillSource(absDir); + + return { resolvedPath, tier, skillSource, skillId, loadedAt: Date.now() }; +} + +export function deriveSkillTier(absDir: string): "project" | "global" { + // Global dirs (discovered via Global.Path.config or EXTERNAL_DIR constants): + // ~/.config/opencode/skills/ — via config.directories() + {skill,skills}/**/SKILL.md + // ~/.agents/skills/ — via AGENTS_EXTERNAL_DIR + skills/**/SKILL.md + // ~/.claude/skills/ — via CLAUDE_EXTERNAL_DIR + skills/**/SKILL.md + const home = process.env.HOME ?? process.env.USERPROFILE ?? ""; + if ( + absDir.startsWith(`${home}/.config/opencode/skills/`) || + absDir.startsWith(`${home}/.agents/skills/`) || + absDir.startsWith(`${home}/.claude/skills/`) + ) { + return "global"; + } + return "project"; +} + +export function deriveSkillSource( + absDir: string, +): "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" { + const home = process.env.HOME ?? process.env.USERPROFILE ?? ""; + if (absDir.startsWith(`${home}/.config/opencode/skills/`)) return "opencode-global"; + if (absDir.startsWith(`${home}/.claude/skills/`)) return "claude-skills"; + if (absDir.includes("/.agents/skills/")) return "agents-skills"; + // Both singular .opencode/skill/ and plural .opencode/skills/ are valid — + // opencode's OPENCODE_SKILL_PATTERN = "{skill,skills}/**/SKILL.md" covers both. + if (absDir.includes("/.opencode/skill/") || absDir.includes("/.opencode/skills/")) { + return "opencode-project"; + } + return "opencode-project"; // default for unknown project-local paths +} + +// ── Session-scoped skill-load registry ───────────────────────────────────── +// Key: `${sessionId}:${skillId}` — populated in tool.execute.after when +// input.tool === "skill". Cleaned up in onSessionDeleted. +// NOT persisted. No leak. + +export type SkillLoadRegistry = Map< + string, + SkillProvenance & { frontmatterConfig: import("./frontmatter").SkillMemoryConfig | null } +>; + +export function createSkillLoadRegistry(): SkillLoadRegistry { + return new Map(); +} + +export function registryKey(sessionId: string, skillId: string): string { + return `${sessionId}:${skillId}`; +} + +export function getSkillLoad( + registry: SkillLoadRegistry, + sessionId: string, + skillId: string, +): + | (SkillProvenance & { + frontmatterConfig: import("./frontmatter").SkillMemoryConfig | null; + }) + | undefined { + return registry.get(registryKey(sessionId, skillId)); +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts new file mode 100644 index 00000000..4c5af78d --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; +import { buildSkillMemoryBlock, flatRecall } from "./recall"; +import { insertSkillMemoryNote } from "./storage"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("flatRecall", () => { + test("returns empty array when no notes exist (cold-start rung 5)", () => { + const db = makeDb(); + try { + const notes = flatRecall(db, "nonexistent-skill", "global", "git:abc", { + maxTokens: 1500, + maxPinnedTokens: 4000, + }); + expect(notes).toEqual([]); + } finally { + closeQuietly(db); + } + }); + + test("returns notes up to token budget", () => { + const db = makeDb(); + try { + const now = Date.now(); + for (let i = 0; i < 5; i++) { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "intent", + kind: "gotcha", + delta: `note ${i} — ${"x".repeat(40)}`, + normalizedHash: `h${i}`, + createdAt: now - i * 1000, + }); + } + // Token-budget truncation test (arithmetic verified): + // delta = "note N — " (9 chars) + 40 "x"s = 49 chars → Math.ceil(49/4) = 13 tokens each. + // maxTokens: 30 → first note fits (13 ≤ 30), second note fits (13+13=26 ≤ 30), + // third note would exceed (26+13=39 > 30) → exactly 2 notes fit. + const notes = flatRecall(db, "tdd", "global", "git:abc", { + maxTokens: 30, // 2 notes × 13 tokens = 26 ≤ 30; 3rd note would push to 39 > 30 + maxPinnedTokens: 4000, + }); + expect(notes.length).toBe(2); + } finally { + closeQuietly(db); + } + }); +}); + +describe("buildSkillMemoryBlock", () => { + test("returns empty string when notes array is empty (cold-start)", () => { + expect(buildSkillMemoryBlock("tdd", "no-intent", [], 0)).toBe(""); + }); + + test("builds correct XML block with notes", () => { + const notes = [ + { + id: 1, + skill_id: "tdd", + kind: "gotcha" as const, + delta: "Always mock the clock", + intent: "fix flaky test", + hit_count: 3, + pinned: 0, + normalized_hash: "h1", + created_at: Date.now(), + last_used_at: Date.now(), + resolved_path: "/p", + tier: "global" as const, + skill_source: "opencode-global" as const, + project_identity: "git:abc", + tags: null, + intent_embedding: null, + embedding_model_version: null, + }, + ]; + const block = buildSkillMemoryBlock("tdd", "no-intent", notes, 0); + expect(block).toContain(' n.pinned === 1); + const unpinned = candidates.filter((n) => n.pinned === 0); + + const result: SkillMemoryNote[] = []; + let pinnedTokens = 0; + let totalTokens = 0; + + // Always include pinned notes (up to maxPinnedTokens) + for (const note of pinned) { + const tokens = estimateTokens(note.delta); + if (pinnedTokens + tokens > options.maxPinnedTokens) break; + result.push(note); + pinnedTokens += tokens; + totalTokens += tokens; + } + + // Fill remaining budget with unpinned notes + for (const note of unpinned) { + if (totalTokens >= options.maxTokens) break; + const tokens = estimateTokens(note.delta); + if (totalTokens + tokens > options.maxTokens) break; + result.push(note); + totalTokens += tokens; + } + + return result; +} + +/** + * Build the XML block to append to the skill tool result. + * Returns empty string for cold-start (no notes) — no empty stub injected. + */ +export function buildSkillMemoryBlock( + skillId: string, + mode: "no-intent" | "flat-fts", + notes: SkillMemoryNote[], + pinnedCount: number, +): string { + if (notes.length === 0) return ""; + + const noteXml = notes + .map((n) => { + const intentAttr = n.intent ? ` intent="${escapeXml(n.intent)}"` : ""; + const pinnedAttr = n.pinned === 1 ? ` pinned="true"` : ` pinned="false"`; + return ( + `\n` + + `${escapeXml(n.delta)}\n` + + `` + ); + }) + .join("\n"); + + const footer = + `\n\n---\n` + + `*After using this skill, call \`ctx_skill_note\` — record only gotchas, novel discoveries, or error→fix; skip routine successes.*`; + + return ( + `\n` + + noteXml + + `\n` + + footer + ); +} + +function escapeXml(str: string): string { + return str + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +/** + * Shared recall core: reads notes from DB, ranks/budgets them, and formats the + * block string. Returns empty string when no notes exist or + * skill-memory is not enabled. + * + * Used by BOTH: + * - maybeInjectSkillMemory (transparent after-hook path) — appends to output.output + * - ctx_skill_recall tool (explicit agent-callable path) — returns as tool result + * + * Lives in the feature layer (not hook-handlers.ts) to avoid tools→hooks layering. + * P2 embeddings benefit both paths automatically when this function is upgraded. + */ +export function recallSkillMemoryBlock( + db: Database, + opts: { + skill: string; + intent?: string; + scope: "project" | "global"; + projectIdentity: string; + frontmatterConfig: SkillMemoryConfig | null; + maxTokens?: number; + }, +): string { + // Guard: skill-memory must be enabled for this skill + if (!opts.frontmatterConfig?.enabled) return ""; + + try { + const maxTokens = opts.maxTokens ?? opts.frontmatterConfig.max_tokens; + const notes = flatRecall(db, opts.skill, opts.scope, opts.projectIdentity, { + maxTokens, + maxPinnedTokens: opts.frontmatterConfig.max_pinned_tokens, + }); + if (notes.length === 0) return ""; // cold-start: no block + + const pinnedCount = notes.filter((n) => n.pinned === 1).length; + // P1: always "no-intent" flat recall. P2 will add intent-aware ranking (fts5-fallback rung). + // TODO (P2): const mode: "no-intent" | "flat-fts" = opts.intent ? "flat-fts" : "no-intent"; + const mode: "no-intent" | "flat-fts" = "no-intent"; + return buildSkillMemoryBlock(opts.skill, mode, notes, pinnedCount); + } catch { + // Non-fatal: recall failure must never block the tool result + return ""; + } +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts new file mode 100644 index 00000000..a380e1d6 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts @@ -0,0 +1,228 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; +import { + bumpHitCount, + getSkillMemoryNotes, + getSkillMemoryStats, + type InsertSkillMemoryNoteArgs, + insertSkillMemoryNote, +} from "./storage"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("skill_memory storage", () => { + test("insertSkillMemoryNote inserts a new row", () => { + const db = makeDb(); + try { + const args: InsertSkillMemoryNoteArgs = { + skillId: "test-driven-development", + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc123", + intent: "fix a flaky test in auth", + kind: "gotcha", + delta: "Always mock the clock in auth tests — real timers cause flakiness", + normalizedHash: "hash-001", + createdAt: Date.now(), + }; + const id = insertSkillMemoryNote(db, args); + expect(typeof id).toBe("number"); + expect(id).toBeGreaterThan(0); + } finally { + closeQuietly(db); + } + }); + + test("insertSkillMemoryNote returns null on duplicate normalized_hash (UNIQUE constraint)", () => { + const db = makeDb(); + try { + const args: InsertSkillMemoryNoteArgs = { + skillId: "tdd", + resolvedPath: "/path/SKILL.md", + tier: "project", + skillSource: "opencode-project", + projectIdentity: "git:abc123", + intent: "intent", + kind: "fix", + delta: "delta content", + normalizedHash: "dup-hash", + createdAt: Date.now(), + }; + insertSkillMemoryNote(db, args); + const result = insertSkillMemoryNote(db, args); // duplicate + expect(result).toBeNull(); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryNotes returns notes ordered by recency × hit_count", () => { + const db = makeDb(); + try { + const now = Date.now(); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "note A (high hit_count)", + normalizedHash: "h1", + createdAt: now - 10000, + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "discovery", + delta: "note B (recent)", + normalizedHash: "h2", + createdAt: now, + }); + // Bump hit_count on note A + bumpHitCount(db, "tdd", "global", "git:abc", "h1"); + bumpHitCount(db, "tdd", "global", "git:abc", "h1"); + + const notes = getSkillMemoryNotes(db, "tdd", "global", "git:abc", 10); + expect(notes.length).toBe(2); + // Both notes should be returned; order is recency × hit_count + expect(notes.map((n) => n.delta)).toContain("note A (high hit_count)"); + expect(notes.map((n) => n.delta)).toContain("note B (recent)"); + } finally { + closeQuietly(db); + } + }); + + test("bumpHitCount increments hit_count and updates last_used_at", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "workflow", + delta: "workflow note", + normalizedHash: "h-bump", + createdAt: Date.now(), + }); + bumpHitCount(db, "tdd", "global", "git:abc", "h-bump"); + bumpHitCount(db, "tdd", "global", "git:abc", "h-bump"); + const notes = getSkillMemoryNotes(db, "tdd", "global", "git:abc", 10); + expect(notes[0].hit_count).toBe(2); + expect(notes[0].last_used_at).not.toBeNull(); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryStats returns totals scoped to project_identity", () => { + const db = makeDb(); + try { + // Seed 3 notes for skill "tdd" under project "git:abc", 1 of them pinned. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i1", + kind: "gotcha", + delta: "n1", + normalizedHash: "stats-h1", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i2", + kind: "fix", + delta: "n2", + normalizedHash: "stats-h2", + createdAt: Date.now(), + }); + // pin the second one directly via SQL — there's no pin API in storage yet + db.prepare("UPDATE skill_memory SET pinned = 1 WHERE normalized_hash = ?").run( + "stats-h2", + ); + + // Seed 2 notes for a different skill "debugging" under the same project. + insertSkillMemoryNote(db, { + skillId: "debugging", + resolvedPath: "/p2", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i3", + kind: "discovery", + delta: "n3", + normalizedHash: "stats-h3", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "debugging", + resolvedPath: "/p2", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i4", + kind: "workflow", + delta: "n4", + normalizedHash: "stats-h4", + createdAt: Date.now(), + }); + + // Seed 1 note under a DIFFERENT project — must NOT be counted. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:other", + intent: "i5", + kind: "gotcha", + delta: "n5", + normalizedHash: "stats-h5", + createdAt: Date.now(), + }); + + const stats = getSkillMemoryStats(db, "git:abc"); + expect(stats.totalNotes).toBe(4); + expect(stats.skillsWithNotes).toBe(2); + expect(stats.pinnedNotes).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryStats returns all-zeros when no notes exist for the project", () => { + const db = makeDb(); + try { + const stats = getSkillMemoryStats(db, "git:empty"); + expect(stats.totalNotes).toBe(0); + expect(stats.skillsWithNotes).toBe(0); + expect(stats.pinnedNotes).toBe(0); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.ts new file mode 100644 index 00000000..4eb57513 --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.ts @@ -0,0 +1,186 @@ +import type { Database } from "../../../shared/sqlite"; + +export interface SkillMemoryNote { + id: number; + skill_id: string; + resolved_path: string; + tier: "project" | "global"; + skill_source: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" | null; + project_identity: string; + intent: string; + intent_embedding: Buffer | null; + embedding_model_version: string | null; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + delta: string; + tags: string | null; + hit_count: number; + pinned: number; + normalized_hash: string; + created_at: number; + last_used_at: number | null; +} + +export interface InsertSkillMemoryNoteArgs { + skillId: string; + resolvedPath: string; + tier: "project" | "global"; + skillSource: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" | null; + projectIdentity: string; + intent: string; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + delta: string; + tags?: string[]; + normalizedHash: string; + createdAt: number; +} + +/** + * Insert a new skill_memory note. Returns the new row id, or null if a + * duplicate normalized_hash already exists for this (skill_id, tier, project_identity). + * On duplicate, callers should call bumpHitCount instead. + */ +export function insertSkillMemoryNote( + db: Database, + args: InsertSkillMemoryNoteArgs, +): number | null { + try { + const result = db + .prepare( + `INSERT INTO skill_memory + (skill_id, resolved_path, tier, skill_source, project_identity, + intent, kind, delta, tags, hit_count, pinned, normalized_hash, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)`, + ) + .run( + args.skillId, + args.resolvedPath, + args.tier, + args.skillSource ?? null, + args.projectIdentity, + args.intent, + args.kind, + args.delta, + args.tags ? JSON.stringify(args.tags) : null, + args.normalizedHash, + args.createdAt, + ); + return result.lastInsertRowid as number; + } catch (err: unknown) { + // UNIQUE constraint violation = duplicate + if (err instanceof Error && err.message.includes("UNIQUE constraint failed")) { + return null; + } + throw err; + } +} + +/** + * Retrieve notes for flat recall (rungs 2 + 4). + * Ordered by normalized additive recency + hit_count score (pinned notes first). + * + * Scoring: recency_norm + hit_norm where: + * recency_norm = (ts - min_ts) / NULLIF(max_ts - min_ts, 1) — 0..1 range + * hit_norm = hit_count / NULLIF(MAX(hit_count) OVER (), 1) — 0..1 range + * Additive (not multiplicative) so hit_count is not swamped by timestamp scale. + * + * NOTE: The window-function form requires SQLite ≥ 3.25 (2018). Bun ships SQLite ≥ 3.39. + * If the window form causes issues, fall back to the simpler: + * (COALESCE(last_used_at, created_at) / 1000000.0) + (hit_count * 0.1) DESC + * which is less precise but avoids the window function. + * + * TODO: add an ordering test that inserts notes with known recency/hit_count values + * and asserts the returned order matches the expected ranking. + */ +export function getSkillMemoryNotes( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + limit: number, +): SkillMemoryNote[] { + return db + .prepare( + `SELECT * + FROM skill_memory + WHERE skill_id = ? AND tier = ? AND project_identity = ? + ORDER BY + pinned DESC, + ( + (COALESCE(last_used_at, created_at) - MIN(COALESCE(last_used_at, created_at)) OVER ()) * 1.0 + / NULLIF(MAX(COALESCE(last_used_at, created_at)) OVER () - MIN(COALESCE(last_used_at, created_at)) OVER (), 1) + + + hit_count * 1.0 / NULLIF(MAX(hit_count) OVER (), 1) + ) DESC, + created_at DESC + LIMIT ?`, + ) + .all(skillId, tier, projectIdentity, limit) as SkillMemoryNote[]; +} + +/** + * Bump hit_count and update last_used_at for a note identified by its + * normalized_hash within a (skill_id, tier, project_identity) scope. + */ +export function bumpHitCount( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + normalizedHash: string, +): void { + db.prepare( + `UPDATE skill_memory + SET hit_count = hit_count + 1, last_used_at = ? + WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, + ).run(Date.now(), skillId, tier, projectIdentity, normalizedHash); +} + +/** + * Check if a note with the given normalized_hash already exists. + * Returns the existing note's id and hit_count, or null. + */ +export function findExistingNote( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + normalizedHash: string, +): { id: number; hit_count: number } | null { + return ( + (db + .prepare( + `SELECT id, hit_count FROM skill_memory + WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, + ) + .get(skillId, tier, projectIdentity, normalizedHash) as { + id: number; + hit_count: number; + } | null) ?? null + ); +} + +/** + * Aggregate stats for the skill_memory table scoped to a project identity. + * Used by the ctx-status / TUI status dialog (mirrors the external-memory + * status surface). Sync, single query; safe to call on every status poll. + */ +export function getSkillMemoryStats( + db: Database, + projectIdentity: string, +): { totalNotes: number; skillsWithNotes: number; pinnedNotes: number } { + const row = db + .prepare( + `SELECT + COUNT(*) AS total, + COUNT(DISTINCT skill_id) AS skills, + COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned + FROM skill_memory + WHERE project_identity = ?`, + ) + .get(projectIdentity) as { total: number; skills: number; pinned: number } | undefined; + return { + totalNotes: Number(row?.total ?? 0), + skillsWithNotes: Number(row?.skills ?? 0), + pinnedNotes: Number(row?.pinned ?? 0), + }; +} diff --git a/packages/plugin/src/features/magic-context/storage-db.ts b/packages/plugin/src/features/magic-context/storage-db.ts index d1fcc9ec..eb3749bf 100644 --- a/packages/plugin/src/features/magic-context/storage-db.ts +++ b/packages/plugin/src/features/magic-context/storage-db.ts @@ -38,7 +38,7 @@ export function getSchemaFenceRejection(): { return lastSchemaFenceRejection; } -export const LATEST_SUPPORTED_VERSION = 49; +export const LATEST_SUPPORTED_VERSION = 50; // chmod is meaningless on Windows (POSIX modes are not honored), so all // permission tightening is skipped there. mkdir's `mode` is likewise ignored. diff --git a/packages/plugin/src/hooks/magic-context/command-handler.ts b/packages/plugin/src/hooks/magic-context/command-handler.ts index 89d16f54..d0874a87 100644 --- a/packages/plugin/src/hooks/magic-context/command-handler.ts +++ b/packages/plugin/src/hooks/magic-context/command-handler.ts @@ -552,7 +552,12 @@ export function createMagicContextCommandHandler(deps: { } const liveModelKey = deps.getLiveModelKey?.(sessionId); const liveContextLimit = deps.getContextLimit?.(sessionId); - const statusOutput = executeStatus( + // Use dreamer's directory when available (== project's working + // directory for dreamer-aware sessions); fall back to cwd so + // the new "Skill memory" section can resolve a project identity + // for sessions that don't have dreamer configured. + const statusDirectory = deps.dreamer?.directory ?? process.cwd(); + const statusOutput = await executeStatus( deps.db, sessionId, deps.protectedTags, @@ -562,6 +567,7 @@ export function createMagicContextCommandHandler(deps: { deps.commitClusterTrigger, deps.executeThresholdTokens, liveContextLimit, + statusDirectory, ); result += result ? `\n\n${statusOutput}` : statusOutput; } diff --git a/packages/plugin/src/hooks/magic-context/execute-status.ts b/packages/plugin/src/hooks/magic-context/execute-status.ts index cfe95ffb..f363f068 100644 --- a/packages/plugin/src/hooks/magic-context/execute-status.ts +++ b/packages/plugin/src/hooks/magic-context/execute-status.ts @@ -1,6 +1,8 @@ import { DEFAULT_EXECUTE_THRESHOLD_PERCENTAGE } from "../../config/schema/magic-context"; import { getCompartments } from "../../features/magic-context/compartment-storage"; +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { parseCacheTtl } from "../../features/magic-context/scheduler"; +import { getSkillMemoryStats } from "../../features/magic-context/skill-memory/storage"; import { getPendingOps } from "../../features/magic-context/storage"; import { getOrCreateSessionMeta } from "../../features/magic-context/storage-meta"; import { getTagsBySession } from "../../features/magic-context/storage-tags"; @@ -31,7 +33,7 @@ function formatExecuteThreshold( return `${thresholdPercentage}%`; } -export function executeStatus( +export async function executeStatus( db: Database, sessionId: string, protectedTags: number, @@ -43,7 +45,8 @@ export function executeStatus( commitClusterTrigger?: { enabled: boolean; min_clusters: number }, executeThresholdTokens?: { default?: number; [modelKey: string]: number | undefined }, contextLimit?: number, -): string { + directory?: string, +): Promise { // Single source of truth — resolver tells us both the effective percentage AND // which config source won (tokens vs percentage). Previously /ctx-status // reimplemented the token-match check here and missed progressive base-model @@ -185,6 +188,32 @@ export function executeStatus( } } + // Skill-memory stats — only when a directory is available to resolve + // the project identity (skill_memory is partitioned on + // project_identity). Mirrors the external-memory section's pattern: + // surface counts only when there is something to show, skip otherwise. + // Wrapped in try/catch so a missing skill_memory table (e.g. pre-v37 + // migration in tests) doesn't fail the whole status output — same + // defensive pattern the tags / pending_ops queries use. + if (directory) { + try { + const projectIdentity = resolveProjectIdentity(directory); + if (projectIdentity) { + const skillStats = getSkillMemoryStats(db, projectIdentity); + if (skillStats.totalNotes > 0) { + lines.push( + "", + "### Skill memory", + `- notes: ${skillStats.totalNotes} (across ${skillStats.skillsWithNotes} ${skillStats.skillsWithNotes === 1 ? "skill" : "skills"})`, + `- pinned: ${skillStats.pinnedNotes}`, + ); + } + } + } catch { + // skill_memory may not exist (pre-v37 schema) — skip silently + } + } + return lines.join("\n"); } catch (error) { sessionLog(sessionId, "ctx-status failed:", error); diff --git a/packages/plugin/src/hooks/magic-context/hook-handlers.ts b/packages/plugin/src/hooks/magic-context/hook-handlers.ts index ee449937..ee5f5582 100644 --- a/packages/plugin/src/hooks/magic-context/hook-handlers.ts +++ b/packages/plugin/src/hooks/magic-context/hook-handlers.ts @@ -1,8 +1,11 @@ +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { clearSessionTracking, scheduleIncrementalIndex, scheduleReconciliation, } from "../../features/magic-context/message-index-async"; +import type { SkillMemoryConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import { recallSkillMemoryBlock } from "../../features/magic-context/skill-memory/recall"; import { clearPersistedReasoningWatermark } from "../../features/magic-context/storage"; import { getOrCreateSessionMeta, @@ -22,6 +25,7 @@ import { import { clearSidebarSnapshotCache } from "../../plugin/sidebar-snapshot-cache"; import type { PluginContext } from "../../plugin/types"; import { sessionLog } from "../../shared/logger"; +import type { Database } from "../../shared/sqlite"; import { clearAutoSearchForSession } from "./auto-search-runner"; import { buildChannel1Reminder, @@ -467,9 +471,109 @@ function maybeInjectChannel1Nudge( ); } +// ── intentByCallId stash map ──────────────────────────────────────────────── +// Keyed by callID (= options.toolCallId, identical before↔after). +// Bounded: 60s TTL + 256-entry hard cap. The after-hook deletes in a finally; +// this map is the backstop for callIDs whose after-hook never fires (crash, +// swallowed exception, tool error). +// Spike C (Task 0a) confirmed: tool.execute.before fires PRE-validation on +// raw output.args, so intent is present before Effect-Schema strips it. + +export type IntentByCallIdMap = Map; + +export function createIntentByCallIdMap(): IntentByCallIdMap { + return new Map(); +} + +const INTENT_TTL_MS = 60_000; +const INTENT_MAP_CAP = 256; + +export function stashIntent(map: IntentByCallIdMap, callId: string, intent: string): void { + // Sweep stale entries (TTL backstop) + const now = Date.now(); + for (const [key, entry] of map) { + if (now - entry.ts > INTENT_TTL_MS) { + map.delete(key); + } + } + // Hard cap: evict oldest if at limit + if (map.size >= INTENT_MAP_CAP) { + let oldestKey: string | undefined; + let oldestTs = Infinity; + for (const [key, entry] of map) { + if (entry.ts < oldestTs) { + oldestTs = entry.ts; + oldestKey = key; + } + } + if (oldestKey !== undefined) map.delete(oldestKey); + } + map.set(callId, { intent, ts: now }); +} + +export function getAndDeleteIntent(map: IntentByCallIdMap, callId: string): string | null { + const entry = map.get(callId); + if (!entry) return null; + map.delete(callId); + return entry.intent; +} + +// ── createToolExecuteBeforeHook ───────────────────────────────────────────── + +/** + * Append a block to output.output when: + * 1. frontmatterConfig is non-null (skill has skill-memory: enabled: true) + * 2. Notes exist for this skill in the DB + * 3. output.output is a non-empty string + * + * Delegates to recallSkillMemoryBlock (feature layer) for the shared recall+format core. + * Append ordering: this runs BEFORE maybeInjectChannel1Nudge (skill-memory + * content before Channel-1 meta-reminder). See design §2.6. + */ +export function maybeInjectSkillMemory( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + frontmatterConfig: SkillMemoryConfig | null, + output: { output?: unknown }, +): void { + if (typeof output.output !== "string" || output.output.length === 0) return; + + // Delegate to shared recall core (also used by ctx_skill_recall tool) + const block = recallSkillMemoryBlock(db, { + skill: skillId, + scope: tier, + projectIdentity, + frontmatterConfig, + }); + if (block) { + output.output = `${output.output}\n\n${block}`; + } +} + +export function createToolExecuteBeforeHook(args: { intentByCallId: IntentByCallIdMap }) { + return async (input: unknown, output?: unknown) => { + const typedInput = input as { tool?: string; callID?: string }; + const typedOutput = output as { args?: Record } | undefined; + if (typedInput.tool !== "skill") return; + if (!typedInput.callID) return; + const intent = typedOutput?.args?.intent; + if (typeof intent !== "string") return; + stashIntent(args.intentByCallId, typedInput.callID, intent); + }; +} + export function createToolExecuteAfterHook(args: { db: Parameters[0]; channel1StateBySession: Map; + skillLoadRegistry: import("../../features/magic-context/skill-memory/provenance").SkillLoadRegistry; + /** Resolved session.directory values, used to compute projectIdentity for + * the skill-memory recall. The hook's transform pass populates this on + * every message turn; on the first skill call before the map is seeded, + * we fall back to `defaultDirectory` (deps.directory). */ + sessionDirectoryBySession: Map; + defaultDirectory: string; }) { return async (input: unknown, output?: unknown) => { const typedInput = input as { tool?: string; sessionID?: string; args?: unknown }; @@ -477,6 +581,93 @@ export function createToolExecuteAfterHook(args: { return; } + // Skill-memory: populate registry when skill tool completes. + // Frontmatter MUST be read from DISK (proven in Task 0b: opencode's + // skill loader strips the skill-memory: block from the model-facing + // output). Reading output.output would always yield null. We re-read + // SKILL.md from provenance.resolvedPath (which IS present in the + // output's "Base directory for this skill:" line). + if (typedInput.tool === "skill") { + const typedOutput = output as { output?: unknown } | undefined; + if (typeof typedOutput?.output === "string") { + const skillArgs = typedInput.args as { name?: unknown } | undefined; + const skillId = typeof skillArgs?.name === "string" ? skillArgs.name : null; + if (skillId) { + try { + const { parseSkillProvenance, registryKey } = await import( + "../../features/magic-context/skill-memory/provenance" + ); + const { parseFrontmatterConfig } = await import( + "../../features/magic-context/skill-memory/frontmatter" + ); + const provenance = parseSkillProvenance(typedOutput.output, skillId); + if (provenance) { + let frontmatterConfig: + | import("../../features/magic-context/skill-memory/frontmatter").SkillMemoryConfig + | null = null; + try { + const { readFileSync } = await import("node:fs"); + const rawSkillContent = readFileSync( + provenance.resolvedPath, + "utf-8", + ); + frontmatterConfig = parseFrontmatterConfig(rawSkillContent); + } catch { + // Non-fatal: SKILL.md unreadable → frontmatterConfig stays null + // (skill-memory disabled for this skill load) + } + args.skillLoadRegistry.set(registryKey(typedInput.sessionID, skillId), { + ...provenance, + frontmatterConfig, + }); + } + } catch { + // Non-fatal: registry miss means ctx_skill_note will surface an actionable error + } + + // Skill-memory injection (BEFORE Channel-1 nudge — design §2.6). + // Re-read skillId/args from typedInput; resolve sessionDir to + // projectIdentity; delegate to maybeInjectSkillMemory which + // appends the block to output.output. + // Non-fatal: recall failure must never block the tool result. + try { + const { registryKey: rKey } = await import( + "../../features/magic-context/skill-memory/provenance" + ); + const registryEntry = args.skillLoadRegistry.get( + rKey(typedInput.sessionID, skillId), + ); + if (registryEntry) { + // First-turn fallback: if the map has no entry yet + // (skill tool fires before sessionDirectoryBySession + // is populated), fall back to args.defaultDirectory. + // Intentional: multi-project / Desktop-launched sessions + // may misattribute on the very first skill call; + // subsequent calls resolve correctly. + const sessionDir = + args.sessionDirectoryBySession.get(typedInput.sessionID) ?? + args.defaultDirectory; + const projectIdentity = resolveProjectIdentity(sessionDir); + maybeInjectSkillMemory( + args.db, + skillId, + registryEntry.tier, + projectIdentity, + registryEntry.frontmatterConfig, + output as { output?: unknown }, + ); + } + } catch (error) { + sessionLog( + typedInput.sessionID, + "skill-memory injection failed (ignored):", + error, + ); + } + } + } + } + if (typedInput.tool === "ctx_reduce") { // Mark the Channel 1 baseline dirty so the next nudge re-measures the // (now smaller) reclaimable tail instead of replaying a stale band. diff --git a/packages/plugin/src/hooks/magic-context/hook.ts b/packages/plugin/src/hooks/magic-context/hook.ts index bd823c60..9ba3385d 100644 --- a/packages/plugin/src/hooks/magic-context/hook.ts +++ b/packages/plugin/src/hooks/magic-context/hook.ts @@ -27,6 +27,10 @@ import { getEmbeddingCoverageStatus, } from "../../features/magic-context/project-embedding-registry"; import type { Scheduler } from "../../features/magic-context/scheduler"; +import { + createSkillLoadRegistry, + type SkillLoadRegistry, +} from "../../features/magic-context/skill-memory/provenance"; import { getDatabasePersistenceError, getSessionsWithPendingMarker, @@ -73,7 +77,9 @@ import { createChatMessageHook, createCommandExecuteBeforeHook, createEventHook, + createIntentByCallIdMap, createToolExecuteAfterHook, + createToolExecuteBeforeHook, getLiveNotificationParams, } from "./hook-handlers"; import type { LiveSessionState } from "./live-session-state"; @@ -282,6 +288,17 @@ export function createMagicContextHook(deps: MagicContextDeps) { // Written at the end of each transform pass (post-drop), read in // tool.execute.after. Only populated for primary sessions. const channel1StateBySession = new Map(); + // intentByCallId: stash for skill tool intent captured pre-validation in + // tool.execute.before. Bounded: 60s TTL + 256-entry hard cap + finally-delete + // in after-hook. Cleared in onSessionDeleted. + const intentByCallId = createIntentByCallIdMap(); + // skillLoadRegistry: session-scoped registry of (skillId → SkillProvenance + + // frontmatterConfig), populated in tool.execute.after for the skill tool. + // Per-session cleanup in onSessionDeleted (keyed as `${sessionId}:${skillId}`). + // Exposed on the hook's return value so the same instance flows to + // createCtxSkillNoteTool (index.ts, Task 8) — otherwise the tool sees a + // disconnected empty Map and recall is dead on arrival. + const skillLoadRegistry: SkillLoadRegistry = createSkillLoadRegistry(); /** * Return the live provider/model for a session. @@ -664,6 +681,21 @@ export function createMagicContextHook(deps: MagicContextDeps) { internalChildSessions.delete(sessionId); channel1StateBySession.delete(sessionId); clearEmbedSessionState(sessionId); + // NOTE: intentByCallId is keyed by callID (not sessionID:callID), so .clear() removes + // entries from ALL concurrent sessions, not just the deleted one. This is an accepted + // design trade-off: the 60s TTL + 256-entry hard cap are the real leak guards; the + // .clear() here is a belt-and-braces backstop for long-lived sessions. Cross-session + // clearing degrades quality (lost intents for concurrent sessions) but is not fatal. + // If concurrent multi-session use becomes common, key entries as `${sessionID}:${callID}` + // and filter on delete. For P1, document-as-intentional is the chosen fix. + intentByCallId.clear(); // clear all entries on session delete (bounded map; cross-session clear is intentional — see note above) + // skillLoadRegistry is keyed as `${sessionId}:${skillId}` so we can prune + // per-session entries without cross-session bleed. Without this, deleted + // sessions' skill loads would persist in the registry for the plugin's + // lifetime (potentially days/weeks), slowly leaking memory. + for (const key of skillLoadRegistry.keys()) { + if (key.startsWith(`${sessionId}:`)) skillLoadRegistry.delete(key); + } }, }); @@ -927,6 +959,18 @@ export function createMagicContextHook(deps: MagicContextDeps) { "tool.execute.after": createToolExecuteAfterHook({ db, channel1StateBySession, + skillLoadRegistry, + // Resolve session-specific directory from the map populated by the + // transform pass; fall back to the hook's own directory (deps.directory) + // for the first-turn case where the map isn't seeded yet. + sessionDirectoryBySession, + defaultDirectory: deps.directory, }), + "tool.execute.before": createToolExecuteBeforeHook({ intentByCallId }), + // Exposed so index.ts can pass the SAME instance to createCtxSkillNoteTool. + // The after-hook populates this registry; the tool reads from it. Without + // this, the tool would receive a fresh empty Map and ctx_skill_note would + // always return "No recent skill load found". + skillLoadRegistry, }; } diff --git a/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts b/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts new file mode 100644 index 00000000..8e66cf4e --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts @@ -0,0 +1,212 @@ +import { describe, expect, test } from "bun:test"; +import { runMigrations } from "../../features/magic-context/migrations"; +import { recallSkillMemoryBlock } from "../../features/magic-context/skill-memory/recall"; +import { insertSkillMemoryNote } from "../../features/magic-context/skill-memory/storage"; +import { initializeDatabase } from "../../features/magic-context/storage-db"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { maybeInjectSkillMemory } from "../magic-context/hook-handlers"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("recallSkillMemoryBlock (shared recall core)", () => { + test("returns non-empty string containing { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock in auth tests", + normalizedHash: "h-recall-1", + createdAt: Date.now(), + }); + const block = recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + expect(block).toContain(" { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "some note", + normalizedHash: "h-recall-2", + createdAt: Date.now(), + }); + const block = recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: null, + }); + expect(block).toBe(""); + } finally { + closeQuietly(db); + } + }); + + test("returns empty string when no notes exist (cold-start)", () => { + const db = makeDb(); + try { + const block = recallSkillMemoryBlock(db, { + skill: "nonexistent-skill", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + expect(block).toBe(""); + } finally { + closeQuietly(db); + } + }); +}); + +describe("maybeInjectSkillMemory", () => { + test("appends skill-memory block to output.output when notes exist", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock in auth tests", + normalizedHash: "h1", + createdAt: Date.now(), + }); + + const output = { output: "# TDD Skill\nContent here." }; + // Pass enabled frontmatterConfig — null triggers the early-return guard + // (`if (!frontmatterConfig?.enabled) return;`) and the block is never injected. + maybeInjectSkillMemory( + db, + "tdd", + "global", + "git:abc", + { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000, dedup_threshold: 0.92 }, + output, + ); + + expect(output.output).toContain(" { + const db = makeDb(); + try { + const output = { output: "# TDD Skill\nContent here." }; + maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); + expect(output.output).not.toContain(" { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "some note", + normalizedHash: "h2", + createdAt: Date.now(), + }); + const output = { output: "# TDD Skill\nContent here." }; + // null frontmatterConfig = skill-memory not enabled for this skill + maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); + expect(output.output).not.toContain(" { + // maybeInjectSkillMemory APPENDS to output.output — it does NOT prepend. + // So if a sentinel is already in the output, the skill-memory block lands AFTER it. + // This test verifies the append contract: skillMemoryPos > channel1Pos. + // + // The ordering contract between maybeInjectSkillMemory and maybeInjectChannel1Nudge + // (skill-memory before Channel-1 meta-reminder) is enforced at the + // createToolExecuteAfterHook level, not at the single-function level. + // TODO (U11): add a createToolExecuteAfterHook integration test with stubs to verify + // that maybeInjectSkillMemory fires before maybeInjectChannel1Nudge. + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "gotcha", + delta: "some note", + normalizedHash: "h3", + createdAt: Date.now(), + }); + const output = { output: "# TDD Skill\nContent here.\n" }; + maybeInjectSkillMemory( + db, + "tdd", + "global", + "git:abc", + { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000, dedup_threshold: 0.92 }, + output, + ); + const skillMemoryPos = output.output.indexOf(" { + test("stashIntent stores intent keyed by callId", () => { + const map = createIntentByCallIdMap(); + stashIntent(map, "call-1", "fix the bug in auth"); + expect(getAndDeleteIntent(map, "call-1")).toBe("fix the bug in auth"); + }); + + test("getAndDeleteIntent removes the entry (finally-delete semantics)", () => { + const map = createIntentByCallIdMap(); + stashIntent(map, "call-2", "some intent"); + getAndDeleteIntent(map, "call-2"); + expect(getAndDeleteIntent(map, "call-2")).toBeNull(); + }); + + test("stashIntent evicts entries older than 60s (TTL backstop)", () => { + const map = createIntentByCallIdMap(); + // Manually insert a stale entry (65s > 60s TTL, clearly expired regardless of boundary strictness) + map.set("stale-call", { intent: "old intent", ts: Date.now() - 65_000 }); + stashIntent(map, "new-call", "new intent"); // triggers sweep + expect(map.has("stale-call")).toBe(false); + expect(map.has("new-call")).toBe(true); + }); + + test("stashIntent hard-caps map at 256 entries (evicts oldest)", () => { + const map = createIntentByCallIdMap(); + // Fill to 256 + for (let i = 0; i < 256; i++) { + map.set(`call-${i}`, { intent: `intent-${i}`, ts: Date.now() - i }); + } + // Adding one more should evict the oldest + stashIntent(map, "call-overflow", "overflow intent"); + expect(map.size).toBeLessThanOrEqual(256); + expect(map.has("call-overflow")).toBe(true); + }); + + test("clearIntentMap removes all entries (onSessionDeleted)", () => { + const map = createIntentByCallIdMap(); + stashIntent(map, "call-a", "intent a"); + stashIntent(map, "call-b", "intent b"); + map.clear(); + expect(map.size).toBe(0); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts b/packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts new file mode 100644 index 00000000..9c4e8090 --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/skill-tool-definition.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, test } from "bun:test"; + +// Test the intent injection logic in isolation (pure function). +// Import from the dedicated module, not from index.ts (which is the plugin entry point). +import { injectSkillIntentParam } from "./skill-tool-definition"; + +type OutputShape = Parameters[1]; + +describe("skill tool definition intent injection", () => { + test("assigns output.jsonSchema for the skill tool (current opencode: jsonSchema starts undefined)", () => { + const output: OutputShape = { + description: "Load a specialized skill", + parameters: {}, // Effect Schema — opencode's tool.definition hook passes this; we leave it alone + jsonSchema: undefined, + }; + injectSkillIntentParam("skill", output); + // New contract: opencode advertises the model-facing schema from + // `output.jsonSchema ?? fromSchema(output.parameters)`. The skill + // tool's `jsonSchema` is currently undefined, so we MUST assign one. + expect(output.jsonSchema).toBeDefined(); + const js = output.jsonSchema as NonNullable; + expect(js.type).toBe("object"); + expect(js.properties).toBeDefined(); + // Mirrors the skill tool's real param: a single required `name` string. + expect(js.properties.name).toBeDefined(); + expect((js.properties.name as { type?: string }).type).toBe("string"); + // The injected `intent` is present and is a string. + expect(js.properties.intent).toBeDefined(); + expect((js.properties.intent as { type?: string }).type).toBe("string"); + // `name` is required; `intent` is optional. + expect(js.required).toEqual(["name"]); + expect(js.required).not.toContain("intent"); + }); + + test("does not touch output.jsonSchema for non-skill tool ids", () => { + const output: OutputShape = { + description: "Read a file", + parameters: {}, + jsonSchema: undefined, + }; + injectSkillIntentParam("read", output); + expect(output.jsonSchema).toBeUndefined(); + }); + + test("extends an existing output.jsonSchema in place (forward-compat)", () => { + const output: OutputShape = { + description: "Skill", + parameters: {}, + jsonSchema: { + type: "object", + properties: { + name: { + type: "string", + description: "The name of the skill from available_skills", + }, + }, + required: ["name"], + }, + }; + injectSkillIntentParam("skill", output); + const js = output.jsonSchema as NonNullable; + // Preserves the existing properties (does not clobber `name`). + expect(js.properties.name).toBeDefined(); + expect((js.properties.name as { type?: string }).type).toBe("string"); + // Adds `intent` alongside. + expect(js.properties.intent).toBeDefined(); + expect((js.properties.intent as { type?: string }).type).toBe("string"); + // Required list untouched. + expect(js.required).toEqual(["name"]); + }); + + test("is idempotent — calling twice does not double-add intent", () => { + const output: OutputShape = { + description: "Skill", + parameters: {}, + jsonSchema: undefined, + }; + injectSkillIntentParam("skill", output); + const first = JSON.stringify(output.jsonSchema); + injectSkillIntentParam("skill", output); + const second = JSON.stringify(output.jsonSchema); + expect(second).toBe(first); + const js = output.jsonSchema as NonNullable; + const intentKeys = Object.keys(js.properties).filter((k) => k === "intent"); + expect(intentKeys.length).toBe(1); + }); + + test("intent description mentions skill-memory recall (drives the recall surface)", () => { + const output: OutputShape = { + description: "Skill", + parameters: {}, + jsonSchema: undefined, + }; + injectSkillIntentParam("skill", output); + const js = output.jsonSchema as NonNullable; + const desc = (js.properties.intent as { description?: string }).description ?? ""; + expect(desc).toContain("skill-memory recall"); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/skill-tool-definition.ts b/packages/plugin/src/hooks/magic-context/skill-tool-definition.ts new file mode 100644 index 00000000..9d78dc48 --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/skill-tool-definition.ts @@ -0,0 +1,81 @@ +/** + * Injects an optional `intent` parameter into the `skill` tool's schema. + * Called from the `tool.definition` hook. + * + * opencode advertises the model-facing schema for a tool via + * fromTool = tool.jsonSchema ?? fromSchema(tool.parameters) + * The `output` of the `tool.definition` hook has shape + * { description, parameters: , jsonSchema: }. + * + * For the `skill` tool: + * - `output.parameters` is an Effect Schema (a Decoder object) — not a plain JSON Schema. + * Mutating `.properties.intent` on it is a no-op for the model-facing schema. + * - `output.jsonSchema` is currently `undefined` (the skill tool never sets it and + * `Tool.define` does not synthesize one), so opencode derives the model-facing + * schema from the Effect `parameters` — which only has `name`. Our mutation is + * invisible to the model. The model sees a `skill` tool with ONLY `name`. + * + * The fix: ASSIGN `output.jsonSchema` with a JSON Schema object that mirrors the + * skill tool's real params (a single required `name` string) plus the optional + * `intent`. This makes `output.jsonSchema !== tool.jsonSchema` (the new reference + * satisfies opencode's registry gate `output.parameters === tool.parameters || + * output.jsonSchema !== tool.jsonSchema`), so the new schema is what opencode + * advertises to the model. We deliberately leave `output.parameters` untouched + * — the Effect schema still governs execute-time validation, which strips + * `intent` via `onExcessProperty: "ignore"` so the existing `tool.execute.before` + * capture path keeps working unchanged. + * + * If a future opencode version starts precomputing a `jsonSchema` for the skill + * tool, we extend it in place rather than clobbering it. + * + * Lives here (not in index.ts) to avoid leaking an internal helper through the + * plugin entry point. index.ts imports and calls it directly. + */ +export function injectSkillIntentParam( + toolID: string, + output: { + parameters?: unknown; + jsonSchema?: { + type?: string; + properties?: Record; + required?: string[]; + additionalProperties?: boolean; + }; + }, +): void { + if (toolID !== "skill") return; + const INTENT_PROP = { + type: "string", + description: + "Optional: describe what you are trying to accomplish with this skill (used for skill-memory recall). E.g. 'fix a flaky test in the auth module'.", + }; + // Forward-compat + idempotency: if opencode (a future version) already provides a + // jsonSchema object, extend it in place rather than clobbering it. + const existing = output.jsonSchema; + if ( + existing && + typeof existing === "object" && + existing.properties && + typeof existing.properties === "object" + ) { + if (!("intent" in existing.properties)) { + existing.properties.intent = INTENT_PROP; + } + return; + } + // Current opencode: the skill tool has no precomputed jsonSchema (undefined), so opencode + // would derive the model-facing schema from the Effect `parameters` (name only). Construct + // a jsonSchema mirroring the skill tool's real params (name, required) PLUS the optional intent. + output.jsonSchema = { + type: "object", + properties: { + name: { + type: "string", + description: "The name of the skill from available_skills", + }, + intent: INTENT_PROP, + }, + required: ["name"], + additionalProperties: false, + }; +} diff --git a/packages/plugin/src/index.ts b/packages/plugin/src/index.ts index c246d06e..d1428342 100644 --- a/packages/plugin/src/index.ts +++ b/packages/plugin/src/index.ts @@ -28,6 +28,7 @@ import { HISTORIAN_EDITOR_SYSTEM_PROMPT, } from "./hooks/magic-context/compartment-prompt"; import { createLiveSessionState } from "./hooks/magic-context/live-session-state"; +import { injectSkillIntentParam } from "./hooks/magic-context/skill-tool-definition"; import { cleanupConflictWarnings, sendConflictWarning } from "./plugin/conflict-warning-hook"; import { startDreamScheduleTimer } from "./plugin/dream-timer"; import { ensureProjectRegisteredFromOpenCodeDirectory } from "./plugin/embedding-bootstrap"; @@ -148,9 +149,23 @@ const server: Plugin = async (ctx) => { liveSessionState, }); + // Fail-loud guard: skillLoadRegistry is required for ctx_skill_note to + // verify the skill was loaded this session. If the after-hook wiring + // is broken, ctx_skill_note would silently read an empty Map and + // every note would return "No recent skill load found" — the exact + // opposite of "fail loud". Catch a wiring regression at startup, not + // at the first ctx_skill_note call from an agent. + if (!hooks.magicContext?.skillLoadRegistry) { + throw new Error( + "[magic-context] ctx_skill_note registration failed: " + + "hooks.magicContext.skillLoadRegistry is missing. " + + "Ensure createMagicContextHook() returns skillLoadRegistry in its return object.", + ); + } const tools = createToolRegistry({ ctx, pluginConfig, + skillLoadRegistry: hooks.magicContext.skillLoadRegistry, }); // v22 deferred legacy-memory identity backfill. createSessionHooks() opens @@ -457,7 +472,16 @@ const server: Plugin = async (ctx) => { // land correctly on the next flight. if (!lastChatContext) return; const typedInput = input as { toolID?: string }; - const typedOutput = output as { description?: unknown; parameters?: unknown }; + const typedOutput = output as { + description?: unknown; + parameters?: unknown; + jsonSchema?: { + type?: string; + properties?: Record; + required?: string[]; + additionalProperties?: boolean; + }; + }; if (!typedInput.toolID) return; recordToolDefinition( lastChatContext.providerID, @@ -467,10 +491,18 @@ const server: Plugin = async (ctx) => { typeof typedOutput.description === "string" ? typedOutput.description : "", typedOutput.parameters, ); + // Inject optional intent param for skill-memory recall + injectSkillIntentParam( + typedInput.toolID, + typedOutput as Parameters[1], + ); }, "tool.execute.after": async (input, output) => { await hooks.magicContext?.["tool.execute.after"]?.(input, output); }, + "tool.execute.before": async (input, output) => { + await hooks.magicContext?.["tool.execute.before"]?.(input, output); + }, "experimental.text.complete": async (input, output) => { await hooks.magicContext?.["experimental.text.complete"]?.(input, output); }, diff --git a/packages/plugin/src/plugin/rpc-handlers.test.ts b/packages/plugin/src/plugin/rpc-handlers.test.ts index 09825cec..52c5fe50 100644 --- a/packages/plugin/src/plugin/rpc-handlers.test.ts +++ b/packages/plugin/src/plugin/rpc-handlers.test.ts @@ -5,6 +5,7 @@ import { replaceAllCompartmentState } from "../features/magic-context/compartmen import { insertMemory } from "../features/magic-context/memory"; import { resolveProjectIdentity } from "../features/magic-context/memory/project-identity"; import { runMigrations } from "../features/magic-context/migrations"; +import { insertSkillMemoryNote } from "../features/magic-context/skill-memory/storage"; import { initializeDatabase } from "../features/magic-context/storage-db"; import { createLiveSessionState } from "../hooks/magic-context/live-session-state"; import { estimateTokens } from "../hooks/magic-context/read-session-formatting"; @@ -238,3 +239,121 @@ describe("buildStatusDetail — history token reuse (council audit bg_51106601 # } }); }); + +describe("buildStatusDetail — skill memory section", () => { + test("seeds skill_memory rows → detail.skillMemory reflects totals/skills/pinned scoped to the project", async () => { + const db = createTestDb(); + try { + const sessionId = "ses-status-skillmem-pop"; + const directory = process.cwd(); + const projectIdentity = resolveProjectIdentity(directory); + + db.prepare( + "INSERT INTO session_meta (session_id, last_input_tokens, last_context_percentage) VALUES (?, 0, 0)", + ).run(sessionId); + + // 3 notes for "tdd", 2 of them pinned + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i1", + kind: "gotcha", + delta: "n1", + normalizedHash: "sm-pop-h1", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i2", + kind: "fix", + delta: "n2", + normalizedHash: "sm-pop-h2", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i3", + kind: "workflow", + delta: "n3", + normalizedHash: "sm-pop-h3", + createdAt: Date.now(), + }); + db.prepare("UPDATE skill_memory SET pinned = 1 WHERE normalized_hash IN (?, ?)").run( + "sm-pop-h1", + "sm-pop-h2", + ); + + // 1 note for a different skill, not pinned + insertSkillMemoryNote(db, { + skillId: "debugging", + resolvedPath: "/p2", + tier: "global", + skillSource: "opencode-global", + projectIdentity, + intent: "i4", + kind: "discovery", + delta: "n4", + normalizedHash: "sm-pop-h4", + createdAt: Date.now(), + }); + + const detail = await buildStatusDetail(db, sessionId, directory); + expect(detail.skillMemory).not.toBeNull(); + expect(detail.skillMemory?.totalNotes).toBe(4); + expect(detail.skillMemory?.skillsWithNotes).toBe(2); + expect(detail.skillMemory?.pinnedNotes).toBe(2); + } finally { + closeQuietly(db); + } + }); + + test("no project identity (directory is not a git repo / fallback fails) → skillMemory is null", async () => { + const db = createTestDb(); + try { + const sessionId = "ses-status-skillmem-noproj"; + // Use a non-existent directory to force resolveProjectIdentity to + // either throw or land on the dir: fallback. Either way we should + // get a deterministic identity — but to specifically exercise the + // "no identity" path we'd need to stub resolveProjectIdentity. + // Simpler check: insert a row under a project that does NOT match + // the resolved one, and assert stats are 0 (proves scoping works). + const directory = process.cwd(); + + db.prepare( + "INSERT INTO session_meta (session_id, last_input_tokens, last_context_percentage) VALUES (?, 0, 0)", + ).run(sessionId); + + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:some-other-project", + intent: "i", + kind: "gotcha", + delta: "isolated", + normalizedHash: "sm-iso-h1", + createdAt: Date.now(), + }); + + const detail = await buildStatusDetail(db, sessionId, directory); + expect(detail.skillMemory).not.toBeNull(); + expect(detail.skillMemory?.totalNotes).toBe(0); + expect(detail.skillMemory?.skillsWithNotes).toBe(0); + expect(detail.skillMemory?.pinnedNotes).toBe(0); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/plugin/rpc-handlers.ts b/packages/plugin/src/plugin/rpc-handlers.ts index 5894c927..aa6e9c57 100644 --- a/packages/plugin/src/plugin/rpc-handlers.ts +++ b/packages/plugin/src/plugin/rpc-handlers.ts @@ -5,6 +5,7 @@ import type { MagicContextConfig } from "../config/schema/magic-context"; import { resolveProjectIdentity } from "../features/magic-context/memory/project-identity"; import { getEmbeddingCoverageStatus } from "../features/magic-context/project-embedding-registry"; +import { getSkillMemoryStats } from "../features/magic-context/skill-memory/storage"; import { type ContextDatabase as Database, openDatabase, @@ -536,8 +537,18 @@ export function buildStatusDetail( compressionBudget: null, compressionUsage: null, toastDurationMs: 5000, + skillMemory: null, }; + // Skill-memory stats — scoped to the session's project identity (the + // skill_memory table is partitioned on project_identity). base.projectIdentity + // is resolved by buildSidebarSnapshot; we re-use it here to avoid a second + // resolveProjectIdentity call. Null identity → null stats (status dialog + // hides the section). Single SQL aggregate, sync, safe to call every poll. + detail.skillMemory = base.projectIdentity + ? getSkillMemoryStats(db, base.projectIdentity) + : null; + try { const meta = db .prepare<[string], Record>( diff --git a/packages/plugin/src/plugin/tool-registry.ts b/packages/plugin/src/plugin/tool-registry.ts index 14bfd1e7..36a45480 100644 --- a/packages/plugin/src/plugin/tool-registry.ts +++ b/packages/plugin/src/plugin/tool-registry.ts @@ -3,6 +3,7 @@ import type { MagicContextPluginConfig } from "../config"; import { isDreamerRunnable } from "../config/agent-disable"; import { DEFAULT_PROTECTED_TAGS } from "../features/magic-context/defaults"; import { resolveProjectIdentity } from "../features/magic-context/memory/project-identity"; +import type { SkillLoadRegistry } from "../features/magic-context/skill-memory/provenance"; import { getDatabasePersistenceError, isDatabasePersisted, @@ -14,6 +15,8 @@ import { CTX_MEMORY_ACTIONS, createCtxMemoryTools } from "../tools/ctx-memory"; import { createCtxNoteTools } from "../tools/ctx-note"; import { createCtxReduceTools } from "../tools/ctx-reduce"; import { createCtxSearchTools } from "../tools/ctx-search"; +import { CTX_SKILL_NOTE_TOOL_NAME, createCtxSkillNoteTool } from "../tools/ctx-skill-note"; +import { CTX_SKILL_RECALL_TOOL_NAME, createCtxSkillRecallTool } from "../tools/ctx-skill-recall"; import { ensureProjectRegisteredFromOpenCodeDirectory } from "./embedding-bootstrap"; import { normalizeToolArgSchemas } from "./normalize-tool-arg-schemas"; import type { PluginContext } from "./types"; @@ -21,6 +24,7 @@ import type { PluginContext } from "./types"; export function createToolRegistry(args: { ctx: PluginContext; pluginConfig: MagicContextPluginConfig; + skillLoadRegistry: SkillLoadRegistry; }): Record { const { ctx, pluginConfig } = args; @@ -101,6 +105,28 @@ export function createToolRegistry(args: { allowedActions: [...CTX_MEMORY_ACTIONS], }) : {}), + // ctx_skill_note: skill-specific memory tool. Reads the session-scoped + // skillLoadRegistry to verify the skill was actually loaded this session + // before allowing a note to be written. The fail-loud guard for a missing + // registry lives in index.ts (the only place hooks.magicContext is in + // scope) so a wiring regression is caught at startup, not at runtime. + // NOT gated on memoryEnabled — skill-memory is an independent store. + [CTX_SKILL_NOTE_TOOL_NAME]: createCtxSkillNoteTool({ + db, + skillLoadRegistry: args.skillLoadRegistry, + }), + // ctx_skill_recall: explicit agent-callable recall tool. Registry-first + // (reuses the already-parsed frontmatterConfig from the transparent path) + // with disk-fallback for cold-start sessions where the skill hasn't been + // loaded yet. projectDirectory is a fallback only — execute() prefers + // toolContext.directory (the session's actual working dir) so `opencode -s` + // from outside the project resolves correctly. No fail-loud guard needed: + // skillLoadRegistry is optional for ctx_skill_recall. + [CTX_SKILL_RECALL_TOOL_NAME]: createCtxSkillRecallTool({ + db, + projectDirectory: ctx.directory, + skillLoadRegistry: args.skillLoadRegistry, + }), }; // Patch arg schemas so property-level .describe() text survives JSON Schema serialization. diff --git a/packages/plugin/src/shared/rpc-types.ts b/packages/plugin/src/shared/rpc-types.ts index 66f4d4e5..6c7e837c 100644 --- a/packages/plugin/src/shared/rpc-types.ts +++ b/packages/plugin/src/shared/rpc-types.ts @@ -123,6 +123,16 @@ export interface StatusDetail extends SidebarSnapshot { compressionUsage: string | null; /** Effective configured toast duration in ms after config resolution. */ toastDurationMs: number; + /** + * Per-skill cross-session recall store (skill_memory table) stats scoped + * to the session's project identity. Null when no project identity is + * available (status dialog will hide the section). + */ + skillMemory?: { + totalNotes: number; + skillsWithNotes: number; + pinnedNotes: number; + } | null; } /** Embedding coverage for `/ctx-embed` status (mirrors getEmbeddingCoverageStatus). */ diff --git a/packages/plugin/src/tools/ctx-skill-note/index.ts b/packages/plugin/src/tools/ctx-skill-note/index.ts new file mode 100644 index 00000000..e8623189 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/index.ts @@ -0,0 +1,3 @@ +export { CTX_SKILL_NOTE_TOOL_NAME, createCtxSkillNoteTool } from "./tools"; +export type { CtxSkillNoteArgs, CtxSkillNoteToolDeps, SkillNoteKind } from "./types"; +export { VALID_KINDS } from "./types"; diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.test.ts b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts new file mode 100644 index 00000000..6bad3790 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, test } from "bun:test"; +import { runMigrations } from "../../features/magic-context/migrations"; +import { + createSkillLoadRegistry, + registryKey, + type SkillLoadRegistry, +} from "../../features/magic-context/skill-memory/provenance"; +import { initializeDatabase } from "../../features/magic-context/storage-db"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { createCtxSkillNoteTool } from "./tools"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +const toolContext = (sessionID = "ses_test", agent = "general") => + ({ sessionID, agent, directory: "/tmp/test" }) as never; + +describe("ctx_skill_note tool", () => { + test("rejects kind='general' with actionable error (hard gate)", async () => { + const db = makeDb(); + const registry = createSkillLoadRegistry(); + try { + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + const result = await t.execute( + { skill: "tdd", intent: "fix test", kind: "general" as never, delta: "some note" }, + toolContext(), + ); + expect(typeof result).toBe("string"); + expect(result).toContain("ctx_memory"); + } finally { + closeQuietly(db); + } + }); + + test("returns actionable error when skill not in registry", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + const result = await t.execute( + { + skill: "nonexistent-skill", + intent: "fix test", + kind: "gotcha", + delta: "some note", + }, + toolContext(), + ); + expect(typeof result).toBe("string"); + expect(result).toContain("No recent skill load found"); + expect(result).toContain("nonexistent-skill"); + } finally { + closeQuietly(db); + } + }); + + test("inserts note when skill is in registry", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + // Pre-populate registry + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + const result = await t.execute( + { + skill: "tdd", + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock", + }, + toolContext(), + ); + expect(typeof result).toBe("string"); + expect(result).toContain("saved"); + } finally { + closeQuietly(db); + } + }); + + test("deduplicates: bumps hit_count on exact duplicate delta", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { skill: "tdd", intent: "fix test", kind: "gotcha", delta: "Exact duplicate note" }, + toolContext(), + ); + const result = await t.execute( + { skill: "tdd", intent: "fix test", kind: "gotcha", delta: "Exact duplicate note" }, + toolContext(), + ); + expect(result).toContain("already recorded"); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.ts b/packages/plugin/src/tools/ctx-skill-note/tools.ts new file mode 100644 index 00000000..1c304b04 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/tools.ts @@ -0,0 +1,143 @@ +import { type ToolContext, type ToolDefinition, tool } from "@opencode-ai/plugin"; +import { computeNormalizedHash } from "../../features/magic-context/memory/normalize-hash"; +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; +import { getSkillLoad } from "../../features/magic-context/skill-memory/provenance"; +import { + bumpHitCount, + findExistingNote, + insertSkillMemoryNote, +} from "../../features/magic-context/skill-memory/storage"; +import { + CTX_SKILL_NOTE_TOOL_NAME, + type CtxSkillNoteArgs, + type CtxSkillNoteToolDeps, + VALID_KINDS, +} from "./types"; + +// NOTE on tool() API: the real @opencode-ai/plugin tool() takes: +// { description, args: ZodRawShape, execute(args, context: ToolContext) } +// `name` is registry-level (passed when registering, not inside tool body). +// `args` uses tool.schema.* (Zod-like) for field definitions, NOT a JSON Schema object. +// See packages/plugin/src/tools/ctx-memory/tools.ts for the canonical pattern. +// +// The tool body below uses the correct API shape. The `name` field is intentionally +// absent from the tool() call — it is provided at registration time in the tool registry. + +export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefinition { + return tool({ + description: + "Record a skill-specific note (gotcha, discovery, fix, or workflow step) for future recall. " + + "Call after using a skill when you hit a non-obvious issue, found a better approach, or fixed a skill-specific error. " + + "Skip routine successes. Notes are recalled automatically on the next load of the same skill.", + args: { + skill: tool.schema.string().describe("The skill name (e.g. 'test-driven-development')"), + intent: tool.schema + .string() + .describe("The task/intent context when this note was learned"), + kind: tool.schema + .enum(VALID_KINDS) + .describe( + "Note type: 'gotcha' (non-obvious trap), 'discovery' (better approach found), " + + "'fix' (error→solution), 'workflow' (step that must not be skipped). " + + "Do NOT use 'general' — general observations belong in ctx_memory.", + ), + delta: tool.schema + .string() + .describe("The note content — concise, actionable, specific to this skill"), + tags: tool.schema + .array(tool.schema.string()) + .optional() + .describe("Optional tags for future filtering"), + }, + execute: async (args: CtxSkillNoteArgs, toolContext: ToolContext) => { + // Hard gate: reject kind='general' + if ((args.kind as string) === "general") { + return ( + "'kind: general' is not a valid skill-memory note type. " + + "General observations belong in `ctx_memory` with an appropriate category " + + "(e.g. PROJECT_RULES, CONSTRAINTS, ARCHITECTURE). " + + "Use ctx_skill_note only for gotchas, discoveries, fixes, or workflow steps specific to this skill." + ); + } + + if (!VALID_KINDS.includes(args.kind)) { + return `Invalid kind '${args.kind}'. Must be one of: ${VALID_KINDS.join(", ")}.`; + } + + const sessionId = toolContext.sessionID; + if (!sessionId) return "Error: no session ID available."; + + // Resolve skill from session-scoped registry + const registryEntry = getSkillLoad(deps.skillLoadRegistry, sessionId, args.skill); + if (!registryEntry) { + return ( + `No recent skill load found for '${args.skill}' in this session — load it first with the skill tool. ` + + `If you just loaded it, this may indicate a provenance parse failure (check that the skill output contains a 'Base directory for this skill:' line).` + ); + } + + // Use toolContext.directory (the session's working directory) rather than + // a launch dir. This matches ctx_memory's pattern and correctly handles + // `opencode -s` launched outside the project root. + const projectIdentity = resolveProjectIdentity(toolContext.directory); + const normalizedHash = computeNormalizedHash(args.delta); + + // Check for exact duplicate + const existing = findExistingNote( + deps.db, + args.skill, + registryEntry.tier, + projectIdentity, + normalizedHash, + ); + if (existing) { + bumpHitCount( + deps.db, + args.skill, + registryEntry.tier, + projectIdentity, + normalizedHash, + ); + return ( + `Note already recorded (hit_count now ${existing.hit_count + 1}). ` + + `Exact duplicate detected — hit count bumped to reinforce recall priority.` + ); + } + + // Insert new note + const id = insertSkillMemoryNote(deps.db, { + skillId: args.skill, + resolvedPath: registryEntry.resolvedPath, + tier: registryEntry.tier, + skillSource: registryEntry.skillSource, + projectIdentity, + intent: args.intent, + kind: args.kind, + delta: args.delta, + tags: args.tags, + normalizedHash, + createdAt: Date.now(), + }); + + if (id === null) { + // Race condition: another process inserted the same hash + bumpHitCount( + deps.db, + args.skill, + registryEntry.tier, + projectIdentity, + normalizedHash, + ); + return "Note already recorded (concurrent insert detected — hit count bumped)."; + } + + return ( + `Skill note saved (id=${id}, skill=${args.skill}, kind=${args.kind}, tier=${registryEntry.tier}). ` + + `It will be recalled on the next load of '${args.skill}' in this project.` + ); + }, + }); +} + +// Re-export the tool name for the registration site (lives in plugin/tool-registry.ts). +export { CTX_SKILL_NOTE_TOOL_NAME }; diff --git a/packages/plugin/src/tools/ctx-skill-note/types.ts b/packages/plugin/src/tools/ctx-skill-note/types.ts new file mode 100644 index 00000000..c50bbe99 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-note/types.ts @@ -0,0 +1,23 @@ +import type { SkillLoadRegistry } from "../../features/magic-context/skill-memory/provenance"; +import type { Database } from "../../shared/sqlite"; + +export const CTX_SKILL_NOTE_TOOL_NAME = "ctx_skill_note"; + +export const VALID_KINDS = ["gotcha", "discovery", "fix", "workflow"] as const; +export type SkillNoteKind = (typeof VALID_KINDS)[number]; + +export interface CtxSkillNoteArgs { + skill: string; + intent: string; + kind: SkillNoteKind; + delta: string; + tags?: string[]; +} + +export interface CtxSkillNoteToolDeps { + db: Database; + skillLoadRegistry: SkillLoadRegistry; + // NOTE: projectDirectory is intentionally absent — execute uses toolContext.directory + // (the session's working directory) to match ctx_memory's pattern and correctly handle + // `opencode -s` launched outside the project root. See Task 8 Step 3 for rationale. +} diff --git a/packages/plugin/src/tools/ctx-skill-recall/index.ts b/packages/plugin/src/tools/ctx-skill-recall/index.ts new file mode 100644 index 00000000..c89e7c34 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/index.ts @@ -0,0 +1,2 @@ +export { CTX_SKILL_RECALL_TOOL_NAME, createCtxSkillRecallTool } from "./tools"; +export type { CtxSkillRecallArgs, CtxSkillRecallToolDeps } from "./types"; diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts new file mode 100644 index 00000000..dc742db8 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, test } from "bun:test"; +import { runMigrations } from "../../features/magic-context/migrations"; +import { parseFrontmatterConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import { insertSkillMemoryNote } from "../../features/magic-context/skill-memory/storage"; +import { initializeDatabase } from "../../features/magic-context/storage-db"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { createCtxSkillRecallTool } from "./tools"; + +// DI-based tests: inject _testFrontmatterConfig + _testProjectIdentity via deps +// to bypass SKILL.md disk resolution and resolveProjectIdentity() entirely. +// This avoids: +// 1. Missing SKILL.md → null frontmatterConfig → early return → no block +// 2. resolveProjectIdentity("/tmp/test") mismatch with test-inserted projectIdentity + +const TEST_PROJECT_IDENTITY = "git:abc"; +const ENABLED_FRONTMATTER = parseFrontmatterConfig( + "---\nskill-memory:\n enabled: true\n max_tokens: 1500\n max_pinned_tokens: 4000\n dedup_threshold: 0.92\n---\n", +); + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("ctx_skill_recall tool", () => { + test("returns block string when notes exist for skill (DI injection)", async () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: TEST_PROJECT_IDENTITY, + intent: "fix flaky test", + kind: "gotcha", + delta: "Always mock the clock in auth tests", + normalizedHash: "h-recall-tool-1", + createdAt: Date.now(), + }); + + // Inject frontmatterConfig + projectIdentity via DI — no SKILL.md needed + const tool = createCtxSkillRecallTool({ + db, + projectDirectory: "/tmp/test", + _testFrontmatterConfig: ENABLED_FRONTMATTER, + _testProjectIdentity: TEST_PROJECT_IDENTITY, + }); + const result = await tool.execute({ skill: "tdd", intent: "fix flaky test" }, { + sessionID: "ses_test", + agent: "general", + directory: "/tmp/test", + } as never); + expect(typeof result).toBe("string"); + expect(result).toContain(" { + const db = makeDb(); + try { + // Inject enabled frontmatter + matching projectIdentity, but no notes inserted + const tool = createCtxSkillRecallTool({ + db, + projectDirectory: "/tmp/test", + _testFrontmatterConfig: ENABLED_FRONTMATTER, + _testProjectIdentity: TEST_PROJECT_IDENTITY, + }); + const result = await tool.execute({ skill: "nonexistent-skill" }, { + sessionID: "ses_test", + agent: "general", + directory: "/tmp/test", + } as never); + expect(typeof result).toBe("string"); + expect(result).toContain("No skill-memory found"); + expect(result).toContain("nonexistent-skill"); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.ts new file mode 100644 index 00000000..b491df01 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.ts @@ -0,0 +1,190 @@ +import { readFileSync } from "node:fs"; +import { type ToolContext, type ToolDefinition, tool } from "@opencode-ai/plugin"; +import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; +import { parseFrontmatterConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import { + deriveSkillTier, + getSkillLoad, + type SkillProvenance, +} from "../../features/magic-context/skill-memory/provenance"; +import { recallSkillMemoryBlock } from "../../features/magic-context/skill-memory/recall"; +import { + CTX_SKILL_RECALL_TOOL_NAME, + type CtxSkillRecallArgs, + type CtxSkillRecallToolDeps, +} from "./types"; + +// NOTE on tool() API: same pattern as ctx_skill_note (Task 8). +// `name` is registry-level (not in tool body). `args` uses tool.schema.* Zod-like shape. +// See packages/plugin/src/tools/ctx-memory/tools.ts for the canonical pattern. + +type SkillLoadEntry = ReturnType; + +export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefinition { + return tool({ + description: + "Explicitly recall skill-memory notes for a named skill without re-loading the skill. " + + "Use when you want to query accumulated gotchas/discoveries for a skill you have already loaded " + + "this session, or when you need to recall notes without triggering a full skill load. " + + "Returns the block as a string, or a 'No skill-memory found' message when empty.", + args: { + skill: tool.schema + .string() + .describe("The skill name to recall notes for (e.g. 'test-driven-development')"), + intent: tool.schema + .string() + .optional() + .describe( + "Optional: your current task intent — used for intent-scoped recall (P2). Omit for flat recall.", + ), + max_tokens: tool.schema + .number() + .optional() + .describe( + "Optional token budget override. Defaults to the skill's frontmatter max_tokens (or 1500 if absent).", + ), + }, + execute: async (args: CtxSkillRecallArgs, toolContext: ToolContext) => { + // Test-only DI overrides (bypass all resolution) + if ( + deps._testFrontmatterConfig !== undefined || + deps._testProjectIdentity !== undefined + ) { + const projectIdentity = + deps._testProjectIdentity ?? + resolveProjectIdentity(toolContext.directory ?? deps.projectDirectory); + const frontmatterConfig = deps._testFrontmatterConfig ?? null; + const tier: "project" | "global" = "global"; // default for test injection + const block = recallSkillMemoryBlock(deps.db, { + skill: args.skill, + intent: args.intent, + scope: tier, + projectIdentity, + frontmatterConfig, + maxTokens: args.max_tokens, + }); + if (!block) { + return `No skill-memory found for '${args.skill}' in this session.`; + } + return block; + } + + // Resolve project identity from the session's working directory + const projectDirectory = toolContext.directory ?? deps.projectDirectory; + const projectIdentity = resolveProjectIdentity(projectDirectory); + + // ── RESOLUTION: REGISTRY-FIRST + disk-fallback ────────────────────────── + // + // 1. Registry-first (common case): if the skill was loaded this session, + // the transparent path already populated SkillLoadRegistry with the + // exact resolvedPath + frontmatterConfig. Use it — no disk I/O needed. + // + // 2. Disk-fallback (cold-start): only when NOT in registry, search disk. + // Search order matches opencode's real discoverSkills() from + // packages/opencode/src/skill/index.ts:173-233: + // - Project dirs FIRST (finding U3: project shadows global) + // - Global dirs second + // Verified paths (see discoverSkills() source): + // - Global external: ~/.claude/skills/, ~/.agents/skills/ + // (pattern: skills/**/SKILL.md, via CLAUDE_EXTERNAL_DIR + AGENTS_EXTERNAL_DIR) + // - Walk-up project external: .claude/skills/, .agents/skills/ + // (same pattern, ancestor walk from project root) + // - Config dirs (pattern: {skill,skills}/**/SKILL.md): + // ~/.config/opencode/ (= Global.Path.config = xdgConfig/opencode) + // .opencode/ (walk-up from project root) + // NOTE: ~/.config/opencode/skills/ IS discovered via this path — + // config.directories() returns Global.Path.config and the pattern + // {skill,skills}/**/SKILL.md matches skills//SKILL.md under it. + // - Custom paths: cfg.skills?.paths (user-configured in opencode.jsonc) + // Singular .opencode/skill/ is also valid (OPENCODE_SKILL_PATTERN covers both). + + const sessionId = toolContext.sessionID; + const registryEntry: SkillLoadEntry = + sessionId && deps.skillLoadRegistry + ? getSkillLoad(deps.skillLoadRegistry, sessionId, args.skill) + : undefined; + + let resolvedPath: string | null = null; + let frontmatterConfig = registryEntry?.frontmatterConfig ?? null; + let tier: SkillProvenance["tier"] = "global"; + + if (registryEntry) { + // Registry hit: exact resolution, reuse already-parsed frontmatterConfig + resolvedPath = registryEntry.resolvedPath; + tier = registryEntry.tier; + } else { + // Cold-start disk fallback: search in opencode discovery order + // Project dirs first (shadows global), then global dirs + const home = process.env.HOME ?? process.env.USERPROFILE ?? ""; + const candidateDirs = [ + // Project-local dirs first (project shadows global — finding U3) + `${projectDirectory}/.opencode/skill/${args.skill}`, + `${projectDirectory}/.opencode/skills/${args.skill}`, + `${projectDirectory}/.agents/skills/${args.skill}`, + `${projectDirectory}/.claude/skills/${args.skill}`, + // Global dirs second + `${home}/.config/opencode/skills/${args.skill}`, // via Global.Path.config + {skill,skills}/**/SKILL.md + `${home}/.agents/skills/${args.skill}`, // via AGENTS_EXTERNAL_DIR + `${home}/.claude/skills/${args.skill}`, // via CLAUDE_EXTERNAL_DIR + ]; + + let rawSkillContent: string | null = null; + for (const dir of candidateDirs) { + const candidate = `${dir}/SKILL.md`; + try { + rawSkillContent = readFileSync(candidate, "utf-8"); + resolvedPath = candidate; + break; + } catch { + // Not found in this dir — try next + } + } + + if (!resolvedPath) { + // SKILL.md not found anywhere — distinct message from "no notes" cold-start + return ( + `SKILL.md not found for '${args.skill}' in any known skill directory. ` + + `Load the skill first with the skill tool, or verify the skill name is correct. ` + + `Searched: project .opencode/skill/, .opencode/skills/, .agents/skills/, .claude/skills/; ` + + `global ~/.config/opencode/skills/, ~/.agents/skills/, ~/.claude/skills/.` + ); + } + + // Parse frontmatter from on-disk SKILL.md + frontmatterConfig = rawSkillContent + ? parseFrontmatterConfig(rawSkillContent) + : null; + // Derive tier from resolved path + tier = deriveSkillTier(resolvedPath.replace("/SKILL.md", "")); + } + + if (!frontmatterConfig?.enabled) { + // Distinct message: skill-memory disabled in frontmatter (not "no notes") + return ( + `skill-memory is not enabled for '${args.skill}'. ` + + `To enable it, add \`skill-memory: { enabled: true }\` to the skill's SKILL.md frontmatter.` + ); + } + + // Delegate to shared recall core (feature layer — same as transparent path) + const block = recallSkillMemoryBlock(deps.db, { + skill: args.skill, + intent: args.intent, + scope: tier, + projectIdentity, + frontmatterConfig, + maxTokens: args.max_tokens, + }); + + if (!block) { + // Cold-start: skill-memory enabled but no notes recorded yet + return `No skill-memory found for '${args.skill}' — no notes have been recorded yet. Use ctx_skill_note to record gotchas, discoveries, fixes, or workflow steps after using this skill.`; + } + + return block; + }, + }); +} + +// Re-export the tool name for the registration site (lives in plugin/tool-registry.ts). +export { CTX_SKILL_RECALL_TOOL_NAME }; diff --git a/packages/plugin/src/tools/ctx-skill-recall/types.ts b/packages/plugin/src/tools/ctx-skill-recall/types.ts new file mode 100644 index 00000000..acae7075 --- /dev/null +++ b/packages/plugin/src/tools/ctx-skill-recall/types.ts @@ -0,0 +1,24 @@ +import type { SkillMemoryConfig } from "../../features/magic-context/skill-memory/frontmatter"; +import type { SkillLoadRegistry } from "../../features/magic-context/skill-memory/provenance"; +import type { Database } from "../../shared/sqlite"; + +export const CTX_SKILL_RECALL_TOOL_NAME = "ctx_skill_recall"; + +export interface CtxSkillRecallArgs { + skill: string; + intent?: string; + max_tokens?: number; +} + +export interface CtxSkillRecallToolDeps { + db: Database; + projectDirectory: string; + // Optional: session-scoped skill-load registry (populated by transparent path). + // When provided, resolution is registry-first (exact, no disk I/O). + // In production, pass hooks.magicContext.skillLoadRegistry. + // In tests, inject directly to avoid SKILL.md fixture files. + skillLoadRegistry?: SkillLoadRegistry; + // Test-only DI overrides (bypass disk resolution entirely): + _testFrontmatterConfig?: SkillMemoryConfig | null; + _testProjectIdentity?: string; +} diff --git a/packages/plugin/src/tools/index.ts b/packages/plugin/src/tools/index.ts index ab712beb..47965a65 100644 --- a/packages/plugin/src/tools/index.ts +++ b/packages/plugin/src/tools/index.ts @@ -3,3 +3,5 @@ export * from "./ctx-memory"; export * from "./ctx-note"; export * from "./ctx-reduce"; export * from "./ctx-search"; +export * from "./ctx-skill-note"; +export * from "./ctx-skill-recall"; diff --git a/packages/plugin/src/tui/index.tsx b/packages/plugin/src/tui/index.tsx index eca06f0c..daafce9a 100644 --- a/packages/plugin/src/tui/index.tsx +++ b/packages/plugin/src/tui/index.tsx @@ -463,6 +463,26 @@ const StatusDialog = (props: { api: TuiPluginApi; s: StatusDetail }) => { {s().lastDreamerRunAt && ( )} + {/* Skill-memory store (per-skill cross-session recall) — + only when the status detail carries the aggregate + (requires a resolved project identity). Mirrors the + text-mode executeStatus "### Skill memory" section. */} + {s().skillMemory && (() => { + const sm = s().skillMemory! + return ( + + + Skill Memory + + + + + ) + })()} From 01053949cb2fe073c8a4920d1facddd123d39388 Mon Sep 17 00:00:00 2001 From: Tehan Date: Thu, 18 Jun 2026 00:28:53 +0200 Subject: [PATCH 02/10] feat(skill-memory): embeddings + intent-scoped recall (P2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upgrade recall from flat recency×hit to a multi-rung cascade: intent + model-matched embeddings → cosine blend across intent_embedding + delta_embedding (relevance/recency/hit weights tunable per skill via ranking_* frontmatter); intent + no model match → FTS5 fallback over the content-linked skill_memory_fts vtable; empty → flat fallback. - migration: delta_embedding + recall_count columns + skill_memory_fts FTS5 vtable. - embed-on-write in insertSkillMemoryNote; delta-only semantic dedup. - programmatic, no-LLM reembed pre-step for the distill-skill-memory dreamer task. - read-side recall_count (distinct from write-side hit_count). - canonical vector serde + dedup/ranking/FTS query helpers. --- .../features/magic-context/dreamer/runner.ts | 1138 +++++++++++++++++ .../magic-context/dreamer/task-prompts.ts | 45 +- .../memory/storage-memory-embeddings.test.ts | 11 + .../memory/storage-memory-embeddings.ts | 7 +- .../src/features/magic-context/migrations.ts | 59 +- .../skill-memory/frontmatter.test.ts | 25 + .../magic-context/skill-memory/frontmatter.ts | 15 + .../magic-context/skill-memory/recall.test.ts | 371 +++++- .../magic-context/skill-memory/recall.ts | 224 +++- .../skill-memory/reembed.test.ts | 45 + .../magic-context/skill-memory/reembed.ts | 43 + .../skill-memory/storage.test.ts | 252 ++++ .../magic-context/skill-memory/storage.ts | 129 +- .../src/features/magic-context/storage-db.ts | 2 +- .../hooks/magic-context/hook-handlers.test.ts | 46 +- .../src/hooks/magic-context/hook-handlers.ts | 23 +- .../plugin/src/hooks/magic-context/hook.ts | 1 + .../skill-memory-injection.test.ts | 28 +- .../src/tools/ctx-skill-note/tools.test.ts | 103 +- .../plugin/src/tools/ctx-skill-note/tools.ts | 43 + .../src/tools/ctx-skill-recall/tools.ts | 4 +- 21 files changed, 2510 insertions(+), 104 deletions(-) create mode 100644 packages/plugin/src/features/magic-context/dreamer/runner.ts create mode 100644 packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/reembed.ts diff --git a/packages/plugin/src/features/magic-context/dreamer/runner.ts b/packages/plugin/src/features/magic-context/dreamer/runner.ts new file mode 100644 index 00000000..dd0b2c45 --- /dev/null +++ b/packages/plugin/src/features/magic-context/dreamer/runner.ts @@ -0,0 +1,1138 @@ +import { existsSync } from "node:fs"; +import { join } from "node:path"; +import { DREAMER_AGENT } from "../../../agents/dreamer"; +import type { DreamingTask } from "../../../config/schema/magic-context"; +import type { PluginContext } from "../../../plugin/types"; +import * as shared from "../../../shared"; +import { extractLatestAssistantText } from "../../../shared/assistant-message-extractor"; +import { getDataDir } from "../../../shared/data-path"; +import { describeError, getErrorMessage } from "../../../shared/error-message"; +import { shouldKeepSubagents } from "../../../shared/keep-subagents"; +import { log } from "../../../shared/logger"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runKeyFilesTask } from "../key-files/identify-key-files"; +import { getMemoryCountsByStatus } from "../memory/storage-memory"; +import { reembedStaleSkillNotes } from "../skill-memory/reembed"; +import { getPendingSmartNotes, markNoteChecked, markNoteReady } from "../storage-notes"; +import { recordChildInvocation } from "../subagent-token-capture"; +import { reviewUserMemories } from "../user-memory/review-user-memories"; +import { getActiveUserMemories } from "../user-memory/storage-user-memory"; +import { acquireLease, getLeaseHolder, releaseLease, renewLease } from "./lease"; +import { + enforceMaintainDocsProtectedRegions, + snapshotMaintainDocsFiles, +} from "./maintain-docs-protected-enforcement"; +import { + clearStaleEntries, + dequeueNext, + getEntryRetryCount, + hasActiveDreamLease, + removeDreamEntry, + resetDreamEntry, +} from "./queue"; +import { insertDreamRun } from "./storage-dream-runs"; +import { getDreamState, setDreamState } from "./storage-dream-state"; +import { buildDreamTaskPrompt, DREAMER_SYSTEM_PROMPT } from "./task-prompts"; + +// Keyed by project identity (e.g. "git:"), not filesystem path. Two +// worktrees/clones of the same repo collapse to the same identity, so in a +// single process this map's entry for that identity is "last-registered wins" +// — it can point at a different checkout than the one draining the queue. This +// map is only a FALLBACK now: production drain callers pass their own +// `sessionDirectoryOverride` (the directory of the project THIS process owns), +// so the dequeued entry always runs in a live checkout the draining process +// actually registered, never a stale sibling-checkout path. See +// processDreamQueue's sessionDirectoryOverride. +const dreamProjectDirectories = new Map(); +const CIRCUIT_BREAKER_THRESHOLD = 3; + +interface ExperimentalPinKeyFilesConfig { + enabled: boolean; + token_budget: number; + min_reads: number; +} + +export function registerDreamProjectDirectory(projectIdentity: string, directory: string): void { + dreamProjectDirectories.set(projectIdentity, directory); +} + +function resolveDreamSessionDirectory(projectIdentity: string): string { + return dreamProjectDirectories.get(projectIdentity) ?? projectIdentity; +} + +export interface DreamRunResult { + startedAt: number; + finishedAt: number; + holderId: string; + smartNotesSurfaced: number; + smartNotesPending: number; + tasks: { + name: string; + durationMs: number; + result: unknown; + error?: string; + }[]; +} + +function countNewIds(beforeIds: number[], afterIds: number[]): number { + const beforeSet = new Set(beforeIds); + let count = 0; + for (const id of afterIds) { + if (!beforeSet.has(id)) { + count += 1; + } + } + return count; +} + +function getCircuitBreakerSignature(error: unknown, brief: string): string { + if (error instanceof Error && error.name && error.name !== "Error") { + return error.name; + } + + const namedError = error as { name?: unknown } | null; + if ( + namedError && + typeof namedError === "object" && + typeof namedError.name === "string" && + namedError.name.length > 0 && + namedError.name !== "Error" + ) { + return namedError.name; + } + + return brief.split(":")[0]?.trim().split(/\s+/)[0] || brief || "unknown"; +} + +function shouldSkipCircuitBreaker(error: unknown, brief: string): boolean { + const namedError = error as { name?: unknown } | null; + const name = + error instanceof Error + ? error.name + : namedError && typeof namedError === "object" && typeof namedError.name === "string" + ? namedError.name + : ""; + const combined = `${name} ${brief}`.toLowerCase(); + return name === "AbortError" || combined.includes("lease"); +} + +function logWithStackHead(message: string, stackHead?: string): void { + log(message, stackHead ? { stackHead } : undefined); +} + +function getOpenCodeDbPath(): string { + return join(getDataDir(), "opencode", "opencode.db"); +} + +function openOpenCodeDb(): Database | null { + const dbPath = getOpenCodeDbPath(); + if (!existsSync(dbPath)) { + log(`[key-files] OpenCode DB not found at ${dbPath} — skipping`); + return null; + } + + try { + const db = new Database(dbPath, { readonly: true }); + db.exec("PRAGMA busy_timeout = 5000"); + return db; + } catch (error) { + log(`[key-files] failed to open OpenCode DB at ${dbPath}: ${getErrorMessage(error)}`); + return null; + } +} + +export async function runDream(args: { + db: Database; + client: PluginContext["client"]; + /** Project identity (e.g. "git:"), NOT a filesystem path. Used for dream state keys. */ + projectIdentity: string; + tasks: DreamingTask[]; + taskTimeoutMinutes: number; + maxRuntimeMinutes: number; + parentSessionId?: string; + sessionDirectory?: string; + experimentalUserMemories?: { enabled: boolean; promotionThreshold: number }; + experimentalPinKeyFiles?: ExperimentalPinKeyFilesConfig; + /** + * Resolved fallback chain for dreamer subagent calls. When the primary + * `dreamer.model` fails (auth, model-not-found, rate limit, transient + * network), each entry is tried in order before giving up. Empty/undefined + * disables fallback iteration (legacy single-suggestion-retry only). + * + * Caller (`processDreamQueue` / direct caller) resolves this via + * `resolveFallbackChain(DREAMER_AGENT, config.dreamer.fallback_models)`. + */ + fallbackModels?: readonly string[]; +}): Promise { + const holderId = crypto.randomUUID(); + const startedAt = Date.now(); + const result: DreamRunResult = { + startedAt, + finishedAt: startedAt, + holderId, + smartNotesSurfaced: 0, + smartNotesPending: 0, + tasks: [], + }; + const memoryCountsBefore = getMemoryCountsByStatus(args.db, args.projectIdentity); + + log( + `[dreamer] starting dream run: ${args.tasks.length} tasks, timeout=${args.taskTimeoutMinutes}m, maxRuntime=${args.maxRuntimeMinutes}m, project=${args.projectIdentity}`, + ); + + if (!acquireLease(args.db, holderId)) { + const currentHolder = getLeaseHolder(args.db) ?? "another holder"; + log(`[dreamer] lease acquisition failed — already held by ${currentHolder}`); + result.tasks.push({ + name: "lease", + durationMs: 0, + result: null, + error: `Dream lease is already held by ${currentHolder}`, + }); + result.finishedAt = Date.now(); + return result; + } + log(`[dreamer] lease acquired: ${holderId}`); + + // Resolve a parent session ID so child sessions are hidden from the UI session list. + // /ctx-dream passes the active session; scheduled runs resolve from the API. + let parentSessionId = args.parentSessionId; + if (!parentSessionId) { + try { + const sessionDir = args.sessionDirectory ?? args.projectIdentity; + const listResponse = await args.client.session.list({ + query: { directory: sessionDir }, + }); + const sessions = shared.normalizeSDKResponse(listResponse, [] as { id?: string }[], { + preferResponseOnMissingData: true, + }); + // Intentional: any existing session works — we just need parentID so child sessions don't appear in the UI + parentSessionId = sessions?.find((s) => typeof s?.id === "string")?.id; + if (parentSessionId) { + log(`[dreamer] resolved parent session: ${parentSessionId}`); + } + } catch { + log( + "[dreamer] could not resolve parent session — child sessions will be visible in UI", + ); + } + } + + const deadline = startedAt + args.maxRuntimeMinutes * 60 * 1000; + // Strictly per-project (no global-key fallback — it cross-contaminated the + // maintain-docs cutoff across projects). + const lastDreamAt = getDreamState(args.db, `last_dream_at:${args.projectIdentity}`); + log(`[dreamer] last dream at: ${lastDreamAt ?? "never"} (project=${args.projectIdentity})`); + + let lastErrorSignature: string | null = null; + let consecutiveSameErrorFailures = 0; + let circuitBreakerTripped = false; + let lostLease = false; + let lostLeaseReason: string | null = null; + let lostLeaseRecorded = false; + + const markLeaseLost = (phase: string, error?: unknown): void => { + const detail = error ? `: ${getErrorMessage(error)}` : ""; + lostLeaseReason = `Dream lease lost during ${phase}${detail}`; + if (!lostLease) { + log(`[dreamer] FATAL: ${lostLeaseReason}; aborting all remaining dream work`); + } else { + log(`[dreamer] FATAL: ${lostLeaseReason}; dream work is already aborting`); + } + lostLease = true; + }; + + const recordLeaseLostTask = (phase: string): void => { + if (lostLeaseRecorded) return; + lostLeaseRecorded = true; + result.tasks.push({ + name: "lease-lost", + durationMs: 0, + result: "", + error: lostLeaseReason ?? `Dream lease lost during ${phase}; aborted remaining work`, + }); + }; + + const verifyLeaseStillHeld = (phase: string): boolean => { + if (lostLease) return false; + try { + if (!renewLease(args.db, holderId)) { + markLeaseLost(phase); + return false; + } + return true; + } catch (error) { + markLeaseLost(phase, error); + return false; + } + }; + + try { + for (const taskName of args.tasks) { + if (!verifyLeaseStillHeld(`before task ${taskName}`)) { + recordLeaseLostTask(`before task ${taskName}`); + break; + } + if (Date.now() > deadline) { + log(`[dreamer] deadline reached, stopping after ${result.tasks.length} tasks`); + break; + } + + log(`[dreamer] starting task: ${taskName}`); + const taskStartedAt = Date.now(); + let agentSessionId: string | null = null; + // Keep FAILED dreamer child sessions for debugging (the task's model + // output + error stay inspectable); delete only on success. + let taskFailed = false; + const invocationStartedAt = Date.now(); + let invocationRecorded = false; + const recordInvocation = (params: { + status: "completed" | "failed" | "aborted"; + messages?: unknown[]; + error?: unknown; + }) => { + if (!parentSessionId || invocationRecorded) return; + invocationRecorded = true; + recordChildInvocation({ + db: args.db, + parentSessionId, + harness: "opencode", + subagent: "dreamer", + task: taskName, + startedAt: invocationStartedAt, + status: params.status, + messages: params.messages, + error: params.error, + }); + }; + // AbortController lets us cancel the in-flight LLM prompt immediately when lease is lost + const taskAbortController = new AbortController(); + // Renew lease periodically while the LLM task runs (can take 5+ min on slow models) + const leaseRenewalInterval = setInterval(() => { + try { + if (!renewLease(args.db, holderId)) { + log(`[dreamer] task ${taskName}: lease renewal failed — aborting LLM call`); + markLeaseLost(`task ${taskName} lease renewal`); + taskAbortController.abort(); + } + } catch (err) { + log( + `[dreamer] task ${taskName}: lease renewal threw — aborting LLM call: ${err}`, + ); + markLeaseLost(`task ${taskName} lease renewal`, err); + taskAbortController.abort(); + } + }, 60_000); + + try { + // Use sessionDirectory (filesystem path) for file checks, not projectPath (identity like "git:") + const docsDir = args.sessionDirectory ?? args.projectIdentity; + const maintainDocsSnapshot = + taskName === "maintain-docs" ? snapshotMaintainDocsFiles(docsDir) : undefined; + const existingDocs = + taskName === "maintain-docs" + ? { + architecture: existsSync(join(docsDir, "ARCHITECTURE.md")), + structure: existsSync(join(docsDir, "STRUCTURE.md")), + } + : undefined; + + // Load user memories for archive-stale dedup context + const userMemories = + taskName === "archive-stale" + ? getActiveUserMemories(args.db).map((um) => ({ + id: um.id, + content: um.content, + })) + : undefined; + + if (taskName === "distill-skill-memory") { + await reembedStaleSkillNotes(args.db, args.projectIdentity); + } + + const taskPrompt = buildDreamTaskPrompt(taskName, { + projectPath: args.projectIdentity, + lastDreamAt, + existingDocs, + userMemories, + }); + + const createResponse = await args.client.session.create({ + body: { + ...(parentSessionId ? { parentID: parentSessionId } : {}), + title: `magic-context-dream-${taskName}`, + }, + query: { directory: args.sessionDirectory ?? args.projectIdentity }, + }); + + const createdSession = shared.normalizeSDKResponse( + createResponse, + null as { id?: string } | null, + { preferResponseOnMissingData: true }, + ); + agentSessionId = typeof createdSession?.id === "string" ? createdSession.id : null; + if (!agentSessionId) { + const error = new Error("Dreamer could not create its child session."); + recordInvocation({ status: "failed", error }); + throw error; + } + log(`[dreamer] task ${taskName}: child session created ${agentSessionId}`); + const childSessionId = agentSessionId; + + const dreamTaskRun = await shared.promptSyncWithValidatedOutputRetry( + args.client, + { + path: { id: childSessionId }, + query: { directory: args.sessionDirectory ?? args.projectIdentity }, + body: { + agent: DREAMER_AGENT, + system: DREAMER_SYSTEM_PROMPT, + // synthetic: true hides the dreamer task prompt from the TUI + // subagent pane while still delivering it to the model. See issue #50. + parts: [{ type: "text", text: taskPrompt, synthetic: true }], + }, + }, + { + timeoutMs: args.taskTimeoutMinutes * 60 * 1000, + signal: taskAbortController.signal, + fallbackModels: args.fallbackModels, + callContext: `dreamer:${taskName}`, + fetchOutput: async () => { + const messagesResponse = await args.client.session.messages({ + path: { id: childSessionId }, + query: { + directory: args.sessionDirectory ?? args.projectIdentity, + limit: 50, + }, + }); + return shared.normalizeSDKResponse(messagesResponse, [] as unknown[], { + preferResponseOnMissingData: true, + }); + }, + validateOutput: (messages) => { + const taskResult = extractLatestAssistantText(messages); + if (!taskResult) { + throw new Error("Dreamer returned no assistant output."); + } + return taskResult; + }, + }, + ); + if (lostLease) { + throw new Error(lostLeaseReason ?? `Dream lease lost during ${taskName}`); + } + + const taskResult = dreamTaskRun.validated; + recordInvocation({ status: "completed", messages: dreamTaskRun.output }); + + if ( + taskName === "maintain-docs" && + maintainDocsSnapshot && + maintainDocsSnapshot.size > 0 + ) { + try { + enforceMaintainDocsProtectedRegions({ + docsDir, + snapshot: maintainDocsSnapshot, + }); + } catch (error) { + log( + `[dreamer] maintain-docs protected-region enforcement failed: ${error}`, + ); + } + } + + const durationMs = Date.now() - taskStartedAt; + log( + `[dreamer] task ${taskName}: completed in ${(durationMs / 1000).toFixed(1)}s (result: ${String(taskResult).length} chars)`, + ); + result.tasks.push({ + name: taskName, + durationMs, + result: taskResult, + }); + lastErrorSignature = null; + consecutiveSameErrorFailures = 0; + } catch (error) { + taskFailed = true; + recordInvocation({ status: lostLease ? "aborted" : "failed", error }); + const durationMs = Date.now() - taskStartedAt; + const errorDescription = describeError(error); + logWithStackHead( + `[dreamer] task ${taskName}: failed after ${(durationMs / 1000).toFixed(1)}s — ${errorDescription.brief}`, + errorDescription.stackHead, + ); + result.tasks.push({ + name: taskName, + durationMs, + result: null, + error: errorDescription.brief, + }); + + if (lostLease) { + lastErrorSignature = null; + consecutiveSameErrorFailures = 0; + } else if (shouldSkipCircuitBreaker(error, errorDescription.brief)) { + lastErrorSignature = null; + consecutiveSameErrorFailures = 0; + } else { + const signature = getCircuitBreakerSignature(error, errorDescription.brief); + if (signature === lastErrorSignature) { + consecutiveSameErrorFailures += 1; + } else { + lastErrorSignature = signature; + consecutiveSameErrorFailures = 1; + } + + if (consecutiveSameErrorFailures >= CIRCUIT_BREAKER_THRESHOLD) { + circuitBreakerTripped = true; + log( + `[dreamer] circuit breaker: ${consecutiveSameErrorFailures} consecutive ${signature} failures — aborting remaining tasks`, + ); + result.tasks.push({ + name: "circuit-breaker", + durationMs: 0, + result: "", + error: `Aborted remaining tasks: ${consecutiveSameErrorFailures} consecutive ${signature} failures. Configure dreamer model/fallback_models in magic-context.jsonc.`, + }); + } + } + } finally { + clearInterval(leaseRenewalInterval); + // Delete the child session only on SUCCESS. Keep failed sessions so + // the task's prompt / model output / error can be inspected (the + // failure is already recorded in subagent_invocations). + // keep_subagents debug flag retains successful ones too. + if (agentSessionId && !taskFailed && !shouldKeepSubagents()) { + await args.client.session + .delete({ + path: { id: agentSessionId }, + }) + .catch((error: unknown) => { + log("[dreamer] failed to delete child session:", error); + }); + } else if (agentSessionId && (taskFailed || shouldKeepSubagents())) { + log( + `[dreamer] KEEPING child session ${agentSessionId} for task ${taskName} (${taskFailed ? "failed" : "keep_subagents"})`, + ); + } + } + + if (lostLease) { + recordLeaseLostTask(`task ${taskName}`); + break; + } + + if (circuitBreakerTripped) { + break; + } + } + + if (lostLease) { + log("[dreamer] lease lost: skipping all post-task phases"); + recordLeaseLostTask("post-task phases"); + } else if (circuitBreakerTripped) { + log("[dreamer] circuit breaker: skipping post-task phases"); + result.tasks.push({ + name: "post-task-phases", + durationMs: 0, + result: "", + error: "Skipped post-task phases after circuit breaker tripped; configure dreamer model/fallback_models in magic-context.jsonc.", + }); + } + // ── User memory review phase ── + // Runs after regular dream tasks, reviews user memory candidates for promotion. + if ( + !circuitBreakerTripped && + !lostLease && + args.experimentalUserMemories?.enabled && + Date.now() <= deadline + ) { + const umStart = Date.now(); + try { + if (!verifyLeaseStillHeld("before user-memory review")) { + throw new Error( + lostLeaseReason ?? "Dream lease lost before user-memory review", + ); + } + const reviewResult = await reviewUserMemories({ + db: args.db, + client: args.client, + parentSessionId, + sessionDirectory: args.sessionDirectory, + holderId, + deadline, + promotionThreshold: args.experimentalUserMemories.promotionThreshold, + fallbackModels: args.fallbackModels, + }); + if (!verifyLeaseStillHeld("after user-memory review")) { + throw new Error(lostLeaseReason ?? "Dream lease lost after user-memory review"); + } + const umOutput = `promoted=${reviewResult.promoted} merged=${reviewResult.merged} dismissed=${reviewResult.dismissed} consumed=${reviewResult.candidatesConsumed}`; + if ( + reviewResult.promoted > 0 || + reviewResult.merged > 0 || + reviewResult.dismissed > 0 + ) { + log(`[dreamer] user-memories: ${umOutput}`); + } + result.tasks.push({ + name: "user memories", + durationMs: Date.now() - umStart, + result: umOutput, + }); + } catch (error) { + const errorDescription = describeError(error); + logWithStackHead( + `[dreamer] user-memory review failed: ${errorDescription.brief}`, + errorDescription.stackHead, + ); + result.tasks.push({ + name: "user memories", + durationMs: Date.now() - umStart, + result: "", + error: errorDescription.brief, + }); + } + if (lostLease) recordLeaseLostTask("user-memory review"); + } + // ── Smart note evaluation phase ── + // Runs after regular dream tasks, evaluates pending smart note conditions. + // Not a user-configurable task — always runs when dreamer has pending smart notes. + if (!circuitBreakerTripped && !lostLease && Date.now() <= deadline) { + try { + if (!verifyLeaseStillHeld("before smart-note evaluation")) { + throw new Error( + lostLeaseReason ?? "Dream lease lost before smart-note evaluation", + ); + } + await evaluateSmartNotes({ + db: args.db, + client: args.client, + projectIdentity: args.projectIdentity, + parentSessionId, + sessionDirectory: args.sessionDirectory, + holderId, + deadline, + result, + fallbackModels: args.fallbackModels, + onLeaseLost: markLeaseLost, + isLeaseLost: () => lostLease, + }); + if (!verifyLeaseStillHeld("after smart-note evaluation")) { + throw new Error( + lostLeaseReason ?? "Dream lease lost after smart-note evaluation", + ); + } + } catch (error) { + const errorDescription = describeError(error); + logWithStackHead( + `[dreamer] smart note evaluation failed: ${errorDescription.brief}`, + errorDescription.stackHead, + ); + } + if (lostLease) recordLeaseLostTask("smart-note evaluation"); + } + if ( + !circuitBreakerTripped && + !lostLease && + args.experimentalPinKeyFiles?.enabled && + Date.now() <= deadline + ) { + const kfStart = Date.now(); + try { + if (!verifyLeaseStillHeld("before key-file identification")) { + throw new Error( + lostLeaseReason ?? "Dream lease lost before key-file identification", + ); + } + const openCodeDb = openOpenCodeDb(); + if (openCodeDb) { + try { + await runKeyFilesTask({ + db: args.db, + openCodeDb, + client: args.client, + projectPath: args.sessionDirectory ?? args.projectIdentity, + holderId, + deadline, + parentSessionId, + config: args.experimentalPinKeyFiles, + fallbackModels: args.fallbackModels, + }); + } finally { + closeQuietly(openCodeDb); + } + } + if (!verifyLeaseStillHeld("after key-file identification")) { + throw new Error( + lostLeaseReason ?? "Dream lease lost after key-file identification", + ); + } + result.tasks.push({ + name: "key files", + durationMs: Date.now() - kfStart, + result: "completed", + }); + } catch (error) { + const errorDescription = describeError(error); + logWithStackHead( + `[key-files] identification phase failed: ${errorDescription.brief}`, + errorDescription.stackHead, + ); + result.tasks.push({ + name: "key files", + durationMs: Date.now() - kfStart, + result: "", + error: errorDescription.brief, + }); + } + if (lostLease) recordLeaseLostTask("key-file identification"); + } + } finally { + releaseLease(args.db, holderId); + log(`[dreamer] lease released: ${holderId}`); + } + + result.finishedAt = Date.now(); + const memoryCountsAfter = getMemoryCountsByStatus(args.db, args.projectIdentity); + const merged = countNewIds(memoryCountsBefore.mergedIds, memoryCountsAfter.mergedIds); + const memoryChanges = { + written: countNewIds(memoryCountsBefore.ids, memoryCountsAfter.ids), + deleted: countNewIds(memoryCountsAfter.ids, memoryCountsBefore.ids), + // archivedIds already EXCLUDES merged/superseded rows — getMemoryCountsByStatus + // routes a memory with superseded_by_memory_id into mergedIds and never into + // archivedIds (the two sets are disjoint). So the archived delta is already + // merge-free; subtracting `merged` again double-counted and under-reported + // archived (often to zero). + archived: countNewIds(memoryCountsBefore.archivedIds, memoryCountsAfter.archivedIds), + merged, + }; + const persistedMemoryChanges = Object.values(memoryChanges).some((value) => value > 0) + ? memoryChanges + : null; + insertDreamRun(args.db, { + projectPath: args.projectIdentity, + startedAt: result.startedAt, + finishedAt: result.finishedAt, + holderId: result.holderId, + tasks: result.tasks.map((task) => ({ + name: task.name, + durationMs: task.durationMs, + resultChars: typeof task.result === "string" ? task.result.length : 0, + ...(task.error ? { error: task.error } : {}), + })), + tasksSucceeded: result.tasks.filter((task) => !task.error).length, + tasksFailed: result.tasks.filter((task) => Boolean(task.error)).length, + smartNotesSurfaced: result.smartNotesSurfaced, + smartNotesPending: result.smartNotesPending, + memoryChanges: persistedMemoryChanges, + }); + // Only update dream timestamps when at least one task succeeded — failed runs + // should not block re-scheduling for the project. + // + // Only count configured dream tasks (consolidate / verify / archive-stale / + // improve / maintain-docs) for success. Post-task phases (smart-notes, + // user memories, key files) run unconditionally after the main task loop + // and must NOT mask failures of the configured tasks — otherwise a + // successful key-file evaluation would suppress re-scheduling a project + // whose consolidate/verify/archive tasks all failed. + const POST_TASK_NAMES = new Set([ + "smart-notes", + "user memories", + "key files", + "post-task-phases", + "circuit-breaker", + ]); + const hasSuccessfulTask = result.tasks.some((t) => !t.error && !POST_TASK_NAMES.has(t.name)); + if (hasSuccessfulTask && !lostLease) { + // Per-project only. Do NOT also write the legacy global "last_dream_at" + // key — that write is what let one project's run suppress another's. + setDreamState(args.db, `last_dream_at:${args.projectIdentity}`, String(result.finishedAt)); + } + const totalDuration = ((result.finishedAt - startedAt) / 1000).toFixed(1); + const succeeded = result.tasks.filter((t) => !t.error).length; + const failed = result.tasks.filter((t) => t.error).length; + log( + `[dreamer] dream run finished in ${totalDuration}s: ${succeeded} succeeded, ${failed} failed`, + ); + return result; +} + +async function evaluateSmartNotes(args: { + db: Database; + client: PluginContext["client"]; + projectIdentity: string; + parentSessionId: string | undefined; + sessionDirectory: string | undefined; + holderId: string; + deadline: number; + result: DreamRunResult; + /** Resolved dreamer fallback chain. */ + fallbackModels?: readonly string[]; + onLeaseLost?: (phase: string, error?: unknown) => void; + isLeaseLost?: () => boolean; +}): Promise { + const pendingNotes = getPendingSmartNotes(args.db, args.projectIdentity); + if (pendingNotes.length === 0) { + log("[dreamer] smart notes: no pending notes to evaluate"); + return; + } + + log(`[dreamer] smart notes: evaluating ${pendingNotes.length} pending note(s)`); + + // Build a single evaluation prompt for all pending notes. + // The dreamer checks each condition and returns structured results. + const noteDescriptions = pendingNotes + .map((n) => `- Note #${n.id}: "${n.content}"\n Condition: ${n.surfaceCondition}`) + .join("\n"); + + const evaluationPrompt = `You are evaluating smart note conditions for the magic-context system. + +For each note below, determine whether its surface condition has been met. +You have access to tools like GitHub CLI (gh), web search, and the local codebase to verify conditions. + +You DO NOT have access to: +- Any conversation between the user and the original agent that wrote the note +- The state of any active session, including whether messages have been sent +- The current task, mood, or intent of the human user + +If a condition references conversation context the user is having ("When the user mentions X", "When they ask to do Y", "When we revisit Z", "When relevant to current discussion", etc.), it is UNEVALUATABLE — skip it (do not include in results) so the note stays pending. These are misuse cases that should never have been written as smart notes; leaving them pending is the correct outcome, the dreamer's archive-stale task will eventually retire them. + +## Pending Smart Notes + +${noteDescriptions} + +## Instructions + +1. Check each condition using the tools available to you. +2. Be conservative — only mark a condition as met when you have clear evidence. +3. Skip conditions that depend on session/conversation context you cannot observe — do not invent a "false" verdict for them, just omit them from your response. +4. Respond with a JSON array of results: + +\`\`\`json +[ + { "id": , "met": true/false, "reason": "brief explanation" } +] +\`\`\` + +Only include notes whose conditions you could definitively evaluate against external signals. Skip notes where you cannot determine the status (they will be re-evaluated next run, or eventually archived as stale).`; + + const taskStartedAt = Date.now(); + let agentSessionId: string | null = null; + // Retain the child session on failure so its prompt/output/error can be + // inspected — mirrors the main-task cleanup rule. Optional phases used to + // delete unconditionally, losing the evidence for exactly the runs worth + // debugging. + let phaseFailed = false; + const startedAt = Date.now(); + let invocationRecorded = false; + const recordInvocation = (params: { + status: "completed" | "failed" | "aborted"; + messages?: unknown[]; + error?: unknown; + }) => { + if (!args.parentSessionId || invocationRecorded) return; + invocationRecorded = true; + recordChildInvocation({ + db: args.db, + parentSessionId: args.parentSessionId, + harness: "opencode", + subagent: "dreamer", + task: "smart-notes", + startedAt, + status: params.status, + messages: params.messages, + error: params.error, + }); + }; + const abortController = new AbortController(); + const leaseInterval = setInterval(() => { + try { + if (!renewLease(args.db, args.holderId)) { + log("[dreamer] smart notes: lease renewal failed — aborting"); + args.onLeaseLost?.("smart notes"); + abortController.abort(); + } + } catch (error) { + args.onLeaseLost?.("smart notes", error); + abortController.abort(); + } + }, 60_000); + + try { + const createResponse = await args.client.session.create({ + body: { + ...(args.parentSessionId ? { parentID: args.parentSessionId } : {}), + title: "magic-context-dream-smart-notes", + }, + query: { directory: args.sessionDirectory ?? args.projectIdentity }, + }); + const created = shared.normalizeSDKResponse( + createResponse, + null as { id?: string } | null, + { preferResponseOnMissingData: true }, + ); + agentSessionId = typeof created?.id === "string" ? created.id : null; + if (!agentSessionId) { + const error = new Error("Could not create smart note evaluation session."); + recordInvocation({ status: "failed", error }); + throw error; + } + + log(`[dreamer] smart notes: child session created ${agentSessionId}`); + const childSessionId = agentSessionId; + + const remainingMs = Math.max(0, args.deadline - Date.now()); + const smartNoteRun = await shared.promptSyncWithValidatedOutputRetry( + args.client, + { + path: { id: childSessionId }, + query: { directory: args.sessionDirectory ?? args.projectIdentity }, + body: { + agent: DREAMER_AGENT, + system: DREAMER_SYSTEM_PROMPT, + // synthetic: true hides the dreamer evaluation prompt from the TUI + // subagent pane while still delivering it to the model. See issue #50. + parts: [{ type: "text", text: evaluationPrompt, synthetic: true }], + }, + }, + { + timeoutMs: Math.min(remainingMs, 5 * 60 * 1000), + signal: abortController.signal, + fallbackModels: args.fallbackModels, + callContext: "dreamer:smart-notes", + fetchOutput: async () => { + const messagesResponse = await args.client.session.messages({ + path: { id: childSessionId }, + query: { + directory: args.sessionDirectory ?? args.projectIdentity, + limit: 50, + }, + }); + return shared.normalizeSDKResponse(messagesResponse, [] as unknown[], { + preferResponseOnMissingData: true, + }); + }, + validateOutput: (messages) => { + const output = extractLatestAssistantText(messages); + if (!output) throw new Error("Smart note evaluation returned no output."); + + // Parse the JSON results from the LLM response — use greedy match to handle + // `]` chars inside JSON string values (e.g., reasons containing brackets). + const jsonMatch = output.match(/\[[\s\S]*\]/); + if (!jsonMatch) { + throw new Error("Smart note evaluation returned no JSON array."); + } + + try { + return JSON.parse(jsonMatch[0]) as Array<{ + id: number; + met: boolean; + reason?: string; + }>; + } catch { + throw new Error("Smart note evaluation returned invalid JSON."); + } + }, + }, + ); + + recordInvocation({ status: "completed", messages: smartNoteRun.output }); + const evaluations = smartNoteRun.validated; + let surfaced = 0; + for (const evaluation of evaluations) { + if (typeof evaluation.id !== "number") continue; + const note = pendingNotes.find((n) => n.id === evaluation.id); + if (!note) continue; + + if (evaluation.met) { + markNoteReady(args.db, note.id, evaluation.reason); + surfaced++; + log( + `[dreamer] smart notes: #${note.id} condition MET — "${evaluation.reason ?? "condition satisfied"}"`, + ); + } else { + markNoteChecked(args.db, note.id); + } + } + + // Mark any notes not in the evaluation as checked (LLM skipped them) + for (const note of pendingNotes) { + if (!evaluations.some((e) => e.id === note.id)) { + markNoteChecked(args.db, note.id); + } + } + + const durationMs = Date.now() - taskStartedAt; + const pending = Math.max(0, pendingNotes.length - surfaced); + args.result.smartNotesSurfaced = surfaced; + args.result.smartNotesPending = pending; + log( + `[dreamer] smart notes: evaluated ${pendingNotes.length} notes in ${(durationMs / 1000).toFixed(1)}s — ${surfaced} surfaced, ${pending} still pending`, + ); + args.result.tasks.push({ + name: "smart-notes", + durationMs, + result: `${surfaced} surfaced, ${pending} still pending`, + }); + } catch (error) { + phaseFailed = true; + if ( + error instanceof Error && + error.message === "Smart note evaluation returned no JSON array." + ) { + log("[dreamer] smart notes: no JSON array found in output, skipping"); + for (const note of pendingNotes) markNoteChecked(args.db, note.id); + } else if ( + error instanceof Error && + error.message === "Smart note evaluation returned invalid JSON." + ) { + log(`[dreamer] smart notes: failed to parse JSON from LLM output, marking all checked`); + for (const note of pendingNotes) markNoteChecked(args.db, note.id); + } + const durationMs = Date.now() - taskStartedAt; + const errorDescription = describeError(error); + args.result.smartNotesSurfaced = 0; + args.result.smartNotesPending = pendingNotes.length; + logWithStackHead( + `[dreamer] smart notes: failed after ${(durationMs / 1000).toFixed(1)}s — ${errorDescription.brief}`, + errorDescription.stackHead, + ); + args.result.tasks.push({ + name: "smart-notes", + durationMs, + result: null, + error: errorDescription.brief, + }); + } finally { + clearInterval(leaseInterval); + // Keep the child session on failure (debugging) or under keep_subagents. + if (agentSessionId && !phaseFailed && !shouldKeepSubagents()) { + await args.client.session + .delete({ + path: { id: agentSessionId }, + }) + .catch(() => {}); + } + } +} + +const MAX_LEASE_RETRIES = 3; + +export async function processDreamQueue(args: { + db: Database; + client: PluginContext["client"]; + tasks: DreamingTask[]; + taskTimeoutMinutes: number; + maxRuntimeMinutes: number; + experimentalUserMemories?: { enabled: boolean; promotionThreshold: number }; + experimentalPinKeyFiles?: ExperimentalPinKeyFilesConfig; + /** + * Optional project identity filter — when provided, only entries belonging + * to this project are dequeued. Each running OpenCode/Pi process registers + * exactly one project, and the host's dreamer client (and `pi` runner, in + * Pi's case) is project-specific. Without this filter, a Pi process running + * for project A would dequeue queue entries for project B and try to + * `posix_spawn 'pi'` in B's `git:` identity string as a directory, + * failing with ENOENT every cycle. + * + * Callers should pass this whenever they own a single project — both the + * scheduled timer tick (`sweepProject`) and the `/ctx-dream` command + * handler. Tests pass `undefined` to keep the legacy "dequeue any" semantics. + */ + projectIdentity?: string; + /** + * Filesystem directory of the project THIS draining process owns. Because + * project identity collapses worktrees/clones to one `git:`, resolving + * the execution directory from the shared in-memory map can pick a stale + * sibling checkout ("last-registered wins"). The drain caller always knows + * its own live directory, so passing it here guarantees the dream runs in a + * checkout this process actually registered. Paired with projectIdentity + * (the queue filter), so the dequeued entry is guaranteed to be this + * project's. Falls back to the map (then the identity string) when omitted. + */ + sessionDirectoryOverride?: string; + /** + * Resolved Dreamer fallback chain. See `runDream` for semantics. Callers + * compute via `resolveFallbackChain(DREAMER_AGENT, pluginConfig.dreamer?.fallback_models)`. + */ + fallbackModels?: readonly string[]; +}): Promise { + // Use configured max runtime + 30min buffer for stale threshold instead of hardcoded 2h. + // Only reap when no live lease exists — a healthy long-running dream renews its lease and + // would otherwise have its own queue row deleted mid-run. Scope to this project so the + // cross-process shared queue doesn't reap another host's still-running rows. + const maxRuntimeMs = args.maxRuntimeMinutes * 60 * 1000; + // A live lease means another dream (this project or a sibling on the shared + // queue) is actively running. Don't dequeue underneath it: runDream would just + // fail lease acquisition, increment this entry's retry count, and after + // MAX_LEASE_RETRIES DELETE the queue row — silently dropping a project's + // pending dream that never got a fair chance to run. Skip this tick; the entry + // stays queued for when the lease frees. + if (hasActiveDreamLease(args.db)) { + return null; + } + clearStaleEntries(args.db, maxRuntimeMs + 30 * 60 * 1000, args.projectIdentity); + const entry = dequeueNext(args.db, args.projectIdentity); + if (!entry) { + return null; + } + + // Prefer the draining caller's own directory (the project THIS process + // owns). The dequeue filter (projectIdentity) guarantees entry belongs to + // this project, so the override is the correct live checkout — not a stale + // sibling-worktree path the shared identity map might resolve to. + const projectDirectory = + args.sessionDirectoryOverride ?? resolveDreamSessionDirectory(entry.projectIdentity); + // Log the project identity only — never the resolved directory. The + // absolute path carries the username + project name (privacy), and the + // git:/dir: identity uniquely correlates the run for debugging. + log(`[dreamer] dequeued project ${entry.projectIdentity}, starting dream run`); + + let result: DreamRunResult; + try { + result = await runDream({ + db: args.db, + client: args.client, + // entry.projectIdentity is the project identity (e.g. "git:") — used for dream state keys. + // projectDirectory is the filesystem path — used for session creation and file access. + projectIdentity: entry.projectIdentity, + tasks: args.tasks, + taskTimeoutMinutes: args.taskTimeoutMinutes, + maxRuntimeMinutes: args.maxRuntimeMinutes, + sessionDirectory: projectDirectory, + experimentalUserMemories: args.experimentalUserMemories, + experimentalPinKeyFiles: args.experimentalPinKeyFiles, + fallbackModels: args.fallbackModels, + }); + } catch (error) { + log(`[dreamer] runDream threw for ${entry.projectIdentity}: ${getErrorMessage(error)}`); + // Remove the entry so it doesn't stay stuck in "started" state for 2 hours + removeDreamEntry(args.db, entry.id); + return null; + } + + // Only remove queue entry if the dream actually ran (lease acquired). + // If lease acquisition failed, the entry stays so it can be retried (up to MAX_LEASE_RETRIES). + const leaseError = result.tasks.find((t) => t.name === "lease" && t.error); + if (leaseError) { + const retryCount = getEntryRetryCount(args.db, entry.id); + if (retryCount >= MAX_LEASE_RETRIES) { + log( + `[dreamer] lease acquisition failed ${retryCount + 1} times for ${entry.projectIdentity} — removing queue entry`, + ); + removeDreamEntry(args.db, entry.id); + } else { + log( + `[dreamer] lease acquisition failed for ${entry.projectIdentity} (attempt ${retryCount + 1}/${MAX_LEASE_RETRIES}) — keeping for retry`, + ); + resetDreamEntry(args.db, entry.id); + } + } else { + removeDreamEntry(args.db, entry.id); + } + + return result; +} diff --git a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts index f6eb1b35..ffc4aa8d 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts @@ -377,47 +377,32 @@ const STRUCTURE_TEMPLATE = ` // ── Distill Skill Memory ─────────────────────────────────────────────────── function buildDistillSkillMemoryPrompt(projectPath: string): string { - return `## Task: Distill Skill Memory + return `## Task: Distill Skill Memory (P2 — read-only health report) **Project:** ${projectPath} -### Goal -Maintain the skill_memory table: merge near-duplicate notes, prune stale low-hit notes, -promote recurring gotchas to pinned, enforce per-skill note caps. +### Important context +- Embedding refresh for NULL/stale vectors already ran programmatically BEFORE this prompt — no need to re-embed. +- Merge (action="distill" + merge), prune, and promote are P3 / NOT YET IMPLEMENTED. Do NOT call ctx_skill_note with action="distill". -### Process -1. Query skill_memory for skills with note_count > 20 (the distill threshold): +### Your task: produce a short read-only summary of skill-memory corpus health +1. Query aggregate counts and flag obvious issues: \`\`\`sql - SELECT skill_id, tier, project_identity, COUNT(*) as note_count + SELECT skill_id, tier, COUNT(*) as note_count, + SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END) as pinned_count, + SUM(CASE WHEN intent_embedding IS NULL OR delta_embedding IS NULL THEN 1 ELSE 0 END) as missing_embedding_count FROM skill_memory WHERE project_identity = (SELECT project_identity FROM skill_memory LIMIT 1) - GROUP BY skill_id, tier, project_identity - HAVING note_count > 20 + GROUP BY skill_id, tier ORDER BY note_count DESC - LIMIT 5; + LIMIT 20; \`\`\` -2. For each qualifying skill: - a. List notes ordered by hit_count DESC, created_at DESC. - b. Merge near-duplicate notes (same kind, similar delta — use judgment). - Use ctx_skill_note with action="distill" and merge: [id, id]. - c. Prune notes with hit_count=0 AND created_at < now-30d (stale, never recalled). - Use ctx_skill_note with action="distill" and prune: id. - d. Promote notes with hit_count >= 5 to pinned=1 if not already pinned. - Use ctx_skill_note with action="distill" and promote: id. - e. If note count > 100 after pruning, archive oldest low-hit unpinned notes. -3. Log a quality alert if >30% of kind='gotcha' notes appear to be general observations - (not skill-specific). Use ctx_memory to record the alert. -4. Process at most 5 skill groups per run (rotating by last_distilled_at). - -### Tools available -- ctx_skill_note (with action="distill" — dreamer-only action for merge/prune/promote) -- Read, bash (for verification queries) +2. Note any skills with >100 notes, >30% gotcha-kind notes, or obvious near-duplicates (same skill + kind + very similar delta text). +3. Report findings as a short summary — no tool calls beyond read-only SQL queries. ### Success criteria -- No skill has >100 notes in the project tier. -- Pinned notes reflect genuinely recurring gotchas (hit_count >= 5). -- Stale zero-hit notes older than 30 days are pruned. -- Quality alert logged if >30% of gotcha notes are general-quality.`; +- A concise health summary is logged (via ctx_memory) for the project maintainer to review. +- No tool calls to unimplemented actions.`; } // ── Dispatcher ───────────────────────────────────────────────────────────── diff --git a/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts new file mode 100644 index 00000000..3ab2b9dc --- /dev/null +++ b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, test } from "bun:test"; +import { float32ArrayToBlob, toFloat32Array } from "./storage-memory-embeddings"; + +describe("vector serde round-trip", () => { + test("Float32Array → blob → Float32Array preserves values", () => { + const vec = new Float32Array([0.1, -0.5, 0.99, 0.0]); + const blob = float32ArrayToBlob(vec); + const back = toFloat32Array(blob); + expect(Array.from(back)).toEqual(Array.from(vec)); + }); +}); diff --git a/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts index f10e623a..1fa57ae1 100644 --- a/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts +++ b/packages/plugin/src/features/magic-context/memory/storage-memory-embeddings.ts @@ -37,7 +37,7 @@ function isEmbeddingRow(row: unknown): row is EmbeddingRow { ); } -function toFloat32Array(blob: Uint8Array | ArrayBuffer): Float32Array { +export function toFloat32Array(blob: Uint8Array | ArrayBuffer): Float32Array { if (blob instanceof Uint8Array) { const buffer = blob.buffer.slice(blob.byteOffset, blob.byteOffset + blob.byteLength); return new Float32Array(buffer); @@ -46,6 +46,11 @@ function toFloat32Array(blob: Uint8Array | ArrayBuffer): Float32Array { return new Float32Array(blob.slice(0)); } +/** Serialize a Float32Array to a SQLite BLOB (Buffer). Canonical — reuse, do not duplicate. */ +export function float32ArrayToBlob(vector: Float32Array): Buffer { + return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength); +} + function getSaveEmbeddingStatement(db: Database): PreparedStatement { let stmt = saveEmbeddingStatements.get(db); if (!stmt) { diff --git a/packages/plugin/src/features/magic-context/migrations.ts b/packages/plugin/src/features/magic-context/migrations.ts index f5d661b5..c4a65750 100644 --- a/packages/plugin/src/features/magic-context/migrations.ts +++ b/packages/plugin/src/features/magic-context/migrations.ts @@ -47,7 +47,12 @@ function assertForeignKeyIntegrity(db: Database, table?: string): void { } } -const MIGRATIONS: Migration[] = [ +function columnExists(db: Database, table: string, column: string): boolean { + const rows = db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name?: string }>; + return rows.some((row) => row.name === column); +} + +export const MIGRATIONS: Migration[] = [ { version: 1, description: "Merge session_notes + smart_notes into unified notes table", @@ -1918,6 +1923,58 @@ const MIGRATIONS: Migration[] = [ `); }, }, + + { + // Skill-memory P2: was v39/v43 across earlier rebases; renumbered to v51 + // after upstream v0.27 took v42–v49 (skill-P1 is now v50). + version: 51, + description: + "Skill-memory P2: delta_embedding + recall_count columns + skill_memory_fts FTS5 vtable", + up: (db: Database) => { + // skill_memory is migration-only (created by v50); ALTER is safe here. + if (!columnExists(db, "skill_memory", "delta_embedding")) { + db.exec(`ALTER TABLE skill_memory ADD COLUMN delta_embedding BLOB;`); + } + + // recall_count: read-side usage counter, bumped each time a note is surfaced + // in a recall block (distinct from hit_count, which is write-side re-record salience). + // Answers "which notes are recalled most". NOT_NULL+DEFAULT is valid in ALTER ADD COLUMN. + if (!columnExists(db, "skill_memory", "recall_count")) { + db.exec( + `ALTER TABLE skill_memory ADD COLUMN recall_count INTEGER NOT NULL DEFAULT 0;`, + ); + } + + // FTS5 over (intent, delta), content-linked to skill_memory — mirrors memories_fts. + db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS skill_memory_fts USING fts5( + intent, + delta, + content='skill_memory', + content_rowid='id', + tokenize='porter unicode61' + ); + + CREATE TRIGGER IF NOT EXISTS skill_memory_ai AFTER INSERT ON skill_memory BEGIN + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_ad AFTER DELETE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_au AFTER UPDATE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + `); + + // Backfill the FTS index for any existing skill_memory rows. External-content FTS5 tables + // expose content rowids immediately, so a `NOT IN (SELECT rowid FROM …_fts)` guard is a no-op; + // the 'rebuild' command is the correct way to (re)populate an external-content index. + db.exec(`INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild');`); + }, + }, ]; /** diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts index 363ca2fb..31fe1424 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts @@ -44,4 +44,29 @@ describe("parseFrontmatterConfig", () => { const content = `---\nskill-memory: true\n---\n# Skill`; expect(parseFrontmatterConfig(content)).toBeNull(); }); + + test("parses flat ranking_* keys as numbers", () => { + const md = `--- +skill-memory: + enabled: true + ranking_relevance: 0.7 + ranking_recency: 0.2 + ranking_hit: 0.1 +--- +body`; + const cfg = parseFrontmatterConfig(md); + expect(cfg?.ranking_relevance).toBe(0.7); + expect(cfg?.ranking_recency).toBe(0.2); + expect(cfg?.ranking_hit).toBe(0.1); + }); + + test("ranking_* default to undefined when omitted (recall applies defaults)", () => { + const md = `--- +skill-memory: + enabled: true +--- +body`; + const cfg = parseFrontmatterConfig(md); + expect(cfg?.ranking_relevance).toBeUndefined(); + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts index 8bb5d20d..8321a46a 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts @@ -11,6 +11,9 @@ export interface SkillMemoryConfig { max_tokens: number; max_pinned_tokens: number; dedup_threshold: number; + ranking_relevance?: number; + ranking_recency?: number; + ranking_hit?: number; } const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---/m; @@ -32,6 +35,9 @@ export function parseFrontmatterConfig(content: string): SkillMemoryConfig | nul max_tokens: toNumber(skillMemoryBlock.max_tokens, 1500), max_pinned_tokens: toNumber(skillMemoryBlock.max_pinned_tokens, 4000), dedup_threshold: toNumber(skillMemoryBlock.dedup_threshold, 0.92), + ranking_relevance: toOptionalNumber(skillMemoryBlock.ranking_relevance), + ranking_recency: toOptionalNumber(skillMemoryBlock.ranking_recency), + ranking_hit: toOptionalNumber(skillMemoryBlock.ranking_hit), }; } catch { // Non-choke: malformed config = inert @@ -48,6 +54,15 @@ function toNumber(value: unknown, defaultValue: number): number { return defaultValue; } +function toOptionalNumber(value: unknown): number | undefined { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "string") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return undefined; +} + /** * Extract the `skill-memory:` sub-block from YAML frontmatter text. * Returns a flat key→value map of the block's immediate children. diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts index 4c5af78d..db6983e8 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -1,9 +1,16 @@ -import { describe, expect, test } from "bun:test"; +import { describe, expect, mock, test } from "bun:test"; import { Database } from "../../../shared/sqlite"; import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; import { runMigrations } from "../migrations"; import { initializeDatabase } from "../storage-db"; -import { buildSkillMemoryBlock, flatRecall } from "./recall"; +import { + buildSkillMemoryBlock, + flatRecall, + rankRung1, + recallSkillMemoryBlock, + sanitizeSkillIntentForFts, +} from "./recall"; import { insertSkillMemoryNote } from "./storage"; function makeDb(): Database { @@ -60,6 +67,305 @@ describe("flatRecall", () => { }); }); +describe("sanitizeSkillIntentForFts", () => { + test("quotes tokens and neutralizes FTS operators", () => { + expect(sanitizeSkillIntentForFts("debug AND fix (urgent)")).toBe( + '"debug" OR "and" OR "fix" OR "urgent"', + ); + expect(sanitizeSkillIntentForFts("!!!")).toBe(""); + expect(sanitizeSkillIntentForFts('say "hi"')).toBe('"say" OR "hi"'); + }); +}); + +describe("rankRung1", () => { + test("clamps negative cosine and guards div-by-zero", () => { + const q = new Float32Array([1, 0]); + const notes = [ + { + id: 1, + intentVec: new Float32Array([0, 1]), + deltaVec: new Float32Array([0, 1]), + ts: 5, + hit: 0, + }, + ]; + const ranked = rankRung1(q, notes, { relevance: 0.6, recency: 0.25, hit: 0.15 }); + expect(ranked.length).toBe(1); + expect(Number.isNaN(ranked[0].score)).toBe(false); + }); + + test("orders by weighted blend (relevance leads)", () => { + const q = new Float32Array([1, 0]); + const notes = [ + { + id: 1, + intentVec: new Float32Array([1, 0]), + deltaVec: new Float32Array([1, 0]), + ts: 1, + hit: 0, + }, + { + id: 2, + intentVec: new Float32Array([0, 1]), + deltaVec: new Float32Array([0, 1]), + ts: 100, + hit: 50, + }, + ]; + const ranked = rankRung1(q, notes, { relevance: 0.6, recency: 0.25, hit: 0.15 }); + expect(ranked[0].id).toBe(1); + }); +}); + +let EMBED_UP = true; +mock.module("../memory/embedding", () => ({ + embedTextForProject: async (_p: string, text: string) => + EMBED_UP + ? { + vector: text.includes("auth") + ? new Float32Array([1, 0]) + : new Float32Array([0, 1]), + modelId: "m1", + generation: 1, + } + : null, +})); + +function modeOf(block: string): string | null { + return block.match(/]*\bmode="([^"]+)"/)?.[1] ?? null; +} +const cfg = { + enabled: true as const, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, +}; + +describe("recallSkillMemoryBlock (intent-scoped rungs)", () => { + test("rung 1 full: provider up + intent + a model-matched embedded note", async () => { + const db = makeDb(); + EMBED_UP = true; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "fix auth", + kind: "fix", + delta: "auth note", + normalizedHash: "h1", + createdAt: 1, + intentEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + deltaEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + embeddingModelVersion: "m1", + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "auth bug", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("full"); + }); + + test("rung 2 no-intent: provider up, no intent → flat", async () => { + const db = makeDb(); + EMBED_UP = true; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("no-intent"); + }); + + test("rung 3 fts5-fallback: provider down + intent → FTS", async () => { + const db = makeDb(); + EMBED_UP = false; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "fix auth flake", + kind: "fix", + delta: "mock timers", + normalizedHash: "h1", + createdAt: 1, + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "auth", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("fts5-fallback"); + }); + + test("rung 4 flat-fts: provider down + UNINDEXABLE intent (sanitize→empty) → flat", async () => { + const db = makeDb(); + EMBED_UP = false; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "!!! ???", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("flat-fts"); + }); + + test("rung 5 cold: no notes → empty block", async () => { + const db = makeDb(); + EMBED_UP = true; + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "x", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(block).toBe(""); + }); + + test("zero model-matched → falls to rung 3, never empty full block", async () => { + const db = makeDb(); + EMBED_UP = true; + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "auth fix", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + intentEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + embeddingModelVersion: "OLD-model", + }); + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "auth", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(modeOf(block)).toBe("fts5-fallback"); + }); + + test("intent-scoped recall matches a note sharing SOME (not all) intent tokens (OR semantics)", async () => { + const db = makeDb(); + EMBED_UP = false; // force rung 3 FTS path + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: null, + projectIdentity: "git:abc", + intent: "fix the flaky auth login test", + kind: "fix", + delta: "mock the system clock in auth specs", + normalizedHash: "h1", + createdAt: Date.now(), + }); + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: null, + projectIdentity: "git:abc", + intent: "speed up the docker build cache", + kind: "discovery", + delta: "layer ordering matters", + normalizedHash: "h2", + createdAt: Date.now(), + }); + // A multi-token NL intent that shares SOME tokens with note 1 (auth, test) but NOT all — + // under AND-join this matches ZERO notes (the bug); under OR-join + bm25 it returns note 1. + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + intent: "auth test timing clock stabilization", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: cfg, + }); + expect(block).not.toBe(""); // RED with AND (empty), GREEN with OR + expect(block).toContain('mode="fts5-fallback"'); // proves rung-3 path + expect(block).toContain("mock the system clock"); // note 1's delta — the relevant note surfaced + expect(block).not.toContain("layer ordering"); // note 2 (docker) shares no tokens → not matched + } finally { + closeQuietly(db); + } + }); + + test("pinned notes appear even when intent doesn't match them (M2)", async () => { + const db = makeDb(); + EMBED_UP = true; + db.prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','git:x','old auth fix','fix','rotate token','h1',0,1,1)`, + ).run(); + for (let i = 0; i < 10; i++) { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: `note ${i}`, + kind: "fix", + delta: `delta ${i}`, + normalizedHash: `h${i + 2}`, + createdAt: 1000 + i, + intentEmbedding: float32ArrayToBlob(new Float32Array([0, 1])), + deltaEmbedding: float32ArrayToBlob(new Float32Array([0, 1])), + embeddingModelVersion: "m1", + }); + } + const block = await recallSkillMemoryBlock(db, { + skill: "s", + intent: "frontend css", + scope: "global", + projectIdentity: "git:x", + frontmatterConfig: cfg, + }); + expect(block).toContain("rotate token"); + expect(modeOf(block)).toBe("full"); + }); +}); + describe("buildSkillMemoryBlock", () => { test("returns empty string when notes array is empty (cold-start)", () => { expect(buildSkillMemoryBlock("tdd", "no-intent", [], 0)).toBe(""); @@ -74,6 +380,7 @@ describe("buildSkillMemoryBlock", () => { delta: "Always mock the clock", intent: "fix flaky test", hit_count: 3, + recall_count: 0, pinned: 0, normalized_hash: "h1", created_at: Date.now(), @@ -96,3 +403,63 @@ describe("buildSkillMemoryBlock", () => { expect(block).toContain("ctx_skill_note"); }); }); + +describe("recallSkillMemoryBlock bumps recall_count for surfaced notes", () => { + test("a surfaced note's recall_count increments per recall (no-intent rung)", async () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix a flaky test", + kind: "fix", + delta: "mock the clock", + normalizedHash: "rc1", + createdAt: Date.now(), + }); + const cfg = { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000 }; + // Two recalls (no intent → rung 2, which surfaces the note both times). + const b1 = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: cfg, + }); + const b2 = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: cfg, + }); + expect(b1).toContain("mock the clock"); + expect(b2).toContain("mock the clock"); + const row = db + .prepare( + "SELECT recall_count, last_used_at FROM skill_memory WHERE normalized_hash='rc1'", + ) + .get() as { recall_count: number; last_used_at: number | null }; + expect(row.recall_count).toBe(2); // bumped once per recall + expect(row.last_used_at).toBeNull(); // recall must NOT touch recency + } finally { + closeQuietly(db); + } + }); + + test("a cold-start recall (no notes) bumps nothing and returns empty", async () => { + const db = makeDb(); + try { + const block = await recallSkillMemoryBlock(db, { + skill: "ghost", + scope: "global", + projectIdentity: "git:abc", + frontmatterConfig: { enabled: true, max_tokens: 1500, max_pinned_tokens: 4000 }, + }); + expect(block).toBe(""); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.ts index d76db421..4d8327ca 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.ts @@ -1,6 +1,16 @@ import type { Database } from "../../../shared/sqlite"; +import { cosineSimilarity } from "../memory/cosine-similarity"; +import { embedTextForProject } from "../memory/embedding"; +import { toFloat32Array } from "../memory/storage-memory-embeddings"; import type { SkillMemoryConfig } from "./frontmatter"; -import { getSkillMemoryNotes, type SkillMemoryNote } from "./storage"; +import { + bumpRecallCountByIds, + getPinnedNotes, + getRankingCandidates, + getSkillMemoryNotes, + type SkillMemoryNote, + searchSkillMemoryFts, +} from "./storage"; export interface FlatRecallOptions { maxTokens: number; @@ -24,31 +34,93 @@ export function flatRecall( projectIdentity: string, options: FlatRecallOptions, ): SkillMemoryNote[] { - // Fetch a generous candidate set (2× budget as a heuristic) const candidates = getSkillMemoryNotes(db, skillId, tier, projectIdentity, 50); if (candidates.length === 0) return []; + return budgetFill(candidates, options.maxTokens, options.maxPinnedTokens); +} + +/** + * Build the XML block to append to the skill tool result. + * Returns empty string for cold-start (no notes) — no empty stub injected. + */ +/** Escape a natural-language intent into a safe FTS5 MATCH query: quote each alphanumeric token. */ +export function sanitizeSkillIntentForFts(intent: string): string { + const tokens = intent.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? []; + return tokens.map((t) => `"${t.replace(/"/g, '""')}"`).join(" OR "); +} - const pinned = candidates.filter((n) => n.pinned === 1); - const unpinned = candidates.filter((n) => n.pinned === 0); +export interface Rung1Note { + id: number; + intentVec: Float32Array | null; + deltaVec: Float32Array | null; + ts: number; + hit: number; +} +export interface Rung1Weights { + relevance: number; + recency: number; + hit: number; +} +export function rankRung1( + q: Float32Array, + notes: Rung1Note[], + w: Rung1Weights, +): Array<{ id: number; score: number }> { + if (notes.length === 0) return []; + const tsVals = notes.map((n) => n.ts); + const minTs = Math.min(...tsVals), + maxTs = Math.max(...tsVals); + const maxHit = Math.max(...notes.map((n) => n.hit)); + const denR = maxTs - minTs; + return notes + .map((n) => { + const ic = n.intentVec ? cosineSimilarity(q, n.intentVec) : -1; + const dc = n.deltaVec ? cosineSimilarity(q, n.deltaVec) : -1; + const relevance = Math.max(0, Math.max(ic, dc)); + const recency = denR === 0 ? 1.0 : (n.ts - minTs) / denR; + const hitN = maxHit === 0 ? 0.0 : n.hit / maxHit; + return { + id: n.id, + score: w.relevance * relevance + w.recency * recency + w.hit * hitN, + }; + }) + .sort((a, b) => b.score - a.score); +} + +/** Normalize the 3 weights to sum 1; all-zero (or omitted) → defaults. */ +function resolveWeights(cfg: SkillMemoryConfig): Rung1Weights { + const r = cfg.ranking_relevance ?? 0.6, + c = cfg.ranking_recency ?? 0.25, + h = cfg.ranking_hit ?? 0.15; + const sum = r + c + h; + if (sum === 0) return { relevance: 0.6, recency: 0.25, hit: 0.15 }; + return { relevance: r / sum, recency: c / sum, hit: h / sum }; +} + +/** Prepend pinned notes (already pinned-first ordered by getPinnedNotes) ahead of `rest`, deduped by id. */ +function unionPinnedFirst(pinned: SkillMemoryNote[], rest: SkillMemoryNote[]): SkillMemoryNote[] { + const seen = new Set(pinned.map((n) => n.id)); + return [...pinned, ...rest.filter((n) => !seen.has(n.id))]; +} + +/** Greedy fill pinned-first up to token budget; pinned up to maxPinnedTokens, total up to maxTokens. */ +function budgetFill( + notes: SkillMemoryNote[], + maxTokens: number, + maxPinnedTokens: number, +): SkillMemoryNote[] { const result: SkillMemoryNote[] = []; let pinnedTokens = 0; let totalTokens = 0; - // Always include pinned notes (up to maxPinnedTokens) - for (const note of pinned) { + for (const note of notes) { const tokens = estimateTokens(note.delta); - if (pinnedTokens + tokens > options.maxPinnedTokens) break; - result.push(note); - pinnedTokens += tokens; - totalTokens += tokens; - } - - // Fill remaining budget with unpinned notes - for (const note of unpinned) { - if (totalTokens >= options.maxTokens) break; - const tokens = estimateTokens(note.delta); - if (totalTokens + tokens > options.maxTokens) break; + if (note.pinned === 1) { + if (pinnedTokens + tokens > maxPinnedTokens) continue; + pinnedTokens += tokens; + } + if (totalTokens + tokens > maxTokens) continue; result.push(note); totalTokens += tokens; } @@ -56,13 +128,9 @@ export function flatRecall( return result; } -/** - * Build the XML block to append to the skill tool result. - * Returns empty string for cold-start (no notes) — no empty stub injected. - */ export function buildSkillMemoryBlock( skillId: string, - mode: "no-intent" | "flat-fts", + mode: "no-intent" | "flat-fts" | "full" | "fts5-fallback", notes: SkillMemoryNote[], pinnedCount: number, ): string { @@ -112,7 +180,7 @@ function escapeXml(str: string): string { * Lives in the feature layer (not hook-handlers.ts) to avoid tools→hooks layering. * P2 embeddings benefit both paths automatically when this function is upgraded. */ -export function recallSkillMemoryBlock( +export async function recallSkillMemoryBlock( db: Database, opts: { skill: string; @@ -122,25 +190,105 @@ export function recallSkillMemoryBlock( frontmatterConfig: SkillMemoryConfig | null; maxTokens?: number; }, -): string { - // Guard: skill-memory must be enabled for this skill +): Promise { if (!opts.frontmatterConfig?.enabled) return ""; - try { const maxTokens = opts.maxTokens ?? opts.frontmatterConfig.max_tokens; - const notes = flatRecall(db, opts.skill, opts.scope, opts.projectIdentity, { - maxTokens, - maxPinnedTokens: opts.frontmatterConfig.max_pinned_tokens, - }); - if (notes.length === 0) return ""; // cold-start: no block - - const pinnedCount = notes.filter((n) => n.pinned === 1).length; - // P1: always "no-intent" flat recall. P2 will add intent-aware ranking (fts5-fallback rung). - // TODO (P2): const mode: "no-intent" | "flat-fts" = opts.intent ? "flat-fts" : "no-intent"; - const mode: "no-intent" | "flat-fts" = "no-intent"; - return buildSkillMemoryBlock(opts.skill, mode, notes, pinnedCount); + const maxPinned = opts.frontmatterConfig.max_pinned_tokens; + const intent = opts.intent?.trim(); + + // Single chokepoint for every rung: bump read-side recall_count for the notes + // actually surfaced, then format the block. Empty selection → empty string (no bump). + const finalize = ( + mode: "no-intent" | "flat-fts" | "full" | "fts5-fallback", + notes: SkillMemoryNote[], + ): string => { + if (notes.length === 0) return ""; + bumpRecallCountByIds( + db, + notes.map((n) => n.id), + ); + return buildSkillMemoryBlock( + opts.skill, + mode, + notes, + notes.filter((n) => n.pinned === 1).length, + ); + }; + + // Rung 2: no intent → flat recency×hit (nothing to embed/FTS-match → always "no-intent"). + if (!intent) { + const notes = flatRecall(db, opts.skill, opts.scope, opts.projectIdentity, { + maxTokens, + maxPinnedTokens: maxPinned, + }); + return finalize("no-intent", notes); + } + + const q = await embedTextForProject(opts.projectIdentity, intent); + if (q) { + const candidates = getRankingCandidates( + db, + opts.skill, + opts.scope, + opts.projectIdentity, + 200, + ); + const matched = candidates.filter( + (n) => + n.embedding_model_version === q.modelId && + (n.intent_embedding || n.delta_embedding), + ); + if (matched.length > 0) { + const weights = resolveWeights(opts.frontmatterConfig); + const ranked = rankRung1( + q.vector, + matched.map((n) => ({ + id: n.id, + intentVec: n.intent_embedding ? toFloat32Array(n.intent_embedding) : null, + deltaVec: n.delta_embedding ? toFloat32Array(n.delta_embedding) : null, + ts: n.last_used_at ?? n.created_at, + hit: n.hit_count, + })), + weights, + ); + const byId = new Map(candidates.map((n) => [n.id, n])); + const rankedNotes = ranked + .map((r) => byId.get(r.id)) + .filter((n): n is SkillMemoryNote => n != null); + const ordered = unionPinnedFirst( + getPinnedNotes(db, opts.skill, opts.scope, opts.projectIdentity), + rankedNotes, + ); + const selected = budgetFill(ordered, maxTokens, maxPinned); + return finalize("full", selected); + } + // zero model-matched → fall to FTS rung 3. + } + + const match = sanitizeSkillIntentForFts(intent); + if (match === "") { + const notes = flatRecall(db, opts.skill, opts.scope, opts.projectIdentity, { + maxTokens, + maxPinnedTokens: maxPinned, + }); + return finalize("flat-fts", notes); + } + const ftsNotes = searchSkillMemoryFts( + db, + opts.skill, + opts.scope, + opts.projectIdentity, + match, + 50, + ); + const ordered = unionPinnedFirst( + getPinnedNotes(db, opts.skill, opts.scope, opts.projectIdentity), + ftsNotes, + ); + const selected = budgetFill(ordered, maxTokens, maxPinned); + return finalize("fts5-fallback", selected); } catch { - // Non-fatal: recall failure must never block the tool result return ""; } } diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts new file mode 100644 index 00000000..ccd75b9b --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts @@ -0,0 +1,45 @@ +import { expect, mock, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; + +// Provider "up" with a deterministic model — MUST precede the reembed.ts import. +mock.module("../memory/embedding", () => ({ + embedTextForProject: async (_p: string, _text: string) => ({ + vector: new Float32Array([0.1, 0.2, 0.3]), + modelId: "test-model", + generation: 1, + }), +})); +const { reembedStaleSkillNotes } = await import("./reembed"); + +test("reembedStaleSkillNotes fills NULL embeddings (bounded, idempotent)", async () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + db.prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','git:abc','fix auth','fix','mock Date','h1',0,0,1)`, + ).run(); + const result1 = await reembedStaleSkillNotes(db, "git:abc"); + expect(result1.reembedded).toBe(1); + const row = db + .prepare( + "SELECT intent_embedding, delta_embedding, embedding_model_version FROM skill_memory WHERE normalized_hash='h1'", + ) + .get() as { + intent_embedding: unknown; + delta_embedding: unknown; + embedding_model_version: string; + }; + expect(row.intent_embedding).not.toBeNull(); + expect(row.delta_embedding).not.toBeNull(); + expect(row.embedding_model_version).toBe("test-model"); + const result2 = await reembedStaleSkillNotes(db, "git:abc"); + expect(result2.reembedded).toBe(0); + } finally { + closeQuietly(db); + } +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.ts new file mode 100644 index 00000000..040a540b --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.ts @@ -0,0 +1,43 @@ +import type { Database } from "../../../shared/sqlite"; +import { embedTextForProject } from "../memory/embedding"; +import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; + +const REEMBED_CAP = 200; + +/** Programmatic (no-LLM) backfill: re-embed notes with NULL/stale vectors. Idempotent; skips when provider off. */ +export async function reembedStaleSkillNotes( + db: Database, + projectIdentity: string, +): Promise<{ reembedded: number }> { + // Probe the current model version once (also tells us if the provider is up). + const probe = await embedTextForProject(projectIdentity, "probe"); + if (!probe) return { reembedded: 0 }; // provider off — nothing to do + const currentModel = probe.modelId; + + // Stale = NULL embeddings OR embedded under a DIFFERENT model version. + // `embedding_model_version IS NOT ?` is NULL-safe SQL; the prior IS NULL OR-clauses already catch NULL-embedding + // rows; INSERT always sets all three embedding columns together, so non-NULL model_version + NULL embeddings can't occur. + const stale = db + .prepare( + `SELECT id, intent, delta FROM skill_memory + WHERE project_identity = ? + AND (intent_embedding IS NULL OR delta_embedding IS NULL OR embedding_model_version IS NOT ?) + LIMIT ?`, + ) + .all(projectIdentity, currentModel, REEMBED_CAP) as Array<{ + id: number; + intent: string; + delta: string; + }>; + let n = 0; + for (const row of stale) { + const iv = await embedTextForProject(projectIdentity, row.intent); + const dv = await embedTextForProject(projectIdentity, row.delta); + if (!iv || !dv) break; // provider went down mid-batch — stop + db.prepare( + `UPDATE skill_memory SET intent_embedding=?, delta_embedding=?, embedding_model_version=? WHERE id=?`, + ).run(float32ArrayToBlob(iv.vector), float32ArrayToBlob(dv.vector), dv.modelId, row.id); + n++; + } + return { reembedded: n }; +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts index a380e1d6..3be0adc6 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts @@ -1,14 +1,21 @@ import { describe, expect, test } from "bun:test"; import { Database } from "../../../shared/sqlite"; import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; import { runMigrations } from "../migrations"; import { initializeDatabase } from "../storage-db"; import { bumpHitCount, + bumpHitCountById, + bumpRecallCountByIds, + getDedupCandidates, + getPinnedNotes, + getRankingCandidates, getSkillMemoryNotes, getSkillMemoryStats, type InsertSkillMemoryNoteArgs, insertSkillMemoryNote, + searchSkillMemoryFts, } from "./storage"; function makeDb(): Database { @@ -65,6 +72,35 @@ describe("skill_memory storage", () => { } }); + test("insertSkillMemoryNote stores intent_embedding/delta_embedding/embedding_model_version", () => { + const db = makeDb(); + try { + const iv = float32ArrayToBlob(new Float32Array([1, 0, 0])); + const dv = float32ArrayToBlob(new Float32Array([0, 1, 0])); + const id = insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "emb-hash", + createdAt: 1, + intentEmbedding: iv, + deltaEmbedding: dv, + embeddingModelVersion: "m1", + }); + const row = db + .prepare("SELECT embedding_model_version FROM skill_memory WHERE id=?") + .get(id) as { embedding_model_version: string }; + expect(row.embedding_model_version).toBe("m1"); + } finally { + closeQuietly(db); + } + }); + test("getSkillMemoryNotes returns notes ordered by recency × hit_count", () => { const db = makeDb(); try { @@ -107,6 +143,29 @@ describe("skill_memory storage", () => { } }); + test("bumpHitCountById increments by id", () => { + const db = makeDb(); + try { + const id = Number( + ( + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','git:x','i','fix','d','h',0,0,1) RETURNING id`, + ) + .get() as { id: number } + ).id, + ); + bumpHitCountById(db, id); + const row = db.prepare("SELECT hit_count FROM skill_memory WHERE id=?").get(id) as { + hit_count: number; + }; + expect(row.hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + test("bumpHitCount increments hit_count and updates last_used_at", () => { const db = makeDb(); try { @@ -132,6 +191,63 @@ describe("skill_memory storage", () => { } }); + test("bumpRecallCountByIds increments recall_count without touching last_used_at or hit_count", () => { + const db = makeDb(); + try { + const mkId = (hash: string): number => + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "i", + kind: "fix", + delta: `d-${hash}`, + normalizedHash: hash, + createdAt: Date.now(), + }) as number; + const id1 = mkId("r1"); + const id2 = mkId("r2"); + const id3 = mkId("r3"); + + // Surface only id1 + id2 twice; id3 never recalled. + bumpRecallCountByIds(db, [id1, id2]); + bumpRecallCountByIds(db, [id1, id2]); + + const rows = db + .prepare( + "SELECT id, recall_count, hit_count, last_used_at FROM skill_memory ORDER BY id", + ) + .all() as Array<{ + id: number; + recall_count: number; + hit_count: number; + last_used_at: number | null; + }>; + const byId = new Map(rows.map((r) => [r.id, r])); + expect(byId.get(id1)?.recall_count).toBe(2); + expect(byId.get(id2)?.recall_count).toBe(2); + expect(byId.get(id3)?.recall_count).toBe(0); + // read-counter must NOT pollute write-side salience or recency + expect(byId.get(id1)?.hit_count).toBe(0); + expect(byId.get(id1)?.last_used_at).toBeNull(); + } finally { + closeQuietly(db); + } + }); + + test("bumpRecallCountByIds is a no-op on empty ids", () => { + const db = makeDb(); + try { + // must not throw + bumpRecallCountByIds(db, []); + expect(true).toBe(true); + } finally { + closeQuietly(db); + } + }); + test("getSkillMemoryStats returns totals scoped to project_identity", () => { const db = makeDb(); try { @@ -214,6 +330,26 @@ describe("skill_memory storage", () => { } }); + test("getSkillMemoryNotes: equal timestamps don't break ordering (NULLIF guard)", () => { + const db = makeDb(); + try { + const ts = 1_000_000; + const ins = (hash: string, hits: number) => + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at,last_used_at) + VALUES ('s','/p','global','git:x','i','fix','d',?,?,0,?,?)`, + ) + .run(hash, hits, ts, ts); + ins("a", 1); + ins("b", 5); + const notes = getSkillMemoryNotes(db, "s", "global", "git:x", 10); + expect(notes[0].normalized_hash).toBe("b"); // higher hit_count first when timestamps equal + } finally { + closeQuietly(db); + } + }); + test("getSkillMemoryStats returns all-zeros when no notes exist for the project", () => { const db = makeDb(); try { @@ -225,4 +361,120 @@ describe("skill_memory storage", () => { closeQuietly(db); } }); + + test("getDedupCandidates returns top-N same-scope rows with delta_embedding + model version", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d1", + normalizedHash: "dedup-h1", + createdAt: 1, + deltaEmbedding: float32ArrayToBlob(new Float32Array([1, 0])), + embeddingModelVersion: "m1", + }); + const cands = getDedupCandidates(db, "s", "global", "git:x", 200); + expect(cands.length).toBe(1); + expect(cands[0].delta_embedding).toBeTruthy(); + } finally { + closeQuietly(db); + } + }); + + test("getRankingCandidates returns scope-filtered rows ordered by recency", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "rank-h1", + createdAt: 1, + }); + const cands = getRankingCandidates(db, "s", "global", "git:x", 10); + expect(cands.length).toBe(1); + expect(cands[0].skill_id).toBe("s"); + } finally { + closeQuietly(db); + } + }); + + test("searchSkillMemoryFts returns scope-filtered BM25 matches", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "fix flaky auth test", + kind: "fix", + delta: "mock Date.now", + normalizedHash: "fts-h1", + createdAt: 1, + }); + insertSkillMemoryNote(db, { + skillId: "OTHER", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "auth", + kind: "fix", + delta: "x", + normalizedHash: "fts-h2", + createdAt: 1, + }); + const hits = searchSkillMemoryFts(db, "s", "global", "git:x", '"auth"', 10); + expect(hits.every((h) => h.skill_id === "s")).toBe(true); + expect(hits.length).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("getPinnedNotes returns only pinned same-scope rows", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "global", + skillSource: null, + projectIdentity: "git:x", + intent: "i", + kind: "fix", + delta: "unpinned", + normalizedHash: "pin-h1", + createdAt: 1, + }); + const pid = Number( + ( + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','git:x','i','fix','pinned','pin-h2',0,1,2) RETURNING id`, + ) + .get() as { id: number } + ).id, + ); + const pinned = getPinnedNotes(db, "s", "global", "git:x"); + expect(pinned.length).toBe(1); + expect(pinned[0].id).toBe(pid); + } finally { + closeQuietly(db); + } + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.ts index 4eb57513..c3a3d432 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.ts @@ -9,11 +9,13 @@ export interface SkillMemoryNote { project_identity: string; intent: string; intent_embedding: Buffer | null; + delta_embedding: Buffer | null; embedding_model_version: string | null; kind: "gotcha" | "discovery" | "fix" | "workflow"; delta: string; tags: string | null; hit_count: number; + recall_count: number; pinned: number; normalized_hash: string; created_at: number; @@ -30,6 +32,9 @@ export interface InsertSkillMemoryNoteArgs { kind: "gotcha" | "discovery" | "fix" | "workflow"; delta: string; tags?: string[]; + intentEmbedding?: Buffer | null; + deltaEmbedding?: Buffer | null; + embeddingModelVersion?: string | null; normalizedHash: string; createdAt: number; } @@ -48,8 +53,9 @@ export function insertSkillMemoryNote( .prepare( `INSERT INTO skill_memory (skill_id, resolved_path, tier, skill_source, project_identity, - intent, kind, delta, tags, hit_count, pinned, normalized_hash, created_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)`, + intent, kind, delta, tags, intent_embedding, delta_embedding, embedding_model_version, + hit_count, pinned, normalized_hash, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)`, ) .run( args.skillId, @@ -61,6 +67,9 @@ export function insertSkillMemoryNote( args.kind, args.delta, args.tags ? JSON.stringify(args.tags) : null, + args.intentEmbedding ?? null, + args.deltaEmbedding ?? null, + args.embeddingModelVersion ?? null, args.normalizedHash, args.createdAt, ); @@ -79,8 +88,8 @@ export function insertSkillMemoryNote( * Ordered by normalized additive recency + hit_count score (pinned notes first). * * Scoring: recency_norm + hit_norm where: - * recency_norm = (ts - min_ts) / NULLIF(max_ts - min_ts, 1) — 0..1 range - * hit_norm = hit_count / NULLIF(MAX(hit_count) OVER (), 1) — 0..1 range + * recency_norm = (ts - min_ts) / NULLIF(max_ts - min_ts, 0) — 0..1 range (0 when all timestamps equal) + * hit_norm = hit_count / NULLIF(MAX(hit_count) OVER (), 0) — 0..1 range (0 when all hit_counts 0) * Additive (not multiplicative) so hit_count is not swamped by timestamp scale. * * NOTE: The window-function form requires SQLite ≥ 3.25 (2018). Bun ships SQLite ≥ 3.39. @@ -104,12 +113,18 @@ export function getSkillMemoryNotes( FROM skill_memory WHERE skill_id = ? AND tier = ? AND project_identity = ? ORDER BY - pinned DESC, + pinned DESC, ( - (COALESCE(last_used_at, created_at) - MIN(COALESCE(last_used_at, created_at)) OVER ()) * 1.0 - / NULLIF(MAX(COALESCE(last_used_at, created_at)) OVER () - MIN(COALESCE(last_used_at, created_at)) OVER (), 1) + COALESCE( + (COALESCE(last_used_at, created_at) - MIN(COALESCE(last_used_at, created_at)) OVER ()) * 1.0 + / NULLIF(MAX(COALESCE(last_used_at, created_at)) OVER () - MIN(COALESCE(last_used_at, created_at)) OVER (), 0), + 0.0 + ) + - hit_count * 1.0 / NULLIF(MAX(hit_count) OVER (), 1) + COALESCE( + hit_count * 1.0 / NULLIF(MAX(hit_count) OVER (), 0), + 0.0 + ) ) DESC, created_at DESC LIMIT ?`, @@ -135,6 +150,35 @@ export function bumpHitCount( ).run(Date.now(), skillId, tier, projectIdentity, normalizedHash); } +/** + * Bump hit_count + last_used_at for a note identified by id (used by cosine dedup, which has no hash). + */ +export function bumpHitCountById(db: Database, id: number): void { + db.prepare( + `UPDATE skill_memory SET hit_count = hit_count + 1, last_used_at = ? WHERE id = ?`, + ).run(Date.now(), id); +} + +/** + * Bump recall_count for notes actually surfaced in a recall block (read-side usage). + * Distinct from hit_count (write-side re-record salience): recall_count answers + * "which notes are recalled most". Deliberately does NOT touch last_used_at — the + * ranking's recency term must reflect when a lesson was learned/re-recorded, not when + * it was surfaced, else surfaced notes would always win recency and starve new notes. + * Best-effort, no-throw: a counter write must never break recall. + */ +export function bumpRecallCountByIds(db: Database, ids: number[]): void { + if (ids.length === 0) return; + try { + const placeholders = ids.map(() => "?").join(","); + db.prepare( + `UPDATE skill_memory SET recall_count = recall_count + 1 WHERE id IN (${placeholders})`, + ).run(...ids); + } catch { + // never let usage-tracking break a recall + } +} + /** * Check if a note with the given normalized_hash already exists. * Returns the existing note's id and hit_count, or null. @@ -164,6 +208,75 @@ export function findExistingNote( * Used by the ctx-status / TUI status dialog (mirrors the external-memory * status surface). Sync, single query; safe to call on every status poll. */ +export function getDedupCandidates( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + limit: number, +): Array> { + return db + .prepare( + `SELECT id, delta_embedding, embedding_model_version FROM skill_memory + WHERE skill_id=? AND tier=? AND project_identity=? + ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, + ) + .all(skillId, tier, projectIdentity, limit) as Array< + Pick + >; +} + +export function getRankingCandidates( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + limit: number, +): SkillMemoryNote[] { + return db + .prepare( + `SELECT * FROM skill_memory + WHERE skill_id=? AND tier=? AND project_identity=? + ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, + ) + .all(skillId, tier, projectIdentity, limit) as SkillMemoryNote[]; +} + +export function searchSkillMemoryFts( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, + matchQuery: string, + limit: number, +): SkillMemoryNote[] { + return db + .prepare( + `SELECT m.* FROM skill_memory_fts f + JOIN skill_memory m ON m.id = f.rowid + WHERE skill_memory_fts MATCH ? + AND m.skill_id=? AND m.tier=? AND m.project_identity=? + ORDER BY bm25(skill_memory_fts) ASC, COALESCE(m.last_used_at, m.created_at) DESC + LIMIT ?`, + ) + .all(matchQuery, skillId, tier, projectIdentity, limit) as SkillMemoryNote[]; +} + +export function getPinnedNotes( + db: Database, + skillId: string, + tier: "project" | "global", + projectIdentity: string, +): SkillMemoryNote[] { + return db + .prepare( + `SELECT * FROM skill_memory + WHERE skill_id=? AND tier=? AND project_identity=? AND pinned=1 + ORDER BY COALESCE(last_used_at, created_at) DESC`, + ) + .all(skillId, tier, projectIdentity) as SkillMemoryNote[]; +} + export function getSkillMemoryStats( db: Database, projectIdentity: string, diff --git a/packages/plugin/src/features/magic-context/storage-db.ts b/packages/plugin/src/features/magic-context/storage-db.ts index eb3749bf..6a53f3ab 100644 --- a/packages/plugin/src/features/magic-context/storage-db.ts +++ b/packages/plugin/src/features/magic-context/storage-db.ts @@ -38,7 +38,7 @@ export function getSchemaFenceRejection(): { return lastSchemaFenceRejection; } -export const LATEST_SUPPORTED_VERSION = 50; +export const LATEST_SUPPORTED_VERSION = 51; // chmod is meaningless on Windows (POSIX modes are not honored), so all // permission tightening is skipped there. mkdir's `mode` is likewise ignored. diff --git a/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts b/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts index 8d2c0a46..f9bab309 100644 --- a/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts +++ b/packages/plugin/src/hooks/magic-context/hook-handlers.test.ts @@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test"; import { runMigrations } from "../../features/magic-context/migrations"; +import { insertSkillMemoryNote } from "../../features/magic-context/skill-memory/storage"; import { initializeDatabase } from "../../features/magic-context/storage-db"; import { getOrCreateSessionMeta, @@ -13,7 +14,11 @@ import { } from "../../features/magic-context/storage-meta-persisted"; import { Database } from "../../shared/sqlite"; import { closeQuietly } from "../../shared/sqlite-helpers"; -import { createEventHook, createToolExecuteAfterHook } from "./hook-handlers"; +import { + createEventHook, + createToolExecuteAfterHook, + maybeInjectSkillMemory, +} from "./hook-handlers"; function createTestDb(): Database { const db = new Database(":memory:"); @@ -22,10 +27,21 @@ function createTestDb(): Database { return db; } +const CFG = { + enabled: true as const, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, +}; + function createTestHook(db: Database): ReturnType { return createToolExecuteAfterHook({ db, channel1StateBySession: new Map(), + skillLoadRegistry: new Map(), + sessionDirectoryBySession: new Map(), + defaultDirectory: "/tmp/test", + intentByCallId: new Map(), }); } @@ -235,3 +251,31 @@ describe("createEventHook mid-session model switch clears overflow state", () => } }); }); + +describe("maybeInjectSkillMemory intent threading", () => { + test("maybeInjectSkillMemory threads intent → FTS rung (vs no-intent without it)", async () => { + const db = createTestDb(); + try { + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p/SKILL.md", + tier: "global", + skillSource: "opencode-global", + projectIdentity: "git:abc", + intent: "fix the auth flake", + kind: "fix", + delta: "stub the clock", + normalizedHash: "h1", + createdAt: Date.now(), + }); + const withIntent = { output: "# tool result" }; + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", CFG, withIntent, "auth"); + expect(withIntent.output).toContain('mode="fts5-fallback"'); + const noIntent = { output: "# tool result" }; + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", CFG, noIntent, undefined); + expect(noIntent.output).toContain('mode="no-intent"'); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/hook-handlers.ts b/packages/plugin/src/hooks/magic-context/hook-handlers.ts index ee5f5582..2abf14dc 100644 --- a/packages/plugin/src/hooks/magic-context/hook-handlers.ts +++ b/packages/plugin/src/hooks/magic-context/hook-handlers.ts @@ -530,19 +530,21 @@ export function getAndDeleteIntent(map: IntentByCallIdMap, callId: string): stri * Append ordering: this runs BEFORE maybeInjectChannel1Nudge (skill-memory * content before Channel-1 meta-reminder). See design §2.6. */ -export function maybeInjectSkillMemory( +export async function maybeInjectSkillMemory( db: Database, skillId: string, tier: "project" | "global", projectIdentity: string, frontmatterConfig: SkillMemoryConfig | null, output: { output?: unknown }, -): void { + intent?: string, +): Promise { if (typeof output.output !== "string" || output.output.length === 0) return; // Delegate to shared recall core (also used by ctx_skill_recall tool) - const block = recallSkillMemoryBlock(db, { + const block = await recallSkillMemoryBlock(db, { skill: skillId, + intent, scope: tier, projectIdentity, frontmatterConfig, @@ -574,9 +576,15 @@ export function createToolExecuteAfterHook(args: { * we fall back to `defaultDirectory` (deps.directory). */ sessionDirectoryBySession: Map; defaultDirectory: string; + intentByCallId: IntentByCallIdMap; }) { return async (input: unknown, output?: unknown) => { - const typedInput = input as { tool?: string; sessionID?: string; args?: unknown }; + const typedInput = input as { + tool?: string; + sessionID?: string; + callID?: string; + args?: unknown; + }; if (!typedInput.sessionID || !typedInput.tool) { return; } @@ -648,13 +656,18 @@ export function createToolExecuteAfterHook(args: { args.sessionDirectoryBySession.get(typedInput.sessionID) ?? args.defaultDirectory; const projectIdentity = resolveProjectIdentity(sessionDir); - maybeInjectSkillMemory( + const stashed = typedInput.callID + ? (getAndDeleteIntent(args.intentByCallId, typedInput.callID) ?? + undefined) + : undefined; + await maybeInjectSkillMemory( args.db, skillId, registryEntry.tier, projectIdentity, registryEntry.frontmatterConfig, output as { output?: unknown }, + stashed, ); } } catch (error) { diff --git a/packages/plugin/src/hooks/magic-context/hook.ts b/packages/plugin/src/hooks/magic-context/hook.ts index 9ba3385d..62b0c571 100644 --- a/packages/plugin/src/hooks/magic-context/hook.ts +++ b/packages/plugin/src/hooks/magic-context/hook.ts @@ -965,6 +965,7 @@ export function createMagicContextHook(deps: MagicContextDeps) { // for the first-turn case where the map isn't seeded yet. sessionDirectoryBySession, defaultDirectory: deps.directory, + intentByCallId, }), "tool.execute.before": createToolExecuteBeforeHook({ intentByCallId }), // Exposed so index.ts can pass the SAME instance to createCtxSkillNoteTool. diff --git a/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts b/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts index 8e66cf4e..3e231f53 100644 --- a/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts +++ b/packages/plugin/src/hooks/magic-context/skill-memory-injection.test.ts @@ -15,7 +15,7 @@ function makeDb(): Database { } describe("recallSkillMemoryBlock (shared recall core)", () => { - test("returns non-empty string containing { + test("returns non-empty string containing { const db = makeDb(); try { insertSkillMemoryNote(db, { @@ -30,7 +30,7 @@ describe("recallSkillMemoryBlock (shared recall core)", () => { normalizedHash: "h-recall-1", createdAt: Date.now(), }); - const block = recallSkillMemoryBlock(db, { + const block = await recallSkillMemoryBlock(db, { skill: "tdd", scope: "global", projectIdentity: "git:abc", @@ -48,7 +48,7 @@ describe("recallSkillMemoryBlock (shared recall core)", () => { } }); - test("returns empty string when frontmatterConfig is null (not opted in)", () => { + test("returns empty string when frontmatterConfig is null (not opted in)", async () => { const db = makeDb(); try { insertSkillMemoryNote(db, { @@ -63,7 +63,7 @@ describe("recallSkillMemoryBlock (shared recall core)", () => { normalizedHash: "h-recall-2", createdAt: Date.now(), }); - const block = recallSkillMemoryBlock(db, { + const block = await recallSkillMemoryBlock(db, { skill: "tdd", scope: "global", projectIdentity: "git:abc", @@ -75,10 +75,10 @@ describe("recallSkillMemoryBlock (shared recall core)", () => { } }); - test("returns empty string when no notes exist (cold-start)", () => { + test("returns empty string when no notes exist (cold-start)", async () => { const db = makeDb(); try { - const block = recallSkillMemoryBlock(db, { + const block = await recallSkillMemoryBlock(db, { skill: "nonexistent-skill", scope: "global", projectIdentity: "git:abc", @@ -97,7 +97,7 @@ describe("recallSkillMemoryBlock (shared recall core)", () => { }); describe("maybeInjectSkillMemory", () => { - test("appends skill-memory block to output.output when notes exist", () => { + test("appends skill-memory block to output.output when notes exist", async () => { const db = makeDb(); try { insertSkillMemoryNote(db, { @@ -116,7 +116,7 @@ describe("maybeInjectSkillMemory", () => { const output = { output: "# TDD Skill\nContent here." }; // Pass enabled frontmatterConfig — null triggers the early-return guard // (`if (!frontmatterConfig?.enabled) return;`) and the block is never injected. - maybeInjectSkillMemory( + await maybeInjectSkillMemory( db, "tdd", "global", @@ -133,18 +133,18 @@ describe("maybeInjectSkillMemory", () => { } }); - test("does NOT append when no notes exist (cold-start)", () => { + test("does NOT append when no notes exist (cold-start)", async () => { const db = makeDb(); try { const output = { output: "# TDD Skill\nContent here." }; - maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); expect(output.output).not.toContain(" { + test("does NOT append when frontmatterConfig is null (skill-memory not opted in)", async () => { const db = makeDb(); try { insertSkillMemoryNote(db, { @@ -161,14 +161,14 @@ describe("maybeInjectSkillMemory", () => { }); const output = { output: "# TDD Skill\nContent here." }; // null frontmatterConfig = skill-memory not enabled for this skill - maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); + await maybeInjectSkillMemory(db, "tdd", "global", "git:abc", null, output); expect(output.output).not.toContain(" { + test("skill-memory block appears AFTER existing output content (append semantics)", async () => { // maybeInjectSkillMemory APPENDS to output.output — it does NOT prepend. // So if a sentinel is already in the output, the skill-memory block lands AFTER it. // This test verifies the append contract: skillMemoryPos > channel1Pos. @@ -193,7 +193,7 @@ describe("maybeInjectSkillMemory", () => { createdAt: Date.now(), }); const output = { output: "# TDD Skill\nContent here.\n" }; - maybeInjectSkillMemory( + await maybeInjectSkillMemory( db, "tdd", "global", diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.test.ts b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts index 6bad3790..5b45b927 100644 --- a/packages/plugin/src/tools/ctx-skill-note/tools.test.ts +++ b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, test } from "bun:test"; +import { describe, expect, mock, test } from "bun:test"; import { runMigrations } from "../../features/magic-context/migrations"; import { createSkillLoadRegistry, @@ -10,6 +10,16 @@ import { Database } from "../../shared/sqlite"; import { closeQuietly } from "../../shared/sqlite-helpers"; import { createCtxSkillNoteTool } from "./tools"; +mock.module("../../features/magic-context/memory/embedding", () => ({ + embedTextForProject: async (_p: string, text: string) => ({ + vector: text.includes("mock Date.now") + ? new Float32Array([1, 0]) + : new Float32Array([0, 1]), + modelId: "test-model", + generation: 1, + }), +})); + function makeDb(): Database { const db = new Database(":memory:"); initializeDatabase(db); @@ -127,4 +137,95 @@ describe("ctx_skill_note tool", () => { closeQuietly(db); } }); + + test("near-duplicate delta under a different intent bumps hit_count (one row)", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { + skill: "tdd", + intent: "fix flaky auth test", + kind: "fix", + delta: "mock Date.now in auth tests", + }, + toolContext(), + ); + await t.execute( + { + skill: "tdd", + intent: "stabilize login spec", + kind: "fix", + delta: "also mock Date.now in auth tests", + }, + toolContext(), + ); + const rows = db + .prepare("SELECT hit_count FROM skill_memory WHERE skill_id='tdd'") + .all() as Array<{ hit_count: number }>; + expect(rows.length).toBe(1); + expect(rows[0].hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("same-intent different-delta inserts a second row", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "tdd"), { + resolvedPath: "/home/user/.config/opencode/skills/tdd/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "tdd", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { + skill: "tdd", + intent: "fix flaky auth test", + kind: "fix", + delta: "mock Date.now in auth tests", + }, + toolContext(), + ); + await t.execute( + { + skill: "tdd", + intent: "fix flaky auth test", + kind: "fix", + delta: "use a fixed seed for the rng", + }, + toolContext(), + ); + const rows = db.prepare("SELECT id FROM skill_memory WHERE skill_id='tdd'").all(); + expect(rows.length).toBe(2); + } finally { + closeQuietly(db); + } + }); }); diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.ts b/packages/plugin/src/tools/ctx-skill-note/tools.ts index 1c304b04..08413e0b 100644 --- a/packages/plugin/src/tools/ctx-skill-note/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-note/tools.ts @@ -1,10 +1,18 @@ import { type ToolContext, type ToolDefinition, tool } from "@opencode-ai/plugin"; +import { cosineSimilarity } from "../../features/magic-context/memory/cosine-similarity"; +import { embedTextForProject } from "../../features/magic-context/memory/embedding"; import { computeNormalizedHash } from "../../features/magic-context/memory/normalize-hash"; import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; +import { + float32ArrayToBlob, + toFloat32Array, +} from "../../features/magic-context/memory/storage-memory-embeddings"; import { getSkillLoad } from "../../features/magic-context/skill-memory/provenance"; import { bumpHitCount, + bumpHitCountById, findExistingNote, + getDedupCandidates, insertSkillMemoryNote, } from "../../features/magic-context/skill-memory/storage"; import { @@ -104,6 +112,38 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti ); } + // Embed both fields (best-effort; null on provider-off/unseeded — note still inserts). + let intentEmb = await embedTextForProject(projectIdentity, args.intent); + const deltaEmb = await embedTextForProject(projectIdentity, args.delta); + // Guard mixed vector spaces: if a re-registration happened between the two embeds, discard intent to keep one space. + if (intentEmb && deltaEmb && intentEmb.modelId !== deltaEmb.modelId) { + intentEmb = null; + } + const modelVersion = deltaEmb?.modelId ?? intentEmb?.modelId ?? null; + + // Delta-only semantic dedup (bounded top-200, model-matched). + if (deltaEmb) { + const cands = getDedupCandidates( + deps.db, + args.skill, + registryEntry.tier, + projectIdentity, + 200, + ); + const threshold = registryEntry.frontmatterConfig?.dedup_threshold ?? 0.92; + for (const c of cands) { + if (!c.delta_embedding || c.embedding_model_version !== deltaEmb.modelId) + continue; + if ( + cosineSimilarity(deltaEmb.vector, toFloat32Array(c.delta_embedding)) >= + threshold + ) { + bumpHitCountById(deps.db, c.id); + return "Note already recorded (semantic duplicate — hit_count bumped)."; + } + } + } + // Insert new note const id = insertSkillMemoryNote(deps.db, { skillId: args.skill, @@ -116,6 +156,9 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti delta: args.delta, tags: args.tags, normalizedHash, + intentEmbedding: intentEmb ? float32ArrayToBlob(intentEmb.vector) : null, + deltaEmbedding: deltaEmb ? float32ArrayToBlob(deltaEmb.vector) : null, + embeddingModelVersion: modelVersion, createdAt: Date.now(), }); diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.ts index b491df01..6db13f36 100644 --- a/packages/plugin/src/tools/ctx-skill-recall/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.ts @@ -55,7 +55,7 @@ export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefi resolveProjectIdentity(toolContext.directory ?? deps.projectDirectory); const frontmatterConfig = deps._testFrontmatterConfig ?? null; const tier: "project" | "global" = "global"; // default for test injection - const block = recallSkillMemoryBlock(deps.db, { + const block = await recallSkillMemoryBlock(deps.db, { skill: args.skill, intent: args.intent, scope: tier, @@ -167,7 +167,7 @@ export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefi } // Delegate to shared recall core (feature layer — same as transparent path) - const block = recallSkillMemoryBlock(deps.db, { + const block = await recallSkillMemoryBlock(deps.db, { skill: args.skill, intent: args.intent, scope: tier, From 740d8e8c11987f7d06def1b056b581def0e8a958 Mon Sep 17 00:00:00 2001 From: Tehan Date: Fri, 19 Jun 2026 00:22:26 +0200 Subject: [PATCH 03/10] =?UTF-8?q?feat(skill-memory):=20historian-extractio?= =?UTF-8?q?n=20foundation=20=E2=80=94=20global=20'*'=20unification=20(P3a)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for the historian to auto-capture skill notes cross-project. - surface the skill name in the historian chunk as a `TC: skill()` marker (the keystone — the tool input name was previously dropped). - migration: origin_project + source_type columns; unify global-tier notes under project_identity='*' (collision-merge) so a global note is one row recallable from any repo. - partitionKey helper routes global write/recall/reembed/stats through '*'; recall reads global-tier from '*' (cross-project); reembed sweeps '*'. --- .../magic-context/migrations-v41.test.ts | 185 ++++++++++++++++++ .../src/features/magic-context/migrations.ts | 72 +++++++ .../skill-memory/promote.test.ts | 61 ++++++ .../magic-context/skill-memory/promote.ts | 60 ++++++ .../magic-context/skill-memory/recall.test.ts | 27 ++- .../magic-context/skill-memory/recall.ts | 14 +- .../skill-memory/reembed.test.ts | 34 +++- .../magic-context/skill-memory/reembed.ts | 4 +- .../skill-memory/storage.test.ts | 123 +++++++++--- .../magic-context/skill-memory/storage.ts | 66 ++++--- .../src/features/magic-context/storage-db.ts | 2 +- .../read-session-formatting.test.ts | 27 +++ .../magic-context/read-session-formatting.ts | 8 + .../src/tools/ctx-skill-note/tools.test.ts | 36 ++++ .../plugin/src/tools/ctx-skill-note/tools.ts | 13 +- 15 files changed, 665 insertions(+), 67 deletions(-) create mode 100644 packages/plugin/src/features/magic-context/migrations-v41.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/promote.test.ts create mode 100644 packages/plugin/src/features/magic-context/skill-memory/promote.ts create mode 100644 packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts diff --git a/packages/plugin/src/features/magic-context/migrations-v41.test.ts b/packages/plugin/src/features/magic-context/migrations-v41.test.ts new file mode 100644 index 00000000..f3d06fdd --- /dev/null +++ b/packages/plugin/src/features/magic-context/migrations-v41.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { LATEST_MIGRATION_VERSION, MIGRATIONS, runMigrations } from "./migrations"; +import { initializeDatabase, LATEST_SUPPORTED_VERSION } from "./storage-db"; + +function migratedDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +function insertGlobal( + db: Database, + skillId: string, + projectIdentity: string, + hash: string, + opts: { hit?: number; recall?: number; lastUsed?: number | null; createdAt?: number } = {}, +): void { + db.prepare( + `INSERT INTO skill_memory (skill_id, resolved_path, tier, project_identity, intent, kind, delta, hit_count, recall_count, pinned, normalized_hash, created_at, last_used_at) + VALUES (?, '/p', 'global', ?, 'i', 'fix', 'd-' || ?, ?, ?, 0, ?, ?, ?)`, + ).run( + skillId, + projectIdentity, + hash, + opts.hit ?? 0, + opts.recall ?? 0, + hash, + opts.createdAt ?? Date.now(), + opts.lastUsed ?? null, + ); +} + +describe("migration v41 — origin_project + source_type + global '*' unification", () => { + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v41", () => { + expect(LATEST_SUPPORTED_VERSION).toBe(41); + expect(LATEST_MIGRATION_VERSION).toBe(41); + }); + + test("fresh DB has origin_project + source_type columns", () => { + const db = migratedDb(); + try { + const cols = ( + db.prepare("PRAGMA table_info(skill_memory)").all() as Array<{ name: string }> + ).map((r) => r.name); + expect(cols).toContain("origin_project"); + expect(cols).toContain("source_type"); + } finally { + closeQuietly(db); + } + }); + + test("singleton global note rewritten to '*' with origin_project preserved", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "h1"); + + const v41 = MIGRATIONS.find((m) => m.version === 41); + expect(v41).toBeDefined(); + v41?.up(db); + + const row = db + .prepare( + "SELECT project_identity, origin_project FROM skill_memory WHERE normalized_hash='h1'", + ) + .get() as { project_identity: string; origin_project: string }; + expect(row.project_identity).toBe("*"); + expect(row.origin_project).toBe("git:repoA"); + } finally { + closeQuietly(db); + } + }); + + test("collision-merge: same lesson in 2 repos → one '*' row, summed counters, MAX(last_used_at)", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "dup", { + hit: 2, + recall: 3, + lastUsed: 1000, + createdAt: 500, + }); + insertGlobal(db, "council", "git:repoB", "dup", { + hit: 5, + recall: 1, + lastUsed: 9000, + createdAt: 800, + }); + + MIGRATIONS.find((m) => m.version === 41)?.up(db); + + const rows = db + .prepare( + "SELECT project_identity, hit_count, recall_count, last_used_at, created_at FROM skill_memory WHERE normalized_hash='dup'", + ) + .all() as Array<{ + project_identity: string; + hit_count: number; + recall_count: number; + last_used_at: number; + created_at: number; + }>; + expect(rows.length).toBe(1); + expect(rows[0].project_identity).toBe("*"); + expect(rows[0].hit_count).toBe(7); + expect(rows[0].recall_count).toBe(4); + expect(rows[0].last_used_at).toBe(9000); + expect(rows[0].created_at).toBe(500); + } finally { + closeQuietly(db); + } + }); + + test("idempotent: re-running v41 up() does not double-process '*' rows", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "h1", { hit: 1 }); + + const v41 = MIGRATIONS.find((m) => m.version === 41); + v41?.up(db); + v41?.up(db); + + const rows = db + .prepare("SELECT hit_count FROM skill_memory WHERE normalized_hash='h1'") + .all() as Array<{ hit_count: number }>; + expect(rows.length).toBe(1); + expect(rows[0].hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("FTS index consistent after collision-merge (no orphans)", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + insertGlobal(db, "council", "git:repoA", "dup"); + insertGlobal(db, "council", "git:repoB", "dup"); + + MIGRATIONS.find((m) => m.version === 41)?.up(db); + + const ftsCount = db.prepare("SELECT COUNT(*) AS n FROM skill_memory_fts").get() as { + n: number; + }; + const rowCount = db.prepare("SELECT COUNT(*) AS n FROM skill_memory").get() as { + n: number; + }; + // Prove the merge actually happened (2 dup rows → 1) so the parity + // assertion below isn't trivially true on a no-op merge. + expect(rowCount.n).toBe(1); + expect(ftsCount.n).toBe(rowCount.n); + } finally { + closeQuietly(db); + } + }); + + test("project-tier rows untouched", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); + db.prepare( + `INSERT INTO skill_memory (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, created_at) VALUES ('s', '/p', 'project', 'git:repoA', 'i', 'fix', 'd', 'ph', 1)`, + ).run(); + + MIGRATIONS.find((m) => m.version === 41)?.up(db); + + const row = db + .prepare("SELECT project_identity FROM skill_memory WHERE normalized_hash='ph'") + .get() as { project_identity: string }; + expect(row.project_identity).toBe("git:repoA"); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/migrations.ts b/packages/plugin/src/features/magic-context/migrations.ts index c4a65750..24309ed7 100644 --- a/packages/plugin/src/features/magic-context/migrations.ts +++ b/packages/plugin/src/features/magic-context/migrations.ts @@ -1975,6 +1975,78 @@ export const MIGRATIONS: Migration[] = [ db.exec(`INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild');`); }, }, + { + // Skill-memory historian extraction: was v41/v44 across earlier rebases; + // renumbered to v52 after upstream v0.27 took v42–v49 (skill is now v50/51). + version: 52, + description: + "Skill-memory historian extraction: origin_project + source_type columns; unify global-tier notes under project_identity='*' (collision-merge)", + up: (db: Database) => { + db.transaction(() => { + if (!columnExists(db, "skill_memory", "origin_project")) { + db.exec(`ALTER TABLE skill_memory ADD COLUMN origin_project TEXT;`); + } + if (!columnExists(db, "skill_memory", "source_type")) { + db.exec(`ALTER TABLE skill_memory ADD COLUMN source_type TEXT;`); + } + + // resolved_path stays TEXT NOT NULL; historian writes the '' sentinel + // (handled in storage layer, not here). + const groups = db + .prepare( + `SELECT skill_id, normalized_hash, COUNT(*) AS n, MIN(created_at) AS min_created, + SUM(hit_count) AS sum_hit, SUM(recall_count) AS sum_recall, MAX(last_used_at) AS max_used + FROM skill_memory + WHERE tier='global' AND project_identity != '*' + GROUP BY skill_id, normalized_hash HAVING COUNT(*) > 1`, + ) + .all() as Array<{ + skill_id: string; + normalized_hash: string; + n: number; + min_created: number; + sum_hit: number; + sum_recall: number; + max_used: number | null; + }>; + for (const g of groups) { + const survivor = db + .prepare( + `SELECT id, project_identity FROM skill_memory + WHERE skill_id=? AND normalized_hash=? AND tier='global' AND project_identity != '*' + ORDER BY created_at ASC, id ASC LIMIT 1`, + ) + .get(g.skill_id, g.normalized_hash) as { + id: number; + project_identity: string; + }; + db.prepare( + `DELETE FROM skill_memory WHERE skill_id=? AND normalized_hash=? AND tier='global' AND project_identity != '*' AND id != ?`, + ).run(g.skill_id, g.normalized_hash, survivor.id); + db.prepare( + `UPDATE skill_memory SET hit_count=?, recall_count=?, last_used_at=?, origin_project=?, project_identity='*' WHERE id=?`, + ).run(g.sum_hit, g.sum_recall, g.max_used, survivor.project_identity, survivor.id); + } + + // Defensive (S4): drop any pre-'*' row whose (skill_id, normalized_hash) + // already has a '*' sibling. Dead code in normal flow — v41 is the only + // writer of '*' rows and runs atomically, so a pre-'*' row can't coexist + // with a '*' sibling after a clean run. It only fires if a prior v41 run + // was interrupted after creating some '*' rows but before finishing; in + // that case the '*' row is canonical and the leftover pre-'*' row is + // dropped rather than colliding on the singleton UPDATE below. + db.prepare( + `DELETE FROM skill_memory AS s + WHERE s.tier='global' AND s.project_identity != '*' + AND EXISTS (SELECT 1 FROM skill_memory g WHERE g.tier='global' AND g.project_identity='*' AND g.skill_id=s.skill_id AND g.normalized_hash=s.normalized_hash)`, + ).run(); + + db.prepare( + `UPDATE skill_memory SET origin_project = project_identity, project_identity = '*' WHERE tier='global' AND project_identity != '*'`, + ).run(); + })(); + }, + }, ]; /** diff --git a/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts b/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts new file mode 100644 index 00000000..4f30faef --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../../shared/sqlite"; +import { closeQuietly } from "../../../shared/sqlite-helpers"; +import { runMigrations } from "../migrations"; +import { initializeDatabase } from "../storage-db"; +import { promoteSkillObservations } from "./promote"; + +function makeDb(): Database { + const db = new Database(":memory:"); + initializeDatabase(db); + runMigrations(db); + return db; +} + +describe("promoteSkillObservations", () => { + test("direct-writes a global '*' note with historian provenance", () => { + const db = makeDb(); + try { + const n = promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "gotcha", lesson: "aggregator needs a fast model" }, + ]); + expect(n).toBe(1); + const row = db.prepare("SELECT tier, project_identity, origin_project, source_type, resolved_path, kind FROM skill_memory").get() as Record< + string, + string + >; + expect(row.tier).toBe("global"); + expect(row.project_identity).toBe("*"); + expect(row.origin_project).toBe("git:repoA"); + expect(row.source_type).toBe("historian"); + expect(row.resolved_path).toBe(""); + expect(row.kind).toBe("gotcha"); + } finally { + closeQuietly(db); + } + }); + + test("exact-hash duplicate bumps hit_count instead of inserting", () => { + const db = makeDb(); + try { + promoteSkillObservations(db, "git:repoA", [{ skillId: "council", kind: "fix", lesson: "same lesson" }]); + const n = promoteSkillObservations(db, "git:repoB", [{ skillId: "council", kind: "fix", lesson: "same lesson" }]); + expect(n).toBe(0); + const rows = db.prepare("SELECT hit_count FROM skill_memory").all() as Array<{ hit_count: number }>; + expect(rows.length).toBe(1); + expect(rows[0].hit_count).toBe(1); + } finally { + closeQuietly(db); + } + }); + + test("rejects kind='general'", () => { + const db = makeDb(); + try { + const n = promoteSkillObservations(db, "git:repoA", [{ skillId: "council", kind: "general" as never, lesson: "x" }]); + expect(n).toBe(0); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/promote.ts b/packages/plugin/src/features/magic-context/skill-memory/promote.ts new file mode 100644 index 00000000..986308da --- /dev/null +++ b/packages/plugin/src/features/magic-context/skill-memory/promote.ts @@ -0,0 +1,60 @@ +import type { Database } from "../../../shared/sqlite"; +import { computeNormalizedHash } from "../memory/normalize-hash"; +import { bumpHitCount, findExistingNote, insertSkillMemoryNote, partitionKey } from "./storage"; + +const VALID_KINDS = new Set(["gotcha", "discovery", "fix", "workflow"]); + +export interface SkillObservation { + skillId: string; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + lesson: string; +} + +/** + * Direct-write historian-extracted skill observations as GLOBAL-tier notes under + * the '*' partition (source_type='historian', resolved_path='' sentinel). Hash-dedup: + * an exact-hash match bumps hit_count instead of inserting. Returns the number of + * NEW notes written (dups excluded). Best-effort per item: never throws. + */ +export function promoteSkillObservations( + db: Database, + originProject: string, + observations: SkillObservation[], +): number { + let written = 0; + const tier = "global" as const; + const part = partitionKey(tier, originProject); + + for (const obs of observations) { + if (!obs.skillId || !obs.lesson || !VALID_KINDS.has(obs.kind)) continue; + + try { + const normalizedHash = computeNormalizedHash(obs.lesson); + const existing = findExistingNote(db, obs.skillId, tier, part, normalizedHash); + if (existing) { + bumpHitCount(db, obs.skillId, tier, part, normalizedHash); + continue; + } + + const id = insertSkillMemoryNote(db, { + skillId: obs.skillId, + resolvedPath: "", + tier, + skillSource: null, + projectIdentity: part, + originProject, + sourceType: "historian", + intent: obs.lesson, + kind: obs.kind, + delta: obs.lesson, + normalizedHash, + createdAt: Date.now(), + }); + if (id !== null) written++; + } catch { + // Best-effort: one bad observation must not block the publish. + } + } + + return written; +} diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts index db6983e8..7d7944b8 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -11,6 +11,7 @@ import { recallSkillMemoryBlock, sanitizeSkillIntentForFts, } from "./recall"; +import { promoteSkillObservations } from "./promote"; import { insertSkillMemoryNote } from "./storage"; function makeDb(): Database { @@ -335,7 +336,7 @@ describe("recallSkillMemoryBlock (intent-scoped rungs)", () => { EMBED_UP = true; db.prepare( `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) - VALUES ('s','/p','global','git:x','old auth fix','fix','rotate token','h1',0,1,1)`, + VALUES ('s','/p','global','*','old auth fix','fix','rotate token','h1',0,1,1)`, ).run(); for (let i = 0; i < 10; i++) { insertSkillMemoryNote(db, { @@ -404,6 +405,30 @@ describe("buildSkillMemoryBlock", () => { }); }); +describe("cross-project global recall", () => { + test("a global note learned in repo A surfaces when recalled from repo B", async () => { + const db = makeDb(); + try { + promoteSkillObservations(db, "git:repoA", [ + { + skillId: "council", + kind: "gotcha", + lesson: "aggregator needs a fast model", + }, + ]); + const block = await recallSkillMemoryBlock(db, { + skill: "council", + scope: "global", + projectIdentity: "git:repoB", + frontmatterConfig: cfg, + }); + expect(block).toContain("aggregator needs a fast model"); + } finally { + closeQuietly(db); + } + }); +}); + describe("recallSkillMemoryBlock bumps recall_count for surfaced notes", () => { test("a surfaced note's recall_count increments per recall (no-intent rung)", async () => { const db = makeDb(); diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.ts index 4d8327ca..51709d68 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.ts @@ -8,6 +8,7 @@ import { getPinnedNotes, getRankingCandidates, getSkillMemoryNotes, + partitionKey, type SkillMemoryNote, searchSkillMemoryFts, } from "./storage"; @@ -193,6 +194,7 @@ export async function recallSkillMemoryBlock( ): Promise { if (!opts.frontmatterConfig?.enabled) return ""; try { + const part = partitionKey(opts.scope, opts.projectIdentity); const maxTokens = opts.maxTokens ?? opts.frontmatterConfig.max_tokens; const maxPinned = opts.frontmatterConfig.max_pinned_tokens; const intent = opts.intent?.trim(); @@ -218,7 +220,7 @@ export async function recallSkillMemoryBlock( // Rung 2: no intent → flat recency×hit (nothing to embed/FTS-match → always "no-intent"). if (!intent) { - const notes = flatRecall(db, opts.skill, opts.scope, opts.projectIdentity, { + const notes = flatRecall(db, opts.skill, opts.scope, part, { maxTokens, maxPinnedTokens: maxPinned, }); @@ -231,7 +233,7 @@ export async function recallSkillMemoryBlock( db, opts.skill, opts.scope, - opts.projectIdentity, + part, 200, ); const matched = candidates.filter( @@ -257,7 +259,7 @@ export async function recallSkillMemoryBlock( .map((r) => byId.get(r.id)) .filter((n): n is SkillMemoryNote => n != null); const ordered = unionPinnedFirst( - getPinnedNotes(db, opts.skill, opts.scope, opts.projectIdentity), + getPinnedNotes(db, opts.skill, opts.scope, part), rankedNotes, ); const selected = budgetFill(ordered, maxTokens, maxPinned); @@ -268,7 +270,7 @@ export async function recallSkillMemoryBlock( const match = sanitizeSkillIntentForFts(intent); if (match === "") { - const notes = flatRecall(db, opts.skill, opts.scope, opts.projectIdentity, { + const notes = flatRecall(db, opts.skill, opts.scope, part, { maxTokens, maxPinnedTokens: maxPinned, }); @@ -278,12 +280,12 @@ export async function recallSkillMemoryBlock( db, opts.skill, opts.scope, - opts.projectIdentity, + part, match, 50, ); const ordered = unionPinnedFirst( - getPinnedNotes(db, opts.skill, opts.scope, opts.projectIdentity), + getPinnedNotes(db, opts.skill, opts.scope, part), ftsNotes, ); const selected = budgetFill(ordered, maxTokens, maxPinned); diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts index ccd75b9b..84c5f067 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts @@ -4,13 +4,18 @@ import { closeQuietly } from "../../../shared/sqlite-helpers"; import { runMigrations } from "../migrations"; import { initializeDatabase } from "../storage-db"; +const embedCalls: string[] = []; + // Provider "up" with a deterministic model — MUST precede the reembed.ts import. mock.module("../memory/embedding", () => ({ - embedTextForProject: async (_p: string, _text: string) => ({ - vector: new Float32Array([0.1, 0.2, 0.3]), - modelId: "test-model", - generation: 1, - }), + embedTextForProject: async (projectIdentity: string, _text: string) => { + embedCalls.push(projectIdentity); + return { + vector: new Float32Array([0.1, 0.2, 0.3]), + modelId: "test-model", + generation: 1, + }; + }, })); const { reembedStaleSkillNotes } = await import("./reembed"); @@ -43,3 +48,22 @@ test("reembedStaleSkillNotes fills NULL embeddings (bounded, idempotent)", async closeQuietly(db); } }); + +test("reembed selects global '*' notes and embeds them under the real identity", async () => { + const { promoteSkillObservations } = await import("./promote"); + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + promoteSkillObservations(db, "git:repoA", [{ skillId: "council", kind: "fix", lesson: "L7" }]); + embedCalls.length = 0; + + const res = await reembedStaleSkillNotes(db, "git:repoA"); + + expect(res.reembedded).toBe(1); + expect(embedCalls.length).toBeGreaterThan(0); + expect(embedCalls.every((c) => c === "git:repoA")).toBe(true); + } finally { + closeQuietly(db); + } +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.ts index 040a540b..7b058d17 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/reembed.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.ts @@ -20,8 +20,8 @@ export async function reembedStaleSkillNotes( const stale = db .prepare( `SELECT id, intent, delta FROM skill_memory - WHERE project_identity = ? - AND (intent_embedding IS NULL OR delta_embedding IS NULL OR embedding_model_version IS NOT ?) + WHERE (project_identity = ? OR project_identity = '*') + AND (intent_embedding IS NULL OR delta_embedding IS NULL OR embedding_model_version IS NOT ?) LIMIT ?`, ) .all(projectIdentity, currentModel, REEMBED_CAP) as Array<{ diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts index 3be0adc6..38d97406 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts @@ -11,11 +11,12 @@ import { getDedupCandidates, getPinnedNotes, getRankingCandidates, - getSkillMemoryNotes, - getSkillMemoryStats, - type InsertSkillMemoryNoteArgs, - insertSkillMemoryNote, - searchSkillMemoryFts, + getSkillMemoryNotes, + getSkillMemoryStats, + type InsertSkillMemoryNoteArgs, + insertSkillMemoryNote, + partitionKey, + searchSkillMemoryFts, } from "./storage"; function makeDb(): Database { @@ -307,13 +308,13 @@ describe("skill_memory storage", () => { createdAt: Date.now(), }); - // Seed 1 note under a DIFFERENT project — must NOT be counted. - insertSkillMemoryNote(db, { - skillId: "tdd", - resolvedPath: "/p", - tier: "global", - skillSource: "opencode-global", - projectIdentity: "git:other", + // Seed 1 project-tier note under a DIFFERENT project — must NOT be counted. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "project", + skillSource: "opencode-global", + projectIdentity: "git:other", intent: "i5", kind: "gotcha", delta: "n5", @@ -334,12 +335,12 @@ describe("skill_memory storage", () => { const db = makeDb(); try { const ts = 1_000_000; - const ins = (hash: string, hits: number) => - db - .prepare( - `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at,last_used_at) - VALUES ('s','/p','global','git:x','i','fix','d',?,?,0,?,?)`, - ) + const ins = (hash: string, hits: number) => + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at,last_used_at) + VALUES ('s','/p','global','*','i','fix','d',?,?,0,?,?)`, + ) .run(hash, hits, ts, ts); ins("a", 1); ins("b", 5); @@ -463,10 +464,10 @@ describe("skill_memory storage", () => { const pid = Number( ( db - .prepare( - `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) - VALUES ('s','/p','global','git:x','i','fix','pinned','pin-h2',0,1,2) RETURNING id`, - ) + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + VALUES ('s','/p','global','*','i','fix','pinned','pin-h2',0,1,2) RETURNING id`, + ) .get() as { id: number } ).id, ); @@ -478,3 +479,81 @@ describe("skill_memory storage", () => { } }); }); + +describe("partitionKey", () => { + test("global tier maps to '*' sentinel", () => { + expect(partitionKey("global", "git:repoA")).toBe("*"); + }); + test("project tier passes through the real identity", () => { + expect(partitionKey("project", "git:repoA")).toBe("git:repoA"); + }); +}); + +describe("storage v41 fields", () => { + test("insert stamps origin_project + source_type; null resolvedPath -> ''", () => { + const db = makeDb(); + try { + const id = insertSkillMemoryNote(db, { + skillId: "council", + resolvedPath: null, + tier: "global", + skillSource: null, + projectIdentity: "*", + originProject: "git:repoA", + sourceType: "historian", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + expect(id).not.toBeNull(); + const row = db + .prepare( + "SELECT resolved_path, origin_project, source_type FROM skill_memory WHERE id=?", + ) + .get(id) as { resolved_path: string; origin_project: string; source_type: string }; + expect(row.resolved_path).toBe(""); + expect(row.origin_project).toBe("git:repoA"); + expect(row.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); + + test("getSkillMemoryStats counts global ('*') notes alongside the project's own", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "project", + skillSource: null, + projectIdentity: "git:repoA", + intent: "i", + kind: "fix", + delta: "d1", + normalizedHash: "p1", + createdAt: 1, + }); + insertSkillMemoryNote(db, { + skillId: "council", + resolvedPath: "", + tier: "global", + skillSource: null, + projectIdentity: "*", + originProject: "git:repoB", + sourceType: "historian", + intent: "i", + kind: "fix", + delta: "d2", + normalizedHash: "g1", + createdAt: 2, + }); + const stats = getSkillMemoryStats(db, "git:repoA"); + expect(stats.totalNotes).toBe(2); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.ts index c3a3d432..ad74c3ab 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.ts @@ -1,5 +1,15 @@ import type { Database } from "../../../shared/sqlite"; +/** + * The DB partition key for a note's tier. Global-tier notes are stored under the + * '*' sentinel so a lesson learned in any repo is recallable everywhere; project-tier + * notes keep their real project identity. ALL global-tier reads/writes/dedup MUST + * route through this so no call site is missed. + */ +export function partitionKey(tier: "project" | "global", projectIdentity: string): string { + return tier === "global" ? "*" : projectIdentity; +} + export interface SkillMemoryNote { id: number; skill_id: string; @@ -7,6 +17,8 @@ export interface SkillMemoryNote { tier: "project" | "global"; skill_source: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" | null; project_identity: string; + origin_project: string | null; + source_type: string | null; intent: string; intent_embedding: Buffer | null; delta_embedding: Buffer | null; @@ -24,10 +36,12 @@ export interface SkillMemoryNote { export interface InsertSkillMemoryNoteArgs { skillId: string; - resolvedPath: string; + resolvedPath: string | null; tier: "project" | "global"; skillSource: "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" | null; projectIdentity: string; + originProject?: string | null; + sourceType?: string | null; intent: string; kind: "gotcha" | "discovery" | "fix" | "workflow"; delta: string; @@ -52,17 +66,19 @@ export function insertSkillMemoryNote( const result = db .prepare( `INSERT INTO skill_memory - (skill_id, resolved_path, tier, skill_source, project_identity, - intent, kind, delta, tags, intent_embedding, delta_embedding, embedding_model_version, - hit_count, pinned, normalized_hash, created_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)`, + (skill_id, resolved_path, tier, skill_source, project_identity, origin_project, source_type, + intent, kind, delta, tags, intent_embedding, delta_embedding, embedding_model_version, + hit_count, pinned, normalized_hash, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)`, ) .run( args.skillId, - args.resolvedPath, + args.resolvedPath ?? "", args.tier, args.skillSource ?? null, - args.projectIdentity, + partitionKey(args.tier, args.projectIdentity), + args.originProject ?? null, + args.sourceType ?? null, args.intent, args.kind, args.delta, @@ -129,7 +145,7 @@ export function getSkillMemoryNotes( created_at DESC LIMIT ?`, ) - .all(skillId, tier, projectIdentity, limit) as SkillMemoryNote[]; + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; } /** @@ -144,10 +160,10 @@ export function bumpHitCount( normalizedHash: string, ): void { db.prepare( - `UPDATE skill_memory - SET hit_count = hit_count + 1, last_used_at = ? - WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, - ).run(Date.now(), skillId, tier, projectIdentity, normalizedHash); + `UPDATE skill_memory + SET hit_count = hit_count + 1, last_used_at = ? + WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, + ).run(Date.now(), skillId, tier, partitionKey(tier, projectIdentity), normalizedHash); } /** @@ -196,7 +212,7 @@ export function findExistingNote( `SELECT id, hit_count FROM skill_memory WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, ) - .get(skillId, tier, projectIdentity, normalizedHash) as { + .get(skillId, tier, partitionKey(tier, projectIdentity), normalizedHash) as { id: number; hit_count: number; } | null) ?? null @@ -221,7 +237,7 @@ export function getDedupCandidates( WHERE skill_id=? AND tier=? AND project_identity=? ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, ) - .all(skillId, tier, projectIdentity, limit) as Array< + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as Array< Pick >; } @@ -239,7 +255,7 @@ export function getRankingCandidates( WHERE skill_id=? AND tier=? AND project_identity=? ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, ) - .all(skillId, tier, projectIdentity, limit) as SkillMemoryNote[]; + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; } export function searchSkillMemoryFts( @@ -259,7 +275,7 @@ export function searchSkillMemoryFts( ORDER BY bm25(skill_memory_fts) ASC, COALESCE(m.last_used_at, m.created_at) DESC LIMIT ?`, ) - .all(matchQuery, skillId, tier, projectIdentity, limit) as SkillMemoryNote[]; + .all(matchQuery, skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; } export function getPinnedNotes( @@ -274,7 +290,7 @@ export function getPinnedNotes( WHERE skill_id=? AND tier=? AND project_identity=? AND pinned=1 ORDER BY COALESCE(last_used_at, created_at) DESC`, ) - .all(skillId, tier, projectIdentity) as SkillMemoryNote[]; + .all(skillId, tier, partitionKey(tier, projectIdentity)) as SkillMemoryNote[]; } export function getSkillMemoryStats( @@ -282,14 +298,14 @@ export function getSkillMemoryStats( projectIdentity: string, ): { totalNotes: number; skillsWithNotes: number; pinnedNotes: number } { const row = db - .prepare( - `SELECT - COUNT(*) AS total, - COUNT(DISTINCT skill_id) AS skills, - COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned - FROM skill_memory - WHERE project_identity = ?`, - ) + .prepare( + `SELECT + COUNT(*) AS total, + COUNT(DISTINCT skill_id) AS skills, + COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned + FROM skill_memory + WHERE project_identity = ? OR project_identity = '*'`, + ) .get(projectIdentity) as { total: number; skills: number; pinned: number } | undefined; return { totalNotes: Number(row?.total ?? 0), diff --git a/packages/plugin/src/features/magic-context/storage-db.ts b/packages/plugin/src/features/magic-context/storage-db.ts index 6a53f3ab..7c31f3ca 100644 --- a/packages/plugin/src/features/magic-context/storage-db.ts +++ b/packages/plugin/src/features/magic-context/storage-db.ts @@ -38,7 +38,7 @@ export function getSchemaFenceRejection(): { return lastSchemaFenceRejection; } -export const LATEST_SUPPORTED_VERSION = 51; +export const LATEST_SUPPORTED_VERSION = 52; // chmod is meaningless on Windows (POSIX modes are not honored), so all // permission tightening is skipped there. mkdir's `mode` is likewise ignored. diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts new file mode 100644 index 00000000..18a5d1fc --- /dev/null +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, test } from "bun:test"; + +import { extractToolCallSummaries } from "./read-session-formatting"; + +describe("extractToolCallSummaries — skill tool", () => { + test("surfaces the skill name as TC: skill()", () => { + const parts = [ + { type: "tool", tool: "skill", state: { input: { name: "test-driven-development" }, metadata: {} } }, + ]; + + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(test-driven-development)"]); + }); + + test("skill branch wins even if metadata.description is present (regression-proof)", () => { + const parts = [ + { type: "tool", tool: "skill", state: { input: { name: "council" }, metadata: { description: "Load skill" } } }, + ]; + + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(council)"]); + }); + + test("skill with no name falls through to bare TC: skill", () => { + const parts = [{ type: "tool", tool: "skill", state: { input: {}, metadata: {} } }]; + + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill"]); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts index 2e6f75db..7468c9d1 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts @@ -70,6 +70,14 @@ export function extractToolCallSummaries(parts: unknown[]): string[] { const input = state.input as Record | null; const metadata = state.metadata as Record | null; + // Skill tool: surface the skill name (input.name) before the description + // fallback, which would otherwise mask it if metadata.description exists. + if (p.tool === "skill") { + const name = input && typeof input.name === "string" ? input.name : ""; + summaries.push(name ? `TC: skill(${truncateArg(name)})` : "TC: skill"); + continue; + } + // Prefer explicit description (bash tool always has one) const description = (input && typeof input.description === "string" && input.description) || diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.test.ts b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts index 5b45b927..9c13047b 100644 --- a/packages/plugin/src/tools/ctx-skill-note/tools.test.ts +++ b/packages/plugin/src/tools/ctx-skill-note/tools.test.ts @@ -105,6 +105,42 @@ describe("ctx_skill_note tool", () => { } }); + test("global-tier note is written under '*' with origin_project = real repo", async () => { + const db = makeDb(); + const registry: SkillLoadRegistry = createSkillLoadRegistry(); + try { + registry.set(registryKey("ses_test", "council"), { + resolvedPath: "/home/user/.config/opencode/skills/council/SKILL.md", + tier: "global", + skillSource: "opencode-global", + skillId: "council", + loadedAt: Date.now(), + frontmatterConfig: { + enabled: true, + max_tokens: 1500, + max_pinned_tokens: 4000, + dedup_threshold: 0.92, + }, + }); + + const t = createCtxSkillNoteTool({ db, skillLoadRegistry: registry }); + await t.execute( + { skill: "council", intent: "x", kind: "gotcha", delta: "global lesson xyz" }, + toolContext(), + ); + const row = db + .prepare( + "SELECT project_identity, origin_project FROM skill_memory WHERE delta='global lesson xyz'", + ) + .get() as { project_identity: string; origin_project: string }; + expect(row.project_identity).toBe("*"); + expect(row.origin_project).not.toBe("*"); + expect(row.origin_project.length).toBeGreaterThan(0); + } finally { + closeQuietly(db); + } + }); + test("deduplicates: bumps hit_count on exact duplicate delta", async () => { const db = makeDb(); const registry: SkillLoadRegistry = createSkillLoadRegistry(); diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.ts b/packages/plugin/src/tools/ctx-skill-note/tools.ts index 08413e0b..7a1e8e33 100644 --- a/packages/plugin/src/tools/ctx-skill-note/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-note/tools.ts @@ -14,6 +14,7 @@ import { findExistingNote, getDedupCandidates, insertSkillMemoryNote, + partitionKey, } from "../../features/magic-context/skill-memory/storage"; import { CTX_SKILL_NOTE_TOOL_NAME, @@ -88,6 +89,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti // a launch dir. This matches ctx_memory's pattern and correctly handles // `opencode -s` launched outside the project root. const projectIdentity = resolveProjectIdentity(toolContext.directory); + const part = partitionKey(registryEntry.tier, projectIdentity); const normalizedHash = computeNormalizedHash(args.delta); // Check for exact duplicate @@ -95,7 +97,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti deps.db, args.skill, registryEntry.tier, - projectIdentity, + part, normalizedHash, ); if (existing) { @@ -103,7 +105,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti deps.db, args.skill, registryEntry.tier, - projectIdentity, + part, normalizedHash, ); return ( @@ -127,7 +129,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti deps.db, args.skill, registryEntry.tier, - projectIdentity, + part, 200, ); const threshold = registryEntry.frontmatterConfig?.dedup_threshold ?? 0.92; @@ -150,7 +152,8 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti resolvedPath: registryEntry.resolvedPath, tier: registryEntry.tier, skillSource: registryEntry.skillSource, - projectIdentity, + projectIdentity: part, + originProject: projectIdentity, intent: args.intent, kind: args.kind, delta: args.delta, @@ -168,7 +171,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti deps.db, args.skill, registryEntry.tier, - projectIdentity, + part, normalizedHash, ); return "Note already recorded (concurrent insert detected — hit count bumped)."; From 0e5d79d6d3a891af4310943d5138db63fe060d4a Mon Sep 17 00:00:00 2001 From: Tehan Date: Fri, 19 Jun 2026 00:59:59 +0200 Subject: [PATCH 04/10] feat(skill-memory): historian auto-extraction pipeline (P3b) Close the loop so the historian writes skill notes during compaction without an agent volunteering ctx_skill_note. - historian prompt emits a block; parser extracts it; threaded through the validated historian result. - both runners (OpenCode + Pi) promote skill observations post-commit via the shared promoteSkillObservations helper, gated by promotionActive && !discardedLast, writing global '*' notes with source_type='historian'. - self-heal net: initializeDatabase re-creates skill_memory + ensureColumn so an upgraded DB recovers even if a migration row is lost. --- ARCHITECTURE.md | 13 +- assets/magic-context.schema.json | 54 + packages/cli/src/lib/dreamer-setup.test.ts | 4 +- packages/cli/src/lib/dreamer-setup.ts | 2 + packages/dashboard/src-tauri/src/config.rs | 4 +- .../ConfigEditor/DreamerTasksField.tsx | 6 + .../content/docs/reference/configuration.md | 5 + .../pi-plugin/src/pi-historian-runner.test.ts | 19 + packages/pi-plugin/src/pi-historian-runner.ts | 31 + .../schema/distill-skill-memory-enum.test.ts | 30 +- .../plugin/src/config/schema/magic-context.ts | 5 + .../features/magic-context/dreamer/runner.ts | 1138 ----------------- .../magic-context/dreamer/task-executor.ts | 14 +- .../magic-context/dreamer/task-gates.ts | 7 + .../magic-context/migrations-v42.test.ts | 126 ++ .../magic-context/migrations-v49.test.ts | 4 +- .../magic-context/migrations-v51.test.ts | 113 ++ ...ons-v41.test.ts => migrations-v52.test.ts} | 28 +- .../src/features/magic-context/migrations.ts | 8 +- .../skill-memory/promote.test.ts | 25 +- .../magic-context/skill-memory/recall.test.ts | 2 +- .../magic-context/skill-memory/recall.ts | 17 +- .../skill-memory/reembed.test.ts | 20 +- .../skill-memory/storage.test.ts | 184 +-- .../magic-context/skill-memory/storage.ts | 28 +- .../storage-db-skill-memory-net.test.ts | 87 ++ .../src/features/magic-context/storage-db.ts | 59 + .../hooks/magic-context/command-handler.ts | 2 +- .../magic-context/compartment-parser.test.ts | 40 + .../hooks/magic-context/compartment-parser.ts | 37 +- .../compartment-runner-incremental.ts | 23 + .../magic-context/compartment-runner-types.ts | 3 +- .../compartment-runner-validation.test.ts | 21 + .../compartment-runner-validation.ts | 2 + .../magic-context/compartment-runner.test.ts | 53 + .../historian-prompt.generated.ts | 25 +- .../magic-context/historian-prompt.source.md | 25 +- .../read-session-formatting.test.ts | 12 +- .../plugin/src/plugin/rpc-handlers.test.ts | 18 +- .../plugin/src/tools/ctx-skill-note/tools.ts | 16 +- 40 files changed, 972 insertions(+), 1338 deletions(-) delete mode 100644 packages/plugin/src/features/magic-context/dreamer/runner.ts create mode 100644 packages/plugin/src/features/magic-context/migrations-v42.test.ts create mode 100644 packages/plugin/src/features/magic-context/migrations-v51.test.ts rename packages/plugin/src/features/magic-context/{migrations-v41.test.ts => migrations-v52.test.ts} (90%) create mode 100644 packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index fb374102..600e695e 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -19,7 +19,7 @@ Magic Context is an `@opencode-ai/plugin` (entry `src/index.ts`) that rewrites t - **Adapters** (`src/plugin/`): hook wrappers, tool registry, RPC handlers, dream-timer lifecycle, per-session hook construction. - **Runtime** (`src/hooks/magic-context/`): the transform pipeline, postprocess phase, event/command handlers, system-prompt injection, compartment runners, decay rendering, strip-and-replay, nudges, m[0]/m[1] injection. - **Feature services** (`src/features/magic-context/`): storage, scheduler, tagger, memory, dreamer, sidekick, git-commit + message FTS indexes, unified search, overflow detection, migrations. -- **Tools** (`src/tools/`): `ctx_reduce`, `ctx_expand`, `ctx_note`, `ctx_memory`, `ctx_search`. +- **Tools** (`src/tools/`): `ctx_reduce`, `ctx_expand`, `ctx_note`, `ctx_memory`, `ctx_search`, `ctx_skill_note`, `ctx_skill_recall`. - **Config + shared** (`src/config/`, `src/shared/`): Zod config (deep-merge raw JSONC before validation; invalid leaves fall back to defaults with warnings, never disable the plugin), logger, data paths, SQLite selector, harness id, RPC transport, conflict detector, tag-transcript primitive (shared with Pi). - **TUI** (`src/tui/`): sidebar + `/ctx-status` / `/ctx-recomp` dialogs, RPC-backed; shipped as raw TS via the `./tui` export (not bundled into `dist/index.js`). - **CLI** (`packages/cli/`, separate `@cortexkit/magic-context` package): `npx` setup / doctor / migrate wizard. @@ -114,9 +114,12 @@ The long-history pipeline. Tiered compartments + deterministic decay renderer (r 2. **Before (`tool.execute.before` in `src/hooks/magic-context/hook-handlers.ts` — `createToolExecuteBeforeHook`)** — stashes the raw `intent` by `callID` in a bounded closure-state `Map` (60s TTL sweep + 256-entry cap + full clear on session delete, so unpaired before-hooks never leak). The stash is the only place `intent` is observable; it's deleted in the after-hook's `finally`. 3. **After (`tool.execute.after` in `src/hooks/magic-context/hook-handlers.ts` — `createToolExecuteAfterHook`)** — runs only for `input.tool === "skill"`. Parses the `Base directory for this skill: file:///...` line from `output.output` via `parseSkillProvenance()` (`fileURLToPath`-based, cross-platform) to recover the resolved `SKILL.md` path + tier (project/global) + `skill_source`. Then re-reads `SKILL.md` from disk (opencode's skill loader strips the `skill-memory:` block from the model-facing output, so the frontmatter is unreadable from `output.output`). Populates a session-scoped `SkillLoadRegistry` keyed by `${sessionId}:${skillId}` (NOT persisted, cleaned in `onSessionDeleted`). When frontmatter has `enabled: true` and notes exist, delegates to `recallSkillMemoryBlock` (feature layer) and appends the block to `output.output` BEFORE the Channel-1 ctx_reduce nudge runs. 4. **Cache safety (keystone).** The append lands in the skill tool RESULT = conversation tail, NOT the cached m[0]/m[1] prefix. This is why the feature cannot regress the prompt-cache hit rate. Channel-1 already appends to tool output strings the same way (precedent in `maybeInjectChannel1Nudge`) — this is proven production behavior. -5. **Write-back (`ctx_skill_note`)** — `kind` is a hard gate rejecting `'general'` at the tool level; duplicates dedup on `normalized_hash` and bump `hit_count` (`computeNormalizedHash` from `memory/normalize-hash.ts`). Resolves `(skill_id, tier, project_identity, resolved_path)` from the session-scoped `SkillLoadRegistry` (so the agent must load the skill first — actionable error otherwise). Inserts into the `skill_memory` table (migration v39). The injected block footer reinforces: "After using this skill, call `ctx_skill_note` — record only gotchas, novel discoveries, or error→fix; skip routine successes." +5. **Write-back (`ctx_skill_note`)** — `kind` is a hard gate rejecting `'general'` at the tool level; duplicates dedup on `normalized_hash` and bump `hit_count` (`computeNormalizedHash` from `memory/normalize-hash.ts`). Resolves `(skill_id, tier, project_identity, resolved_path)` from the session-scoped `SkillLoadRegistry` (so the agent must load the skill first — actionable error otherwise). Inserts into the `skill_memory` table (migration v50). The injected block footer reinforces: "After using this skill, call `ctx_skill_note` — record only gotchas, novel discoveries, or error→fix; skip routine successes." 6. **Explicit recall (`ctx_skill_recall`)** — companion tool to the transparent path; reuses `recallSkillMemoryBlock` so P2 embeddings upgrade both at once. Registry-first resolution (exact, free, no disk I/O when the skill was loaded this session) with a cold-start disk fallback that walks opencode's real `discoverSkills()` order (project dirs first — they shadow global — then global external + config dirs). -7. **Dreamer distill (`distill-skill-memory` task — opt-in, NOT a default)** — `DREAMER_TASKS` enum carries it (line 25 of `src/config/schema/magic-context.ts`); `DEFAULT_DREAMER_TASKS` does NOT (mirroring the `maintain-docs` precedent). The task prompt lives in `src/features/magic-context/dreamer/task-prompts.ts` and runs the merge/prune/promote maintenance cycle documented in CONFIGURATION.md. +7. **Multi-rung recall cascade (P2)** — `recallSkillMemoryBlock` is a four-rung selector: no intent → flat recency×hit (`mode="no-intent"`); intent + model-matched embeddings → cosine blend across `intent_embedding` + `delta_embedding` with tunable `ranking_relevance`/`ranking_recency`/`ranking_hit` weights (`mode="full"`); intent + no model match → FTS5 over the content-linked `skill_memory_fts` vtable (`mode="fts5-fallback"`); intent tokenized to empty → flat fallback (`mode="flat-fts"`). Embeddings live on `skill_memory` (BLOBs scanned via `Float32Array` cosine, mirroring the memories-embed path); a programmatic no-LLM `reembedStaleSkillNotes` keeps them fresh during the `distill-skill-memory` task. Read-side `recall_count` (migration v51) is bumped on every surfaced note — distinct from write-side `hit_count` so the recency term cannot be poisoned by which notes are queried most. +8. **Global-tier unification (P3a)** — global-tier notes collapse to `project_identity = '*'` (collision-merge at migration v52). The `partitionKey(tier, projectIdentity)` helper is the single chokepoint for every global write/recall/reembed/stats call site. A historian-extracted lesson learned in repoA is recallable from repoB without per-repo replication; the originating repo is preserved in `origin_project` (`source_type='historian'` for P3b writes, `'agent'` for tool writes). +9. **Historian auto-extraction (P3b)** — the historian prompt emits an optional `` block when the chunk shows the agent USING a skill (surfaced via the `TC: skill()` marker emitted by `extractToolCallSummaries` in `read-session-formatting.ts`) and learning a UNIVERSAL, reusable lesson. Both OpenCode (`compartment-runner-incremental.ts`) and Pi (`pi-historian-runner.ts`) runners promote the validated observations post-commit via the shared `promoteSkillObservations` helper, gated by `promotionActive && !discardedLast`. Writes go to the global `'*'` partition with `source_type='historian'` and `resolved_path=''` sentinel; hash-dedup bumps `hit_count` on collisions. +10. **Dreamer distill (`distill-skill-memory` task — opt-in, NOT a default)** — `DREAMER_TASKS` enum carries it (line 25 of `src/config/schema/magic-context.ts`); `DEFAULT_DREAMER_TASKS` does NOT (mirroring the `maintain-docs` precedent). The task prompt lives in `src/features/magic-context/dreamer/task-prompts.ts` and runs the merge/prune/promote maintenance cycle documented in CONFIGURATION.md. **Git-commit indexing:** - `src/features/magic-context/git-commits/indexer.ts` reads HEAD-only non-merge commits via `git log` (NUL-byte-free format separator `\x1f`), bounded by `experimental.git_commit_indexing.{since_days, max_commits}`. @@ -152,7 +155,7 @@ Background maintenance (V2: per-task cron scheduling). A process-wide 15-min tim ## Storage & migrations -`storage-db.ts` creates the schema and runs versioned migrations (`migrations.ts`, currently v1–v44). `LATEST_SUPPORTED_VERSION` is a schema fence — it MUST be bumped with every new migration (a unit test asserts it equals the highest migration), and a stale value makes the DB refuse to open after the migration applies. `ensureColumn()` + `healAllNullColumns()` backfill upgraded DBs even if a migration row is lost. New session-scoped tables must be added to `clearSession()`. A bulletproof `MAGIC_CONTEXT_TEST_DATA_DIR` guard keeps the test suite off the live DB (running `bun test` once migrated a live DB and fail-closed running binaries). SQLite binds must use SPREAD positional args, never the array form (`bun:sqlite` binds a lone array positionally; `node:sqlite` reads it as named params and throws). +`storage-db.ts` creates the schema and runs versioned migrations (`migrations.ts`, currently v1–v52). `LATEST_SUPPORTED_VERSION` is a schema fence — it MUST be bumped with every new migration (a unit test asserts it equals the highest migration), and a stale value makes the DB refuse to open after the migration applies. `ensureColumn()` + `healAllNullColumns()` backfill upgraded DBs even if a migration row is lost. New session-scoped tables must be added to `clearSession()`. A bulletproof `MAGIC_CONTEXT_TEST_DATA_DIR` guard keeps the test suite off the live DB (running `bun test` once migrated a live DB and fail-closed running binaries). SQLite binds must use SPREAD positional args, never the array form (`bun:sqlite` binds a lone array positionally; `node:sqlite` reads it as named params and throws). ## Session modes @@ -355,7 +358,7 @@ Each `tags` row is one taggable source-content unit (`message`, `file`, or `tool **Storage:** Use the SQLite database created by `src/features/magic-context/storage-db.ts` under the cortexkit data directory resolved by `src/shared/data-path.ts` (`~/.local/share/cortexkit/magic-context/context.db` on Linux/macOS, XDG-equivalent on Windows). Legacy OpenCode-plugin-folder DBs are migrated forward on first boot. The same DB is shared cross-harness between OpenCode and Pi; session-scoped tables include a `harness` discriminator (`'opencode'` / `'pi'`) while project-scoped tables (memories, git commits) are shared. -**Schema migrations:** `src/features/magic-context/migrations.ts` declares versioned migrations v1–v39 (`LATEST_SUPPORTED_VERSION = 39` in `storage-db.ts` is the schema-fence ceiling and MUST be bumped with every new migration; a unit test — `schema-version-fence.test.ts` — asserts `LATEST_SUPPORTED_VERSION === LATEST_MIGRATION_VERSION` so the two can't drift). Notable: v10 `tool_owner_message_id` (composite tool-tag identity); v11 `todo_synthetic_*` (synthetic-todowrite); v12 orphan `memory_embeddings` cleanup; v13 `pending_compaction_marker_state` (deferred-marker drain); v14 project-scoped key files + version counter; v15 `deferred_execute_state` (boundary execution); v16 context-limit cache sentinels; v17 multi-anchor note-nudge/auto-search JSON storage; v18 `pending_pi_compaction_marker_state`; v19 compartment-state lease table; v20 subagent invocation token accounting; v21 session lifetime work metrics; **v22 the v2.0 cache-architecture foundation (m[0]/m[1] split tables, `project_state` epoch counter, plus per-compartment `p1`–`p4` tier columns, `importance`, `episode_type`, `p1_embedding`, and `legacy` flag); v23 `compartment_events` (historian-extracted causal_incident / trajectory_correction, stored-not-rendered in v2.0); v24 `historian_runs` telemetry (per-run chunk range, compartment/fact/event counts, importance min/max/avg, status + failure reason, FK to `subagent_invocations`); v25 `pi_stable_id_scheme` (Pi stable-id cutover watermark); v26 `memory_mutation_log` + `cached_m1_bytes` (memory supersede-delta — non-additive in-session memory mutations render as an m[1] `` delta instead of bumping the project epoch, plus the frozen-m[1]-bytes cache column); v27 `tags.entry_fingerprint` (Pi fallback-tag adoption); v28 `git_sweep_coordinator` (lease/cooldown for cross-process git-commit sweeps); v29 `notes.anchor_ordinal` (note→conversation-tail traceback); v30 `cached_m0_system_hash` / `cached_m0_tool_set_hash` / `cached_m0_model_key` (HARD-bust m[0] markers — provider-side cache-eviction detection for the materialization taxonomy; the migration clears the m[0]/m[1] cache once so pre-v30 rows re-materialize cleanly); v31 ctx_reduce-nudge state (`last_nudge_undropped`, `channel2_nudge_state`, `last_emergency_input_sample` + startup heal zeroing legacy sticky/anchor nudge state); v32 protected-tail v3 boundary state + per-tag cached token counts (`tags.token_count` / `input_token_count` / `reasoning_token_count` — computed once on tag insert, summed for sidebar/boundary/nudge math); v33 `compartment_chunk_embeddings` table for cross-session semantic search across compartment windows; v34 `workspaces` / `workspace_members` tables plus `cached_m0_workspace_fingerprint` m[0] marker (with a one-shot m[0]/m[1] cache reset so pre-v34 rows re-materialize cleanly); v35 `workspaces.share_categories` default + epoch refresh for existing members; v36 `session_projects` ownership map + seed for pre-v36 embedded sessions; v37 emergency drain catch-up latch + historian drain failure backoff; v38 `transform_decisions` table for durable cache-event cause attribution; **v39 `skill_memory` table for per-skill cross-session recall (the `skill-memory` feature — see "Skill-memory" in Key Abstractions) with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE, plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes for the flat-recall path.** Migration runner uses `schema_migrations` table with version-ordered execution and sibling-startup race protection (duplicate-insert is tolerated). +**Schema migrations:** `src/features/magic-context/migrations.ts` declares versioned migrations v1–v52 (`LATEST_SUPPORTED_VERSION = 52` in `storage-db.ts` is the schema-fence ceiling and MUST be bumped with every new migration; a unit test — `schema-version-fence.test.ts` — asserts `LATEST_SUPPORTED_VERSION === LATEST_MIGRATION_VERSION` so the two can't drift). Notable: v10 `tool_owner_message_id` (composite tool-tag identity); v11 `todo_synthetic_*` (synthetic-todowrite); v12 orphan `memory_embeddings` cleanup; v13 `pending_compaction_marker_state` (deferred-marker drain); v14 project-scoped key files + version counter; v15 `deferred_execute_state` (boundary execution); v16 context-limit cache sentinels; v17 multi-anchor note-nudge/auto-search JSON storage; v18 `pending_pi_compaction_marker_state`; v19 compartment-state lease table; v20 subagent invocation token accounting; v21 session lifetime work metrics; **v22 the v2.0 cache-architecture foundation (m[0]/m[1] split tables, `project_state` epoch counter, plus per-compartment `p1`–`p4` tier columns, `importance`, `episode_type`, `p1_embedding`, and `legacy` flag); v23 `compartment_events` (historian-extracted causal_incident / trajectory_correction, stored-not-rendered in v2.0); v24 `historian_runs` telemetry (per-run chunk range, compartment/fact/event counts, importance min/max/avg, status + failure reason, FK to `subagent_invocations`); v25 `pi_stable_id_scheme` (Pi stable-id cutover watermark); v26 `memory_mutation_log` + `cached_m1_bytes` (memory supersede-delta — non-additive in-session memory mutations render as an m[1] `` delta instead of bumping the project epoch, plus the frozen-m[1]-bytes cache column); v27 `tags.entry_fingerprint` (Pi fallback-tag adoption); v28 `git_sweep_coordinator` (lease/cooldown for cross-process git-commit sweeps); v29 `notes.anchor_ordinal` (note→conversation-tail traceback); v30 `cached_m0_system_hash` / `cached_m0_tool_set_hash` / `cached_m0_model_key` (HARD-bust m[0] markers — provider-side cache-eviction detection for the materialization taxonomy; the migration clears the m[0]/m[1] cache once so pre-v30 rows re-materialize cleanly); v31 ctx_reduce-nudge state (`last_nudge_undropped`, `channel2_nudge_state`, `last_emergency_input_sample` + startup heal zeroing legacy sticky/anchor nudge state); v32 protected-tail v3 boundary state + per-tag cached token counts (`tags.token_count` / `input_token_count` / `reasoning_token_count` — computed once on tag insert, summed for sidebar/boundary/nudge math); v33 `compartment_chunk_embeddings` table for cross-session semantic search across compartment windows; v34 `workspaces` / `workspace_members` tables plus `cached_m0_workspace_fingerprint` m[0] marker (with a one-shot m[0]/m[1] cache reset so pre-v34 rows re-materialize cleanly); v35 `workspaces.share_categories` default + epoch refresh for existing members; v36 `session_projects` ownership map + seed for pre-v36 embedded sessions; v37 emergency drain catch-up latch + historian drain failure backoff; v38 `transform_decisions` table for durable cache-event cause attribution; v40 index Pi fallback tool owners for stable-id cutover; v41 key detected context limits by model; v42 per-task dreamer scheduling state (Dreamer v2 A+B); v43 memory verification side table and verify watermarks; v44 memory classification scope and shareability columns; v45 retrospective content watermark and processed-window idempotence; v46 Primers v1 candidate and promoted primer storage; v47 compiled smart-note checks and runtime policy state; v48 DreamerV2 rework: memory→file mapping vs verification split, classify marker; v49 per-model embedding coexistence and active identity tracking; **v50 `skill_memory` table for per-skill cross-session recall (P1 — see "Skill-memory" in Key Abstractions) with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE, plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes for the flat-recall path; v51 skill-memory P2 — `delta_embedding` + `recall_count` columns + content-linked `skill_memory_fts` FTS5 vtable (intent+delta, porter+unicode61 tokenizer, INSERT/UPDATE/DELETE triggers, post-migration rebuild) for the multi-rung recall cascade; v52 skill-memory historian extraction — `origin_project` + `source_type` columns and global-tier `'*'` collision-merge (one row per global lesson, recallable from any repo, with `origin_project` preserved).** Migration runner uses `schema_migrations` table with version-ordered execution and sibling-startup race protection (duplicate-insert is tolerated). **Harness-aware behavior:** `src/shared/harness.ts` exposes `setHarness()`/`getHarness()` for the runtime to identify itself; production INSERTs into session-scoped tables tag rows with the current harness. Pi-specific session-resolution paths are skipped on OpenCode and vice versa. diff --git a/assets/magic-context.schema.json b/assets/magic-context.schema.json index 52b0e02c..f6399635 100644 --- a/assets/magic-context.schema.json +++ b/assets/magic-context.schema.json @@ -396,6 +396,10 @@ "refresh-primers": { "schedule": "0 3 * * *", "timeout_minutes": 20 + }, + "distill-skill-memory": { + "schedule": "", + "timeout_minutes": 20 } }, "type": "object", @@ -963,6 +967,56 @@ "minimum": 5 } } + }, + "distill-skill-memory": { + "default": { + "schedule": "", + "timeout_minutes": 20 + }, + "type": "object", + "properties": { + "schedule": { + "default": "", + "type": "string", + "description": "5-field cron schedule (e.g. \"0 3 * * *\"), or \"\" to disable this task." + }, + "model": { + "description": "Per-task model override (inherits dreamer.model)", + "type": "string" + }, + "fallback_models": { + "description": "Per-task fallback chain (inherits dreamer.fallback_models)", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "thinking_level": { + "description": "Pi only: per-task thinking level", + "type": "string", + "enum": [ + "off", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] + }, + "timeout_minutes": { + "default": 20, + "description": "Minutes allowed for this task before it is aborted", + "type": "number", + "minimum": 5 + } + } } }, "description": "Per-task scheduling + model config. Each task has its own cron schedule and may override the dreamer-level model." diff --git a/packages/cli/src/lib/dreamer-setup.test.ts b/packages/cli/src/lib/dreamer-setup.test.ts index 44e52db7..e043ea19 100644 --- a/packages/cli/src/lib/dreamer-setup.test.ts +++ b/packages/cli/src/lib/dreamer-setup.test.ts @@ -77,11 +77,11 @@ describe("runDreamerSetup", () => { const prompts = new MockPrompts({ confirms: [false], autos: ["x/y"], - selects: Array(11).fill("cron:0 3 * * *"), + selects: Array(12).fill("cron:0 3 * * *"), }); const result = await runDreamerSetup(prompts, ["x/y"]); expect(result.tasks).toBeDefined(); - expect(Object.keys(result.tasks ?? {}).length).toBe(11); + expect(Object.keys(result.tasks ?? {}).length).toBe(12); expect(result.tasks?.verify.schedule).toBe("0 3 * * *"); expect(result.tasks?.curate.schedule).toBe("0 3 * * *"); expect(result.tasks?.["classify-memories"].schedule).toBe("0 3 * * *"); diff --git a/packages/cli/src/lib/dreamer-setup.ts b/packages/cli/src/lib/dreamer-setup.ts index fd2de294..7d76812b 100644 --- a/packages/cli/src/lib/dreamer-setup.ts +++ b/packages/cli/src/lib/dreamer-setup.ts @@ -35,6 +35,7 @@ const TASK_DESCRIPTIONS: Record = { "review-user-memories": "Promote recurring behaviors into your user profile", "promote-primers": "Promote recurring project questions into Primers", "refresh-primers": "Refresh answers for active project Primers", + "distill-skill-memory": "Opt-in: distills per-skill memory (merge/prune/promote)", }; /** v1-behavior-preserving default schedules (must match the Zod schema defaults). */ @@ -50,6 +51,7 @@ const DEFAULT_TASK_SCHEDULES: Record = { "review-user-memories": "0 3 * * *", "promote-primers": "0 3 * * *", "refresh-primers": "0 3 * * *", + "distill-skill-memory": "", }; const PRESET_CUSTOM = "__custom__"; diff --git a/packages/dashboard/src-tauri/src/config.rs b/packages/dashboard/src-tauri/src/config.rs index c4db92bd..66b83216 100644 --- a/packages/dashboard/src-tauri/src/config.rs +++ b/packages/dashboard/src-tauri/src/config.rs @@ -28,7 +28,7 @@ pub fn resolve_project_config_path(project_path: &str) -> PathBuf { /// frontend DreamerTasksField list). The dashboard renders this fixed set so /// every project shows the same tasks regardless of its (possibly stale) per- /// project scheduler snapshot in task_schedule_state. -pub const CANONICAL_DREAM_TASKS: [&str; 11] = [ +pub const CANONICAL_DREAM_TASKS: [&str; 12] = [ "map-memories", "verify", "verify-broad", @@ -40,6 +40,7 @@ pub const CANONICAL_DREAM_TASKS: [&str; 11] = [ "review-user-memories", "promote-primers", "refresh-primers", + "distill-skill-memory", ]; /// Default cron per task (mirrors DEFAULT_TASK_SCHEDULES in the plugin schema and @@ -58,6 +59,7 @@ pub fn default_task_schedule(task: &str) -> &'static str { "review-user-memories" => "0 3 * * *", "promote-primers" => "0 3 * * *", "refresh-primers" => "0 3 * * *", + "distill-skill-memory" => "", _ => "", } } diff --git a/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx b/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx index 6dd609c3..4451f7fd 100644 --- a/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx +++ b/packages/dashboard/src/components/ConfigEditor/DreamerTasksField.tsx @@ -100,6 +100,12 @@ export const TASKS: TaskMeta[] = [ description: "Refresh answers for active project Primers", defaultSchedule: "0 3 * * *", }, + { + name: "distill-skill-memory", + label: "Distill skill memory", + description: "Opt-in: distills per-skill memory (merge/prune/promote)", + defaultSchedule: "", + }, ]; const PRESETS: { label: string; cron: string }[] = [ diff --git a/packages/docs/src/content/docs/reference/configuration.md b/packages/docs/src/content/docs/reference/configuration.md index 13968405..a37025df 100644 --- a/packages/docs/src/content/docs/reference/configuration.md +++ b/packages/docs/src/content/docs/reference/configuration.md @@ -199,6 +199,11 @@ Off-hours maintenance (Dreamer) and on-demand prompt augmentation (Sidekick). | `dreamer.tasks.refresh-primers.fallback_models` | string \\| string[] | — | Per-task fallback chain (inherits dreamer.fallback_models) | | `dreamer.tasks.refresh-primers.thinking_level` | `"off"` \\| `"minimal"` \\| `"low"` \\| `"medium"` \\| `"high"` \\| `"xhigh"` | — | Pi only: per-task thinking level | | `dreamer.tasks.refresh-primers.timeout_minutes` | number (5–) | `20` | Minutes allowed for this task before it is aborted | +| `dreamer.tasks.distill-skill-memory.schedule` | string | `""` | 5-field cron schedule (e.g. "0 3 * * *"), or "" to disable this task. | +| `dreamer.tasks.distill-skill-memory.model` | string | — | Per-task model override (inherits dreamer.model) | +| `dreamer.tasks.distill-skill-memory.fallback_models` | string \\| string[] | — | Per-task fallback chain (inherits dreamer.fallback_models) | +| `dreamer.tasks.distill-skill-memory.thinking_level` | `"off"` \\| `"minimal"` \\| `"low"` \\| `"medium"` \\| `"high"` \\| `"xhigh"` | — | Pi only: per-task thinking level | +| `dreamer.tasks.distill-skill-memory.timeout_minutes` | number (5–) | `20` | Minutes allowed for this task before it is aborted | | `dreamer.inject_docs` | boolean | `true` | Inject ARCHITECTURE.md and STRUCTURE.md into system prompt | | `dreamer.thinking_level` | `"off"` \\| `"minimal"` \\| `"low"` \\| `"medium"` \\| `"high"` \\| `"xhigh"` | — | Pi only: default thinking level for dreamer subagent invocations. See historian.thinking_level. | | `sidekick` | object | — | Optional sidekick agent configuration for session-start memory retrieval | diff --git a/packages/pi-plugin/src/pi-historian-runner.test.ts b/packages/pi-plugin/src/pi-historian-runner.test.ts index 005c1db1..0299faac 100644 --- a/packages/pi-plugin/src/pi-historian-runner.test.ts +++ b/packages/pi-plugin/src/pi-historian-runner.test.ts @@ -393,6 +393,25 @@ describe("runPiHistorian", () => { closeQuietly(db); } }); + it("promotes skillObservations as global '*' notes", async () => { + const xml = `${successXml()}\n\n* council | gotcha | fast aggregator\n`; + const { db } = await runHistorianWith({ + outputs: [xml], + memoryEnabled: true, + autoPromote: true, + }); + try { + const row = db + .prepare( + "SELECT project_identity, source_type FROM skill_memory WHERE skill_id='council'", + ) + .get() as { project_identity: string; source_type: string } | undefined; + expect(row?.project_identity).toBe("*"); + expect(row?.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); it("runs the Pi subagent, parses output, and publishes compartments and facts", async () => { const { db, runner } = await runHistorianWith({ outputs: [successXml()] }); try { diff --git a/packages/pi-plugin/src/pi-historian-runner.ts b/packages/pi-plugin/src/pi-historian-runner.ts index c0d5c86e..0cd99ad1 100644 --- a/packages/pi-plugin/src/pi-historian-runner.ts +++ b/packages/pi-plugin/src/pi-historian-runner.ts @@ -52,6 +52,7 @@ import { } from "@magic-context/core/features/magic-context/memory"; import { resolveProjectIdentity } from "@magic-context/core/features/magic-context/memory/project-identity"; import { getMemoriesByProject } from "@magic-context/core/features/magic-context/memory/storage-memory"; +import { promoteSkillObservations } from "@magic-context/core/features/magic-context/skill-memory/promote"; import { clearEmergencyDrainLatch, clearEmergencyRecovery, @@ -1114,6 +1115,28 @@ export async function runPiHistorian(deps: PiHistorianDeps): Promise { } } + // Skill-memory historian extraction (Pi mirror of OpenCode): promote + // into per-skill notes. Same promotionActive + + // !discardedLast gate as facts/primers so a provisional tail does not + // double-emit. + if ( + promotionActive && + !discardedLast && + validatedPass.skillObservations && + validatedPass.skillObservations.length > 0 + ) { + try { + const written = promoteSkillObservations( + db, + projectPath, + validatedPass.skillObservations, + ); + sessionLog(sessionId, `promoted ${written} skill observation(s)`); + } catch (error) { + sessionLog(sessionId, "failed to promote skill observations:", error); + } + } + // Raw chunk embeddings: the ctx_search semantic substrate over session // history. Fire-and-forget, best-effort, memory-gated. if (embeddingActive) { @@ -1275,6 +1298,13 @@ type ValidationOutcome = ? P : never : never; + skillObservations?: ReturnType< + typeof validateHistorianOutput + > extends infer T + ? T extends { ok: true; skillObservations?: infer S } + ? S + : never + : never; events?: ReturnType extends infer T ? T extends { ok: true; events?: infer E } ? E @@ -1317,6 +1347,7 @@ async function validateHistorianResult( facts: validation.facts, userObservations: validation.userObservations, primerCandidates: validation.primerCandidates, + skillObservations: validation.skillObservations, events: validation.events, }; } diff --git a/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts b/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts index a80ddff3..a5cfe446 100644 --- a/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts +++ b/packages/plugin/src/config/schema/distill-skill-memory-enum.test.ts @@ -1,22 +1,34 @@ import { describe, expect, test } from "bun:test"; -import { DEFAULT_DREAMER_TASKS, DREAMER_TASKS, DreamingTaskSchema } from "./magic-context"; +import { + AGENTIC_DREAM_TASKS, + CANONICAL_DREAM_TASKS, +} from "../../features/magic-context/dreamer/task-registry"; +import { DREAMER_TASKS, DreamingTaskSchema, DreamTasksSchema } from "./magic-context"; describe("distill-skill-memory dreamer task", () => { - test("distill-skill-memory is in DREAMER_TASKS enum", () => { - expect(DREAMER_TASKS).toContain("distill-skill-memory"); + test("distill-skill-memory is a canonical dream task", () => { + expect(CANONICAL_DREAM_TASKS).toContain("distill-skill-memory"); }); - test("distill-skill-memory is NOT in DEFAULT_DREAMER_TASKS (opt-in)", () => { - expect(DEFAULT_DREAMER_TASKS).not.toContain("distill-skill-memory"); + test("distill-skill-memory is agentic (prompt-driven)", () => { + expect(AGENTIC_DREAM_TASKS).toContain("distill-skill-memory"); + // DREAMER_TASKS (the schema enum) re-exports AGENTIC_DREAM_TASKS. + expect(DREAMER_TASKS).toContain("distill-skill-memory"); }); test("DreamingTaskSchema accepts distill-skill-memory", () => { expect(() => DreamingTaskSchema.parse("distill-skill-memory")).not.toThrow(); }); - test("maintain-docs is also not in DEFAULT_DREAMER_TASKS (precedent)", () => { - // Verify the existing asymmetry pattern we're following - expect(DEFAULT_DREAMER_TASKS).not.toContain("maintain-docs"); - expect(DREAMER_TASKS).toContain("maintain-docs"); + test("distill-skill-memory defaults to OFF (empty schedule = opt-in)", () => { + // v2 model: a task is opt-in when its default schedule is "" (disabled). + // Parse an explicit tasks object so the per-key defaults fire. + const parsed = DreamTasksSchema.parse({}); + expect(parsed["distill-skill-memory"].schedule).toBe(""); + }); + + test("maintain-docs is also opt-in (empty default schedule) — precedent", () => { + const parsed = DreamTasksSchema.parse({}); + expect(parsed["maintain-docs"].schedule).toBe(""); }); }); diff --git a/packages/plugin/src/config/schema/magic-context.ts b/packages/plugin/src/config/schema/magic-context.ts index fc3e0e8c..30637109 100644 --- a/packages/plugin/src/config/schema/magic-context.ts +++ b/packages/plugin/src/config/schema/magic-context.ts @@ -110,6 +110,8 @@ const DEFAULT_TASK_SCHEDULES: Record = { "review-user-memories": "0 3 * * *", "promote-primers": "0 3 * * *", "refresh-primers": "0 3 * * *", + // Opt-in (NOT a default): distills per-skill memory. Off until scheduled. + "distill-skill-memory": "", }; function defaultTaskConfig(task: DreamTaskName): z.input { @@ -159,6 +161,9 @@ export const DreamTasksSchema = z "refresh-primers": DreamTaskBaseConfigSchema.default(() => DreamTaskBaseConfigSchema.parse(defaultTaskConfig("refresh-primers")), ), + "distill-skill-memory": DreamTaskBaseConfigSchema.default(() => + DreamTaskBaseConfigSchema.parse(defaultTaskConfig("distill-skill-memory")), + ), }) .describe( "Per-task scheduling + model config. Each task has its own cron schedule and may override the dreamer-level model.", diff --git a/packages/plugin/src/features/magic-context/dreamer/runner.ts b/packages/plugin/src/features/magic-context/dreamer/runner.ts deleted file mode 100644 index dd0b2c45..00000000 --- a/packages/plugin/src/features/magic-context/dreamer/runner.ts +++ /dev/null @@ -1,1138 +0,0 @@ -import { existsSync } from "node:fs"; -import { join } from "node:path"; -import { DREAMER_AGENT } from "../../../agents/dreamer"; -import type { DreamingTask } from "../../../config/schema/magic-context"; -import type { PluginContext } from "../../../plugin/types"; -import * as shared from "../../../shared"; -import { extractLatestAssistantText } from "../../../shared/assistant-message-extractor"; -import { getDataDir } from "../../../shared/data-path"; -import { describeError, getErrorMessage } from "../../../shared/error-message"; -import { shouldKeepSubagents } from "../../../shared/keep-subagents"; -import { log } from "../../../shared/logger"; -import { Database } from "../../../shared/sqlite"; -import { closeQuietly } from "../../../shared/sqlite-helpers"; -import { runKeyFilesTask } from "../key-files/identify-key-files"; -import { getMemoryCountsByStatus } from "../memory/storage-memory"; -import { reembedStaleSkillNotes } from "../skill-memory/reembed"; -import { getPendingSmartNotes, markNoteChecked, markNoteReady } from "../storage-notes"; -import { recordChildInvocation } from "../subagent-token-capture"; -import { reviewUserMemories } from "../user-memory/review-user-memories"; -import { getActiveUserMemories } from "../user-memory/storage-user-memory"; -import { acquireLease, getLeaseHolder, releaseLease, renewLease } from "./lease"; -import { - enforceMaintainDocsProtectedRegions, - snapshotMaintainDocsFiles, -} from "./maintain-docs-protected-enforcement"; -import { - clearStaleEntries, - dequeueNext, - getEntryRetryCount, - hasActiveDreamLease, - removeDreamEntry, - resetDreamEntry, -} from "./queue"; -import { insertDreamRun } from "./storage-dream-runs"; -import { getDreamState, setDreamState } from "./storage-dream-state"; -import { buildDreamTaskPrompt, DREAMER_SYSTEM_PROMPT } from "./task-prompts"; - -// Keyed by project identity (e.g. "git:"), not filesystem path. Two -// worktrees/clones of the same repo collapse to the same identity, so in a -// single process this map's entry for that identity is "last-registered wins" -// — it can point at a different checkout than the one draining the queue. This -// map is only a FALLBACK now: production drain callers pass their own -// `sessionDirectoryOverride` (the directory of the project THIS process owns), -// so the dequeued entry always runs in a live checkout the draining process -// actually registered, never a stale sibling-checkout path. See -// processDreamQueue's sessionDirectoryOverride. -const dreamProjectDirectories = new Map(); -const CIRCUIT_BREAKER_THRESHOLD = 3; - -interface ExperimentalPinKeyFilesConfig { - enabled: boolean; - token_budget: number; - min_reads: number; -} - -export function registerDreamProjectDirectory(projectIdentity: string, directory: string): void { - dreamProjectDirectories.set(projectIdentity, directory); -} - -function resolveDreamSessionDirectory(projectIdentity: string): string { - return dreamProjectDirectories.get(projectIdentity) ?? projectIdentity; -} - -export interface DreamRunResult { - startedAt: number; - finishedAt: number; - holderId: string; - smartNotesSurfaced: number; - smartNotesPending: number; - tasks: { - name: string; - durationMs: number; - result: unknown; - error?: string; - }[]; -} - -function countNewIds(beforeIds: number[], afterIds: number[]): number { - const beforeSet = new Set(beforeIds); - let count = 0; - for (const id of afterIds) { - if (!beforeSet.has(id)) { - count += 1; - } - } - return count; -} - -function getCircuitBreakerSignature(error: unknown, brief: string): string { - if (error instanceof Error && error.name && error.name !== "Error") { - return error.name; - } - - const namedError = error as { name?: unknown } | null; - if ( - namedError && - typeof namedError === "object" && - typeof namedError.name === "string" && - namedError.name.length > 0 && - namedError.name !== "Error" - ) { - return namedError.name; - } - - return brief.split(":")[0]?.trim().split(/\s+/)[0] || brief || "unknown"; -} - -function shouldSkipCircuitBreaker(error: unknown, brief: string): boolean { - const namedError = error as { name?: unknown } | null; - const name = - error instanceof Error - ? error.name - : namedError && typeof namedError === "object" && typeof namedError.name === "string" - ? namedError.name - : ""; - const combined = `${name} ${brief}`.toLowerCase(); - return name === "AbortError" || combined.includes("lease"); -} - -function logWithStackHead(message: string, stackHead?: string): void { - log(message, stackHead ? { stackHead } : undefined); -} - -function getOpenCodeDbPath(): string { - return join(getDataDir(), "opencode", "opencode.db"); -} - -function openOpenCodeDb(): Database | null { - const dbPath = getOpenCodeDbPath(); - if (!existsSync(dbPath)) { - log(`[key-files] OpenCode DB not found at ${dbPath} — skipping`); - return null; - } - - try { - const db = new Database(dbPath, { readonly: true }); - db.exec("PRAGMA busy_timeout = 5000"); - return db; - } catch (error) { - log(`[key-files] failed to open OpenCode DB at ${dbPath}: ${getErrorMessage(error)}`); - return null; - } -} - -export async function runDream(args: { - db: Database; - client: PluginContext["client"]; - /** Project identity (e.g. "git:"), NOT a filesystem path. Used for dream state keys. */ - projectIdentity: string; - tasks: DreamingTask[]; - taskTimeoutMinutes: number; - maxRuntimeMinutes: number; - parentSessionId?: string; - sessionDirectory?: string; - experimentalUserMemories?: { enabled: boolean; promotionThreshold: number }; - experimentalPinKeyFiles?: ExperimentalPinKeyFilesConfig; - /** - * Resolved fallback chain for dreamer subagent calls. When the primary - * `dreamer.model` fails (auth, model-not-found, rate limit, transient - * network), each entry is tried in order before giving up. Empty/undefined - * disables fallback iteration (legacy single-suggestion-retry only). - * - * Caller (`processDreamQueue` / direct caller) resolves this via - * `resolveFallbackChain(DREAMER_AGENT, config.dreamer.fallback_models)`. - */ - fallbackModels?: readonly string[]; -}): Promise { - const holderId = crypto.randomUUID(); - const startedAt = Date.now(); - const result: DreamRunResult = { - startedAt, - finishedAt: startedAt, - holderId, - smartNotesSurfaced: 0, - smartNotesPending: 0, - tasks: [], - }; - const memoryCountsBefore = getMemoryCountsByStatus(args.db, args.projectIdentity); - - log( - `[dreamer] starting dream run: ${args.tasks.length} tasks, timeout=${args.taskTimeoutMinutes}m, maxRuntime=${args.maxRuntimeMinutes}m, project=${args.projectIdentity}`, - ); - - if (!acquireLease(args.db, holderId)) { - const currentHolder = getLeaseHolder(args.db) ?? "another holder"; - log(`[dreamer] lease acquisition failed — already held by ${currentHolder}`); - result.tasks.push({ - name: "lease", - durationMs: 0, - result: null, - error: `Dream lease is already held by ${currentHolder}`, - }); - result.finishedAt = Date.now(); - return result; - } - log(`[dreamer] lease acquired: ${holderId}`); - - // Resolve a parent session ID so child sessions are hidden from the UI session list. - // /ctx-dream passes the active session; scheduled runs resolve from the API. - let parentSessionId = args.parentSessionId; - if (!parentSessionId) { - try { - const sessionDir = args.sessionDirectory ?? args.projectIdentity; - const listResponse = await args.client.session.list({ - query: { directory: sessionDir }, - }); - const sessions = shared.normalizeSDKResponse(listResponse, [] as { id?: string }[], { - preferResponseOnMissingData: true, - }); - // Intentional: any existing session works — we just need parentID so child sessions don't appear in the UI - parentSessionId = sessions?.find((s) => typeof s?.id === "string")?.id; - if (parentSessionId) { - log(`[dreamer] resolved parent session: ${parentSessionId}`); - } - } catch { - log( - "[dreamer] could not resolve parent session — child sessions will be visible in UI", - ); - } - } - - const deadline = startedAt + args.maxRuntimeMinutes * 60 * 1000; - // Strictly per-project (no global-key fallback — it cross-contaminated the - // maintain-docs cutoff across projects). - const lastDreamAt = getDreamState(args.db, `last_dream_at:${args.projectIdentity}`); - log(`[dreamer] last dream at: ${lastDreamAt ?? "never"} (project=${args.projectIdentity})`); - - let lastErrorSignature: string | null = null; - let consecutiveSameErrorFailures = 0; - let circuitBreakerTripped = false; - let lostLease = false; - let lostLeaseReason: string | null = null; - let lostLeaseRecorded = false; - - const markLeaseLost = (phase: string, error?: unknown): void => { - const detail = error ? `: ${getErrorMessage(error)}` : ""; - lostLeaseReason = `Dream lease lost during ${phase}${detail}`; - if (!lostLease) { - log(`[dreamer] FATAL: ${lostLeaseReason}; aborting all remaining dream work`); - } else { - log(`[dreamer] FATAL: ${lostLeaseReason}; dream work is already aborting`); - } - lostLease = true; - }; - - const recordLeaseLostTask = (phase: string): void => { - if (lostLeaseRecorded) return; - lostLeaseRecorded = true; - result.tasks.push({ - name: "lease-lost", - durationMs: 0, - result: "", - error: lostLeaseReason ?? `Dream lease lost during ${phase}; aborted remaining work`, - }); - }; - - const verifyLeaseStillHeld = (phase: string): boolean => { - if (lostLease) return false; - try { - if (!renewLease(args.db, holderId)) { - markLeaseLost(phase); - return false; - } - return true; - } catch (error) { - markLeaseLost(phase, error); - return false; - } - }; - - try { - for (const taskName of args.tasks) { - if (!verifyLeaseStillHeld(`before task ${taskName}`)) { - recordLeaseLostTask(`before task ${taskName}`); - break; - } - if (Date.now() > deadline) { - log(`[dreamer] deadline reached, stopping after ${result.tasks.length} tasks`); - break; - } - - log(`[dreamer] starting task: ${taskName}`); - const taskStartedAt = Date.now(); - let agentSessionId: string | null = null; - // Keep FAILED dreamer child sessions for debugging (the task's model - // output + error stay inspectable); delete only on success. - let taskFailed = false; - const invocationStartedAt = Date.now(); - let invocationRecorded = false; - const recordInvocation = (params: { - status: "completed" | "failed" | "aborted"; - messages?: unknown[]; - error?: unknown; - }) => { - if (!parentSessionId || invocationRecorded) return; - invocationRecorded = true; - recordChildInvocation({ - db: args.db, - parentSessionId, - harness: "opencode", - subagent: "dreamer", - task: taskName, - startedAt: invocationStartedAt, - status: params.status, - messages: params.messages, - error: params.error, - }); - }; - // AbortController lets us cancel the in-flight LLM prompt immediately when lease is lost - const taskAbortController = new AbortController(); - // Renew lease periodically while the LLM task runs (can take 5+ min on slow models) - const leaseRenewalInterval = setInterval(() => { - try { - if (!renewLease(args.db, holderId)) { - log(`[dreamer] task ${taskName}: lease renewal failed — aborting LLM call`); - markLeaseLost(`task ${taskName} lease renewal`); - taskAbortController.abort(); - } - } catch (err) { - log( - `[dreamer] task ${taskName}: lease renewal threw — aborting LLM call: ${err}`, - ); - markLeaseLost(`task ${taskName} lease renewal`, err); - taskAbortController.abort(); - } - }, 60_000); - - try { - // Use sessionDirectory (filesystem path) for file checks, not projectPath (identity like "git:") - const docsDir = args.sessionDirectory ?? args.projectIdentity; - const maintainDocsSnapshot = - taskName === "maintain-docs" ? snapshotMaintainDocsFiles(docsDir) : undefined; - const existingDocs = - taskName === "maintain-docs" - ? { - architecture: existsSync(join(docsDir, "ARCHITECTURE.md")), - structure: existsSync(join(docsDir, "STRUCTURE.md")), - } - : undefined; - - // Load user memories for archive-stale dedup context - const userMemories = - taskName === "archive-stale" - ? getActiveUserMemories(args.db).map((um) => ({ - id: um.id, - content: um.content, - })) - : undefined; - - if (taskName === "distill-skill-memory") { - await reembedStaleSkillNotes(args.db, args.projectIdentity); - } - - const taskPrompt = buildDreamTaskPrompt(taskName, { - projectPath: args.projectIdentity, - lastDreamAt, - existingDocs, - userMemories, - }); - - const createResponse = await args.client.session.create({ - body: { - ...(parentSessionId ? { parentID: parentSessionId } : {}), - title: `magic-context-dream-${taskName}`, - }, - query: { directory: args.sessionDirectory ?? args.projectIdentity }, - }); - - const createdSession = shared.normalizeSDKResponse( - createResponse, - null as { id?: string } | null, - { preferResponseOnMissingData: true }, - ); - agentSessionId = typeof createdSession?.id === "string" ? createdSession.id : null; - if (!agentSessionId) { - const error = new Error("Dreamer could not create its child session."); - recordInvocation({ status: "failed", error }); - throw error; - } - log(`[dreamer] task ${taskName}: child session created ${agentSessionId}`); - const childSessionId = agentSessionId; - - const dreamTaskRun = await shared.promptSyncWithValidatedOutputRetry( - args.client, - { - path: { id: childSessionId }, - query: { directory: args.sessionDirectory ?? args.projectIdentity }, - body: { - agent: DREAMER_AGENT, - system: DREAMER_SYSTEM_PROMPT, - // synthetic: true hides the dreamer task prompt from the TUI - // subagent pane while still delivering it to the model. See issue #50. - parts: [{ type: "text", text: taskPrompt, synthetic: true }], - }, - }, - { - timeoutMs: args.taskTimeoutMinutes * 60 * 1000, - signal: taskAbortController.signal, - fallbackModels: args.fallbackModels, - callContext: `dreamer:${taskName}`, - fetchOutput: async () => { - const messagesResponse = await args.client.session.messages({ - path: { id: childSessionId }, - query: { - directory: args.sessionDirectory ?? args.projectIdentity, - limit: 50, - }, - }); - return shared.normalizeSDKResponse(messagesResponse, [] as unknown[], { - preferResponseOnMissingData: true, - }); - }, - validateOutput: (messages) => { - const taskResult = extractLatestAssistantText(messages); - if (!taskResult) { - throw new Error("Dreamer returned no assistant output."); - } - return taskResult; - }, - }, - ); - if (lostLease) { - throw new Error(lostLeaseReason ?? `Dream lease lost during ${taskName}`); - } - - const taskResult = dreamTaskRun.validated; - recordInvocation({ status: "completed", messages: dreamTaskRun.output }); - - if ( - taskName === "maintain-docs" && - maintainDocsSnapshot && - maintainDocsSnapshot.size > 0 - ) { - try { - enforceMaintainDocsProtectedRegions({ - docsDir, - snapshot: maintainDocsSnapshot, - }); - } catch (error) { - log( - `[dreamer] maintain-docs protected-region enforcement failed: ${error}`, - ); - } - } - - const durationMs = Date.now() - taskStartedAt; - log( - `[dreamer] task ${taskName}: completed in ${(durationMs / 1000).toFixed(1)}s (result: ${String(taskResult).length} chars)`, - ); - result.tasks.push({ - name: taskName, - durationMs, - result: taskResult, - }); - lastErrorSignature = null; - consecutiveSameErrorFailures = 0; - } catch (error) { - taskFailed = true; - recordInvocation({ status: lostLease ? "aborted" : "failed", error }); - const durationMs = Date.now() - taskStartedAt; - const errorDescription = describeError(error); - logWithStackHead( - `[dreamer] task ${taskName}: failed after ${(durationMs / 1000).toFixed(1)}s — ${errorDescription.brief}`, - errorDescription.stackHead, - ); - result.tasks.push({ - name: taskName, - durationMs, - result: null, - error: errorDescription.brief, - }); - - if (lostLease) { - lastErrorSignature = null; - consecutiveSameErrorFailures = 0; - } else if (shouldSkipCircuitBreaker(error, errorDescription.brief)) { - lastErrorSignature = null; - consecutiveSameErrorFailures = 0; - } else { - const signature = getCircuitBreakerSignature(error, errorDescription.brief); - if (signature === lastErrorSignature) { - consecutiveSameErrorFailures += 1; - } else { - lastErrorSignature = signature; - consecutiveSameErrorFailures = 1; - } - - if (consecutiveSameErrorFailures >= CIRCUIT_BREAKER_THRESHOLD) { - circuitBreakerTripped = true; - log( - `[dreamer] circuit breaker: ${consecutiveSameErrorFailures} consecutive ${signature} failures — aborting remaining tasks`, - ); - result.tasks.push({ - name: "circuit-breaker", - durationMs: 0, - result: "", - error: `Aborted remaining tasks: ${consecutiveSameErrorFailures} consecutive ${signature} failures. Configure dreamer model/fallback_models in magic-context.jsonc.`, - }); - } - } - } finally { - clearInterval(leaseRenewalInterval); - // Delete the child session only on SUCCESS. Keep failed sessions so - // the task's prompt / model output / error can be inspected (the - // failure is already recorded in subagent_invocations). - // keep_subagents debug flag retains successful ones too. - if (agentSessionId && !taskFailed && !shouldKeepSubagents()) { - await args.client.session - .delete({ - path: { id: agentSessionId }, - }) - .catch((error: unknown) => { - log("[dreamer] failed to delete child session:", error); - }); - } else if (agentSessionId && (taskFailed || shouldKeepSubagents())) { - log( - `[dreamer] KEEPING child session ${agentSessionId} for task ${taskName} (${taskFailed ? "failed" : "keep_subagents"})`, - ); - } - } - - if (lostLease) { - recordLeaseLostTask(`task ${taskName}`); - break; - } - - if (circuitBreakerTripped) { - break; - } - } - - if (lostLease) { - log("[dreamer] lease lost: skipping all post-task phases"); - recordLeaseLostTask("post-task phases"); - } else if (circuitBreakerTripped) { - log("[dreamer] circuit breaker: skipping post-task phases"); - result.tasks.push({ - name: "post-task-phases", - durationMs: 0, - result: "", - error: "Skipped post-task phases after circuit breaker tripped; configure dreamer model/fallback_models in magic-context.jsonc.", - }); - } - // ── User memory review phase ── - // Runs after regular dream tasks, reviews user memory candidates for promotion. - if ( - !circuitBreakerTripped && - !lostLease && - args.experimentalUserMemories?.enabled && - Date.now() <= deadline - ) { - const umStart = Date.now(); - try { - if (!verifyLeaseStillHeld("before user-memory review")) { - throw new Error( - lostLeaseReason ?? "Dream lease lost before user-memory review", - ); - } - const reviewResult = await reviewUserMemories({ - db: args.db, - client: args.client, - parentSessionId, - sessionDirectory: args.sessionDirectory, - holderId, - deadline, - promotionThreshold: args.experimentalUserMemories.promotionThreshold, - fallbackModels: args.fallbackModels, - }); - if (!verifyLeaseStillHeld("after user-memory review")) { - throw new Error(lostLeaseReason ?? "Dream lease lost after user-memory review"); - } - const umOutput = `promoted=${reviewResult.promoted} merged=${reviewResult.merged} dismissed=${reviewResult.dismissed} consumed=${reviewResult.candidatesConsumed}`; - if ( - reviewResult.promoted > 0 || - reviewResult.merged > 0 || - reviewResult.dismissed > 0 - ) { - log(`[dreamer] user-memories: ${umOutput}`); - } - result.tasks.push({ - name: "user memories", - durationMs: Date.now() - umStart, - result: umOutput, - }); - } catch (error) { - const errorDescription = describeError(error); - logWithStackHead( - `[dreamer] user-memory review failed: ${errorDescription.brief}`, - errorDescription.stackHead, - ); - result.tasks.push({ - name: "user memories", - durationMs: Date.now() - umStart, - result: "", - error: errorDescription.brief, - }); - } - if (lostLease) recordLeaseLostTask("user-memory review"); - } - // ── Smart note evaluation phase ── - // Runs after regular dream tasks, evaluates pending smart note conditions. - // Not a user-configurable task — always runs when dreamer has pending smart notes. - if (!circuitBreakerTripped && !lostLease && Date.now() <= deadline) { - try { - if (!verifyLeaseStillHeld("before smart-note evaluation")) { - throw new Error( - lostLeaseReason ?? "Dream lease lost before smart-note evaluation", - ); - } - await evaluateSmartNotes({ - db: args.db, - client: args.client, - projectIdentity: args.projectIdentity, - parentSessionId, - sessionDirectory: args.sessionDirectory, - holderId, - deadline, - result, - fallbackModels: args.fallbackModels, - onLeaseLost: markLeaseLost, - isLeaseLost: () => lostLease, - }); - if (!verifyLeaseStillHeld("after smart-note evaluation")) { - throw new Error( - lostLeaseReason ?? "Dream lease lost after smart-note evaluation", - ); - } - } catch (error) { - const errorDescription = describeError(error); - logWithStackHead( - `[dreamer] smart note evaluation failed: ${errorDescription.brief}`, - errorDescription.stackHead, - ); - } - if (lostLease) recordLeaseLostTask("smart-note evaluation"); - } - if ( - !circuitBreakerTripped && - !lostLease && - args.experimentalPinKeyFiles?.enabled && - Date.now() <= deadline - ) { - const kfStart = Date.now(); - try { - if (!verifyLeaseStillHeld("before key-file identification")) { - throw new Error( - lostLeaseReason ?? "Dream lease lost before key-file identification", - ); - } - const openCodeDb = openOpenCodeDb(); - if (openCodeDb) { - try { - await runKeyFilesTask({ - db: args.db, - openCodeDb, - client: args.client, - projectPath: args.sessionDirectory ?? args.projectIdentity, - holderId, - deadline, - parentSessionId, - config: args.experimentalPinKeyFiles, - fallbackModels: args.fallbackModels, - }); - } finally { - closeQuietly(openCodeDb); - } - } - if (!verifyLeaseStillHeld("after key-file identification")) { - throw new Error( - lostLeaseReason ?? "Dream lease lost after key-file identification", - ); - } - result.tasks.push({ - name: "key files", - durationMs: Date.now() - kfStart, - result: "completed", - }); - } catch (error) { - const errorDescription = describeError(error); - logWithStackHead( - `[key-files] identification phase failed: ${errorDescription.brief}`, - errorDescription.stackHead, - ); - result.tasks.push({ - name: "key files", - durationMs: Date.now() - kfStart, - result: "", - error: errorDescription.brief, - }); - } - if (lostLease) recordLeaseLostTask("key-file identification"); - } - } finally { - releaseLease(args.db, holderId); - log(`[dreamer] lease released: ${holderId}`); - } - - result.finishedAt = Date.now(); - const memoryCountsAfter = getMemoryCountsByStatus(args.db, args.projectIdentity); - const merged = countNewIds(memoryCountsBefore.mergedIds, memoryCountsAfter.mergedIds); - const memoryChanges = { - written: countNewIds(memoryCountsBefore.ids, memoryCountsAfter.ids), - deleted: countNewIds(memoryCountsAfter.ids, memoryCountsBefore.ids), - // archivedIds already EXCLUDES merged/superseded rows — getMemoryCountsByStatus - // routes a memory with superseded_by_memory_id into mergedIds and never into - // archivedIds (the two sets are disjoint). So the archived delta is already - // merge-free; subtracting `merged` again double-counted and under-reported - // archived (often to zero). - archived: countNewIds(memoryCountsBefore.archivedIds, memoryCountsAfter.archivedIds), - merged, - }; - const persistedMemoryChanges = Object.values(memoryChanges).some((value) => value > 0) - ? memoryChanges - : null; - insertDreamRun(args.db, { - projectPath: args.projectIdentity, - startedAt: result.startedAt, - finishedAt: result.finishedAt, - holderId: result.holderId, - tasks: result.tasks.map((task) => ({ - name: task.name, - durationMs: task.durationMs, - resultChars: typeof task.result === "string" ? task.result.length : 0, - ...(task.error ? { error: task.error } : {}), - })), - tasksSucceeded: result.tasks.filter((task) => !task.error).length, - tasksFailed: result.tasks.filter((task) => Boolean(task.error)).length, - smartNotesSurfaced: result.smartNotesSurfaced, - smartNotesPending: result.smartNotesPending, - memoryChanges: persistedMemoryChanges, - }); - // Only update dream timestamps when at least one task succeeded — failed runs - // should not block re-scheduling for the project. - // - // Only count configured dream tasks (consolidate / verify / archive-stale / - // improve / maintain-docs) for success. Post-task phases (smart-notes, - // user memories, key files) run unconditionally after the main task loop - // and must NOT mask failures of the configured tasks — otherwise a - // successful key-file evaluation would suppress re-scheduling a project - // whose consolidate/verify/archive tasks all failed. - const POST_TASK_NAMES = new Set([ - "smart-notes", - "user memories", - "key files", - "post-task-phases", - "circuit-breaker", - ]); - const hasSuccessfulTask = result.tasks.some((t) => !t.error && !POST_TASK_NAMES.has(t.name)); - if (hasSuccessfulTask && !lostLease) { - // Per-project only. Do NOT also write the legacy global "last_dream_at" - // key — that write is what let one project's run suppress another's. - setDreamState(args.db, `last_dream_at:${args.projectIdentity}`, String(result.finishedAt)); - } - const totalDuration = ((result.finishedAt - startedAt) / 1000).toFixed(1); - const succeeded = result.tasks.filter((t) => !t.error).length; - const failed = result.tasks.filter((t) => t.error).length; - log( - `[dreamer] dream run finished in ${totalDuration}s: ${succeeded} succeeded, ${failed} failed`, - ); - return result; -} - -async function evaluateSmartNotes(args: { - db: Database; - client: PluginContext["client"]; - projectIdentity: string; - parentSessionId: string | undefined; - sessionDirectory: string | undefined; - holderId: string; - deadline: number; - result: DreamRunResult; - /** Resolved dreamer fallback chain. */ - fallbackModels?: readonly string[]; - onLeaseLost?: (phase: string, error?: unknown) => void; - isLeaseLost?: () => boolean; -}): Promise { - const pendingNotes = getPendingSmartNotes(args.db, args.projectIdentity); - if (pendingNotes.length === 0) { - log("[dreamer] smart notes: no pending notes to evaluate"); - return; - } - - log(`[dreamer] smart notes: evaluating ${pendingNotes.length} pending note(s)`); - - // Build a single evaluation prompt for all pending notes. - // The dreamer checks each condition and returns structured results. - const noteDescriptions = pendingNotes - .map((n) => `- Note #${n.id}: "${n.content}"\n Condition: ${n.surfaceCondition}`) - .join("\n"); - - const evaluationPrompt = `You are evaluating smart note conditions for the magic-context system. - -For each note below, determine whether its surface condition has been met. -You have access to tools like GitHub CLI (gh), web search, and the local codebase to verify conditions. - -You DO NOT have access to: -- Any conversation between the user and the original agent that wrote the note -- The state of any active session, including whether messages have been sent -- The current task, mood, or intent of the human user - -If a condition references conversation context the user is having ("When the user mentions X", "When they ask to do Y", "When we revisit Z", "When relevant to current discussion", etc.), it is UNEVALUATABLE — skip it (do not include in results) so the note stays pending. These are misuse cases that should never have been written as smart notes; leaving them pending is the correct outcome, the dreamer's archive-stale task will eventually retire them. - -## Pending Smart Notes - -${noteDescriptions} - -## Instructions - -1. Check each condition using the tools available to you. -2. Be conservative — only mark a condition as met when you have clear evidence. -3. Skip conditions that depend on session/conversation context you cannot observe — do not invent a "false" verdict for them, just omit them from your response. -4. Respond with a JSON array of results: - -\`\`\`json -[ - { "id": , "met": true/false, "reason": "brief explanation" } -] -\`\`\` - -Only include notes whose conditions you could definitively evaluate against external signals. Skip notes where you cannot determine the status (they will be re-evaluated next run, or eventually archived as stale).`; - - const taskStartedAt = Date.now(); - let agentSessionId: string | null = null; - // Retain the child session on failure so its prompt/output/error can be - // inspected — mirrors the main-task cleanup rule. Optional phases used to - // delete unconditionally, losing the evidence for exactly the runs worth - // debugging. - let phaseFailed = false; - const startedAt = Date.now(); - let invocationRecorded = false; - const recordInvocation = (params: { - status: "completed" | "failed" | "aborted"; - messages?: unknown[]; - error?: unknown; - }) => { - if (!args.parentSessionId || invocationRecorded) return; - invocationRecorded = true; - recordChildInvocation({ - db: args.db, - parentSessionId: args.parentSessionId, - harness: "opencode", - subagent: "dreamer", - task: "smart-notes", - startedAt, - status: params.status, - messages: params.messages, - error: params.error, - }); - }; - const abortController = new AbortController(); - const leaseInterval = setInterval(() => { - try { - if (!renewLease(args.db, args.holderId)) { - log("[dreamer] smart notes: lease renewal failed — aborting"); - args.onLeaseLost?.("smart notes"); - abortController.abort(); - } - } catch (error) { - args.onLeaseLost?.("smart notes", error); - abortController.abort(); - } - }, 60_000); - - try { - const createResponse = await args.client.session.create({ - body: { - ...(args.parentSessionId ? { parentID: args.parentSessionId } : {}), - title: "magic-context-dream-smart-notes", - }, - query: { directory: args.sessionDirectory ?? args.projectIdentity }, - }); - const created = shared.normalizeSDKResponse( - createResponse, - null as { id?: string } | null, - { preferResponseOnMissingData: true }, - ); - agentSessionId = typeof created?.id === "string" ? created.id : null; - if (!agentSessionId) { - const error = new Error("Could not create smart note evaluation session."); - recordInvocation({ status: "failed", error }); - throw error; - } - - log(`[dreamer] smart notes: child session created ${agentSessionId}`); - const childSessionId = agentSessionId; - - const remainingMs = Math.max(0, args.deadline - Date.now()); - const smartNoteRun = await shared.promptSyncWithValidatedOutputRetry( - args.client, - { - path: { id: childSessionId }, - query: { directory: args.sessionDirectory ?? args.projectIdentity }, - body: { - agent: DREAMER_AGENT, - system: DREAMER_SYSTEM_PROMPT, - // synthetic: true hides the dreamer evaluation prompt from the TUI - // subagent pane while still delivering it to the model. See issue #50. - parts: [{ type: "text", text: evaluationPrompt, synthetic: true }], - }, - }, - { - timeoutMs: Math.min(remainingMs, 5 * 60 * 1000), - signal: abortController.signal, - fallbackModels: args.fallbackModels, - callContext: "dreamer:smart-notes", - fetchOutput: async () => { - const messagesResponse = await args.client.session.messages({ - path: { id: childSessionId }, - query: { - directory: args.sessionDirectory ?? args.projectIdentity, - limit: 50, - }, - }); - return shared.normalizeSDKResponse(messagesResponse, [] as unknown[], { - preferResponseOnMissingData: true, - }); - }, - validateOutput: (messages) => { - const output = extractLatestAssistantText(messages); - if (!output) throw new Error("Smart note evaluation returned no output."); - - // Parse the JSON results from the LLM response — use greedy match to handle - // `]` chars inside JSON string values (e.g., reasons containing brackets). - const jsonMatch = output.match(/\[[\s\S]*\]/); - if (!jsonMatch) { - throw new Error("Smart note evaluation returned no JSON array."); - } - - try { - return JSON.parse(jsonMatch[0]) as Array<{ - id: number; - met: boolean; - reason?: string; - }>; - } catch { - throw new Error("Smart note evaluation returned invalid JSON."); - } - }, - }, - ); - - recordInvocation({ status: "completed", messages: smartNoteRun.output }); - const evaluations = smartNoteRun.validated; - let surfaced = 0; - for (const evaluation of evaluations) { - if (typeof evaluation.id !== "number") continue; - const note = pendingNotes.find((n) => n.id === evaluation.id); - if (!note) continue; - - if (evaluation.met) { - markNoteReady(args.db, note.id, evaluation.reason); - surfaced++; - log( - `[dreamer] smart notes: #${note.id} condition MET — "${evaluation.reason ?? "condition satisfied"}"`, - ); - } else { - markNoteChecked(args.db, note.id); - } - } - - // Mark any notes not in the evaluation as checked (LLM skipped them) - for (const note of pendingNotes) { - if (!evaluations.some((e) => e.id === note.id)) { - markNoteChecked(args.db, note.id); - } - } - - const durationMs = Date.now() - taskStartedAt; - const pending = Math.max(0, pendingNotes.length - surfaced); - args.result.smartNotesSurfaced = surfaced; - args.result.smartNotesPending = pending; - log( - `[dreamer] smart notes: evaluated ${pendingNotes.length} notes in ${(durationMs / 1000).toFixed(1)}s — ${surfaced} surfaced, ${pending} still pending`, - ); - args.result.tasks.push({ - name: "smart-notes", - durationMs, - result: `${surfaced} surfaced, ${pending} still pending`, - }); - } catch (error) { - phaseFailed = true; - if ( - error instanceof Error && - error.message === "Smart note evaluation returned no JSON array." - ) { - log("[dreamer] smart notes: no JSON array found in output, skipping"); - for (const note of pendingNotes) markNoteChecked(args.db, note.id); - } else if ( - error instanceof Error && - error.message === "Smart note evaluation returned invalid JSON." - ) { - log(`[dreamer] smart notes: failed to parse JSON from LLM output, marking all checked`); - for (const note of pendingNotes) markNoteChecked(args.db, note.id); - } - const durationMs = Date.now() - taskStartedAt; - const errorDescription = describeError(error); - args.result.smartNotesSurfaced = 0; - args.result.smartNotesPending = pendingNotes.length; - logWithStackHead( - `[dreamer] smart notes: failed after ${(durationMs / 1000).toFixed(1)}s — ${errorDescription.brief}`, - errorDescription.stackHead, - ); - args.result.tasks.push({ - name: "smart-notes", - durationMs, - result: null, - error: errorDescription.brief, - }); - } finally { - clearInterval(leaseInterval); - // Keep the child session on failure (debugging) or under keep_subagents. - if (agentSessionId && !phaseFailed && !shouldKeepSubagents()) { - await args.client.session - .delete({ - path: { id: agentSessionId }, - }) - .catch(() => {}); - } - } -} - -const MAX_LEASE_RETRIES = 3; - -export async function processDreamQueue(args: { - db: Database; - client: PluginContext["client"]; - tasks: DreamingTask[]; - taskTimeoutMinutes: number; - maxRuntimeMinutes: number; - experimentalUserMemories?: { enabled: boolean; promotionThreshold: number }; - experimentalPinKeyFiles?: ExperimentalPinKeyFilesConfig; - /** - * Optional project identity filter — when provided, only entries belonging - * to this project are dequeued. Each running OpenCode/Pi process registers - * exactly one project, and the host's dreamer client (and `pi` runner, in - * Pi's case) is project-specific. Without this filter, a Pi process running - * for project A would dequeue queue entries for project B and try to - * `posix_spawn 'pi'` in B's `git:` identity string as a directory, - * failing with ENOENT every cycle. - * - * Callers should pass this whenever they own a single project — both the - * scheduled timer tick (`sweepProject`) and the `/ctx-dream` command - * handler. Tests pass `undefined` to keep the legacy "dequeue any" semantics. - */ - projectIdentity?: string; - /** - * Filesystem directory of the project THIS draining process owns. Because - * project identity collapses worktrees/clones to one `git:`, resolving - * the execution directory from the shared in-memory map can pick a stale - * sibling checkout ("last-registered wins"). The drain caller always knows - * its own live directory, so passing it here guarantees the dream runs in a - * checkout this process actually registered. Paired with projectIdentity - * (the queue filter), so the dequeued entry is guaranteed to be this - * project's. Falls back to the map (then the identity string) when omitted. - */ - sessionDirectoryOverride?: string; - /** - * Resolved Dreamer fallback chain. See `runDream` for semantics. Callers - * compute via `resolveFallbackChain(DREAMER_AGENT, pluginConfig.dreamer?.fallback_models)`. - */ - fallbackModels?: readonly string[]; -}): Promise { - // Use configured max runtime + 30min buffer for stale threshold instead of hardcoded 2h. - // Only reap when no live lease exists — a healthy long-running dream renews its lease and - // would otherwise have its own queue row deleted mid-run. Scope to this project so the - // cross-process shared queue doesn't reap another host's still-running rows. - const maxRuntimeMs = args.maxRuntimeMinutes * 60 * 1000; - // A live lease means another dream (this project or a sibling on the shared - // queue) is actively running. Don't dequeue underneath it: runDream would just - // fail lease acquisition, increment this entry's retry count, and after - // MAX_LEASE_RETRIES DELETE the queue row — silently dropping a project's - // pending dream that never got a fair chance to run. Skip this tick; the entry - // stays queued for when the lease frees. - if (hasActiveDreamLease(args.db)) { - return null; - } - clearStaleEntries(args.db, maxRuntimeMs + 30 * 60 * 1000, args.projectIdentity); - const entry = dequeueNext(args.db, args.projectIdentity); - if (!entry) { - return null; - } - - // Prefer the draining caller's own directory (the project THIS process - // owns). The dequeue filter (projectIdentity) guarantees entry belongs to - // this project, so the override is the correct live checkout — not a stale - // sibling-worktree path the shared identity map might resolve to. - const projectDirectory = - args.sessionDirectoryOverride ?? resolveDreamSessionDirectory(entry.projectIdentity); - // Log the project identity only — never the resolved directory. The - // absolute path carries the username + project name (privacy), and the - // git:/dir: identity uniquely correlates the run for debugging. - log(`[dreamer] dequeued project ${entry.projectIdentity}, starting dream run`); - - let result: DreamRunResult; - try { - result = await runDream({ - db: args.db, - client: args.client, - // entry.projectIdentity is the project identity (e.g. "git:") — used for dream state keys. - // projectDirectory is the filesystem path — used for session creation and file access. - projectIdentity: entry.projectIdentity, - tasks: args.tasks, - taskTimeoutMinutes: args.taskTimeoutMinutes, - maxRuntimeMinutes: args.maxRuntimeMinutes, - sessionDirectory: projectDirectory, - experimentalUserMemories: args.experimentalUserMemories, - experimentalPinKeyFiles: args.experimentalPinKeyFiles, - fallbackModels: args.fallbackModels, - }); - } catch (error) { - log(`[dreamer] runDream threw for ${entry.projectIdentity}: ${getErrorMessage(error)}`); - // Remove the entry so it doesn't stay stuck in "started" state for 2 hours - removeDreamEntry(args.db, entry.id); - return null; - } - - // Only remove queue entry if the dream actually ran (lease acquired). - // If lease acquisition failed, the entry stays so it can be retried (up to MAX_LEASE_RETRIES). - const leaseError = result.tasks.find((t) => t.name === "lease" && t.error); - if (leaseError) { - const retryCount = getEntryRetryCount(args.db, entry.id); - if (retryCount >= MAX_LEASE_RETRIES) { - log( - `[dreamer] lease acquisition failed ${retryCount + 1} times for ${entry.projectIdentity} — removing queue entry`, - ); - removeDreamEntry(args.db, entry.id); - } else { - log( - `[dreamer] lease acquisition failed for ${entry.projectIdentity} (attempt ${retryCount + 1}/${MAX_LEASE_RETRIES}) — keeping for retry`, - ); - resetDreamEntry(args.db, entry.id); - } - } else { - removeDreamEntry(args.db, entry.id); - } - - return result; -} diff --git a/packages/plugin/src/features/magic-context/dreamer/task-executor.ts b/packages/plugin/src/features/magic-context/dreamer/task-executor.ts index 8866a605..b1d4ac67 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-executor.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-executor.ts @@ -23,6 +23,7 @@ import { getMemoryVerifications, type Memory, } from "../memory"; +import { reembedStaleSkillNotes } from "../skill-memory/reembed"; import { recordChildInvocation } from "../subagent-token-capture"; import { reviewUserMemories } from "../user-memory/review-user-memories"; import { getActiveUserMemories } from "../user-memory/storage-user-memory"; @@ -417,7 +418,18 @@ export function createDreamTaskExecutor(deps: DreamTaskExecutorDeps): TaskExecut }; } - // Agentic tasks: verify / curate / maintain-docs. + // Skill-memory P2: re-embed stale/NULL skill-note vectors BEFORE the + // distill agentic pass so the prompt sees a fully-embedded corpus and + // P1-era notes promote from the FTS rung to the cosine rung. + if (config.task === "distill-skill-memory") { + try { + await reembedStaleSkillNotes(db, projectIdentity); + } catch (e) { + log(`[dreamer] distill-skill-memory reembed pre-step failed: ${e}`); + } + } + + // Agentic tasks: curate / maintain-docs / distill-skill-memory. return await runAgenticTask(config, ctx, { deps, deadline, diff --git a/packages/plugin/src/features/magic-context/dreamer/task-gates.ts b/packages/plugin/src/features/magic-context/dreamer/task-gates.ts index 368473e4..7ff15a6d 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-gates.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-gates.ts @@ -156,6 +156,13 @@ export function evaluateTaskGate(task: DreamTaskName, ctx: TaskGateContext): boo (primer.lastObservedAt ?? 0) > primer.answerRefreshedAt, ); + case "distill-skill-memory": + // Agentic opt-in task: when scheduled, always eligible. The reembed + // pre-step + distill prompt no-op gracefully on an empty corpus, so a + // cheap always-true gate avoids coupling the scheduler to skill_memory + // table internals. + return true; + default: { const _exhaustive: never = task; return Boolean(_exhaustive); diff --git a/packages/plugin/src/features/magic-context/migrations-v42.test.ts b/packages/plugin/src/features/magic-context/migrations-v42.test.ts new file mode 100644 index 00000000..5e7e0799 --- /dev/null +++ b/packages/plugin/src/features/magic-context/migrations-v42.test.ts @@ -0,0 +1,126 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { LATEST_MIGRATION_VERSION, runMigrations } from "./migrations"; +import { initializeDatabase, LATEST_SUPPORTED_VERSION } from "./storage-db"; // ESM import (not require) — matches codebase pattern + +function columnNames(db: Database, table: string): string[] { + return (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>).map( + (c) => c.name, + ); +} + +function tableExists(db: Database, name: string): boolean { + return Boolean( + db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name = ?").get(name), + ); +} + +describe("migration v42 — skill_memory table", () => { + test("creates skill_memory table with correct columns on fresh DB, idempotently", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + runMigrations(db); // idempotency check + + expect(tableExists(db, "skill_memory")).toBe(true); + + const cols = columnNames(db, "skill_memory"); + expect(cols).toContain("id"); + expect(cols).toContain("skill_id"); + expect(cols).toContain("resolved_path"); + expect(cols).toContain("tier"); + expect(cols).toContain("skill_source"); + expect(cols).toContain("project_identity"); + expect(cols).toContain("intent"); + expect(cols).toContain("intent_embedding"); + expect(cols).toContain("embedding_model_version"); + expect(cols).toContain("kind"); + expect(cols).toContain("delta"); + expect(cols).toContain("tags"); + expect(cols).toContain("hit_count"); + expect(cols).toContain("pinned"); + expect(cols).toContain("normalized_hash"); + expect(cols).toContain("created_at"); + expect(cols).toContain("last_used_at"); + + expect( + db + .prepare("SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1") + .get(), + ).toEqual({ version: LATEST_MIGRATION_VERSION }); + } finally { + closeQuietly(db); + } + }); + + test("skill_memory CHECK constraints reject invalid tier and kind values", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + + const insert = db.prepare(` + INSERT INTO skill_memory + (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, hit_count, pinned, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?) + `); + + // Valid row + expect(() => + insert.run( + "test-skill", + "/path/SKILL.md", + "project", + "git:abc123", + "test intent", + "gotcha", + "test delta", + "hash1", + Date.now(), + ), + ).not.toThrow(); + + // Invalid tier + expect(() => + insert.run( + "test-skill", + "/path/SKILL.md", + "invalid-tier", + "git:abc123", + "test intent", + "gotcha", + "test delta", + "hash2", + Date.now(), + ), + ).toThrow(); + + // Invalid kind + expect(() => + insert.run( + "test-skill", + "/path/SKILL.md", + "project", + "git:abc123", + "test intent", + "general", + "test delta", + "hash3", + Date.now(), + ), + ).toThrow(); + } finally { + closeQuietly(db); + } + }); + + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v42", () => { + // This test will fail until storage-db.ts is bumped to 39. + // Belt-and-braces: mirrors schema-version-fence.test.ts but is co-located with the migration. + // If this feels redundant, keep it with this comment — co-location aids discoverability. + // NOTE: use ESM import at the top of the file (not require()) to match codebase pattern. + expect(LATEST_SUPPORTED_VERSION).toBe(LATEST_MIGRATION_VERSION); + }); +}); diff --git a/packages/plugin/src/features/magic-context/migrations-v49.test.ts b/packages/plugin/src/features/magic-context/migrations-v49.test.ts index c42a325d..ad7b80ec 100644 --- a/packages/plugin/src/features/magic-context/migrations-v49.test.ts +++ b/packages/plugin/src/features/magic-context/migrations-v49.test.ts @@ -158,7 +158,9 @@ describe("migration v49 — per-model embedding coexistence", () => { ) VALUES (999999, 'ses-orphan', 'git:orphan', 'opencode', 0, 0, 1, 'h', 'm', 4, x'01020304', 1)`, ).run(); db.exec("PRAGMA foreign_keys=ON"); - db.prepare("DELETE FROM schema_migrations WHERE version = 49").run(); + // Delete v49 AND everything above it (our skill migrations are v50-52) + // so getCurrentVersion drops below 49 and the v49 body actually re-runs. + db.prepare("DELETE FROM schema_migrations WHERE version >= 49").run(); expect(() => runMigrations(db)).not.toThrow(); expect(countRows(db, "compartment_chunk_embeddings")).toBe(0); diff --git a/packages/plugin/src/features/magic-context/migrations-v51.test.ts b/packages/plugin/src/features/magic-context/migrations-v51.test.ts new file mode 100644 index 00000000..38c2b28c --- /dev/null +++ b/packages/plugin/src/features/magic-context/migrations-v51.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, test } from "bun:test"; +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { LATEST_MIGRATION_VERSION, MIGRATIONS, runMigrations } from "./migrations"; +import { initializeDatabase, LATEST_SUPPORTED_VERSION } from "./storage-db"; + +function columnNames(db: Database, table: string): string[] { + return (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>).map( + (c) => c.name, + ); +} +function tableExists(db: Database, name: string): boolean { + return Boolean( + db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name = ?").get(name), + ); +} + +describe("migration v51 — skill_memory embeddings + FTS", () => { + test("fresh DB: delta_embedding column and skill_memory_fts exist, no throw", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + runMigrations(db); // idempotency + + expect(columnNames(db, "skill_memory")).toContain("delta_embedding"); + expect(tableExists(db, "skill_memory_fts")).toBe(true); + } finally { + closeQuietly(db); + } + }); + + test("FTS triggers keep skill_memory_fts in sync with skill_memory", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + runMigrations(db); + db.prepare( + `INSERT INTO skill_memory + (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, hit_count, pinned, created_at) + VALUES (?,?,?,?,?,?,?,?,0,0,?)`, + ).run( + "s1", + "/p/SKILL.md", + "global", + "git:abc", + "fix a flaky auth test", + "fix", + "mock Date.now in auth tests", + "h1", + Date.now(), + ); + + const hit = db + .prepare( + `SELECT m.id FROM skill_memory_fts f JOIN skill_memory m ON m.id = f.rowid + WHERE skill_memory_fts MATCH ?`, + ) + .get('"auth"'); + expect(hit).toBeTruthy(); + } finally { + closeQuietly(db); + } + }); + + // requires `import { MIGRATIONS } from "./migrations";` (added above) + test("v51 migration backfills FTS for rows that pre-existed v40", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); + // Build a PRE-v40 schema: apply every migration BELOW v40 directly via up() (runMigrations has no + // target-version param). skill_memory is created at v39; the FTS table + delta_embedding do NOT exist yet. + for (const m of MIGRATIONS.filter((x) => x.version < 51).sort( + (a, b) => a.version - b.version, + )) { + m.up(db); + } + // Insert a row under the pre-v51 schema — no FTS table yet, so no AFTER-INSERT trigger indexes it. + db.prepare( + `INSERT INTO skill_memory + (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, hit_count, pinned, created_at) + VALUES (?,?,?,?,?,?,?,?,0,0,?)`, + ).run( + "s2", + "/p/SKILL.md", + "global", + "git:abc", + "handle oauth refresh", + "fix", + "rotate the token early", + "h2", + Date.now(), + ); + // Apply ONLY v40's up() — its body must ALTER + create the FTS table + BACKFILL the pre-existing row. + const v51 = MIGRATIONS.find((m) => m.version === 51); + if (!v51) throw new Error("v40 migration not found"); + v51.up(db); + const hit = db + .prepare( + `SELECT m.id FROM skill_memory_fts f JOIN skill_memory m ON m.id = f.rowid WHERE skill_memory_fts MATCH ?`, + ) + .get('"oauth"'); + expect(hit).toBeTruthy(); + } finally { + closeQuietly(db); + } + }); + + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v51", () => { + expect(LATEST_SUPPORTED_VERSION).toBe(LATEST_MIGRATION_VERSION); + expect(LATEST_SUPPORTED_VERSION).toBe(52); + }); +}); diff --git a/packages/plugin/src/features/magic-context/migrations-v41.test.ts b/packages/plugin/src/features/magic-context/migrations-v52.test.ts similarity index 90% rename from packages/plugin/src/features/magic-context/migrations-v41.test.ts rename to packages/plugin/src/features/magic-context/migrations-v52.test.ts index f3d06fdd..d92b6400 100644 --- a/packages/plugin/src/features/magic-context/migrations-v41.test.ts +++ b/packages/plugin/src/features/magic-context/migrations-v52.test.ts @@ -33,10 +33,10 @@ function insertGlobal( ); } -describe("migration v41 — origin_project + source_type + global '*' unification", () => { - test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v41", () => { - expect(LATEST_SUPPORTED_VERSION).toBe(41); - expect(LATEST_MIGRATION_VERSION).toBe(41); +describe("migration v52 — origin_project + source_type + global '*' unification", () => { + test("LATEST_SUPPORTED_VERSION equals LATEST_MIGRATION_VERSION after v52", () => { + expect(LATEST_SUPPORTED_VERSION).toBe(52); + expect(LATEST_MIGRATION_VERSION).toBe(52); }); test("fresh DB has origin_project + source_type columns", () => { @@ -59,9 +59,9 @@ describe("migration v41 — origin_project + source_type + global '*' unificatio for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); insertGlobal(db, "council", "git:repoA", "h1"); - const v41 = MIGRATIONS.find((m) => m.version === 41); - expect(v41).toBeDefined(); - v41?.up(db); + const v52 = MIGRATIONS.find((m) => m.version === 52); + expect(v52).toBeDefined(); + v52?.up(db); const row = db .prepare( @@ -93,7 +93,7 @@ describe("migration v41 — origin_project + source_type + global '*' unificatio createdAt: 800, }); - MIGRATIONS.find((m) => m.version === 41)?.up(db); + MIGRATIONS.find((m) => m.version === 52)?.up(db); const rows = db .prepare( @@ -117,16 +117,16 @@ describe("migration v41 — origin_project + source_type + global '*' unificatio } }); - test("idempotent: re-running v41 up() does not double-process '*' rows", () => { + test("idempotent: re-running v52 up() does not double-process '*' rows", () => { const db = new Database(":memory:"); try { initializeDatabase(db); for (const m of MIGRATIONS.filter((m) => m.version <= 40)) m.up(db); insertGlobal(db, "council", "git:repoA", "h1", { hit: 1 }); - const v41 = MIGRATIONS.find((m) => m.version === 41); - v41?.up(db); - v41?.up(db); + const v52 = MIGRATIONS.find((m) => m.version === 52); + v52?.up(db); + v52?.up(db); const rows = db .prepare("SELECT hit_count FROM skill_memory WHERE normalized_hash='h1'") @@ -146,7 +146,7 @@ describe("migration v41 — origin_project + source_type + global '*' unificatio insertGlobal(db, "council", "git:repoA", "dup"); insertGlobal(db, "council", "git:repoB", "dup"); - MIGRATIONS.find((m) => m.version === 41)?.up(db); + MIGRATIONS.find((m) => m.version === 52)?.up(db); const ftsCount = db.prepare("SELECT COUNT(*) AS n FROM skill_memory_fts").get() as { n: number; @@ -172,7 +172,7 @@ describe("migration v41 — origin_project + source_type + global '*' unificatio `INSERT INTO skill_memory (skill_id, resolved_path, tier, project_identity, intent, kind, delta, normalized_hash, created_at) VALUES ('s', '/p', 'project', 'git:repoA', 'i', 'fix', 'd', 'ph', 1)`, ).run(); - MIGRATIONS.find((m) => m.version === 41)?.up(db); + MIGRATIONS.find((m) => m.version === 52)?.up(db); const row = db .prepare("SELECT project_identity FROM skill_memory WHERE normalized_hash='ph'") diff --git a/packages/plugin/src/features/magic-context/migrations.ts b/packages/plugin/src/features/magic-context/migrations.ts index 24309ed7..4ab16f55 100644 --- a/packages/plugin/src/features/magic-context/migrations.ts +++ b/packages/plugin/src/features/magic-context/migrations.ts @@ -2025,7 +2025,13 @@ export const MIGRATIONS: Migration[] = [ ).run(g.skill_id, g.normalized_hash, survivor.id); db.prepare( `UPDATE skill_memory SET hit_count=?, recall_count=?, last_used_at=?, origin_project=?, project_identity='*' WHERE id=?`, - ).run(g.sum_hit, g.sum_recall, g.max_used, survivor.project_identity, survivor.id); + ).run( + g.sum_hit, + g.sum_recall, + g.max_used, + survivor.project_identity, + survivor.id, + ); } // Defensive (S4): drop any pre-'*' row whose (skill_id, normalized_hash) diff --git a/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts b/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts index 4f30faef..914a064e 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/promote.test.ts @@ -20,10 +20,11 @@ describe("promoteSkillObservations", () => { { skillId: "council", kind: "gotcha", lesson: "aggregator needs a fast model" }, ]); expect(n).toBe(1); - const row = db.prepare("SELECT tier, project_identity, origin_project, source_type, resolved_path, kind FROM skill_memory").get() as Record< - string, - string - >; + const row = db + .prepare( + "SELECT tier, project_identity, origin_project, source_type, resolved_path, kind FROM skill_memory", + ) + .get() as Record; expect(row.tier).toBe("global"); expect(row.project_identity).toBe("*"); expect(row.origin_project).toBe("git:repoA"); @@ -38,10 +39,16 @@ describe("promoteSkillObservations", () => { test("exact-hash duplicate bumps hit_count instead of inserting", () => { const db = makeDb(); try { - promoteSkillObservations(db, "git:repoA", [{ skillId: "council", kind: "fix", lesson: "same lesson" }]); - const n = promoteSkillObservations(db, "git:repoB", [{ skillId: "council", kind: "fix", lesson: "same lesson" }]); + promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "fix", lesson: "same lesson" }, + ]); + const n = promoteSkillObservations(db, "git:repoB", [ + { skillId: "council", kind: "fix", lesson: "same lesson" }, + ]); expect(n).toBe(0); - const rows = db.prepare("SELECT hit_count FROM skill_memory").all() as Array<{ hit_count: number }>; + const rows = db.prepare("SELECT hit_count FROM skill_memory").all() as Array<{ + hit_count: number; + }>; expect(rows.length).toBe(1); expect(rows[0].hit_count).toBe(1); } finally { @@ -52,7 +59,9 @@ describe("promoteSkillObservations", () => { test("rejects kind='general'", () => { const db = makeDb(); try { - const n = promoteSkillObservations(db, "git:repoA", [{ skillId: "council", kind: "general" as never, lesson: "x" }]); + const n = promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "general" as never, lesson: "x" }, + ]); expect(n).toBe(0); } finally { closeQuietly(db); diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts index 7d7944b8..f616a74d 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -4,6 +4,7 @@ import { closeQuietly } from "../../../shared/sqlite-helpers"; import { float32ArrayToBlob } from "../memory/storage-memory-embeddings"; import { runMigrations } from "../migrations"; import { initializeDatabase } from "../storage-db"; +import { promoteSkillObservations } from "./promote"; import { buildSkillMemoryBlock, flatRecall, @@ -11,7 +12,6 @@ import { recallSkillMemoryBlock, sanitizeSkillIntentForFts, } from "./recall"; -import { promoteSkillObservations } from "./promote"; import { insertSkillMemoryNote } from "./storage"; function makeDb(): Database { diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.ts index 51709d68..4ac37842 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.ts @@ -229,13 +229,7 @@ export async function recallSkillMemoryBlock( const q = await embedTextForProject(opts.projectIdentity, intent); if (q) { - const candidates = getRankingCandidates( - db, - opts.skill, - opts.scope, - part, - 200, - ); + const candidates = getRankingCandidates(db, opts.skill, opts.scope, part, 200); const matched = candidates.filter( (n) => n.embedding_model_version === q.modelId && @@ -276,14 +270,7 @@ export async function recallSkillMemoryBlock( }); return finalize("flat-fts", notes); } - const ftsNotes = searchSkillMemoryFts( - db, - opts.skill, - opts.scope, - part, - match, - 50, - ); + const ftsNotes = searchSkillMemoryFts(db, opts.skill, opts.scope, part, match, 50); const ordered = unionPinnedFirst( getPinnedNotes(db, opts.skill, opts.scope, part), ftsNotes, diff --git a/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts index 84c5f067..f67a8e07 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/reembed.test.ts @@ -8,14 +8,14 @@ const embedCalls: string[] = []; // Provider "up" with a deterministic model — MUST precede the reembed.ts import. mock.module("../memory/embedding", () => ({ - embedTextForProject: async (projectIdentity: string, _text: string) => { - embedCalls.push(projectIdentity); - return { - vector: new Float32Array([0.1, 0.2, 0.3]), - modelId: "test-model", - generation: 1, - }; - }, + embedTextForProject: async (projectIdentity: string, _text: string) => { + embedCalls.push(projectIdentity); + return { + vector: new Float32Array([0.1, 0.2, 0.3]), + modelId: "test-model", + generation: 1, + }; + }, })); const { reembedStaleSkillNotes } = await import("./reembed"); @@ -55,7 +55,9 @@ test("reembed selects global '*' notes and embeds them under the real identity", try { initializeDatabase(db); runMigrations(db); - promoteSkillObservations(db, "git:repoA", [{ skillId: "council", kind: "fix", lesson: "L7" }]); + promoteSkillObservations(db, "git:repoA", [ + { skillId: "council", kind: "fix", lesson: "L7" }, + ]); embedCalls.length = 0; const res = await reembedStaleSkillNotes(db, "git:repoA"); diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts index 38d97406..9e36c2c9 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.test.ts @@ -11,12 +11,12 @@ import { getDedupCandidates, getPinnedNotes, getRankingCandidates, - getSkillMemoryNotes, - getSkillMemoryStats, - type InsertSkillMemoryNoteArgs, - insertSkillMemoryNote, - partitionKey, - searchSkillMemoryFts, + getSkillMemoryNotes, + getSkillMemoryStats, + type InsertSkillMemoryNoteArgs, + insertSkillMemoryNote, + partitionKey, + searchSkillMemoryFts, } from "./storage"; function makeDb(): Database { @@ -308,13 +308,13 @@ describe("skill_memory storage", () => { createdAt: Date.now(), }); - // Seed 1 project-tier note under a DIFFERENT project — must NOT be counted. - insertSkillMemoryNote(db, { - skillId: "tdd", - resolvedPath: "/p", - tier: "project", - skillSource: "opencode-global", - projectIdentity: "git:other", + // Seed 1 project-tier note under a DIFFERENT project — must NOT be counted. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/p", + tier: "project", + skillSource: "opencode-global", + projectIdentity: "git:other", intent: "i5", kind: "gotcha", delta: "n5", @@ -335,12 +335,12 @@ describe("skill_memory storage", () => { const db = makeDb(); try { const ts = 1_000_000; - const ins = (hash: string, hits: number) => - db - .prepare( - `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at,last_used_at) + const ins = (hash: string, hits: number) => + db + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at,last_used_at) VALUES ('s','/p','global','*','i','fix','d',?,?,0,?,?)`, - ) + ) .run(hash, hits, ts, ts); ins("a", 1); ins("b", 5); @@ -464,10 +464,10 @@ describe("skill_memory storage", () => { const pid = Number( ( db - .prepare( - `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) + .prepare( + `INSERT INTO skill_memory (skill_id,resolved_path,tier,project_identity,intent,kind,delta,normalized_hash,hit_count,pinned,created_at) VALUES ('s','/p','global','*','i','fix','pinned','pin-h2',0,1,2) RETURNING id`, - ) + ) .get() as { id: number } ).id, ); @@ -481,79 +481,79 @@ describe("skill_memory storage", () => { }); describe("partitionKey", () => { - test("global tier maps to '*' sentinel", () => { - expect(partitionKey("global", "git:repoA")).toBe("*"); - }); - test("project tier passes through the real identity", () => { - expect(partitionKey("project", "git:repoA")).toBe("git:repoA"); - }); + test("global tier maps to '*' sentinel", () => { + expect(partitionKey("global", "git:repoA")).toBe("*"); + }); + test("project tier passes through the real identity", () => { + expect(partitionKey("project", "git:repoA")).toBe("git:repoA"); + }); }); describe("storage v41 fields", () => { - test("insert stamps origin_project + source_type; null resolvedPath -> ''", () => { - const db = makeDb(); - try { - const id = insertSkillMemoryNote(db, { - skillId: "council", - resolvedPath: null, - tier: "global", - skillSource: null, - projectIdentity: "*", - originProject: "git:repoA", - sourceType: "historian", - intent: "i", - kind: "fix", - delta: "d", - normalizedHash: "h1", - createdAt: 1, - }); - expect(id).not.toBeNull(); - const row = db - .prepare( - "SELECT resolved_path, origin_project, source_type FROM skill_memory WHERE id=?", - ) - .get(id) as { resolved_path: string; origin_project: string; source_type: string }; - expect(row.resolved_path).toBe(""); - expect(row.origin_project).toBe("git:repoA"); - expect(row.source_type).toBe("historian"); - } finally { - closeQuietly(db); - } - }); + test("insert stamps origin_project + source_type; null resolvedPath -> ''", () => { + const db = makeDb(); + try { + const id = insertSkillMemoryNote(db, { + skillId: "council", + resolvedPath: null, + tier: "global", + skillSource: null, + projectIdentity: "*", + originProject: "git:repoA", + sourceType: "historian", + intent: "i", + kind: "fix", + delta: "d", + normalizedHash: "h1", + createdAt: 1, + }); + expect(id).not.toBeNull(); + const row = db + .prepare( + "SELECT resolved_path, origin_project, source_type FROM skill_memory WHERE id=?", + ) + .get(id) as { resolved_path: string; origin_project: string; source_type: string }; + expect(row.resolved_path).toBe(""); + expect(row.origin_project).toBe("git:repoA"); + expect(row.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); - test("getSkillMemoryStats counts global ('*') notes alongside the project's own", () => { - const db = makeDb(); - try { - insertSkillMemoryNote(db, { - skillId: "s", - resolvedPath: "/p", - tier: "project", - skillSource: null, - projectIdentity: "git:repoA", - intent: "i", - kind: "fix", - delta: "d1", - normalizedHash: "p1", - createdAt: 1, - }); - insertSkillMemoryNote(db, { - skillId: "council", - resolvedPath: "", - tier: "global", - skillSource: null, - projectIdentity: "*", - originProject: "git:repoB", - sourceType: "historian", - intent: "i", - kind: "fix", - delta: "d2", - normalizedHash: "g1", - createdAt: 2, - }); - const stats = getSkillMemoryStats(db, "git:repoA"); - expect(stats.totalNotes).toBe(2); - } finally { - closeQuietly(db); - } - }); + test("getSkillMemoryStats counts global ('*') notes alongside the project's own", () => { + const db = makeDb(); + try { + insertSkillMemoryNote(db, { + skillId: "s", + resolvedPath: "/p", + tier: "project", + skillSource: null, + projectIdentity: "git:repoA", + intent: "i", + kind: "fix", + delta: "d1", + normalizedHash: "p1", + createdAt: 1, + }); + insertSkillMemoryNote(db, { + skillId: "council", + resolvedPath: "", + tier: "global", + skillSource: null, + projectIdentity: "*", + originProject: "git:repoB", + sourceType: "historian", + intent: "i", + kind: "fix", + delta: "d2", + normalizedHash: "g1", + createdAt: 2, + }); + const stats = getSkillMemoryStats(db, "git:repoA"); + expect(stats.totalNotes).toBe(2); + } finally { + closeQuietly(db); + } + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.ts index ad74c3ab..5e8d6239 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.ts @@ -145,7 +145,7 @@ export function getSkillMemoryNotes( created_at DESC LIMIT ?`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; } /** @@ -160,10 +160,10 @@ export function bumpHitCount( normalizedHash: string, ): void { db.prepare( - `UPDATE skill_memory + `UPDATE skill_memory SET hit_count = hit_count + 1, last_used_at = ? WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, - ).run(Date.now(), skillId, tier, partitionKey(tier, projectIdentity), normalizedHash); + ).run(Date.now(), skillId, tier, partitionKey(tier, projectIdentity), normalizedHash); } /** @@ -212,7 +212,7 @@ export function findExistingNote( `SELECT id, hit_count FROM skill_memory WHERE skill_id = ? AND tier = ? AND project_identity = ? AND normalized_hash = ?`, ) - .get(skillId, tier, partitionKey(tier, projectIdentity), normalizedHash) as { + .get(skillId, tier, partitionKey(tier, projectIdentity), normalizedHash) as { id: number; hit_count: number; } | null) ?? null @@ -237,7 +237,7 @@ export function getDedupCandidates( WHERE skill_id=? AND tier=? AND project_identity=? ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as Array< + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as Array< Pick >; } @@ -255,7 +255,7 @@ export function getRankingCandidates( WHERE skill_id=? AND tier=? AND project_identity=? ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; + .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; } export function searchSkillMemoryFts( @@ -275,7 +275,13 @@ export function searchSkillMemoryFts( ORDER BY bm25(skill_memory_fts) ASC, COALESCE(m.last_used_at, m.created_at) DESC LIMIT ?`, ) - .all(matchQuery, skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; + .all( + matchQuery, + skillId, + tier, + partitionKey(tier, projectIdentity), + limit, + ) as SkillMemoryNote[]; } export function getPinnedNotes( @@ -290,7 +296,7 @@ export function getPinnedNotes( WHERE skill_id=? AND tier=? AND project_identity=? AND pinned=1 ORDER BY COALESCE(last_used_at, created_at) DESC`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity)) as SkillMemoryNote[]; + .all(skillId, tier, partitionKey(tier, projectIdentity)) as SkillMemoryNote[]; } export function getSkillMemoryStats( @@ -298,14 +304,14 @@ export function getSkillMemoryStats( projectIdentity: string, ): { totalNotes: number; skillsWithNotes: number; pinnedNotes: number } { const row = db - .prepare( - `SELECT + .prepare( + `SELECT COUNT(*) AS total, COUNT(DISTINCT skill_id) AS skills, COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned FROM skill_memory WHERE project_identity = ? OR project_identity = '*'`, - ) + ) .get(projectIdentity) as { total: number; skills: number; pinned: number } | undefined; return { totalNotes: Number(row?.total ?? 0), diff --git a/packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts b/packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts new file mode 100644 index 00000000..f7a998b7 --- /dev/null +++ b/packages/plugin/src/features/magic-context/storage-db-skill-memory-net.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, test } from "bun:test"; + +import { Database } from "../../shared/sqlite"; +import { closeQuietly } from "../../shared/sqlite-helpers"; +import { runMigrations } from "./migrations"; +import { initializeDatabase } from "./storage-db"; + +function cols(db: Database, table: string): Set { + return new Set( + (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>).map( + (r) => r.name, + ), + ); +} + +function objExists(db: Database, name: string): boolean { + return !!db.prepare("SELECT 1 FROM sqlite_master WHERE name=?").get(name); +} + +describe("skill_memory init-time self-heal net", () => { + test("initializeDatabase ALONE creates skill_memory with the full final column set + FTS vtable + triggers", () => { + const db = new Database(":memory:"); + try { + initializeDatabase(db); // NO runMigrations + const c = cols(db, "skill_memory"); + for (const need of [ + "origin_project", + "source_type", + "delta_embedding", + "recall_count", + "skill_id", + "tier", + "project_identity", + "normalized_hash", + "intent_embedding", + ]) { + expect(c.has(need)).toBe(true); + } + expect(objExists(db, "skill_memory_fts")).toBe(true); + expect(objExists(db, "skill_memory_ai")).toBe(true); + expect(objExists(db, "skill_memory_ad")).toBe(true); + expect(objExists(db, "skill_memory_au")).toBe(true); + } finally { + closeQuietly(db); + } + }); + + test("init-only schema MATCHES fully-migrated schema (no fresh-vs-migrated divergence)", () => { + const a = new Database(":memory:"); + const b = new Database(":memory:"); + try { + initializeDatabase(a); + initializeDatabase(b); + runMigrations(b); + expect([...cols(a, "skill_memory")].sort()).toEqual( + [...cols(b, "skill_memory")].sort(), + ); + expect(objExists(a, "skill_memory_fts")).toBe(objExists(b, "skill_memory_fts")); + } finally { + closeQuietly(a); + closeQuietly(b); + } + }); + + test("heals a renumber-skip: existing skill_memory missing origin_project/source_type gets them on init", () => { + const db = new Database(":memory:"); + try { + // Simulate a DB created at the v39/v40 shape (no origin_project/source_type), + // as if v41 was skipped by a renumber collision. + db.exec(`CREATE TABLE skill_memory ( + id INTEGER PRIMARY KEY AUTOINCREMENT, skill_id TEXT NOT NULL, resolved_path TEXT NOT NULL, + tier TEXT NOT NULL, skill_source TEXT, project_identity TEXT NOT NULL, intent TEXT, + intent_embedding BLOB, embedding_model_version TEXT, kind TEXT NOT NULL, delta TEXT NOT NULL, + tags TEXT, hit_count INTEGER NOT NULL DEFAULT 0, pinned INTEGER NOT NULL DEFAULT 0, + normalized_hash TEXT NOT NULL, created_at INTEGER NOT NULL, last_used_at INTEGER, + delta_embedding BLOB, recall_count INTEGER NOT NULL DEFAULT 0, + UNIQUE(skill_id, tier, project_identity, normalized_hash) + );`); + expect(cols(db, "skill_memory").has("origin_project")).toBe(false); + initializeDatabase(db); // the net must ADD the missing columns + expect(cols(db, "skill_memory").has("origin_project")).toBe(true); + expect(cols(db, "skill_memory").has("source_type")).toBe(true); + } finally { + closeQuietly(db); + } + }); +}); diff --git a/packages/plugin/src/features/magic-context/storage-db.ts b/packages/plugin/src/features/magic-context/storage-db.ts index 7c31f3ca..6d9d7b74 100644 --- a/packages/plugin/src/features/magic-context/storage-db.ts +++ b/packages/plugin/src/features/magic-context/storage-db.ts @@ -632,6 +632,40 @@ export function initializeDatabase(db: Database): void { CREATE INDEX IF NOT EXISTS idx_memory_mutation_log_project ON memory_mutation_log(project_path, id); + CREATE TABLE IF NOT EXISTS skill_memory ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + skill_id TEXT NOT NULL, + resolved_path TEXT NOT NULL, + tier TEXT NOT NULL CHECK(tier IN ('project', 'global')), + skill_source TEXT CHECK(skill_source IN ( + 'opencode-project', 'opencode-global', + 'claude-skills', 'agents-skills' + )), + project_identity TEXT NOT NULL, + intent TEXT NOT NULL, + intent_embedding BLOB, + embedding_model_version TEXT, + kind TEXT NOT NULL CHECK(kind IN ('gotcha', 'discovery', 'fix', 'workflow')), + delta TEXT NOT NULL, + tags TEXT, + hit_count INTEGER NOT NULL DEFAULT 0, + pinned INTEGER NOT NULL DEFAULT 0 CHECK(pinned IN (0, 1)), + normalized_hash TEXT NOT NULL, + created_at INTEGER NOT NULL, + last_used_at INTEGER, + delta_embedding BLOB, + recall_count INTEGER NOT NULL DEFAULT 0, + origin_project TEXT, + source_type TEXT, + UNIQUE(skill_id, tier, project_identity, normalized_hash) + ); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_lookup + ON skill_memory(skill_id, tier, project_identity, last_used_at DESC); + + CREATE INDEX IF NOT EXISTS idx_skill_memory_fts_prep + ON skill_memory(skill_id, tier, project_identity, kind); + CREATE TABLE IF NOT EXISTS dream_state ( key TEXT PRIMARY KEY, value TEXT NOT NULL @@ -787,6 +821,14 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en tokenize='porter unicode61' ); + CREATE VIRTUAL TABLE IF NOT EXISTS skill_memory_fts USING fts5( + intent, + delta, + content='skill_memory', + content_rowid='id', + tokenize='porter unicode61' + ); + CREATE VIRTUAL TABLE IF NOT EXISTS message_history_fts USING fts5( session_id UNINDEXED, message_ordinal UNINDEXED, @@ -816,6 +858,19 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en INSERT INTO memories_fts(rowid, content, category) VALUES (new.id, new.content, new.category); END; + CREATE TRIGGER IF NOT EXISTS skill_memory_ai AFTER INSERT ON skill_memory BEGIN + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_ad AFTER DELETE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + END; + + CREATE TRIGGER IF NOT EXISTS skill_memory_au AFTER UPDATE ON skill_memory BEGIN + INSERT INTO skill_memory_fts(skill_memory_fts, rowid, intent, delta) VALUES ('delete', old.id, old.intent, old.delta); + INSERT INTO skill_memory_fts(rowid, intent, delta) VALUES (new.id, new.intent, new.delta); + END; + CREATE TABLE IF NOT EXISTS session_meta ( session_id TEXT PRIMARY KEY, harness TEXT NOT NULL DEFAULT 'opencode', @@ -1116,6 +1171,10 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en ensureColumn(db, "compartments", "start_message_id", "TEXT DEFAULT ''"); ensureColumn(db, "compartments", "end_message_id", "TEXT DEFAULT ''"); ensureColumn(db, "memory_embeddings", "model_id", "TEXT"); + ensureColumn(db, "skill_memory", "delta_embedding", "BLOB"); + ensureColumn(db, "skill_memory", "recall_count", "INTEGER NOT NULL DEFAULT 0"); + ensureColumn(db, "skill_memory", "origin_project", "TEXT"); + ensureColumn(db, "skill_memory", "source_type", "TEXT"); ensureColumn(db, "session_meta", "memory_block_cache", "TEXT DEFAULT ''"); ensureColumn(db, "session_meta", "memory_block_count", "INTEGER DEFAULT 0"); ensureColumn(db, "session_meta", "pi_stable_id_scheme", "INTEGER"); diff --git a/packages/plugin/src/hooks/magic-context/command-handler.ts b/packages/plugin/src/hooks/magic-context/command-handler.ts index d0874a87..4382c75b 100644 --- a/packages/plugin/src/hooks/magic-context/command-handler.ts +++ b/packages/plugin/src/hooks/magic-context/command-handler.ts @@ -556,7 +556,7 @@ export function createMagicContextCommandHandler(deps: { // directory for dreamer-aware sessions); fall back to cwd so // the new "Skill memory" section can resolve a project identity // for sessions that don't have dreamer configured. - const statusDirectory = deps.dreamer?.directory ?? process.cwd(); + const statusDirectory = deps.dreamer?.projectPath ?? process.cwd(); const statusOutput = await executeStatus( deps.db, sessionId, diff --git a/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts b/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts index 6ea61acf..776e984d 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-parser.test.ts @@ -362,3 +362,43 @@ describe("parseCompartmentOutput — fact scoping (audit Fix 6)", () => { expect(parsed.facts.some((f) => f.category === "NAMING")).toBe(false); }); }); + +describe("parseCompartmentOutput — skill_observations", () => { + it("parses skill observations into {skillId, kind, lesson}", () => { + const xml = ` + + +* council | gotcha | aggregator needs a fast non-deficit model +* test-driven-development | fix | mock the clock in auth tests + +`; + const parsed = parseCompartmentOutput(xml); + expect(parsed.skillObservations).toEqual([ + { + skillId: "council", + kind: "gotcha", + lesson: "aggregator needs a fast non-deficit model", + }, + { + skillId: "test-driven-development", + kind: "fix", + lesson: "mock the clock in auth tests", + }, + ]); + }); + + it("absent block -> empty array", () => { + expect(parseCompartmentOutput("").skillObservations).toEqual([]); + }); + + it("malformed lines (bad kind, missing fields) are skipped, non-choke", () => { + const xml = ` +* council | general | invalid kind dropped +* onlyskill | fix +* good | discovery | kept +`; + expect(parseCompartmentOutput(xml).skillObservations).toEqual([ + { skillId: "good", kind: "discovery", lesson: "kept" }, + ]); + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/compartment-parser.ts b/packages/plugin/src/hooks/magic-context/compartment-parser.ts index 55ce7551..721f7616 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-parser.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-parser.ts @@ -44,6 +44,12 @@ export interface ParsedPrimerCandidate { originCompartmentIndex?: number; } +export interface ParsedSkillObservation { + skillId: string; + kind: "gotcha" | "discovery" | "fix" | "workflow"; + lesson: string; +} + export interface ParsedCompartmentOutput { compartments: ParsedCompartment[]; facts: ParsedFact[]; @@ -51,6 +57,7 @@ export interface ParsedCompartmentOutput { unprocessedFrom: number | null; userObservations: string[]; primerCandidates: ParsedPrimerCandidate[]; + skillObservations: ParsedSkillObservation[]; } // Open tag captured separately from body so attributes (start/end/title/ @@ -84,6 +91,9 @@ const PRIMER_CANDIDATES_REGEX = /(.*?)<\/primer_candidates>/s // (*/-/1.) is still accepted and falls back to the chunk span at emission. const PRIMER_ELEMENT_REGEX = /(.*?)<\/primer>/gs; const PRIMER_ITEM_REGEX = /^\s*(?:\*|-|\d+\.)\s*(.+)$/gm; +const SKILL_OBSERVATIONS_REGEX = /(.*?)<\/skill_observations>/s; +const SKILL_OBS_ITEM_REGEX = /^\s*\*\s*([^|\n]+)\|([^|\n]+)\|([^\n]+)$/gm; +const SKILL_OBS_KINDS = new Set(["gotcha", "discovery", "fix", "workflow"]); // Events: scan the block (if any) for event elements. Kinds // are parsed kind-agnostically — any element with an `at_compartment` attr is an @@ -235,11 +245,36 @@ export function parseCompartmentOutput(text: string): ParsedCompartmentOutput { } } + const skillObservations: ParsedSkillObservation[] = []; + const skillObsMatch = text.match(SKILL_OBSERVATIONS_REGEX); + if (skillObsMatch) { + for (const itemMatch of skillObsMatch[1].matchAll(SKILL_OBS_ITEM_REGEX)) { + const skillId = unescapeXml(itemMatch[1].trim()); + const kind = itemMatch[2].trim(); + const lesson = unescapeXml(itemMatch[3].trim()); + if (skillId && lesson && SKILL_OBS_KINDS.has(kind)) { + skillObservations.push({ + skillId, + kind: kind as ParsedSkillObservation["kind"], + lesson, + }); + } + } + } + const events = parseEvents(text); compartments.sort((a, b) => a.startMessage - b.startMessage); - return { compartments, facts, events, unprocessedFrom, userObservations, primerCandidates }; + return { + compartments, + facts, + events, + unprocessedFrom, + userObservations, + primerCandidates, + skillObservations, + }; } /** diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts index d4127813..e1f81bff 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-incremental.ts @@ -23,6 +23,7 @@ import { } from "../../features/magic-context/memory"; import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { getMemoriesByProject } from "../../features/magic-context/memory/storage-memory"; +import { promoteSkillObservations } from "../../features/magic-context/skill-memory/promote"; import { clearEmergencyDrainLatch, clearEmergencyRecovery, @@ -874,6 +875,28 @@ export async function runCompartmentAgent(deps: CompartmentRunnerDeps): Promise< sessionLog(sessionId, "failed to store primer candidates:", error); } } + + // Skill-memory historian extraction: promote the + // historian emitted into per-skill notes. Same promotionActive + + // !discardedLast gate as facts/primers so a provisional tail does not + // double-emit. + if ( + promotionActive && + !discardedLast && + validatedPass.skillObservations && + validatedPass.skillObservations.length > 0 + ) { + try { + const written = promoteSkillObservations( + db, + promotionProjectIdentity, + validatedPass.skillObservations, + ); + sessionLog(sessionId, `promoted ${written} skill observation(s)`); + } catch (error) { + sessionLog(sessionId, "failed to promote skill observations:", error); + } + } } catch (error: unknown) { // Historian runs are fail-closed because they update durable compartment state. const desc = describeError(error); diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts index 146dcfbd..2cd326fe 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-types.ts @@ -1,6 +1,6 @@ import type { PluginContext } from "../../plugin/types"; import type { Database } from "../../shared/sqlite"; -import type { ParsedEvent } from "./compartment-parser"; +import type { ParsedEvent, ParsedSkillObservation } from "./compartment-parser"; import type { ProtectedTailBoundarySnapshot } from "./protected-tail-boundary"; import type { NotificationParams } from "./send-session-notification"; @@ -171,6 +171,7 @@ export type ValidatedHistorianPassResult = * emitted compartments (same convention as `` at_compartment); * undefined → emission falls back to the chunk span. */ primerCandidates?: Array<{ question: string; originCompartmentIndex?: number }>; + skillObservations?: ParsedSkillObservation[]; /** v2: historian-extracted events (stored, not rendered). */ events?: ParsedEvent[]; /** diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts index 4c25ebdf..11a6a1f8 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.test.ts @@ -253,3 +253,24 @@ describe("validateHistorianOutput primer candidate contract", () => { } }); }); + +describe("validateHistorianOutput — skillObservations", () => { + test("validated pass surfaces skillObservations when present", () => { + const xml = ` + +summary + + +* council | gotcha | use a fast aggregator + +1-23 +`; + const result = validateHistorianOutput(xml, "ses-test", buildChunk(1, 2), [], 0); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.skillObservations).toEqual([ + { skillId: "council", kind: "gotcha", lesson: "use a fast aggregator" }, + ]); + } + }); +}); diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts index b97f0a1e..ac6b2a28 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner-validation.ts @@ -109,6 +109,8 @@ export function validateHistorianOutput( userObservations: parsed.userObservations.length > 0 ? parsed.userObservations : undefined, primerCandidates: parsed.primerCandidates.length > 0 ? parsed.primerCandidates.slice(0, 1) : undefined, + skillObservations: + parsed.skillObservations.length > 0 ? parsed.skillObservations : undefined, // v2: surface events so the runner can persist them (stored, not rendered). events: parsed.events.length > 0 ? parsed.events : undefined, }; diff --git a/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts b/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts index 3fbe4d4e..bf623168 100644 --- a/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts +++ b/packages/plugin/src/hooks/magic-context/compartment-runner.test.ts @@ -1041,6 +1041,59 @@ describe("runCompartmentAgent", () => { expect(promptSession.mock.calls[0]?.[0]?.body.agent).toBe("historian"); }); + it("promotes skillObservations as global '*' notes (gated, non-discard-last)", async () => { + useTempDataHome("compartment-runner-skillobs-"); + createOpenCodeDb("ses-skillobs", [ + { id: "m-1", role: "user", text: "First" }, + { id: "m-2", role: "assistant", text: "Second" }, + { id: "m-3", role: "user", text: "protected 1" }, + { id: "m-4", role: "user", text: "protected 2" }, + { id: "m-5", role: "user", text: "protected 3" }, + { id: "m-6", role: "user", text: "protected 4" }, + { id: "m-7", role: "user", text: "protected 5" }, + ]); + const db = openDatabase(); + const client = { + session: { + get: mock(async () => ({ data: { directory: "/tmp/skillobs" } })), + create: mock(async () => ({ data: { id: "ses-agent" } })), + prompt: mock(async () => ({})), + messages: mock(async () => ({ + data: [ + { + info: { role: "assistant", time: { created: 1 } }, + parts: [ + { + type: "text", + text: `Summary\n\n* council | gotcha | fast aggregator\n`, + }, + ], + }, + ], + })), + delete: mock(async () => ({})), + }, + } as unknown as PluginContext["client"]; + try { + await runCompartmentAgentWithLease({ + client, + db, + sessionId: "ses-skillobs", + historianChunkTokens: 10_000, + directory: "/tmp", + }); + const row = db + .prepare( + "SELECT project_identity, source_type FROM skill_memory WHERE skill_id='council'", + ) + .get() as { project_identity: string; source_type: string } | undefined; + expect(row?.project_identity).toBe("*"); + expect(row?.source_type).toBe("historian"); + } finally { + closeQuietly(db); + } + }); + it("keeps a committed publish succeeded and signaled when post-commit project registration throws", async () => { useTempDataHome("compartment-runner-post-commit-registration-throw-"); createOpenCodeDb("ses-post-commit-registration", [ diff --git a/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts b/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts index f7357b8c..c30cdd95 100644 --- a/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts +++ b/packages/plugin/src/hooks/magic-context/historian-prompt.generated.ts @@ -29,13 +29,14 @@ The primary agent retains two tools — \`ctx_search\` (find a compartment by co ## What you produce -For each pass, you emit five things: +For each pass, you emit up to six things: 1. **Compartments** — completed logical work units from the raw history you just received. Each compartment is stored at four progressive verbosity tiers (\`\`/\`\`/\`\`/\`\`) and carries an \`importance\` score. The decay system renders a different tier depending on how the compartment has aged and how important it is. 2. **Facts** — durable cross-cutting **world knowledge** that survives past any single compartment: stable rules, defaults, constraints, naming choices. 3. **Events** *(optional)* — specific anchor moments worth extracting from compartment narrative: causal incidents (something broke, was investigated, got resolved) and trajectory corrections (a strategy was abandoned for another). 4. **User observations** *(optional)* — universal behavioral patterns about the human user, fed to a separate dreamer review pipeline that promotes recurring patterns into stable user-profile memories. 5. **Primer candidates** *(optional)* — durable standing questions about how the project works that this chunk helps answer, fed to a separate dreamer review pipeline that promotes recurring project primers. +6. **Skill observations** *(optional)* — universal reusable lessons about skills the agent actually used, fed to the skill-memory pipeline. You also receive two reference blocks — \`\` for calibration and \`\` for continuity with your prior work in this session. Read both before producing your output. @@ -697,6 +698,24 @@ After outputting compartments, facts, events, and user observations, also output \`\`\` If no candidates, omit the \`\` section entirely. +## Skill observations (optional, experimental) + +After outputting compartments, facts, events, and user observations, also output a \`\` section IF the chunk shows the agent USING a skill (a \`TC: skill()\` marker) and learning a UNIVERSAL, reusable lesson about that skill's behavior. + +- Each line: \`* | | \` where \`\` is exactly one of \`gotcha\`, \`discovery\`, \`fix\`, \`workflow\`. +- \`\` is the name from the \`TC: skill()\` marker in the chunk. +- Good: \`council | gotcha | the aggregator step needs a fast non-deficit model or it stalls\` — a universal fact about the skill's behavior. +- Bad (DO NOT emit): project-specific usage, one-off task detail, or anything not tied to a skill actually loaded in this chunk. +- Only emit with strong evidence the agent learned something reusable. Zero observations is the normal case. +- Do not re-emit a lesson already visible in the chunk's prior skill notes. +- The output shape gains an additional section: +\`\`\` + +* council | gotcha | the aggregator step needs a fast non-deficit model +* test-driven-development | fix | mock the clock in auth tests to kill flakiness + +\`\`\` +If there are no skill observations, omit the \`\` section entirely. --- @@ -762,6 +781,9 @@ Output valid XML only in this shape: How does subsystem X work? + +* skill-id | kind | lesson text + FIRST-LAST INDEX @@ -774,6 +796,7 @@ Rules: - Omit \`\` section entirely if no events were extracted (this is the normal case for most compartments). - Omit \`\` section entirely if no observations were extracted. - Omit \`\` section entirely if no primer candidates were extracted. +- Omit \`\` section entirely if no observations were extracted. - Compartments must be ordered, contiguous for the ranges they cover, and non-overlapping. - All four \`\`/\`\`/\`\`/\`\` elements must appear in every compartment, in that order. P4 may be self-closed, an anchor-only fragment, or one sentence depending on what makes the compartment recognizable (see P4 section). - \`episode_type\` may be a single activity or a comma-separated list of activities the compartment spans (e.g. \`episode_type="design,feature,release"\`). Multiple activities do not split a compartment — they describe one arc that touched multiple activity types. diff --git a/packages/plugin/src/hooks/magic-context/historian-prompt.source.md b/packages/plugin/src/hooks/magic-context/historian-prompt.source.md index ad32b7e9..c0f06eeb 100644 --- a/packages/plugin/src/hooks/magic-context/historian-prompt.source.md +++ b/packages/plugin/src/hooks/magic-context/historian-prompt.source.md @@ -22,13 +22,14 @@ The primary agent retains two tools — `ctx_search` (find a compartment by cont ## What you produce -For each pass, you emit five things: +For each pass, you emit up to six things: 1. **Compartments** — completed logical work units from the raw history you just received. Each compartment is stored at four progressive verbosity tiers (``/``/``/``) and carries an `importance` score. The decay system renders a different tier depending on how the compartment has aged and how important it is. 2. **Facts** — durable cross-cutting **world knowledge** that survives past any single compartment: stable rules, defaults, constraints, naming choices. 3. **Events** *(optional)* — specific anchor moments worth extracting from compartment narrative: causal incidents (something broke, was investigated, got resolved) and trajectory corrections (a strategy was abandoned for another). 4. **User observations** *(optional)* — universal behavioral patterns about the human user, fed to a separate dreamer review pipeline that promotes recurring patterns into stable user-profile memories. 5. **Primer candidates** *(optional)* — durable standing questions about how the project works that this chunk helps answer, fed to a separate dreamer review pipeline that promotes recurring project primers. +6. **Skill observations** *(optional)* — universal reusable lessons about skills the agent actually used, fed to the skill-memory pipeline. You also receive two reference blocks — `` for calibration and `` for continuity with your prior work in this session. Read both before producing your output. @@ -690,6 +691,24 @@ After outputting compartments, facts, events, and user observations, also output ``` If no candidates, omit the `` section entirely. +## Skill observations (optional, experimental) + +After outputting compartments, facts, events, and user observations, also output a `` section IF the chunk shows the agent USING a skill (a `TC: skill()` marker) and learning a UNIVERSAL, reusable lesson about that skill's behavior. + +- Each line: `* | | ` where `` is exactly one of `gotcha`, `discovery`, `fix`, `workflow`. +- `` is the name from the `TC: skill()` marker in the chunk. +- Good: `council | gotcha | the aggregator step needs a fast non-deficit model or it stalls` — a universal fact about the skill's behavior. +- Bad (DO NOT emit): project-specific usage, one-off task detail, or anything not tied to a skill actually loaded in this chunk. +- Only emit with strong evidence the agent learned something reusable. Zero observations is the normal case. +- Do not re-emit a lesson already visible in the chunk's prior skill notes. +- The output shape gains an additional section: +``` + +* council | gotcha | the aggregator step needs a fast non-deficit model +* test-driven-development | fix | mock the clock in auth tests to kill flakiness + +``` +If there are no skill observations, omit the `` section entirely. --- @@ -755,6 +774,9 @@ Output valid XML only in this shape: How does subsystem X work? + +* skill-id | kind | lesson text + FIRST-LAST INDEX @@ -767,6 +789,7 @@ Rules: - Omit `` section entirely if no events were extracted (this is the normal case for most compartments). - Omit `` section entirely if no observations were extracted. - Omit `` section entirely if no primer candidates were extracted. +- Omit `` section entirely if no observations were extracted. - Compartments must be ordered, contiguous for the ranges they cover, and non-overlapping. - All four ``/``/``/`` elements must appear in every compartment, in that order. P4 may be self-closed, an anchor-only fragment, or one sentence depending on what makes the compartment recognizable (see P4 section). - `episode_type` may be a single activity or a comma-separated list of activities the compartment spans (e.g. `episode_type="design,feature,release"`). Multiple activities do not split a compartment — they describe one arc that touched multiple activity types. diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts index 18a5d1fc..39d40272 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts @@ -5,7 +5,11 @@ import { extractToolCallSummaries } from "./read-session-formatting"; describe("extractToolCallSummaries — skill tool", () => { test("surfaces the skill name as TC: skill()", () => { const parts = [ - { type: "tool", tool: "skill", state: { input: { name: "test-driven-development" }, metadata: {} } }, + { + type: "tool", + tool: "skill", + state: { input: { name: "test-driven-development" }, metadata: {} }, + }, ]; expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(test-driven-development)"]); @@ -13,7 +17,11 @@ describe("extractToolCallSummaries — skill tool", () => { test("skill branch wins even if metadata.description is present (regression-proof)", () => { const parts = [ - { type: "tool", tool: "skill", state: { input: { name: "council" }, metadata: { description: "Load skill" } } }, + { + type: "tool", + tool: "skill", + state: { input: { name: "council" }, metadata: { description: "Load skill" } }, + }, ]; expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(council)"]); diff --git a/packages/plugin/src/plugin/rpc-handlers.test.ts b/packages/plugin/src/plugin/rpc-handlers.test.ts index 52c5fe50..dd219c09 100644 --- a/packages/plugin/src/plugin/rpc-handlers.test.ts +++ b/packages/plugin/src/plugin/rpc-handlers.test.ts @@ -318,16 +318,16 @@ describe("buildStatusDetail — skill memory section", () => { } }); - test("no project identity (directory is not a git repo / fallback fails) → skillMemory is null", async () => { + test("project-tier note under a different project identity does not count toward this project's stats (scoping isolation)", async () => { const db = createTestDb(); try { const sessionId = "ses-status-skillmem-noproj"; - // Use a non-existent directory to force resolveProjectIdentity to - // either throw or land on the dir: fallback. Either way we should - // get a deterministic identity — but to specifically exercise the - // "no identity" path we'd need to stub resolveProjectIdentity. - // Simpler check: insert a row under a project that does NOT match - // the resolved one, and assert stats are 0 (proves scoping works). + // Scoping check: a PROJECT-tier note under a different project identity + // must NOT count toward the resolved project's stats. (Global-tier notes + // are intentionally cross-project — stored under the '*' sentinel and + // counted everywhere per the F5 design — so a global fixture here would + // legitimately count; global counting is covered in storage.test.ts. + // Project-tier still partitions by real identity, which is what isolates.) const directory = process.cwd(); db.prepare( @@ -337,8 +337,8 @@ describe("buildStatusDetail — skill memory section", () => { insertSkillMemoryNote(db, { skillId: "tdd", resolvedPath: "/p", - tier: "global", - skillSource: "opencode-global", + tier: "project", + skillSource: "opencode-project", projectIdentity: "git:some-other-project", intent: "i", kind: "gotcha", diff --git a/packages/plugin/src/tools/ctx-skill-note/tools.ts b/packages/plugin/src/tools/ctx-skill-note/tools.ts index 7a1e8e33..6de52e8b 100644 --- a/packages/plugin/src/tools/ctx-skill-note/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-note/tools.ts @@ -101,13 +101,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti normalizedHash, ); if (existing) { - bumpHitCount( - deps.db, - args.skill, - registryEntry.tier, - part, - normalizedHash, - ); + bumpHitCount(deps.db, args.skill, registryEntry.tier, part, normalizedHash); return ( `Note already recorded (hit_count now ${existing.hit_count + 1}). ` + `Exact duplicate detected — hit count bumped to reinforce recall priority.` @@ -167,13 +161,7 @@ export function createCtxSkillNoteTool(deps: CtxSkillNoteToolDeps): ToolDefiniti if (id === null) { // Race condition: another process inserted the same hash - bumpHitCount( - deps.db, - args.skill, - registryEntry.tier, - part, - normalizedHash, - ); + bumpHitCount(deps.db, args.skill, registryEntry.tier, part, normalizedHash); return "Note already recorded (concurrent insert detected — hit count bumped)."; } From 30399d1ba87a9746170f7b101816d2e0ed053290 Mon Sep 17 00:00:00 2001 From: Tehan Date: Thu, 25 Jun 2026 16:45:58 +0200 Subject: [PATCH 05/10] fix(skill-memory): address PR review findings (cubic + greptile) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove committed <<<<<<< HEAD conflict marker in CONFIGURATION.md (P1) - Move injectSkillIntentParam before the lastChatContext guard so the intent param is advertised even on tool.definition flights before first chat.message - Key intentByCallId by sessionID:callID + prefix-prune on session delete so a concurrent session's delete can't evict another session's in-flight intents - Log silent catch in promoteSkillObservations (observability for dropped writes) - Anchor frontmatter regex to start-of-file (drop m flag) so a later --- rule can't be misparsed; strip inline # comments from unquoted YAML scalars + block header - Scope distill report SQL to ('','*') instead of non-deterministic LIMIT 1 - Don't truncate skill name in TC: skill() marker (identity key); sanitize newlines/control chars - Normalize backslash->slash after fileURLToPath for Windows provenance checks - FTS self-heal rebuild in initializeDatabase when skill_memory_fts is empty but skill_memory has rows - Move ctx_skill_recall _test* DI fields to a separate test-only deps type - Hoist the shared registryKey dynamic import (one import, both blocks) Pushback: reembed pre-step errors are already logged (task-executor.ts) — the non-blocking try/catch is by design (failure leaves notes on the FTS rung). --- .../magic-context/dreamer/task-prompts.ts | 5 +- .../skill-memory/frontmatter.test.ts | 28 +++++++++++ .../magic-context/skill-memory/frontmatter.ts | 21 +++++++- .../magic-context/skill-memory/promote.ts | 10 +++- .../magic-context/skill-memory/provenance.ts | 10 ++-- .../src/features/magic-context/storage-db.ts | 20 ++++++++ .../src/hooks/magic-context/hook-handlers.ts | 49 ++++++++++++++----- .../plugin/src/hooks/magic-context/hook.ts | 15 +++--- .../magic-context/read-session-formatting.ts | 9 +++- .../skill-memory-intent-stash.test.ts | 28 ++++++++--- packages/plugin/src/index.ts | 26 +++++----- .../src/tools/ctx-skill-recall/tools.test.ts | 11 +++-- .../src/tools/ctx-skill-recall/tools.ts | 14 ++++-- .../src/tools/ctx-skill-recall/types.ts | 11 ++++- 14 files changed, 199 insertions(+), 58 deletions(-) diff --git a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts index ffc4aa8d..8d7c8904 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts @@ -386,13 +386,14 @@ function buildDistillSkillMemoryPrompt(projectPath: string): string { - Merge (action="distill" + merge), prune, and promote are P3 / NOT YET IMPLEMENTED. Do NOT call ctx_skill_note with action="distill". ### Your task: produce a short read-only summary of skill-memory corpus health -1. Query aggregate counts and flag obvious issues: +1. Query aggregate counts and flag obvious issues (scoped to THIS project's + own notes plus the cross-project global '*' partition): \`\`\`sql SELECT skill_id, tier, COUNT(*) as note_count, SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END) as pinned_count, SUM(CASE WHEN intent_embedding IS NULL OR delta_embedding IS NULL THEN 1 ELSE 0 END) as missing_embedding_count FROM skill_memory - WHERE project_identity = (SELECT project_identity FROM skill_memory LIMIT 1) + WHERE project_identity IN ('${projectPath}', '*') GROUP BY skill_id, tier ORDER BY note_count DESC LIMIT 20; diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts index 31fe1424..32db5b16 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts @@ -69,4 +69,32 @@ body`; const cfg = parseFrontmatterConfig(md); expect(cfg?.ranking_relevance).toBeUndefined(); }); + + test("does NOT misparse a later --- horizontal rule as frontmatter (start-anchored)", () => { + // No real frontmatter; a horizontal-rule pair appears mid-document. With + // an `m`-flagged regex `^` would match the rule's line start and capture + // the block between the rules as config. Must return null. + const md = `# Skill\n\nSome prose.\n\n---\nskill-memory:\n enabled: true\n---\n\nMore prose.`; + expect(parseFrontmatterConfig(md)).toBeNull(); + }); + + test("honors enabled: true with a trailing inline comment", () => { + const md = `---\nskill-memory:\n enabled: true # motor memory on\n max_tokens: 2000 # bump it\n---\nbody`; + const cfg = parseFrontmatterConfig(md); + expect(cfg).not.toBeNull(); + expect(cfg!.enabled).toBe(true); + expect(cfg!.max_tokens).toBe(2000); + }); + + test("honors an inline comment on the skill-memory block header", () => { + const md = `---\nskill-memory: # procedural recall\n enabled: true\n---\nbody`; + expect(parseFrontmatterConfig(md)?.enabled).toBe(true); + }); + + test("does not strip a '#' inside a quoted scalar", () => { + const md = `---\nskill-memory:\n enabled: "true"\n---\nbody`; + // "true" (quoted) still enables; the quote-strip path runs after the + // unquoted-only comment strip, so quoted values are untouched. + expect(parseFrontmatterConfig(md)?.enabled).toBe(true); + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts index 8321a46a..408c1285 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts @@ -16,7 +16,11 @@ export interface SkillMemoryConfig { ranking_hit?: number; } -const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---/m; +// Anchored to the very start of the file (NO `m` flag): frontmatter is only +// valid as the first bytes of the document. With `m`, `^` matches any line +// start, so a later `--- ... ---` block (e.g. a markdown horizontal rule) could +// be misparsed as config. +const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---/; export function parseFrontmatterConfig(content: string): SkillMemoryConfig | null { try { @@ -76,7 +80,9 @@ function extractSkillMemoryBlock(fmText: string): Record | null for (const line of lines) { if (!inSkillMemory) { - if (/^skill-memory:\s*$/.test(line)) { + // Tolerate a trailing inline comment after the block header + // (`skill-memory: # motor memory`), which is valid YAML. + if (/^skill-memory:\s*(#.*)?$/.test(line)) { inSkillMemory = true; found = true; } @@ -97,6 +103,17 @@ function extractSkillMemoryBlock(fmText: string): Record | null } function parseYamlScalar(raw: string): unknown { + // Strip an inline `# comment` for UNQUOTED scalars (YAML requires whitespace + // before the `#`). Quoted values keep their content verbatim so a literal + // "#" inside quotes survives. Without this, `enabled: true # on` would parse + // as the string "true # on" and silently fail the strict true/false check. + const isQuoted = + (raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'")); + if (!isQuoted) { + const commentIdx = raw.search(/\s#/); + if (commentIdx >= 0) raw = raw.slice(0, commentIdx).trim(); + } + if (raw === "true") return true; if (raw === "false") return false; if (raw === "null" || raw === "~") return null; diff --git a/packages/plugin/src/features/magic-context/skill-memory/promote.ts b/packages/plugin/src/features/magic-context/skill-memory/promote.ts index 986308da..c94abc9e 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/promote.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/promote.ts @@ -1,3 +1,4 @@ +import { log } from "../../../shared/logger"; import type { Database } from "../../../shared/sqlite"; import { computeNormalizedHash } from "../memory/normalize-hash"; import { bumpHitCount, findExistingNote, insertSkillMemoryNote, partitionKey } from "./storage"; @@ -51,8 +52,13 @@ export function promoteSkillObservations( createdAt: Date.now(), }); if (id !== null) written++; - } catch { - // Best-effort: one bad observation must not block the publish. + } catch (err) { + // Best-effort: one bad observation must not block the publish, but + // log it so silent persistence failures (schema drift, DB lock, + // constraint violation) remain observable. + log( + `[skill-memory] promoteSkillObservations: skipped observation for skill "${obs.skillId}" — ${err instanceof Error ? err.message : String(err)}`, + ); } } diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts index d1a59441..a5c5fba2 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts @@ -19,7 +19,11 @@ export function parseSkillProvenance(output: string, skillId: string): SkillProv const fileUrl = match[1].trim(); let absDir: string; try { - absDir = fileURLToPath(new URL(fileUrl)); + // Normalize OS-native separators to forward slashes: on Windows + // fileURLToPath yields backslash paths, which would fail the + // forward-slash startsWith/includes tier/source checks below and + // misclassify global skills as project-local. + absDir = fileURLToPath(new URL(fileUrl)).replace(/\\/g, "/"); } catch { return null; } @@ -36,7 +40,7 @@ export function deriveSkillTier(absDir: string): "project" | "global" { // ~/.config/opencode/skills/ — via config.directories() + {skill,skills}/**/SKILL.md // ~/.agents/skills/ — via AGENTS_EXTERNAL_DIR + skills/**/SKILL.md // ~/.claude/skills/ — via CLAUDE_EXTERNAL_DIR + skills/**/SKILL.md - const home = process.env.HOME ?? process.env.USERPROFILE ?? ""; + const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").replace(/\\/g, "/"); if ( absDir.startsWith(`${home}/.config/opencode/skills/`) || absDir.startsWith(`${home}/.agents/skills/`) || @@ -50,7 +54,7 @@ export function deriveSkillTier(absDir: string): "project" | "global" { export function deriveSkillSource( absDir: string, ): "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" { - const home = process.env.HOME ?? process.env.USERPROFILE ?? ""; + const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").replace(/\\/g, "/"); if (absDir.startsWith(`${home}/.config/opencode/skills/`)) return "opencode-global"; if (absDir.startsWith(`${home}/.claude/skills/`)) return "claude-skills"; if (absDir.includes("/.agents/skills/")) return "agents-skills"; diff --git a/packages/plugin/src/features/magic-context/storage-db.ts b/packages/plugin/src/features/magic-context/storage-db.ts index 6d9d7b74..3d8e364d 100644 --- a/packages/plugin/src/features/magic-context/storage-db.ts +++ b/packages/plugin/src/features/magic-context/storage-db.ts @@ -1082,6 +1082,26 @@ CREATE INDEX IF NOT EXISTS idx_dream_queue_pending ON dream_queue(started_at, en CREATE INDEX IF NOT EXISTS idx_message_history_index_updated_at ON message_history_index(updated_at); `); + // Self-heal: backfill skill_memory_fts if it's empty while skill_memory has + // rows. The CREATE TABLE/TRIGGER block above only indexes FUTURE writes; rows + // that predate the FTS table (e.g. a DB where v50 ran but v51 hadn't, or a + // lost migration row) would be invisible to FTS rung-3 recall until re-saved. + // Guarded so this fires once (on the gap), not on every boot. Mirrors v51's + // INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild'). + try { + const ftsCount = ( + db.prepare("SELECT COUNT(*) AS n FROM skill_memory_fts").get() as { n: number } + ).n; + const rowCount = ( + db.prepare("SELECT COUNT(*) AS n FROM skill_memory").get() as { n: number } + ).n; + if (ftsCount === 0 && rowCount > 0) { + db.exec("INSERT INTO skill_memory_fts(skill_memory_fts) VALUES('rebuild');"); + } + } catch { + // Non-fatal: FTS rung-3 degrades gracefully (embedding + flat recall unaffected). + } + ensureColumn(db, "primer_candidates", "harness", "TEXT NOT NULL DEFAULT 'opencode'"); ensureColumn(db, "primer_candidates", "source_start_message_id", "TEXT NOT NULL DEFAULT ''"); ensureColumn(db, "primer_candidates", "source_end_message_id", "TEXT NOT NULL DEFAULT ''"); diff --git a/packages/plugin/src/hooks/magic-context/hook-handlers.ts b/packages/plugin/src/hooks/magic-context/hook-handlers.ts index 2abf14dc..affe0ccb 100644 --- a/packages/plugin/src/hooks/magic-context/hook-handlers.ts +++ b/packages/plugin/src/hooks/magic-context/hook-handlers.ts @@ -485,6 +485,23 @@ export function createIntentByCallIdMap(): IntentByCallIdMap { return new Map(); } +/** + * Composite key for the intent stash: `${sessionId}:${callId}`. Keying by + * session (not bare callID) lets onSessionDeleted prune one session's entries + * by prefix without evicting concurrent sessions' in-flight intents. + */ +export function intentKey(sessionId: string, callId: string): string { + return `${sessionId}:${callId}`; +} + +/** Delete all stash entries belonging to one session (prefix prune on delete). */ +export function pruneIntentsForSession(map: IntentByCallIdMap, sessionId: string): void { + const prefix = `${sessionId}:`; + for (const key of map.keys()) { + if (key.startsWith(prefix)) map.delete(key); + } +} + const INTENT_TTL_MS = 60_000; const INTENT_MAP_CAP = 256; @@ -556,13 +573,19 @@ export async function maybeInjectSkillMemory( export function createToolExecuteBeforeHook(args: { intentByCallId: IntentByCallIdMap }) { return async (input: unknown, output?: unknown) => { - const typedInput = input as { tool?: string; callID?: string }; + const typedInput = input as { tool?: string; callID?: string; sessionID?: string }; const typedOutput = output as { args?: Record } | undefined; if (typedInput.tool !== "skill") return; - if (!typedInput.callID) return; + if (!typedInput.callID || !typedInput.sessionID) return; const intent = typedOutput?.args?.intent; if (typeof intent !== "string") return; - stashIntent(args.intentByCallId, typedInput.callID, intent); + // Key by sessionID:callID so a concurrent session's delete (which prunes + // by prefix) can't evict this session's in-flight intents. + stashIntent( + args.intentByCallId, + intentKey(typedInput.sessionID, typedInput.callID), + intent, + ); }; } @@ -601,10 +624,13 @@ export function createToolExecuteAfterHook(args: { const skillArgs = typedInput.args as { name?: unknown } | undefined; const skillId = typeof skillArgs?.name === "string" ? skillArgs.name : null; if (skillId) { + // One dynamic import of the provenance module shared by both + // the registry-populate and the injection blocks below + // (lazy-loaded only when the skill tool actually fires). + const { parseSkillProvenance, registryKey } = await import( + "../../features/magic-context/skill-memory/provenance" + ); try { - const { parseSkillProvenance, registryKey } = await import( - "../../features/magic-context/skill-memory/provenance" - ); const { parseFrontmatterConfig } = await import( "../../features/magic-context/skill-memory/frontmatter" ); @@ -639,11 +665,8 @@ export function createToolExecuteAfterHook(args: { // appends the block to output.output. // Non-fatal: recall failure must never block the tool result. try { - const { registryKey: rKey } = await import( - "../../features/magic-context/skill-memory/provenance" - ); const registryEntry = args.skillLoadRegistry.get( - rKey(typedInput.sessionID, skillId), + registryKey(typedInput.sessionID, skillId), ); if (registryEntry) { // First-turn fallback: if the map has no entry yet @@ -657,8 +680,10 @@ export function createToolExecuteAfterHook(args: { args.defaultDirectory; const projectIdentity = resolveProjectIdentity(sessionDir); const stashed = typedInput.callID - ? (getAndDeleteIntent(args.intentByCallId, typedInput.callID) ?? - undefined) + ? (getAndDeleteIntent( + args.intentByCallId, + intentKey(typedInput.sessionID, typedInput.callID), + ) ?? undefined) : undefined; await maybeInjectSkillMemory( args.db, diff --git a/packages/plugin/src/hooks/magic-context/hook.ts b/packages/plugin/src/hooks/magic-context/hook.ts index 62b0c571..d6802695 100644 --- a/packages/plugin/src/hooks/magic-context/hook.ts +++ b/packages/plugin/src/hooks/magic-context/hook.ts @@ -81,6 +81,7 @@ import { createToolExecuteAfterHook, createToolExecuteBeforeHook, getLiveNotificationParams, + pruneIntentsForSession, } from "./hook-handlers"; import type { LiveSessionState } from "./live-session-state"; import { sendIgnoredMessage } from "./send-session-notification"; @@ -681,14 +682,12 @@ export function createMagicContextHook(deps: MagicContextDeps) { internalChildSessions.delete(sessionId); channel1StateBySession.delete(sessionId); clearEmbedSessionState(sessionId); - // NOTE: intentByCallId is keyed by callID (not sessionID:callID), so .clear() removes - // entries from ALL concurrent sessions, not just the deleted one. This is an accepted - // design trade-off: the 60s TTL + 256-entry hard cap are the real leak guards; the - // .clear() here is a belt-and-braces backstop for long-lived sessions. Cross-session - // clearing degrades quality (lost intents for concurrent sessions) but is not fatal. - // If concurrent multi-session use becomes common, key entries as `${sessionID}:${callID}` - // and filter on delete. For P1, document-as-intentional is the chosen fix. - intentByCallId.clear(); // clear all entries on session delete (bounded map; cross-session clear is intentional — see note above) + // intentByCallId is keyed `${sessionID}:${callID}` — prune only THIS + // session's entries by prefix so a concurrent session's delete can't + // evict another session's in-flight intents (which would silently + // degrade its skill-memory recall to the flat rung). The 60s TTL + + // 256-entry cap remain the leak backstops. + pruneIntentsForSession(intentByCallId, sessionId); // skillLoadRegistry is keyed as `${sessionId}:${skillId}` so we can prune // per-session entries without cross-session bleed. Without this, deleted // sessions' skill loads would persist in the registry for the plugin's diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts index 7468c9d1..1915a9f4 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts @@ -72,9 +72,14 @@ export function extractToolCallSummaries(parts: unknown[]): string[] { // Skill tool: surface the skill name (input.name) before the description // fallback, which would otherwise mask it if metadata.description exists. + // The name is an IDENTITY key (the historian extracts skill-id from this + // marker), so do NOT truncate it. Sanitize newlines/control chars and a + // stray ")" so the single-line `TC: skill()` marker can't be + // corrupted — skill names are normally slugs, this is defensive only. if (p.tool === "skill") { - const name = input && typeof input.name === "string" ? input.name : ""; - summaries.push(name ? `TC: skill(${truncateArg(name)})` : "TC: skill"); + const rawName = input && typeof input.name === "string" ? input.name : ""; + const name = rawName.replace(/[\r\n\t)]/g, " ").trim(); + summaries.push(name ? `TC: skill(${name})` : "TC: skill"); continue; } diff --git a/packages/plugin/src/hooks/magic-context/skill-memory-intent-stash.test.ts b/packages/plugin/src/hooks/magic-context/skill-memory-intent-stash.test.ts index 23a2db06..8495f9f9 100644 --- a/packages/plugin/src/hooks/magic-context/skill-memory-intent-stash.test.ts +++ b/packages/plugin/src/hooks/magic-context/skill-memory-intent-stash.test.ts @@ -1,5 +1,11 @@ import { describe, expect, test } from "bun:test"; -import { createIntentByCallIdMap, getAndDeleteIntent, stashIntent } from "./hook-handlers"; +import { + createIntentByCallIdMap, + getAndDeleteIntent, + intentKey, + pruneIntentsForSession, + stashIntent, +} from "./hook-handlers"; describe("intentByCallId stash map", () => { test("stashIntent stores intent keyed by callId", () => { @@ -36,11 +42,21 @@ describe("intentByCallId stash map", () => { expect(map.has("call-overflow")).toBe(true); }); - test("clearIntentMap removes all entries (onSessionDeleted)", () => { + test("pruneIntentsForSession removes ONLY the deleted session's entries", () => { + // Regression (P1): a bare-callID key + .clear() on session delete wiped + // EVERY concurrent session's in-flight intents. Keying by + // `${sessionId}:${callId}` + prefix-prune isolates the delete. const map = createIntentByCallIdMap(); - stashIntent(map, "call-a", "intent a"); - stashIntent(map, "call-b", "intent b"); - map.clear(); - expect(map.size).toBe(0); + stashIntent(map, intentKey("ses-A", "call-1"), "A intent"); + stashIntent(map, intentKey("ses-B", "call-1"), "B intent"); + stashIntent(map, intentKey("ses-B", "call-2"), "B intent 2"); + + pruneIntentsForSession(map, "ses-A"); + + // ses-A's entry is gone; ses-B's two survive (callID "call-1" collides + // across sessions but the composite key keeps them distinct). + expect(getAndDeleteIntent(map, intentKey("ses-A", "call-1"))).toBeNull(); + expect(getAndDeleteIntent(map, intentKey("ses-B", "call-1"))).toBe("B intent"); + expect(getAndDeleteIntent(map, intentKey("ses-B", "call-2"))).toBe("B intent 2"); }); }); diff --git a/packages/plugin/src/index.ts b/packages/plugin/src/index.ts index d1428342..75bbc022 100644 --- a/packages/plugin/src/index.ts +++ b/packages/plugin/src/index.ts @@ -465,12 +465,6 @@ const server: Plugin = async (ctx) => { await hooks.magicContext?.["chat.message"]?.(input); }, "tool.definition": async (input, output) => { - // Attribute tool schema tokens to the most recent chat-message context. - // If no chat.message has fired yet in this process (e.g. a subagent - // flight that reuses a historian/dreamer/sidekick agent whose - // chat.message preceded plugin init), skip — the measurement will - // land correctly on the next flight. - if (!lastChatContext) return; const typedInput = input as { toolID?: string }; const typedOutput = output as { description?: unknown; @@ -483,6 +477,21 @@ const server: Plugin = async (ctx) => { }; }; if (!typedInput.toolID) return; + // Inject optional intent param for skill-memory recall FIRST — it only + // mutates the skill tool's advertised JSON schema and does NOT need + // chat context, so it must run even on a tool.definition flight that + // fires before any chat.message (otherwise the model never sees the + // `intent` param that flight and skill-memory recall silently degrades). + injectSkillIntentParam( + typedInput.toolID, + typedOutput as Parameters[1], + ); + // Attribute tool schema tokens to the most recent chat-message context. + // If no chat.message has fired yet in this process (e.g. a subagent + // flight that reuses a historian/dreamer/sidekick agent whose + // chat.message preceded plugin init), skip the attribution — the + // measurement will land correctly on the next flight. + if (!lastChatContext) return; recordToolDefinition( lastChatContext.providerID, lastChatContext.modelID, @@ -491,11 +500,6 @@ const server: Plugin = async (ctx) => { typeof typedOutput.description === "string" ? typedOutput.description : "", typedOutput.parameters, ); - // Inject optional intent param for skill-memory recall - injectSkillIntentParam( - typedInput.toolID, - typedOutput as Parameters[1], - ); }, "tool.execute.after": async (input, output) => { await hooks.magicContext?.["tool.execute.after"]?.(input, output); diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts index dc742db8..8a17f625 100644 --- a/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.test.ts @@ -6,6 +6,7 @@ import { initializeDatabase } from "../../features/magic-context/storage-db"; import { Database } from "../../shared/sqlite"; import { closeQuietly } from "../../shared/sqlite-helpers"; import { createCtxSkillRecallTool } from "./tools"; +import type { CtxSkillRecallToolTestDeps } from "./types"; // DI-based tests: inject _testFrontmatterConfig + _testProjectIdentity via deps // to bypass SKILL.md disk resolution and resolveProjectIdentity() entirely. @@ -43,12 +44,13 @@ describe("ctx_skill_recall tool", () => { }); // Inject frontmatterConfig + projectIdentity via DI — no SKILL.md needed - const tool = createCtxSkillRecallTool({ + const testDeps: CtxSkillRecallToolTestDeps = { db, projectDirectory: "/tmp/test", _testFrontmatterConfig: ENABLED_FRONTMATTER, _testProjectIdentity: TEST_PROJECT_IDENTITY, - }); + }; + const tool = createCtxSkillRecallTool(testDeps); const result = await tool.execute({ skill: "tdd", intent: "fix flaky test" }, { sessionID: "ses_test", agent: "general", @@ -66,12 +68,13 @@ describe("ctx_skill_recall tool", () => { const db = makeDb(); try { // Inject enabled frontmatter + matching projectIdentity, but no notes inserted - const tool = createCtxSkillRecallTool({ + const testDeps: CtxSkillRecallToolTestDeps = { db, projectDirectory: "/tmp/test", _testFrontmatterConfig: ENABLED_FRONTMATTER, _testProjectIdentity: TEST_PROJECT_IDENTITY, - }); + }; + const tool = createCtxSkillRecallTool(testDeps); const result = await tool.execute({ skill: "nonexistent-skill" }, { sessionID: "ses_test", agent: "general", diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.ts index 6db13f36..958ca975 100644 --- a/packages/plugin/src/tools/ctx-skill-recall/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.ts @@ -12,6 +12,7 @@ import { CTX_SKILL_RECALL_TOOL_NAME, type CtxSkillRecallArgs, type CtxSkillRecallToolDeps, + type CtxSkillRecallToolTestDeps, } from "./types"; // NOTE on tool() API: same pattern as ctx_skill_note (Task 8). @@ -45,15 +46,18 @@ export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefi ), }, execute: async (args: CtxSkillRecallArgs, toolContext: ToolContext) => { - // Test-only DI overrides (bypass all resolution) + // Test-only DI overrides (bypass all resolution). Read via an internal + // cast to the test-deps type so the seams stay OUT of the public + // CtxSkillRecallToolDeps contract (production callers can't pass them). + const testDeps = deps as CtxSkillRecallToolTestDeps; if ( - deps._testFrontmatterConfig !== undefined || - deps._testProjectIdentity !== undefined + testDeps._testFrontmatterConfig !== undefined || + testDeps._testProjectIdentity !== undefined ) { const projectIdentity = - deps._testProjectIdentity ?? + testDeps._testProjectIdentity ?? resolveProjectIdentity(toolContext.directory ?? deps.projectDirectory); - const frontmatterConfig = deps._testFrontmatterConfig ?? null; + const frontmatterConfig = testDeps._testFrontmatterConfig ?? null; const tier: "project" | "global" = "global"; // default for test injection const block = await recallSkillMemoryBlock(deps.db, { skill: args.skill, diff --git a/packages/plugin/src/tools/ctx-skill-recall/types.ts b/packages/plugin/src/tools/ctx-skill-recall/types.ts index acae7075..b53994e9 100644 --- a/packages/plugin/src/tools/ctx-skill-recall/types.ts +++ b/packages/plugin/src/tools/ctx-skill-recall/types.ts @@ -18,7 +18,16 @@ export interface CtxSkillRecallToolDeps { // In production, pass hooks.magicContext.skillLoadRegistry. // In tests, inject directly to avoid SKILL.md fixture files. skillLoadRegistry?: SkillLoadRegistry; - // Test-only DI overrides (bypass disk resolution entirely): +} + +/** + * Test-only DI overrides — kept OUT of the public production contract so a + * production call site cannot accidentally pass them to bypass disk resolution, + * frontmatter loading, and project-identity resolution. Tests pass this wider + * type (structurally assignable to CtxSkillRecallToolDeps); the tool impl reads + * the overrides via an internal cast. + */ +export interface CtxSkillRecallToolTestDeps extends CtxSkillRecallToolDeps { _testFrontmatterConfig?: SkillMemoryConfig | null; _testProjectIdentity?: string; } From 782b11b58bfa4ff1f6edd7d578e5847f87bb7d53 Mon Sep 17 00:00:00 2001 From: Tehan Date: Thu, 25 Jun 2026 17:08:08 +0200 Subject: [PATCH 06/10] fix(skill-memory): address second review round (greptile P1 + cubic P2/P3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - P1: recall now unions the skill's own partition with the global '*' partition (recallPartitionPredicate helper) so a PROJECT-LOCAL skill surfaces historian-written global notes — previously orphaned (tier='project' query never matched tier='global'/'*'). Write/dedup paths stay exact-partition. - Escape apostrophes in projectPath before SQL string interpolation in the distill prompt template. - Frontmatter regex tolerates a leading UTF-8 BOM / whitespace (still start-anchored). - TC: skill() marker emits the name VERBATIM when marker-safe, else drops it — never mutates the identity key (recall keys on raw input.name). - Fix misleading frontmatter test: now actually exercises a '#' inside a quoted scalar (preserved) vs unquoted (comment-stripped). Regression tests: project-local skill recalls a global historian note; agent project note + historian global note both surface for the same skill. --- .../magic-context/dreamer/task-prompts.ts | 2 +- .../skill-memory/frontmatter.test.ts | 16 ++++- .../magic-context/skill-memory/frontmatter.ts | 7 ++- .../magic-context/skill-memory/recall.test.ts | 60 +++++++++++++++++++ .../magic-context/skill-memory/storage.ts | 51 +++++++++++++--- .../magic-context/read-session-formatting.ts | 15 +++-- 6 files changed, 130 insertions(+), 21 deletions(-) diff --git a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts index 8d7c8904..a222bf32 100644 --- a/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts +++ b/packages/plugin/src/features/magic-context/dreamer/task-prompts.ts @@ -393,7 +393,7 @@ function buildDistillSkillMemoryPrompt(projectPath: string): string { SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END) as pinned_count, SUM(CASE WHEN intent_embedding IS NULL OR delta_embedding IS NULL THEN 1 ELSE 0 END) as missing_embedding_count FROM skill_memory - WHERE project_identity IN ('${projectPath}', '*') + WHERE project_identity IN ('${projectPath.replace(/'/g, "''")}', '*') GROUP BY skill_id, tier ORDER BY note_count DESC LIMIT 20; diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts index 32db5b16..74f1ea4d 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts @@ -91,10 +91,20 @@ body`; expect(parseFrontmatterConfig(md)?.enabled).toBe(true); }); - test("does not strip a '#' inside a quoted scalar", () => { + test("a plain quoted scalar still enables", () => { const md = `---\nskill-memory:\n enabled: "true"\n---\nbody`; - // "true" (quoted) still enables; the quote-strip path runs after the - // unquoted-only comment strip, so quoted values are untouched. expect(parseFrontmatterConfig(md)?.enabled).toBe(true); }); + + test("does NOT strip a '#' inside a quoted scalar (comment-strip is unquoted-only)", () => { + // Quoted value containing a '#': the inline-comment strip must NOT fire, + // so the value stays the literal "true # x" (≠ "true") and the config is + // inert. If the '#' WERE wrongly stripped, it would collapse to "true" + // and enable — so a passing assertion proves the '#' was preserved. + const quoted = `---\nskill-memory:\n enabled: "true # x"\n---\nbody`; + expect(parseFrontmatterConfig(quoted)).toBeNull(); + // Contrast: the SAME text UNquoted is a real inline comment → stripped → enables. + const unquoted = `---\nskill-memory:\n enabled: true # x\n---\nbody`; + expect(parseFrontmatterConfig(unquoted)?.enabled).toBe(true); + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts index 408c1285..ad97e814 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts @@ -19,8 +19,11 @@ export interface SkillMemoryConfig { // Anchored to the very start of the file (NO `m` flag): frontmatter is only // valid as the first bytes of the document. With `m`, `^` matches any line // start, so a later `--- ... ---` block (e.g. a markdown horizontal rule) could -// be misparsed as config. -const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---/; +// be misparsed as config. A leading UTF-8 BOM and leading whitespace/blank +// lines are tolerated (`\uFEFF?\s*`) so an editor-saved SKILL.md with a BOM or a +// stray blank first line still parses; this stays start-anchored because `\s*` +// only spans leading whitespace before the first `---`, never a mid-document rule. +const FRONTMATTER_REGEX = /^\uFEFF?\s*---\r?\n([\s\S]*?)\r?\n---/; export function parseFrontmatterConfig(content: string): SkillMemoryConfig | null { try { diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts index f616a74d..694699e7 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -427,6 +427,66 @@ describe("cross-project global recall", () => { closeQuietly(db); } }); + + test("a PROJECT-LOCAL skill surfaces a historian-written global note (P1 regression)", async () => { + // Regression: promoteSkillObservations always writes tier='global'/'*', + // but recall for a project-local skill uses scope='project'. Before the + // union-on-recall fix, the project-tier query never matched the global + // '*' rows, so historian notes were orphaned for project-local skills. + const db = makeDb(); + try { + promoteSkillObservations(db, "git:repoA", [ + { + skillId: "tdd", + kind: "discovery", + lesson: "spike the parser before writing the plan", + }, + ]); + // Recall as a PROJECT-tier skill (the failing case pre-fix). + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "project", + projectIdentity: "git:repoA", + frontmatterConfig: cfg, + }); + expect(block).toContain("spike the parser before writing the plan"); + } finally { + closeQuietly(db); + } + }); + + test("a project-local AGENT note and a global HISTORIAN note both surface for the same skill", async () => { + const db = makeDb(); + try { + // Agent-written, project-tier note. + insertSkillMemoryNote(db, { + skillId: "tdd", + resolvedPath: "/repo/.opencode/skills/tdd/SKILL.md", + tier: "project", + skillSource: "opencode-project", + projectIdentity: "git:repoLocal", + intent: "local lesson", + kind: "gotcha", + delta: "project-local agent note", + normalizedHash: "pl1", + createdAt: Date.now(), + }); + // Historian-written, global note for the same skill. + promoteSkillObservations(db, "git:repoLocal", [ + { skillId: "tdd", kind: "discovery", lesson: "global historian note" }, + ]); + const block = await recallSkillMemoryBlock(db, { + skill: "tdd", + scope: "project", + projectIdentity: "git:repoLocal", + frontmatterConfig: cfg, + }); + expect(block).toContain("project-local agent note"); + expect(block).toContain("global historian note"); + } finally { + closeQuietly(db); + } + }); }); describe("recallSkillMemoryBlock bumps recall_count for surfaced notes", () => { diff --git a/packages/plugin/src/features/magic-context/skill-memory/storage.ts b/packages/plugin/src/features/magic-context/skill-memory/storage.ts index 5e8d6239..7137afdf 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/storage.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/storage.ts @@ -10,6 +10,37 @@ export function partitionKey(tier: "project" | "global", projectIdentity: string return tier === "global" ? "*" : projectIdentity; } +/** + * RECALL-path partition predicate: a note is recallable for (tier, projectIdentity) + * when it is either in the skill's OWN partition OR in the cross-project global + * '*' partition (where historian-extracted lessons live). This is why a + * project-local skill still surfaces global historian notes — without the global + * branch, recall for a project-tier skill (`tier='project'`) would never match the + * `tier='global'/project_identity='*'` rows that promoteSkillObservations writes. + * + * For a global-tier skill the two branches are identical (both `tier='global' AND + * project_identity='*'`), so the OR is a harmless no-op — a row still matches once + * (it's a row filter, not a join, so no duplication). + * + * WRITE/dedup paths (insert, findExistingNote, bumpHitCount, getDedupCandidates) + * deliberately do NOT use this — they must target one exact partition. + * + * Returns `{ clause, binds }`; callers splice `clause` into the WHERE and spread + * `binds` (tier, ownPartition) at the matching `?` positions. + */ +function recallPartitionPredicate( + tier: "project" | "global", + projectIdentity: string, + columnPrefix = "", +): { clause: string; binds: [string, string] } { + const t = `${columnPrefix}tier`; + const p = `${columnPrefix}project_identity`; + return { + clause: `((${t} = ? AND ${p} = ?) OR (${t} = 'global' AND ${p} = '*'))`, + binds: [tier, partitionKey(tier, projectIdentity)], + }; +} + export interface SkillMemoryNote { id: number; skill_id: string; @@ -123,11 +154,12 @@ export function getSkillMemoryNotes( projectIdentity: string, limit: number, ): SkillMemoryNote[] { + const { clause, binds } = recallPartitionPredicate(tier, projectIdentity); return db .prepare( `SELECT * FROM skill_memory - WHERE skill_id = ? AND tier = ? AND project_identity = ? + WHERE skill_id = ? AND ${clause} ORDER BY pinned DESC, ( @@ -145,7 +177,7 @@ export function getSkillMemoryNotes( created_at DESC LIMIT ?`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; + .all(skillId, ...binds, limit) as SkillMemoryNote[]; } /** @@ -249,13 +281,14 @@ export function getRankingCandidates( projectIdentity: string, limit: number, ): SkillMemoryNote[] { + const { clause, binds } = recallPartitionPredicate(tier, projectIdentity); return db .prepare( `SELECT * FROM skill_memory - WHERE skill_id=? AND tier=? AND project_identity=? + WHERE skill_id=? AND ${clause} ORDER BY COALESCE(last_used_at, created_at) DESC LIMIT ?`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity), limit) as SkillMemoryNote[]; + .all(skillId, ...binds, limit) as SkillMemoryNote[]; } export function searchSkillMemoryFts( @@ -271,15 +304,14 @@ export function searchSkillMemoryFts( `SELECT m.* FROM skill_memory_fts f JOIN skill_memory m ON m.id = f.rowid WHERE skill_memory_fts MATCH ? - AND m.skill_id=? AND m.tier=? AND m.project_identity=? + AND m.skill_id=? AND ${recallPartitionPredicate(tier, projectIdentity, "m.").clause} ORDER BY bm25(skill_memory_fts) ASC, COALESCE(m.last_used_at, m.created_at) DESC LIMIT ?`, ) .all( matchQuery, skillId, - tier, - partitionKey(tier, projectIdentity), + ...recallPartitionPredicate(tier, projectIdentity, "m.").binds, limit, ) as SkillMemoryNote[]; } @@ -290,13 +322,14 @@ export function getPinnedNotes( tier: "project" | "global", projectIdentity: string, ): SkillMemoryNote[] { + const { clause, binds } = recallPartitionPredicate(tier, projectIdentity); return db .prepare( `SELECT * FROM skill_memory - WHERE skill_id=? AND tier=? AND project_identity=? AND pinned=1 + WHERE skill_id=? AND ${clause} AND pinned=1 ORDER BY COALESCE(last_used_at, created_at) DESC`, ) - .all(skillId, tier, partitionKey(tier, projectIdentity)) as SkillMemoryNote[]; + .all(skillId, ...binds) as SkillMemoryNote[]; } export function getSkillMemoryStats( diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts index 1915a9f4..ce031c4f 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts @@ -72,14 +72,17 @@ export function extractToolCallSummaries(parts: unknown[]): string[] { // Skill tool: surface the skill name (input.name) before the description // fallback, which would otherwise mask it if metadata.description exists. - // The name is an IDENTITY key (the historian extracts skill-id from this - // marker), so do NOT truncate it. Sanitize newlines/control chars and a - // stray ")" so the single-line `TC: skill()` marker can't be - // corrupted — skill names are normally slugs, this is defensive only. + // The name is an IDENTITY key — the historian extracts the skill-id from + // this marker and recall keys on the raw input.name, so the marker name + // MUST equal the raw name (no truncation, no mutation) or the stored id + // won't match recall. Emit it VERBATIM when marker-safe; if it contains a + // marker-breaking char (CR/LF/tab/")") — which a real skill directory name + // never does — drop the name (`TC: skill`) rather than emit a corrupted or + // mutated identity. if (p.tool === "skill") { const rawName = input && typeof input.name === "string" ? input.name : ""; - const name = rawName.replace(/[\r\n\t)]/g, " ").trim(); - summaries.push(name ? `TC: skill(${name})` : "TC: skill"); + const markerSafe = rawName !== "" && !/[\r\n\t)]/.test(rawName); + summaries.push(markerSafe ? `TC: skill(${rawName})` : "TC: skill"); continue; } From 7b084abcb94e0dba5a13a2da3e9cb2e979947f67 Mon Sep 17 00:00:00 2001 From: Tehan Date: Thu, 25 Jun 2026 17:25:33 +0200 Subject: [PATCH 07/10] fix(skill-memory): address council findings (disabled-path crash + 2 routing/parser gaps) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Council review (deepseek/sonnet/gpt-5.5) of PR #181: - Must (consensus rev-2+rev-3): the ctx_skill_note fail-loud guard threw during plugin init when the plugin is disabled (enabled:false OR conflict-disabled) — createSessionHooks returns {magicContext:null} by design, so the unconditional guard crashed the entry module on the disabled path. Gate it on pluginConfig.enabled; pass a throwaway Map to createToolRegistry (which early-returns {} when disabled and never reads it). - Must (rev-3): singular ~/.config/opencode/skill/ global path was misclassified as project tier (opencode's pattern is {skill,skills}/**/SKILL.md) — fixed in deriveSkillTier/deriveSkillSource + the ctx_skill_recall cold-start search list. - Must (rev-3): the frontmatter parser rejected the inline flow-mapping form 'skill-memory: { enabled: true }' — the EXACT form the ctx_skill_recall remediation message and ARCHITECTURE/CONFIGURATION/README advertise. Added inline-mapping parsing so guidance and parser agree. - Should (consensus rev-1+rev-3): recallSkillMemoryBlock swallowed all errors silently — added a log() so FTS/blob corruption is diagnosable (still no-throw). Regression tests: inline frontmatter form (3 cases), singular skill/ global path (2 cases). --- .../skill-memory/frontmatter.test.ts | 24 +++++++++ .../magic-context/skill-memory/frontmatter.ts | 50 +++++++++++++++++++ .../skill-memory/provenance.test.ts | 18 +++++++ .../magic-context/skill-memory/provenance.ts | 9 +++- .../magic-context/skill-memory/recall.ts | 10 +++- packages/plugin/src/index.ts | 15 +++++- .../src/tools/ctx-skill-recall/tools.ts | 1 + 7 files changed, 123 insertions(+), 4 deletions(-) diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts index 74f1ea4d..58d70b85 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.test.ts @@ -91,6 +91,30 @@ body`; expect(parseFrontmatterConfig(md)?.enabled).toBe(true); }); + test("parses the inline flow-mapping form the docs/remediation advertise", () => { + // ctx_skill_recall remediation + ARCHITECTURE/CONFIGURATION/README all tell + // users to add `skill-memory: { enabled: true }`. The parser MUST accept it + // or the guidance is dead-on-arrival. + const cfg = parseFrontmatterConfig("---\nskill-memory: { enabled: true }\n---\n# Skill"); + expect(cfg).not.toBeNull(); + expect(cfg!.enabled).toBe(true); + }); + + test("parses inline flow-mapping with multiple keys", () => { + const cfg = parseFrontmatterConfig( + "---\nskill-memory: { enabled: true, max_tokens: 2000, dedup_threshold: 0.8 }\n---\nbody", + ); + expect(cfg!.enabled).toBe(true); + expect(cfg!.max_tokens).toBe(2000); + expect(cfg!.dedup_threshold).toBe(0.8); + }); + + test("inline form with enabled: false stays inert", () => { + expect( + parseFrontmatterConfig("---\nskill-memory: { enabled: false }\n---\nbody"), + ).toBeNull(); + }); + test("a plain quoted scalar still enables", () => { const md = `---\nskill-memory:\n enabled: "true"\n---\nbody`; expect(parseFrontmatterConfig(md)?.enabled).toBe(true); diff --git a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts index ad97e814..c235c52d 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/frontmatter.ts @@ -83,6 +83,25 @@ function extractSkillMemoryBlock(fmText: string): Record | null for (const line of lines) { if (!inSkillMemory) { + // Inline flow-mapping form on the header line: + // `skill-memory: { enabled: true, max_tokens: 2000 }` + // This is the form the ctx_skill_recall remediation message and the + // root docs advertise, so it MUST parse — otherwise a user following + // the guidance silently gets an inert config. Parse the {...} body + // into the same flat map the block form produces, then stop (a flow + // mapping is self-contained on one line). + const inlineMatch = line.match(/^skill-memory:\s*\{(.*)\}\s*(#.*)?$/); + if (inlineMatch) { + found = true; + for (const pair of splitFlowEntries(inlineMatch[1])) { + const sep = pair.indexOf(":"); + if (sep < 0) continue; + const key = pair.slice(0, sep).trim(); + if (!/^\w+$/.test(key)) continue; + result[key] = parseYamlScalar(pair.slice(sep + 1).trim()); + } + break; + } // Tolerate a trailing inline comment after the block header // (`skill-memory: # motor memory`), which is valid YAML. if (/^skill-memory:\s*(#.*)?$/.test(line)) { @@ -105,6 +124,37 @@ function extractSkillMemoryBlock(fmText: string): Record | null return found ? result : null; } +/** + * Split a YAML flow-mapping body (the text inside `{...}`) on top-level commas, + * leaving quoted segments intact. Minimal — the skill-memory config is a flat + * map of scalar values, so we don't need nested {}/[] handling. + */ +function splitFlowEntries(body: string): string[] { + const entries: string[] = []; + let current = ""; + let quote: '"' | "'" | null = null; + for (const ch of body) { + if (quote) { + current += ch; + if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'") { + quote = ch; + current += ch; + continue; + } + if (ch === ",") { + entries.push(current); + current = ""; + continue; + } + current += ch; + } + if (current.trim()) entries.push(current); + return entries; +} + function parseYamlScalar(raw: string): unknown { // Strip an inline `# comment` for UNQUOTED scalars (YAML requires whitespace // before the `#`). Quoted values keep their content verbatim so a literal diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts index eadbe637..098d7c2c 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts @@ -51,4 +51,22 @@ describe("parseSkillProvenance", () => { expect(result!.tier).toBe("project"); expect(result!.skillSource).toBe("opencode-project"); }); + + test("handles the SINGULAR ~/.config/opencode/skill/ global path (OPENCODE_SKILL_PATTERN covers both)", () => { + // Regression: opencode's pattern is {skill,skills}/**/SKILL.md, so the + // global config dir resolves under singular `skill/` too. Before the fix, + // deriveSkillTier classified it as project → notes written to the wrong + // partition + cold recall couldn't find SKILL.md. + const output = `Base directory for this skill: file://${HOME}/.config/opencode/skill/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("opencode-global"); + }); + + test("handles the plural ~/.config/opencode/skills/ global path", () => { + const output = `Base directory for this skill: file://${HOME}/.config/opencode/skills/my-skill`; + const result = parseSkillProvenance(output, "my-skill"); + expect(result!.tier).toBe("global"); + expect(result!.skillSource).toBe("opencode-global"); + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts index a5c5fba2..350e39be 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts @@ -42,7 +42,10 @@ export function deriveSkillTier(absDir: string): "project" | "global" { // ~/.claude/skills/ — via CLAUDE_EXTERNAL_DIR + skills/**/SKILL.md const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").replace(/\\/g, "/"); if ( + // opencode's OPENCODE_SKILL_PATTERN is `{skill,skills}/**/SKILL.md`, so + // the global config dir resolves under BOTH singular and plural. absDir.startsWith(`${home}/.config/opencode/skills/`) || + absDir.startsWith(`${home}/.config/opencode/skill/`) || absDir.startsWith(`${home}/.agents/skills/`) || absDir.startsWith(`${home}/.claude/skills/`) ) { @@ -55,7 +58,11 @@ export function deriveSkillSource( absDir: string, ): "opencode-project" | "opencode-global" | "claude-skills" | "agents-skills" { const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").replace(/\\/g, "/"); - if (absDir.startsWith(`${home}/.config/opencode/skills/`)) return "opencode-global"; + if ( + absDir.startsWith(`${home}/.config/opencode/skills/`) || + absDir.startsWith(`${home}/.config/opencode/skill/`) + ) + return "opencode-global"; if (absDir.startsWith(`${home}/.claude/skills/`)) return "claude-skills"; if (absDir.includes("/.agents/skills/")) return "agents-skills"; // Both singular .opencode/skill/ and plural .opencode/skills/ are valid — diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.ts index 4ac37842..36779f24 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.ts @@ -1,3 +1,4 @@ +import { log } from "../../../shared/logger"; import type { Database } from "../../../shared/sqlite"; import { cosineSimilarity } from "../memory/cosine-similarity"; import { embedTextForProject } from "../memory/embedding"; @@ -277,7 +278,14 @@ export async function recallSkillMemoryBlock( ); const selected = budgetFill(ordered, maxTokens, maxPinned); return finalize("fts5-fallback", selected); - } catch { + } catch (err) { + // Cache-safe + non-choking: never throw from recall (a thrown error here + // would surface in the skill tool result). But log it — a broken FTS + // table or bad vector blob would otherwise look identical to "no notes", + // making durable skill-memory data loss undiagnosable. + log( + `[skill-memory] recallSkillMemoryBlock failed for skill "${opts.skill}": ${err instanceof Error ? err.message : String(err)}`, + ); return ""; } } diff --git a/packages/plugin/src/index.ts b/packages/plugin/src/index.ts index 75bbc022..fe6fbf5a 100644 --- a/packages/plugin/src/index.ts +++ b/packages/plugin/src/index.ts @@ -155,7 +155,14 @@ const server: Plugin = async (ctx) => { // every note would return "No recent skill load found" — the exact // opposite of "fail loud". Catch a wiring regression at startup, not // at the first ctx_skill_note call from an agent. - if (!hooks.magicContext?.skillLoadRegistry) { + // + // ONLY when the plugin is enabled: when disabled by config (`enabled: false`) + // or by a detected conflict (sets enabled=false above), createSessionHooks + // returns `{ magicContext: null }` by design and no tools are exposed, so a + // missing registry is expected — throwing here would crash plugin init on + // the disabled path (an entry-module throw — the exact load-crash class this + // plugin must avoid). + if (pluginConfig.enabled && !hooks.magicContext?.skillLoadRegistry) { throw new Error( "[magic-context] ctx_skill_note registration failed: " + "hooks.magicContext.skillLoadRegistry is missing. " + @@ -165,7 +172,11 @@ const server: Plugin = async (ctx) => { const tools = createToolRegistry({ ctx, pluginConfig, - skillLoadRegistry: hooks.magicContext.skillLoadRegistry, + // Disabled path: magicContext is null and createToolRegistry early-returns + // {} without using the registry. Pass a throwaway Map so the argument + // expression never dereferences null (createToolRegistry never reads it + // when disabled). + skillLoadRegistry: hooks.magicContext?.skillLoadRegistry ?? new Map(), }); // v22 deferred legacy-memory identity backfill. createSessionHooks() opens diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.ts index 958ca975..6bd73d64 100644 --- a/packages/plugin/src/tools/ctx-skill-recall/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.ts @@ -128,6 +128,7 @@ export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefi `${projectDirectory}/.claude/skills/${args.skill}`, // Global dirs second `${home}/.config/opencode/skills/${args.skill}`, // via Global.Path.config + {skill,skills}/**/SKILL.md + `${home}/.config/opencode/skill/${args.skill}`, // singular — OPENCODE_SKILL_PATTERN covers both `${home}/.agents/skills/${args.skill}`, // via AGENTS_EXTERNAL_DIR `${home}/.claude/skills/${args.skill}`, // via CLAUDE_EXTERNAL_DIR ]; From 693fd1078e0440de3cd66dc833472848542faa61 Mon Sep 17 00:00:00 2001 From: Tehan Date: Thu, 25 Jun 2026 17:32:25 +0200 Subject: [PATCH 08/10] fix(skill-memory): address council should-level findings - budgetFill now counts per-note XML framing (~20 tokens) so the rendered block stays within max_tokens instead of ~13% overshoot (rev-1). - clamp effective pinned budget to min(max_pinned_tokens, max_tokens) so the default 4000>1500 can't imply pinned gets more room than the whole block (rev-2). - ctx_skill_recall: derive tier via dirname(resolvedPath) instead of a fragile .replace('/SKILL.md','') (rev-2). Updated the budget-truncation test for the framing-inclusive math. --- .../magic-context/skill-memory/recall.test.ts | 11 ++++++----- .../features/magic-context/skill-memory/recall.ts | 14 ++++++++++++-- .../plugin/src/tools/ctx-skill-recall/tools.ts | 8 +++++--- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts index 694699e7..01608e0f 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.test.ts @@ -53,12 +53,13 @@ describe("flatRecall", () => { createdAt: now - i * 1000, }); } - // Token-budget truncation test (arithmetic verified): - // delta = "note N — " (9 chars) + 40 "x"s = 49 chars → Math.ceil(49/4) = 13 tokens each. - // maxTokens: 30 → first note fits (13 ≤ 30), second note fits (13+13=26 ≤ 30), - // third note would exceed (26+13=39 > 30) → exactly 2 notes fit. + // Token-budget truncation test (arithmetic verified, framing-inclusive): + // delta = "note N — " (9 chars) + 40 "x"s = 49 chars → Math.ceil(49/4) = 13 tokens, + // plus NOTE_FRAMING_TOKENS (20) for the wrapper = 33 tokens each. + // maxTokens: 70 → 1st fits (33 ≤ 70), 2nd fits (33+33=66 ≤ 70), + // 3rd would exceed (66+33=99 > 70) → exactly 2 notes fit. const notes = flatRecall(db, "tdd", "global", "git:abc", { - maxTokens: 30, // 2 notes × 13 tokens = 26 ≤ 30; 3rd note would push to 39 > 30 + maxTokens: 70, // 2 notes × 33 tokens = 66 ≤ 70; 3rd would push to 99 > 70 maxPinnedTokens: 4000, }); expect(notes.length).toBe(2); diff --git a/packages/plugin/src/features/magic-context/skill-memory/recall.ts b/packages/plugin/src/features/magic-context/skill-memory/recall.ts index 36779f24..7d4c7ef7 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/recall.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/recall.ts @@ -19,6 +19,12 @@ export interface FlatRecallOptions { maxPinnedTokens: number; } +// Per-note +// framing that budgetFill's delta-only estimate would otherwise ignore (~80 +// chars ≈ 20 tokens/note). Counting it keeps the rendered block within max_tokens +// instead of overshooting ~13% on a 10-note block (rev-1 S1). +const NOTE_FRAMING_TOKENS = 20; + // Rough token estimate: 1 token ≈ 4 chars (conservative for XML overhead) function estimateTokens(text: string): number { return Math.ceil(text.length / 4); @@ -115,11 +121,15 @@ function budgetFill( const result: SkillMemoryNote[] = []; let pinnedTokens = 0; let totalTokens = 0; + // The total budget is the hard ceiling, so the pinned sub-budget can never + // exceed it — clamp so the default max_pinned_tokens (4000) > max_tokens + // (1500) can't imply pinned notes get more room than the whole block (rev-2). + const effectiveMaxPinned = Math.min(maxPinnedTokens, maxTokens); for (const note of notes) { - const tokens = estimateTokens(note.delta); + const tokens = estimateTokens(note.delta) + NOTE_FRAMING_TOKENS; if (note.pinned === 1) { - if (pinnedTokens + tokens > maxPinnedTokens) continue; + if (pinnedTokens + tokens > effectiveMaxPinned) continue; pinnedTokens += tokens; } if (totalTokens + tokens > maxTokens) continue; diff --git a/packages/plugin/src/tools/ctx-skill-recall/tools.ts b/packages/plugin/src/tools/ctx-skill-recall/tools.ts index 6bd73d64..2a932074 100644 --- a/packages/plugin/src/tools/ctx-skill-recall/tools.ts +++ b/packages/plugin/src/tools/ctx-skill-recall/tools.ts @@ -1,4 +1,5 @@ import { readFileSync } from "node:fs"; +import { dirname } from "node:path"; import { type ToolContext, type ToolDefinition, tool } from "@opencode-ai/plugin"; import { resolveProjectIdentity } from "../../features/magic-context/memory/project-identity"; import { parseFrontmatterConfig } from "../../features/magic-context/skill-memory/frontmatter"; @@ -151,7 +152,7 @@ export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefi `SKILL.md not found for '${args.skill}' in any known skill directory. ` + `Load the skill first with the skill tool, or verify the skill name is correct. ` + `Searched: project .opencode/skill/, .opencode/skills/, .agents/skills/, .claude/skills/; ` + - `global ~/.config/opencode/skills/, ~/.agents/skills/, ~/.claude/skills/.` + `global ~/.config/opencode/skill/, ~/.config/opencode/skills/, ~/.agents/skills/, ~/.claude/skills/.` ); } @@ -159,8 +160,9 @@ export function createCtxSkillRecallTool(deps: CtxSkillRecallToolDeps): ToolDefi frontmatterConfig = rawSkillContent ? parseFrontmatterConfig(rawSkillContent) : null; - // Derive tier from resolved path - tier = deriveSkillTier(resolvedPath.replace("/SKILL.md", "")); + // Derive tier from the skill's directory (dirname, not a fragile + // string replace that would mis-handle a path containing "SKILL.md"). + tier = deriveSkillTier(dirname(resolvedPath)); } if (!frontmatterConfig?.enabled) { From d325e72d0ba1d1d070381f141d880c9c7e91f89f Mon Sep 17 00:00:00 2001 From: Tehan Date: Thu, 25 Jun 2026 19:16:15 +0200 Subject: [PATCH 09/10] fix(skill-memory): address third review round (cubic run 17d21f5f) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - provenance.ts: anchor the Base-directory regex to line-start (^…/gm) and take the LAST match — opencode appends the provenance line at the END of tool output, so a skill whose CONTENT echoes 'Base directory for this skill:' (e.g. a skill documenting skill-memory) would otherwise shadow the real line and misdirect recall to a bogus identity. - read-session-formatting.ts: narrow the marker-safe exclusion to CR/LF/tab only — a ')' does not break the single-line TC: skill() marker and the historian reads it as natural language, so a ')'-containing name is preserved verbatim (identity key) instead of dropped. - ARCHITECTURE.md: update the 'Skill-memory (motor memory)' Key Abstraction to the shipped reality (v50/51/52, multi-rung embedding+FTS recall, global-'*' union) — was stale (v37, 'P2 TODO'). Remove the PR-added duplicate 'Tag Identity (v3.3.1+)' section (upstream owns the lean '## Tag identity'; Tag Identity is unrelated to skill-memory — rebase scope-creep). Regression tests: provenance last-match + mid-line rejection; ')' name preserved + CR/LF/tab still dropped. --- ARCHITECTURE.md | 41 +------------------ .../skill-memory/provenance.test.ts | 22 ++++++++++ .../magic-context/skill-memory/provenance.ts | 15 +++++-- .../read-session-formatting.test.ts | 20 +++++++++ .../magic-context/read-session-formatting.ts | 9 +++- 5 files changed, 63 insertions(+), 44 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 600e695e..12f04554 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -219,8 +219,8 @@ Three effective modes; the heavier features (historian, nudges, adjunct injectio **Skill-memory (motor memory for skills):** - Purpose: Per-skill cross-session recall — when a skill declares `skill-memory: { enabled: true }` in its frontmatter, accumulated gotchas/discoveries/fixes/workflow steps surface in a `` block appended to the skill tool's RESULT on every load. Agents write back via `ctx_skill_note`; explicit recall (without re-loading) is `ctx_skill_recall`. The transparent after-hook is the primary path; the two tools are companions. -- Location: `src/features/magic-context/skill-memory/{frontmatter,provenance,storage,recall}.ts`; `src/hooks/magic-context/skill-tool-definition.ts` + the `skill-memory` branches in `src/hooks/magic-context/hook-handlers.ts`; `src/tools/ctx-skill-note/`, `src/tools/ctx-skill-recall/`. Table created in migration v37 (`skill_memory`). -- Pattern: Three-hook transparent augmentation (definition → before → after). The before-hook stashes a per-callID `intent` (bounded 60s TTL + 256-cap + session-delete clear). The after-hook parses the `Base directory for this skill: file:///...` line (cross-platform via `fileURLToPath`), reads the skill's `SKILL.md` from disk to recover its `skill-memory:` frontmatter (opencode strips it from the model-facing output), populates a session-scoped `SkillLoadRegistry` (NOT persisted), and calls `recallSkillMemoryBlock` (feature layer — shared core used by the tool too) to format the injected block. Append lands in the tool RESULT (conversation tail) — cache-safe by construction. P1 retrieval is flat: recency × hit_count, no embeddings (P2 rungs are designed and marked TODO in `recall.ts`). Per-skill opt-in via SKILL.md frontmatter (`enabled: true` required; `max_tokens` 1500 / `max_pinned_tokens` 4000 / `dedup_threshold` 0.92 are tunable). Optional dreamer `distill-skill-memory` task (opt-in, NOT a default) handles merge/prune/promote maintenance. +- Location: `src/features/magic-context/skill-memory/{frontmatter,provenance,storage,recall}.ts`; `src/hooks/magic-context/skill-tool-definition.ts` + the `skill-memory` branches in `src/hooks/magic-context/hook-handlers.ts`; `src/tools/ctx-skill-note/`, `src/tools/ctx-skill-recall/`. Tables: `skill_memory` (migration v50), embedding columns + content-linked `skill_memory_fts` vtable (v51), `origin_project`/`source_type` + global `'*'` collision-merge (v52). See the fuller "Skill-memory flow" subsection above for the per-phase wiring. +- Pattern: Three-hook transparent augmentation (definition → before → after). The before-hook stashes an `intent` keyed by `${sessionId}:${callID}` (bounded 60s TTL + 256-cap + per-session prefix-prune on delete). The after-hook parses the trailing `Base directory for this skill: file:///...` line (line-anchored, last-match, cross-platform via `fileURLToPath`), reads the skill's `SKILL.md` from disk to recover its `skill-memory:` frontmatter (opencode strips it from the model-facing output), populates a session-scoped `SkillLoadRegistry` (NOT persisted), and calls `recallSkillMemoryBlock` (feature layer — shared core used by the tool too) to format the injected block. Append lands in the tool RESULT (conversation tail) — cache-safe by construction. Retrieval is a multi-rung recall cascade (intent embeddings → FTS5 → flat recency×hit), unioning the skill's own partition with the global `'*'` partition; the dreamer `distill-skill-memory` task (opt-in, NOT a default) re-embeds stale vectors and runs merge/prune/promote maintenance. Per-skill opt-in via SKILL.md frontmatter (`enabled: true` required; `max_tokens` 1500 / `max_pinned_tokens` 4000 / `dedup_threshold` 0.92 + `ranking_*` weights are tunable). **TUI ↔ server RPC:** - Purpose: Localhost RPC for sidebar data, status/recomp dialogs, and TUI-action consumption. @@ -361,40 +361,3 @@ Each `tags` row is one taggable source-content unit (`message`, `file`, or `tool **Schema migrations:** `src/features/magic-context/migrations.ts` declares versioned migrations v1–v52 (`LATEST_SUPPORTED_VERSION = 52` in `storage-db.ts` is the schema-fence ceiling and MUST be bumped with every new migration; a unit test — `schema-version-fence.test.ts` — asserts `LATEST_SUPPORTED_VERSION === LATEST_MIGRATION_VERSION` so the two can't drift). Notable: v10 `tool_owner_message_id` (composite tool-tag identity); v11 `todo_synthetic_*` (synthetic-todowrite); v12 orphan `memory_embeddings` cleanup; v13 `pending_compaction_marker_state` (deferred-marker drain); v14 project-scoped key files + version counter; v15 `deferred_execute_state` (boundary execution); v16 context-limit cache sentinels; v17 multi-anchor note-nudge/auto-search JSON storage; v18 `pending_pi_compaction_marker_state`; v19 compartment-state lease table; v20 subagent invocation token accounting; v21 session lifetime work metrics; **v22 the v2.0 cache-architecture foundation (m[0]/m[1] split tables, `project_state` epoch counter, plus per-compartment `p1`–`p4` tier columns, `importance`, `episode_type`, `p1_embedding`, and `legacy` flag); v23 `compartment_events` (historian-extracted causal_incident / trajectory_correction, stored-not-rendered in v2.0); v24 `historian_runs` telemetry (per-run chunk range, compartment/fact/event counts, importance min/max/avg, status + failure reason, FK to `subagent_invocations`); v25 `pi_stable_id_scheme` (Pi stable-id cutover watermark); v26 `memory_mutation_log` + `cached_m1_bytes` (memory supersede-delta — non-additive in-session memory mutations render as an m[1] `` delta instead of bumping the project epoch, plus the frozen-m[1]-bytes cache column); v27 `tags.entry_fingerprint` (Pi fallback-tag adoption); v28 `git_sweep_coordinator` (lease/cooldown for cross-process git-commit sweeps); v29 `notes.anchor_ordinal` (note→conversation-tail traceback); v30 `cached_m0_system_hash` / `cached_m0_tool_set_hash` / `cached_m0_model_key` (HARD-bust m[0] markers — provider-side cache-eviction detection for the materialization taxonomy; the migration clears the m[0]/m[1] cache once so pre-v30 rows re-materialize cleanly); v31 ctx_reduce-nudge state (`last_nudge_undropped`, `channel2_nudge_state`, `last_emergency_input_sample` + startup heal zeroing legacy sticky/anchor nudge state); v32 protected-tail v3 boundary state + per-tag cached token counts (`tags.token_count` / `input_token_count` / `reasoning_token_count` — computed once on tag insert, summed for sidebar/boundary/nudge math); v33 `compartment_chunk_embeddings` table for cross-session semantic search across compartment windows; v34 `workspaces` / `workspace_members` tables plus `cached_m0_workspace_fingerprint` m[0] marker (with a one-shot m[0]/m[1] cache reset so pre-v34 rows re-materialize cleanly); v35 `workspaces.share_categories` default + epoch refresh for existing members; v36 `session_projects` ownership map + seed for pre-v36 embedded sessions; v37 emergency drain catch-up latch + historian drain failure backoff; v38 `transform_decisions` table for durable cache-event cause attribution; v40 index Pi fallback tool owners for stable-id cutover; v41 key detected context limits by model; v42 per-task dreamer scheduling state (Dreamer v2 A+B); v43 memory verification side table and verify watermarks; v44 memory classification scope and shareability columns; v45 retrospective content watermark and processed-window idempotence; v46 Primers v1 candidate and promoted primer storage; v47 compiled smart-note checks and runtime policy state; v48 DreamerV2 rework: memory→file mapping vs verification split, classify marker; v49 per-model embedding coexistence and active identity tracking; **v50 `skill_memory` table for per-skill cross-session recall (P1 — see "Skill-memory" in Key Abstractions) with `(skill_id, tier, project_identity, normalized_hash)` UNIQUE, plus `idx_skill_memory_lookup` and `idx_skill_memory_fts_prep` indexes for the flat-recall path; v51 skill-memory P2 — `delta_embedding` + `recall_count` columns + content-linked `skill_memory_fts` FTS5 vtable (intent+delta, porter+unicode61 tokenizer, INSERT/UPDATE/DELETE triggers, post-migration rebuild) for the multi-rung recall cascade; v52 skill-memory historian extraction — `origin_project` + `source_type` columns and global-tier `'*'` collision-merge (one row per global lesson, recallable from any repo, with `origin_project` preserved).** Migration runner uses `schema_migrations` table with version-ordered execution and sibling-startup race protection (duplicate-insert is tolerated). **Harness-aware behavior:** `src/shared/harness.ts` exposes `setHarness()`/`getHarness()` for the runtime to identify itself; production INSERTs into session-scoped tables tag rows with the current harness. Pi-specific session-resolution paths are skipped on OpenCode and vice versa. - -## Tag Identity (v3.3.1+) - -**Tag types:** `message`, `file`, `tool`. Each row in the `tags` table represents one source-content unit that can be tagged with `§N§` and dropped/truncated/replayed by the runtime. - -**Identity composition by type:** - -- **`message` and `file` tags:** identified by `(session_id, message_id)`. The `message_id` for these is a synthetic content id (`:p` for text, `:fileN` for files). These ids are globally unique within a session. - -- **`tool` tags:** identified by `(session_id, message_id, tool_owner_message_id)` — a *composite* identity. For tool tags, `message_id` is the OpenCode-generated callID (e.g. `read:32`). Pre-v3.3.1 the runtime keyed tool tags by callID alone, but OpenCode reuses a callID counter per assistant turn — so two assistant turns that each invoke `read:32` produced the SAME callID for different invocations. The fix: include the *owning assistant message id* in the key so each invocation gets its own row. - -**Schema enforcement:** schema migration v10 (`src/features/magic-context/migrations.ts`) adds `tool_owner_message_id` (`TEXT NULL`), a partial UNIQUE index `idx_tags_tool_composite` on `(session_id, message_id, tool_owner_message_id) WHERE type='tool' AND tool_owner_message_id IS NOT NULL`, and a partial lookup index `idx_tags_tool_null_owner` on `(session_id, message_id) WHERE type='tool' AND tool_owner_message_id IS NULL` to back lazy adoption. - -**Helper API surface (`src/features/magic-context/storage-tags.ts`):** - -- `getToolTagNumberByOwner(db, sessionId, callId, ownerMsgId)`: composite-identity lookup. -- `getNullOwnerToolTag(db, sessionId, callId)`: find a legacy NULL-owner orphan to lazily adopt. -- `adoptNullOwnerToolTag(db, tagId, ownerMsgId)`: attempt to claim a NULL-owner row (NULL guard ensures first claim wins). -- `getPersistedToolOwnerNearestPrior(db, sessionId, callId, beforeMessageId)`: derive the most recent prior owner for a tool result whose invocation isn't in the visible window. -- `deleteToolTagsByOwner(db, sessionId, ownerMsgId)`: cascade delete on `message.removed`. - -**Owner derivation (`src/hooks/magic-context/tag-messages.ts`):** - -For each tool observation in a transform pass: - -1. **Invocation parts** (`tool-invocation` / `tool_use`): owner = the message hosting the part. -2. **Result parts** (`tool` with output / `tool_result`): pop the FIFO queue of unpaired invocations for that callId; owner = the popped invocation's message id. -3. **Result-only window** (invocation compacted away): fall back to `getPersistedToolOwnerNearestPrior` for the most recent prior persisted owner; if none found, last-resort owner = the result's own message id. - -The same logic mirrors in `src/hooks/magic-context/read-session-chunk.ts: getRawSessionTagKeysThrough` so the drop queue produces composite keys that match what the tagger persisted. - -**Cleanup paths:** - -- `deleteTagsByMessageId(db, sessionId, messageId)` (called from `event-handler.ts` on `message.removed`) deletes BOTH content-id-scoped tags (text/file on the removed message) AND owner-scoped tool tags (`tool_owner_message_id == messageId`). -- `applyHeuristicCleanup` keys both the tag-side index and fingerprint-side map by composite `\x00`. The fingerprint VALUE includes ownerMsgId too, so cross-owner pairs with same `(toolName, args)` produce DISTINCT fingerprints and are NOT merged. - -**Legacy NULL-owner handling:** rows written by pre-v3.3.1 plugin versions have `tool_owner_message_id = NULL`. The Layer B backfill (`src/features/magic-context/tool-owner-backfill.ts`) populates those rows from OpenCode's session DB on plugin upgrade (lease-based concurrency, batched commits). When backfill is skipped (no OpenCode DB attached) lazy adoption converts orphans to non-NULL on the next observation. Drop queue and heuristic cleanup gracefully fall back to bare-callId match for unbackfilled NULL-owner rows. diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts index 098d7c2c..48ba3797 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.test.ts @@ -69,4 +69,26 @@ describe("parseSkillProvenance", () => { expect(result!.tier).toBe("global"); expect(result!.skillSource).toBe("opencode-global"); }); + + test("takes the LAST line-anchored match when skill CONTENT echoes the marker phrase", () => { + // A skill that documents skill-memory itself could contain the marker + // phrase in its body. opencode appends the REAL provenance line last, so + // last-match must win — a first-match parse would resolve the bogus URL. + const output = + `# Skill: skill-memory internals\n` + + `Example: Base directory for this skill: file:///decoy/path/evil-skill\n` + + `more prose\n` + + `Base directory for this skill: file://${HOME}/.config/opencode/skills/real-skill`; + const result = parseSkillProvenance(output, "real-skill"); + expect(result!.resolvedPath).toBe(`${HOME}/.config/opencode/skills/real-skill/SKILL.md`); + expect(result!.tier).toBe("global"); + }); + + test("ignores a mid-line (non-line-anchored) marker mention", () => { + // "see the Base directory for this skill: file:///x" embedded mid-sentence + // (not at column 0) must NOT be captured. + const output = `Note: see the Base directory for this skill: file:///wrong/x for details.\nBase directory for this skill: file://${HOME}/.config/opencode/skills/right`; + const result = parseSkillProvenance(output, "right"); + expect(result!.resolvedPath).toBe(`${HOME}/.config/opencode/skills/right/SKILL.md`); + }); }); diff --git a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts index 350e39be..3cf00000 100644 --- a/packages/plugin/src/features/magic-context/skill-memory/provenance.ts +++ b/packages/plugin/src/features/magic-context/skill-memory/provenance.ts @@ -10,13 +10,20 @@ export interface SkillProvenance { // Matches: "Base directory for this skill: file:///abs/path/to/skill/dir" // Uses fileURLToPath (not naive regex capture) for cross-platform correctness. -const BASE_DIR_REGEX = /Base directory for this skill: (file:\/\/\/[^\n\r]+)/m; +// Anchored to line-start (`^…/gm`) AND we take the LAST match: opencode appends +// this provenance line at the END of the tool output, so if the skill's own +// CONTENT contains the same phrase (e.g. a skill documenting skill-memory +// provenance), a first-match/unanchored parse would capture the wrong URL and +// misdirect recall to a bogus skill identity. Line-anchoring rejects mid-prose +// mentions; last-match ensures the real trailing provenance line wins even if an +// example block reproduces it at column 0. +const BASE_DIR_REGEX = /^Base directory for this skill: (file:\/\/\/[^\n\r]+)/gm; export function parseSkillProvenance(output: string, skillId: string): SkillProvenance | null { - const match = output.match(BASE_DIR_REGEX); - if (!match) return null; + const matches = [...output.matchAll(BASE_DIR_REGEX)]; + if (matches.length === 0) return null; - const fileUrl = match[1].trim(); + const fileUrl = matches[matches.length - 1][1].trim(); let absDir: string; try { // Normalize OS-native separators to forward slashes: on Windows diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts index 39d40272..ec3accc6 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.test.ts @@ -32,4 +32,24 @@ describe("extractToolCallSummaries — skill tool", () => { expect(extractToolCallSummaries(parts)).toEqual(["TC: skill"]); }); + + test("preserves a skill name containing ')' verbatim (does not drop the marker)", () => { + // A ")" doesn't break the single-line marker and the historian reads it as + // natural language — preserve the name (identity key) rather than drop it. + const parts = [ + { + type: "tool", + tool: "skill", + state: { input: { name: "weird(name)" }, metadata: {} }, + }, + ]; + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill(weird(name))"]); + }); + + test("drops the name only when it contains a real line-breaker (CR/LF/tab)", () => { + const parts = [ + { type: "tool", tool: "skill", state: { input: { name: "bad\nname" }, metadata: {} } }, + ]; + expect(extractToolCallSummaries(parts)).toEqual(["TC: skill"]); + }); }); diff --git a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts index ce031c4f..338f210c 100644 --- a/packages/plugin/src/hooks/magic-context/read-session-formatting.ts +++ b/packages/plugin/src/hooks/magic-context/read-session-formatting.ts @@ -81,7 +81,14 @@ export function extractToolCallSummaries(parts: unknown[]): string[] { // mutated identity. if (p.tool === "skill") { const rawName = input && typeof input.name === "string" ? input.name : ""; - const markerSafe = rawName !== "" && !/[\r\n\t)]/.test(rawName); + // Only CR/LF/tab genuinely corrupt the single-line `TC: skill()` + // marker; a ")" does NOT break the line and the historian reads the + // marker as natural language (not a strict paren-matched parse), so a + // ")"-containing name is preserved VERBATIM rather than dropped — + // dropping the name loses historian attribution + recall keying, which + // is worse than a cosmetically-ambiguous paren. (Real skill directory + // names are slugs; this is defensive.) + const markerSafe = rawName !== "" && !/[\r\n\t]/.test(rawName); summaries.push(markerSafe ? `TC: skill(${rawName})` : "TC: skill"); continue; } From 9c6203914b3ee7ebcd53dad211cda1bae9e6cd9e Mon Sep 17 00:00:00 2001 From: Tehan Date: Fri, 26 Jun 2026 17:44:18 +0200 Subject: [PATCH 10/10] fix(skill-memory): gate ctx_memory cross-ref in skill guidance when memory off Rebase-onto-v0.29.0 resolution completion. Upstream ab4f01c1 added a memory.enabled gate that drops ALL ctx_memory mentions from the system prompt when memory is off (ctx_memory is then unregistered). The skill-memory guidance carried a 'those belong in ctx_memory' cross-reference that violated the new contract (buildMagicContextSection memory-gating tests). Parameterized ctxSkillMemoryGuidance(memoryEnabled) so the cross-ref drops when memory is off; skill-memory itself stays ungated (independent store). --- .../plugin/src/agents/magic-context-prompt.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/packages/plugin/src/agents/magic-context-prompt.ts b/packages/plugin/src/agents/magic-context-prompt.ts index 39e37851..f79e400e 100644 --- a/packages/plugin/src/agents/magic-context-prompt.ts +++ b/packages/plugin/src/agents/magic-context-prompt.ts @@ -70,13 +70,20 @@ function memoryGuidanceBlock(memoryEnabled: boolean): string { // `ctx_skill_recall` rehydrates accumulated notes for a skill without re-loading it. // Distinct from `ctx_memory`, which captures general project knowledge (not tied // to a specific skill). NOT gated on memory.enabled — skill-memory is an -// independent store (its own table + tool-result-tail injection). -const CTX_SKILL_MEMORY_GUIDANCE = `Use \`ctx_skill_note\` after using a skill when you hit a non-obvious issue, found a better approach, or fixed a skill-specific error. Skip routine successes — only record gotchas, discoveries, fixes, and workflow steps that would save time on the next use. +// independent store (its own table + tool-result-tail injection) — BUT the +// `ctx_memory` cross-reference is dropped when memory is off, since ctx_memory is +// then unregistered and pointing at it would be misleading (mirrors MEMORY_GUIDANCE). +function ctxSkillMemoryGuidance(memoryEnabled: boolean): string { + const generalObservationsLine = memoryEnabled + ? "Do NOT use `ctx_skill_note` for general project observations — those belong in `ctx_memory`." + : "Do NOT use `ctx_skill_note` for general project observations."; + return `Use \`ctx_skill_note\` after using a skill when you hit a non-obvious issue, found a better approach, or fixed a skill-specific error. Skip routine successes — only record gotchas, discoveries, fixes, and workflow steps that would save time on the next use. Example: \`ctx_skill_note({skill: 'trilium', intent: 'bulk-retag a subtree', kind: 'gotcha', delta: 'ETAPI note PUT needs Content-Type: text/plain even for HTML content'})\` Example: \`ctx_skill_note({skill: 'test-driven-development', intent: 'fix flaky auth test', kind: 'fix', delta: 'Always mock Date.now() in auth tests — real timers cause intermittent failures'})\` -Do NOT use \`ctx_skill_note\` for general project observations — those belong in \`ctx_memory\`. +${generalObservationsLine} Use \`ctx_skill_recall\` to explicitly query accumulated notes for a skill without re-loading it. Call it when you want to recall gotchas/discoveries for a skill you have already loaded this session, or when you need notes without triggering a full skill load. Returns the \`\` block directly as a tool result. Example: \`ctx_skill_recall({skill: 'trilium', intent: 'bulk-retag a subtree'})\`.`; +} const BASE_INTRO = ( protectedTags: number, @@ -85,7 +92,7 @@ const BASE_INTRO = ( Use \`ctx_reduce\` to mark spent tagged content as discardable and reclaim space. Marking is NOT an immediate delete — it queues the content, which stays fully visible until space is actually needed (as soon as the next turn if you're already under pressure, much later if not), so mark a tool output as soon as you're done with it rather than hoarding the call for the end of the turn. The last ${protectedTags} tags are protected (marking one just queues it until it ages out). Syntax: "3-5", "1,2,9", or "1-5,8,12-15". Do not announce or narrate \`ctx_reduce\` drops — just call the tool silently. Saying "I'll drop these outputs" wastes tokens the user does not care about. ${CTX_NOTE_GUIDANCE} -${memoryGuidanceBlock(memoryEnabled)}${CTX_SKILL_MEMORY_GUIDANCE} +${memoryGuidanceBlock(memoryEnabled)}${ctxSkillMemoryGuidance(memoryEnabled)} Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. Use \`ctx_expand\` to recover the raw conversation behind a \`\` summary in \`\` — pass its \`start\`/\`end\` attributes when the summary is not enough (exact wording, values, error text). **Search before asking the user**: If you can't remember or don't know something that might have been discussed before or stored in project memory, use \`ctx_search\` before asking the user. Examples: @@ -107,7 +114,7 @@ Before your turn finishes, consider using \`ctx_reduce\` to drop large tool outp * a tagging system they can't observe just wastes tokens and (empirically) primes * some models to emit malformed `§N">§` tokens at the start of their own text. */ const BASE_INTRO_NO_REDUCE = (memoryEnabled: boolean): string => `${CTX_NOTE_GUIDANCE} -${memoryGuidanceBlock(memoryEnabled)}${CTX_SKILL_MEMORY_GUIDANCE} +${memoryGuidanceBlock(memoryEnabled)}${ctxSkillMemoryGuidance(memoryEnabled)} Use \`ctx_search\` to search across project memories, indexed git commits, and this session's full conversation history (including compacted parts) from one query. Use \`ctx_expand\` to recover the raw conversation behind a \`\` summary in \`\` — pass its \`start\`/\`end\` attributes when the summary is not enough (exact wording, values, error text). **Search before asking the user**: If you can't remember or don't know something that might have been discussed before or stored in project memory, use \`ctx_search\` before asking the user. Examples: