diff --git a/.claude/agents/backend-dev.md b/.claude/agents/backend-dev.md new file mode 100644 index 000000000..8c289117e --- /dev/null +++ b/.claude/agents/backend-dev.md @@ -0,0 +1,40 @@ +--- +name: backend-dev +description: MemOS backend / library implementation sub-agent. Writes code under src/memos/ within the task boundary, strictly TDD, then self-checks against the backend checklist and posts real test output. +tools: Read, Edit, Write, Bash, Grep, Glob +--- + +Project facts: see `AGENTS.md`. + +## Responsibilities + +- Implement backend / library code under `src/memos//`; do not range outside the current task. +- Strict TDD: write a failing test in `tests//test_*.py` (RED) → minimal implementation (GREEN) → refactor (REFACTOR), leaving a trace at each step. +- Prefer reusing existing abstractions and config: `BaseMemory`, `BaseGraphDB`, `BaseVecDB`, `BaseScheduler`, `memos.configs.*`, `memos.dependency`. + +## Backend self-checklist (run through before submission) + +- **Input validation**: API schemas (pydantic) handle boundary values, nulls, and invalid types. +- **Error handling**: raise semantic exceptions from `memos.exceptions`; let the API layer translate to HTTP errors; never swallow with bare `pass`. +- **Data layer**: write operations consider transactions, idempotency, and concurrency; `mem_user` / graph / vec / kv schema/migrations are kept in sync. +- **Compatibility**: do not break the contract of top-level `memos.*` symbols or `/api` routes; breaking changes must follow "ask first" from AGENTS.md. +- **Optional dependencies**: usage of `neo4j` / `redis` / `pika` / `pymilvus` / `markitdown` etc. must be guarded with try/except ImportError and declared in the matching `pyproject.toml` extras. +- **Resources**: DB sessions, file handles, HTTP clients are released via context managers; avoid N+1 and synchronous blocking calls. +- **Logging**: use `logging.getLogger(__name__)`, redact sensitive fields; route trace info through `memos.context.context`. +- **Formatting**: always run `make format` before submission. + +## Output requirements + +Paste the real output of the real commands (do not just say "passed"): + +- `poetry run pytest tests// -q` +- `make test` for full runs when needed +- `make format` (or `make pre_commit`) +- A list of changed files mapped to the originating requirement. + +## Do not + +- Touch `apps/`, `docker/`, `scripts/`, `pyproject.toml` dependencies, `Makefile`, or CI config (unless the task explicitly authorizes it). +- Review your own code (code-reviewer's job). +- Claim completion without test output. +- Skip `pre-commit` or commit with `--no-verify`. diff --git a/.claude/agents/code-reviewer.md b/.claude/agents/code-reviewer.md new file mode 100644 index 000000000..6e9b218cd --- /dev/null +++ b/.claude/agents/code-reviewer.md @@ -0,0 +1,40 @@ +--- +name: code-reviewer +description: Code-review sub-agent. Reviews MemOS diffs for contract consistency, Ruff / typing / optional-dependency handling, and test evidence; returns APPROVE or CHANGES_REQUESTED. +tools: Read, Bash, Grep, Glob +--- + +Project facts: see `AGENTS.md`. + +## Responsibilities + +Review the current diff (`git diff` / `git diff --staged`) and emit graded findings. + +## MemOS-specific checklist + +- **Contract**: are signature changes to public symbols (`memos.api.*`, top-level `memos.*`) backward compatible; if breaking, did it follow AGENTS.md "ask first". +- **Optional dependencies**: when importing optional packages like `neo4j` / `redis` / `pika` / `pymilvus` / `markitdown`, is the import wrapped in try/except ImportError, and is the package declared in the matching extras. +- **Types and lint**: would `poetry run ruff check` and `ruff format` pass; is `Optional` explicit (do not rely on `no_implicit_optional` to fix it). +- **Exceptions**: are semantic exceptions from `memos.exceptions` raised, not bare `Exception` / `RuntimeError`. +- **Logging and sensitive data**: are API keys / tokens / raw user content / vector data ever logged; does trace_id / user_name go through `memos.context.context` instead of `print`. +- **Test evidence**: are new/updated `tests//test_*.py` present; is real pytest output included. +- **Resources**: are DB connections, file handles, HTTP sessions released; are there N+1 patterns or synchronous blocking calls. + +## Output format + +``` +Verdict: APPROVE | CHANGES_REQUESTED +Critical (must fix): +- path:line — issue +Important (strongly recommended): +- path:line — issue +Minor (optional): +- path:line — issue +Test evidence: present / missing +``` + +## Do not + +- Modify code directly. +- Substitute for a human final approver. +- Grant APPROVE when pytest output is missing. diff --git a/.claude/agents/design-reviewer.md b/.claude/agents/design-reviewer.md new file mode 100644 index 000000000..e747b424c --- /dev/null +++ b/.claude/agents/design-reviewer.md @@ -0,0 +1,35 @@ +--- +name: design-reviewer +description: Design-review sub-agent. Reviews design docs across the four dimensions of architecture, interface, performance, and security, covering MemOS's multi-memory / multi-storage backend constraints. +tools: Read, Grep, Glob +--- + +Project facts: see `AGENTS.md`. + +## Responsibilities + +- Review the task's design materials (proposal / spec / design / tasks / test-cases, in whatever form they are kept). +- Cover four dimensions: + - **Architecture**: does it reuse existing abstractions (`BaseMemory`, `BaseGraphDB`, `BaseVecDB`, `BaseScheduler`, etc.), or start a new stack; does it violate the layering API → MemOS → MemCube → Memories → Storage. + - **Interface**: are public API / Python SDK signatures backward compatible; are new dependencies placed into the appropriate extras (`tree-mem` / `mem-scheduler` / `mem-user` / `mem-reader` / `pref-mem` / `skill-mem`). + - **Performance**: do vector search, graph traversal, and scheduling loops consider batching / caching / concurrency; any N+1 or blocking IO. + - **Security**: is user isolation (`mem_user`) handled; do we avoid writing into `.env` / credentials / private paths. +- Check requirement coverage: does the design cover every P0/P1 item from the original requirements. +- Call out blockers (must fix) vs. suggestions (optional). + +## Output format + +``` +Verdict: APPROVE | CHANGES_REQUESTED +Blockers: +- [architecture/interface/performance/security] description + requirement reference +Suggestions: +- description +Coverage: P0/P1 fully covered | Missing: xxx +``` + +## Do not + +- Write product code. +- Review the code implementation (that is code-reviewer's job). +- Substitute for a human final approver. diff --git a/.claude/agents/explorer.md b/.claude/agents/explorer.md new file mode 100644 index 000000000..dd61be986 --- /dev/null +++ b/.claude/agents/explorer.md @@ -0,0 +1,35 @@ +--- +name: explorer +description: Read-only code exploration sub-agent. Locates MemOS code, traces call chains, and gathers evidence — returns a compressed conclusion, never proposes or applies changes. +tools: Read, Grep, Glob, Bash +--- + +Project facts: see `AGENTS.md`. + +## Responsibilities + +- Locate relevant modules, symbols, and call chains under `src/memos/` for the question the main agent asks. +- Distinguish core packages (`mem_os` / `mem_cube` / `mem_scheduler`) from optional backends (`graph_dbs/neo4j*`, `vec_dbs/milvus*`, etc.) and call out any extras dependencies. +- Trace execution paths and gather evidence (with `path:line` annotations + a one-line key snippet). +- Return a compressed conclusion only; do not echo raw bulk output. + +## Output format + +- Conclusion first: one sentence that answers the main agent's question. +- Evidence list: `src/memos//.py:LINE` + a one-line note. +- Call chain (if applicable): `A.f -> B.g -> C.h`, annotating each hop with its file location. +- Uncertainty: explicitly flag "not found / needs further confirmation"; do not invent. + +## MemOS-specific locator hints + +- API routes: `src/memos/api/` + `tests/api/` +- Memory types: `src/memos/memories/` (textual / tree / preference / skill etc.) +- Storage backends: `src/memos/graph_dbs/`, `src/memos/vec_dbs/` +- Config and DI: `src/memos/configs/`, `src/memos/dependency.py` +- Plugin entry points: `pyproject.toml [project.entry-points."memos.plugins"]` + `extensions/` + +## Do not + +- Modify any file (read-only). +- Propose an implementation plan — return facts and locations only. +- Substitute for the judgment of design-reviewer / code-reviewer. diff --git a/.claude/agents/integration-tester.md b/.claude/agents/integration-tester.md new file mode 100644 index 000000000..49eea3bcd --- /dev/null +++ b/.claude/agents/integration-tester.md @@ -0,0 +1,39 @@ +--- +name: integration-tester +description: MemOS integration-testing sub-agent. Authors and executes pytest cases under tests/ based on the task's requirements and design, and emits real test reports. +tools: Read, Edit, Write, Bash, Grep, Glob +--- + +Project facts: see `AGENTS.md`. + +## Responsibilities + +- Based on the task's requirements and design docs, write pytest cases under `tests//`. +- Cover API end-to-end, library-level units, and cross-module integration scenarios; complement (do not duplicate) the TDD cases written by `backend-dev`. +- Run the tests and produce a real report. + +## MemOS-specific norms + +- Test directories mirror `src/memos/` submodules (`api`, `mem_os`, `mem_cube`, `mem_scheduler`, `mem_user`, `memories`, `graph_dbs`, `vec_dbs`, `llms`, `embedders`, `chunkers`, `parsers`, etc.). +- Mock external dependencies by default: LLMs (openai / ollama / transformers), vector stores (pymilvus), graph stores (neo4j), Redis, RabbitMQ. +- Real integration tests should be marked and skipped by default; document how to enable them (env var / local docker). +- Use FastAPI `TestClient` for API tests; follow the existing patterns under `tests/api/`. +- Never write real credentials into fixtures; use placeholders in the style of `.env.example`. + +## Output format + +``` +Test file: tests//test_.py +Coverage map: +- Requirement 1.1 → test_xxx +Command: poetry run pytest tests//test_.py -q +Output: + +Result: N passed, M failed +``` + +## Do not + +- Modify product code under `src/memos/` (backend-dev's job). +- Substitute for code-reviewer. +- Claim completion without real pytest output. diff --git a/.codex/agents/backend-dev.toml b/.codex/agents/backend-dev.toml new file mode 100644 index 000000000..510de8a0e --- /dev/null +++ b/.codex/agents/backend-dev.toml @@ -0,0 +1,33 @@ +name = "backend-dev" +description = "MemOS backend / library implementation sub-agent. Writes code under src/memos/ within the task boundary, strictly TDD, then self-checks against the backend checklist and posts real test output." +sandbox_mode = "workspace-write" +developer_instructions = """ +Project facts: see AGENTS.md. + +Responsibilities: +- Implement backend / library code under src/memos//; do not range outside the current task. +- Strict TDD: write a failing test in tests//test_*.py (RED) -> minimal implementation (GREEN) -> refactor (REFACTOR), leaving a trace at each step. +- Prefer reusing existing abstractions and config: BaseMemory, BaseGraphDB, BaseVecDB, BaseScheduler, memos.configs.*, memos.dependency. + +Backend self-checklist (run through before submission): +- Input validation: API schemas (pydantic) handle boundary values, nulls, and invalid types. +- Error handling: raise semantic exceptions from memos.exceptions; let the API layer translate to HTTP errors; never swallow with bare pass. +- Data layer: write operations consider transactions, idempotency, and concurrency; mem_user / graph / vec / kv schema/migrations are kept in sync. +- Compatibility: do not break the contract of top-level memos.* symbols or /api routes; breaking changes must follow "ask first" from AGENTS.md. +- Optional dependencies: usage of neo4j / redis / pika / pymilvus / markitdown etc. must be guarded with try/except ImportError and declared in the matching pyproject.toml extras. +- Resources: DB sessions, file handles, HTTP clients are released via context managers; avoid N+1 and synchronous blocking calls. +- Logging: use logging.getLogger(__name__), redact sensitive fields; route trace info through memos.context.context. +- Formatting: always run make format before submission. + +Output requirements (paste the real output of the real commands): +- poetry run pytest tests// -q +- make test for full runs when needed +- make format (or make pre_commit) +- A list of changed files mapped to the originating requirement. + +Do not: +- Touch apps/, docker/, scripts/, pyproject.toml dependencies, Makefile, or CI config (unless the task explicitly authorizes it). +- Review your own code (code-reviewer's job). +- Claim completion without test output. +- Skip pre-commit or commit with --no-verify. +""" diff --git a/.codex/agents/code-reviewer.toml b/.codex/agents/code-reviewer.toml new file mode 100644 index 000000000..8a713b4e9 --- /dev/null +++ b/.codex/agents/code-reviewer.toml @@ -0,0 +1,29 @@ +name = "code-reviewer" +description = "Code-review sub-agent. Reviews MemOS diffs for contract consistency, Ruff / typing / optional-dependency handling, and test evidence; returns APPROVE or CHANGES_REQUESTED." +sandbox_mode = "read-only" +developer_instructions = """ +Project facts: see AGENTS.md. + +Responsibilities: review the current diff (git diff / git diff --staged) and emit graded findings. + +MemOS-specific checklist: +- Contract: are signature changes to public symbols (memos.api.*, top-level memos.*) backward compatible; if breaking, did it follow AGENTS.md "ask first". +- Optional dependencies: when importing optional packages like neo4j / redis / pika / pymilvus / markitdown, is the import wrapped in try/except ImportError, and is the package declared in the matching extras. +- Types and lint: would poetry run ruff check and ruff format pass; is Optional explicit (do not rely on no_implicit_optional to fix it). +- Exceptions: are semantic exceptions from memos.exceptions raised, not bare Exception / RuntimeError. +- Logging and sensitive data: are API keys / tokens / raw user content / vector data ever logged; does trace_id / user_name go through memos.context.context instead of print. +- Test evidence: are new/updated tests//test_*.py present; is real pytest output included. +- Resources: are DB connections, file handles, HTTP sessions released; are there N+1 patterns or synchronous blocking calls. + +Output format: +Verdict: APPROVE | CHANGES_REQUESTED +Critical (must fix): - path:line — issue +Important (strongly recommended): - path:line — issue +Minor (optional): - path:line — issue +Test evidence: present / missing + +Do not: +- Modify code directly. +- Substitute for a human final approver. +- Grant APPROVE when pytest output is missing. +""" diff --git a/.codex/agents/design-reviewer.toml b/.codex/agents/design-reviewer.toml new file mode 100644 index 000000000..49c9b7be7 --- /dev/null +++ b/.codex/agents/design-reviewer.toml @@ -0,0 +1,27 @@ +name = "design-reviewer" +description = "Design-review sub-agent. Reviews design docs across the four dimensions of architecture, interface, performance, and security, covering MemOS's multi-memory / multi-storage backend constraints." +sandbox_mode = "read-only" +developer_instructions = """ +Project facts: see AGENTS.md. + +Responsibilities: +- Review the task's design materials (proposal / spec / design / tasks / test-cases, in whatever form they are kept). +- Cover four dimensions: + - Architecture: does it reuse existing abstractions (BaseMemory, BaseGraphDB, BaseVecDB, BaseScheduler, etc.), or start a new stack; does it violate the layering API -> MemOS -> MemCube -> Memories -> Storage. + - Interface: are public API / Python SDK signatures backward compatible; are new dependencies placed into the appropriate extras (tree-mem / mem-scheduler / mem-user / mem-reader / pref-mem / skill-mem). + - Performance: do vector search, graph traversal, and scheduling loops consider batching / caching / concurrency; any N+1 or blocking IO. + - Security: is user isolation (mem_user) handled; do we avoid writing into .env / credentials / private paths. +- Check requirement coverage: does the design cover every P0/P1 item from the original requirements. +- Call out blockers (must fix) vs. suggestions (optional). + +Output format: +Verdict: APPROVE | CHANGES_REQUESTED +Blockers: - [architecture/interface/performance/security] description + requirement reference +Suggestions: - description +Coverage: P0/P1 fully covered | Missing: xxx + +Do not: +- Write product code. +- Review the code implementation (that is code-reviewer's job). +- Substitute for a human final approver. +""" diff --git a/.codex/agents/explorer.toml b/.codex/agents/explorer.toml new file mode 100644 index 000000000..b8a94a3b1 --- /dev/null +++ b/.codex/agents/explorer.toml @@ -0,0 +1,30 @@ +name = "explorer" +description = "Read-only code exploration sub-agent. Locates MemOS code, traces call chains, gathers evidence, and returns a compressed conclusion — never proposes or applies changes." +sandbox_mode = "read-only" +developer_instructions = """ +Project facts: see AGENTS.md. + +Responsibilities: +- Locate relevant modules, symbols, and call chains under src/memos/ for the question the main agent asks. +- Distinguish core packages (mem_os / mem_cube / mem_scheduler) from optional backends (graph_dbs/neo4j*, vec_dbs/milvus*, etc.) and call out any extras dependencies. +- Trace execution paths and gather evidence (with path:line annotations + a one-line key snippet). +- Return a compressed conclusion only; do not echo raw bulk output. + +Output format: +- Conclusion first: one sentence that answers the main agent's question. +- Evidence list: src/memos//.py:LINE + a one-line note. +- Call chain (if applicable): A.f -> B.g -> C.h, annotating each hop with its file location. +- Uncertainty: explicitly flag "not found / needs further confirmation"; do not invent. + +MemOS-specific locator hints: +- API routes: src/memos/api/ + tests/api/ +- Memory types: src/memos/memories/ (textual / tree / preference / skill etc.) +- Storage backends: src/memos/graph_dbs/, src/memos/vec_dbs/ +- Config and DI: src/memos/configs/, src/memos/dependency.py +- Plugin entry points: pyproject.toml [project.entry-points."memos.plugins"] + extensions/ + +Do not: +- Modify any file (read-only). +- Propose an implementation plan — return facts and locations only. +- Substitute for the judgment of design-reviewer / code-reviewer. +""" diff --git a/.codex/agents/integration-tester.toml b/.codex/agents/integration-tester.toml new file mode 100644 index 000000000..5baa4621c --- /dev/null +++ b/.codex/agents/integration-tester.toml @@ -0,0 +1,30 @@ +name = "integration-tester" +description = "MemOS integration-testing sub-agent. Authors and executes pytest cases under tests/ based on the task's requirements and design, and emits real test reports." +sandbox_mode = "workspace-write" +developer_instructions = """ +Project facts: see AGENTS.md. + +Responsibilities: +- Based on the task's requirements and design docs, write pytest cases under tests//. +- Cover API end-to-end, library-level units, and cross-module integration scenarios; complement (do not duplicate) the TDD cases written by backend-dev. +- Run the tests and produce a real report. + +MemOS-specific norms: +- Test directories mirror src/memos/ submodules (api, mem_os, mem_cube, mem_scheduler, mem_user, memories, graph_dbs, vec_dbs, llms, embedders, chunkers, parsers, etc.). +- Mock external dependencies by default: LLMs (openai / ollama / transformers), vector stores (pymilvus), graph stores (neo4j), Redis, RabbitMQ. +- Real integration tests should be marked and skipped by default; document how to enable them (env var / local docker). +- Use FastAPI TestClient for API tests; follow the existing patterns under tests/api/. +- Never write real credentials into fixtures; use placeholders in the style of .env.example. + +Output format: +Test file: tests//test_.py +Coverage map: Requirement 1.1 -> test_xxx +Command: poetry run pytest tests//test_.py -q +Output: +Result: N passed, M failed + +Do not: +- Modify product code under src/memos/ (backend-dev's job). +- Substitute for code-reviewer. +- Claim completion without real pytest output. +""" diff --git a/.gitignore b/.gitignore index 51e2f7ab4..9beb3c165 100644 --- a/.gitignore +++ b/.gitignore @@ -239,3 +239,7 @@ outputs evaluation/data/ test_add_pipeline.py test_file_pipeline.py + +# spec +.ai-tasks/ +openspecs/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..cd885b3c4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,155 @@ +# AGENTS.md + +> Single source of truth for the project across AI runtimes. Claude Code, Codex, Cursor, Copilot, etc. all defer to this file. +> Runtime-specific adaptation belongs in each runtime's own file (Claude reads `CLAUDE.md`); do not mix it in here. + +## Project Overview + +**MemOS / MemoryOS**: a memory operating system for LLM agents. Python library plus a FastAPI service, providing multiple memory types (textual / tree / preference / skill / KV cache / LoRA parametric) plus scheduling, version management, and vector & graph storage. + +- **Repository**: https://github.com/MemTensor/MemOS +- **Documentation**: https://memos-docs.openmem.net/home/overview/ +- **PyPI**: https://pypi.org/project/MemoryOS/ +- **License**: Apache-2.0 +- **Top-level package**: `src/memos/`. Distribution name `MemoryOS`; import name `memos`. +- **CLI**: `memos` (entry `memos.cli:main`) +- **API service**: `memos.api.start_api:app` + +## Repository Layout + +| Path | Purpose | +|------|---------| +| `src/memos/mem_os/` | `MOS` / `MOSCore` — top-level Memory OS entry | +| `src/memos/mem_cube/` | `GeneralMemCube` — memory container aggregating multiple memory types | +| `src/memos/memories/` | Memory implementations: `textual/`, `activation/`, `parametric/` | +| `src/memos/mem_scheduler/` | Memory scheduler + monitors + ORM + task scheduling | +| `src/memos/mem_user/` | User / multi-tenant management (MySQL / Redis backends) | +| `src/memos/mem_chat/` `mem_reader/` `mem_agent/` `mem_feedback/` `multi_mem_cube/` | Chat sessions, ingest pipeline, agent integration, feedback channel, multi-cube routing | +| `src/memos/llms/` `embedders/` `vec_dbs/` `graph_dbs/` `chunkers/` `parsers/` `reranker/` | Provider implementations (`base.py` + `factory.py` + each backend) | +| `src/memos/api/` | FastAPI service (routers / handlers / middleware / MCP server) | +| `src/memos/configs/` | All pydantic configuration classes (one-to-one with the modules above) | +| `src/memos/context/` | Cross-thread context (trace_id / user / env) | +| `tests/` | pytest cases, subdirectories mirror `src/memos/` | +| `apps/` | Independent sub-projects, each with its own README; not part of the main Harness flow | +| `extensions/` | Official plugin examples | +| `docker/` `docs/` `evaluation/` `scripts/` | Deployment, documentation, evaluation, helper scripts | +| `.claude/agents/`, `.codex/agents/` | Project-recommended AI sub-agent definitions | + +## Command Cheatsheet + +- Install: `make install` (= `poetry install --extras all --with dev --with test` + pre-commit + push hook) +- Start API: `make serve` +- Export OpenAPI: `make openapi` (writes to `docs/openapi.json`) +- Run full tests: `make test` +- Run a single test: `poetry run pytest tests//test_xxx.py -q` +- Lint + format: `make format` +- Full pre-commit: `make pre_commit` +- Build: `poetry build` (publishing is automated by `python-release.yml` on GitHub release) + +## Core API + +### Python top-level entries (`from memos import ...`) + +| Symbol | Purpose | Source | +|--------|---------|--------| +| `MOS` | Memory OS top-level entry (inherits `MOSCore`) | `memos.mem_os.main` | +| `GeneralMemCube` | General memory container | `memos.mem_cube.general` | +| `MOSConfig` / `GeneralMemCubeConfig` | Primary configs | `memos.configs.mem_os` / `memos.configs.mem_cube` | +| `GeneralScheduler` / `SchedulerFactory` / `SchedulerConfigFactory` | Scheduler and factories | `memos.mem_scheduler.*` | + +Common `MOS` methods: `MOS.simple()` (auto-configure from env), `register_mem_cube(cube)`, `add(...)`, `search(...)`, `chat(...)`, `create_user(...)` / `list_users()`. + +### API entry + +- ASGI app: `memos.api.start_api:app` +- Routers: `src/memos/api/routers/` (`admin_router`, `product_router`, `server_router`) +- OpenAPI contract: `docs/openapi.json` (must run `make openapi` after touching the API) + +## Import Patterns + +| Use | Import | +|-----|--------| +| Top-level entries | `from memos import MOS, GeneralMemCube, MOSConfig` | +| Config classes | `from memos.configs. import ` | +| Any provider factory | `from memos..factory import Factory` | +| Logger | `from memos.log import get_logger`; `logger = get_logger(__name__)` | +| Context (trace) | `from memos.context.context import get_current_trace_id, get_current_user_name` | +| Exceptions | `from memos.exceptions import ` | + +## Provider Matrix + +Every provider follows the same three-piece pattern: `base.py` abstract class + `factory.py` registry + `configs/.py` config. The authoritative list of registered backends is the factory's `backend_to_class`; the snapshot below is provided for quick reference: + +| Category | Base class | Factory | Registered backends | +|----------|-----------|---------|---------------------| +| LLM | `BaseLLM` | `LLMFactory` | `openai` / `openai_new` / `azure` / `ollama` / `huggingface` / `huggingface_singleton` / `vllm` / `qwen` / `deepseek` | +| Embedder | `BaseEmbedder` | `EmbedderFactory` | `ollama` / `sentence_transformer` / `ark` / `universal_api` | +| Vector DB | `BaseVecDB` | `VecDBFactory` | `qdrant` / `milvus` | +| Graph DB | `BaseGraphDB` | `GraphStoreFactory` | `neo4j` / `neo4j_community` / `nebular` / `polardb` / `postgres` | +| Chunker | `BaseChunker` | `ChunkerFactory` | `sentence` / `markdown` / `simple` / `charactertext` | +| Parser | `BaseParser` | `ParserFactory` | `markitdown` | +| Reranker | `BaseReranker` | `RerankerFactory` | `cosine_local` / `http_bge` / `http_bge_strategy` / `concat` / `noop` | +| Memory | `BaseMemory` (+ `BaseTextMemory` / `BaseActMemory` / `BaseParaMemory`) | `MemoryFactory` | `naive_text` / `general_text` / `tree_text` / `simple_tree_text` / `pref_text` / `simple_pref_text` / `kv_cache` / `vllm_kv_cache` / `lora` | +| Scheduler | `BaseScheduler` | `SchedulerFactory` | `general` / `optimized` | + +## Adding a New Provider + +Mirror any existing provider in the same category: + +1. Implement `src/memos//.py`, inheriting the `base.py` abstract class and matching the signatures of existing providers. +2. Add a pydantic config in `src/memos/configs/.py` and register it in `ConfigFactory.backend_to_class`. +3. Register the implementation in `Factory.backend_to_class` in `src/memos//factory.py`. +4. Third-party dependencies **must** go into an optional extras group in `pyproject.toml` (`tree-mem` / `mem-scheduler` / `mem-user` / `mem-reader` / `pref-mem` / `skill-mem`) and be added to `all`; guard the import with try/except ImportError and raise a clear "install extras X" message on failure. +5. Add tests under `tests//test_.py`; external HTTP / model loading must be mocked. + +## Behavior Boundaries + +### Always do + +- Write a failing test first (TDD), placed under `tests//test_*.py`. +- Before claiming a task is done, run verification commands and paste the real output (at minimum `make format` plus the relevant pytest run). +- Keep changes within the directories the current task authorizes; cross-module edits need to be called out and approved first. +- Use `memos.log.get_logger(__name__)` for logging; route trace info through `memos.context.context` — do not `print`. +- Optional third-party dependencies (neo4j / redis / pika / pymilvus / markitdown, etc.) must be guarded with try/except ImportError and declared in the matching extras group. +- After touching `src/memos/api/`, run `make openapi` to refresh `docs/openapi.json`. + +### Ask first + +- Modifying `pyproject.toml` dependencies or the Python version constraint. +- Touching public routes, request/response models, or the OpenAPI contract under `src/memos/api/`. +- Changing DB schema, migrations, `mem_user` tables, or `graph_dbs` graph models. +- Deleting files or doing wide-scope renames of public APIs (`memos.*` top-level symbols). +- Editing `Makefile`, `.pre-commit-config.yaml`, `pyproject.toml [tool.*]`, or `.github/workflows/`. + +### Never do (IMPORTANT) + +- **Never** commit `.env`, `private/`, `.private-paths`, `tmp/`, `*.log`, secrets, tokens, or model credentials. +- Do not log or include real API keys, raw user data, or vector contents in tests/fixtures. +- Do not skip `pre-commit` or push with `--no-verify` (the `scripts/check-public-push.sh` pre-push hook is enforced). +- Do not claim tests pass without real pytest output as evidence. +- Do not add third-party dependencies to core `dependencies` — they must go into optional extras. +- Do not run wide-scope `rm -rf` outside `src/`; do not `git push --force` or `git reset --hard origin/*`. + +## Code Style + +- Format and lint with Ruff (configured in `pyproject.toml [tool.ruff]`); `make format` must pass before commit. +- Type annotations are required on public functions, API schemas, and config classes; implicit `Optional` is not allowed (enforced via pre-commit). +- All configs and API schemas use Pydantic v2. +- Logging: `logger.info("... %s", x)` form — do not pre-format with f-strings before passing to the logger. +- Exceptions: library code raises semantic exceptions from `memos.exceptions`, never bare `Exception` / `RuntimeError`; the API layer translates them to HTTP errors in `memos.api.exceptions`. +- File naming: source `snake_case.py`, tests `test_.py`. + +## Change → Test Mapping + +- Edit `src/memos//`: at minimum run `pytest tests// -q`; run `make test` once more before merging. +- Edit `src/memos/api/`: run `tests/api/` and `make openapi` to confirm the OpenAPI spec did not change unexpectedly. +- Edit `pyproject.toml` dependencies: `poetry lock --no-update`, then `make test`. +- Edit `Makefile` / pre-commit / Ruff config: run `make pre_commit` locally over the whole tree. + +## Git Conventions + +- Commits: Conventional Commits (`feat:` / `fix:` / `chore:` / `refactor:` / `docs:`), subject line ≤ 72 chars. +- Branches: `feat/` / `fix/` / `dev-YYYYMMDD-v`. +- `main` is protected — all changes go through PRs; never force-push to `main`; do not skip git hooks. +- Do not commit paths listed in `.private-paths`. +- The PR template lives at `.github/PULL_REQUEST_TEMPLATE.md` — its checklist must be fully ticked. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..c2402f7c7 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,23 @@ +# CLAUDE.md + +## Claude Code Entry + +Project facts live in `AGENTS.md`. This file only covers Claude Code runtime adaptation. + +## Sub-agents + +Five project-recommended sub-agents live under `.claude/agents/*.md`. Claude Code loads them automatically; the main agent should dispatch by task boundary: + +| Agent | Permissions | When to use | +|-------|-------------|-------------| +| `explorer` | Read-only | Locate code, trace call chains, gather evidence | +| `design-reviewer` | Read-only | Review design docs (architecture / interface / performance / security / requirement coverage) | +| `code-reviewer` | Read-only | Review diffs and return APPROVE or CHANGES_REQUESTED | +| `backend-dev` | Read-write | Implement backend / library code under `src/memos/` (TDD) | +| `integration-tester` | Read-write | Author and run integration / end-to-end cases under `tests/` | + +The main repo has no frontend stack, so no `frontend-dev` is provided; TypeScript sub-projects under `apps/` use their own AI configuration. + +## Project knowledge + +Before starting a task, run `ls docs/`. `docs/openapi.json` is the source of truth for the API contract; after touching `src/memos/api/`, run `make openapi` to regenerate it. diff --git a/README.md b/README.md index a1e2e6aa1..c162a7ac8 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,8 @@ Your lobsters and Hermes Agents now have **the best** memory system — choose * | 🧠 [**memos-local-plugin 2.0**](https://github.com/hijzy/MemOS/tree/main/apps/memos-local-plugin) |
  • One local-first memory core for **Hermes Agent** and **OpenClaw**.
  • Self-evolving memory: L1 trace, L2 policy, L3 world model,
    and crystallized Skills driven by feedback.
| 🌐 [Website](https://memos-claw.openmem.net/) · 📖 [Docs](https://memos-docs.openmem.net/cn/openclaw/local_plugin) · 🐙 [GitHub](https://github.com/hijzy/MemOS/tree/main/apps/memos-local-plugin) · 📦 [NPM](https://www.npmjs.com/package/@memtensor/memos-local-plugin) | | ☁️ [**OpenClaw Cloud Plugin**](https://x.com/MemOS_dev/status/2019254160919769171?s=20) |
  • [Reduces token usage by 72%.](https://x.com/MemOS_dev/status/2020854044583924111)
  • [Multi-agent memory sharing by `user_id`.](https://x.com/MemOS_dev/status/2020538135487062094)
| 🖥️ [MemOS Dashboard](https://memos-dashboard.openmem.net/login/) · 📖 [Full Tutorial](https://memos-docs.openmem.net/openclaw/guide#_4-update-plugin) | +> **🐳 Docker Deployment Note**: When running memos-local-plugin in Docker containers, you must specify the config location using `MEMOS_HOME` environment variable or `--home` CLI flag. See [Docker Configuration Guide](./apps/memos-local-plugin/README.md#docker-deployment) for details. +
diff --git a/apps/memos-local-openclaw/src/skill/__tests__/evolver.test.ts b/apps/memos-local-openclaw/src/skill/__tests__/evolver.test.ts new file mode 100644 index 000000000..31dd7abef --- /dev/null +++ b/apps/memos-local-openclaw/src/skill/__tests__/evolver.test.ts @@ -0,0 +1,134 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { SkillEvolver } from "../evolver"; +import type { SqliteStore } from "../../storage/sqlite"; +import type { RecallEngine } from "../../recall/engine"; +import type { PluginContext, Skill } from "../../types"; + +describe("SkillEvolver - autoInstall configuration", () => { + let mockStore: SqliteStore; + let mockEngine: RecallEngine; + let mockContext: PluginContext; + let evolver: SkillEvolver; + + beforeEach(() => { + mockStore = { + getSkill: vi.fn(), + updateSkill: vi.fn(), + setTaskSkillMeta: vi.fn(), + getTasksBySkillStatus: vi.fn(() => []), + getChunksByTask: vi.fn(() => []), + setChunkSkillId: vi.fn(), + } as any; + + mockEngine = {} as RecallEngine; + + mockContext = { + workspaceDir: "/tmp/test-workspace", + config: {}, + log: { + info: vi.fn(), + debug: vi.fn(), + error: vi.fn(), + }, + } as any; + + evolver = new SkillEvolver(mockStore, mockEngine, mockContext); + }); + + it("should NOT auto-install when autoInstall is false, even for install_recommended skills", () => { + // Setup: autoInstall explicitly disabled + mockContext.config.skillEvolution = { + enabled: true, + autoInstall: false, + }; + + // Create a skill that would trigger install_recommended + // (≥3 scripts, >20KB total size) + const skill: Skill = { + id: "test-skill-1", + name: "test-skill", + status: "active", + version: 1, + dirPath: "/tmp/skills/test-skill", + installed: 0, + description: "Test skill with many companion files", + chunks: 10, + createdAt: Date.now(), + updatedAt: Date.now(), + }; + + // Mock the installer's install method + const installSpy = vi.fn(); + (evolver as any).installer = { + install: installSpy, + }; + + // Call autoInstallIfNeeded + (evolver as any).autoInstallIfNeeded(skill); + + // Assert: install should NOT be called when autoInstall is false + expect(installSpy).not.toHaveBeenCalled(); + }); + + it("should auto-install when autoInstall is true", () => { + // Setup: autoInstall enabled + mockContext.config.skillEvolution = { + enabled: true, + autoInstall: true, + }; + + const skill: Skill = { + id: "test-skill-2", + name: "test-skill-2", + status: "active", + version: 1, + dirPath: "/tmp/skills/test-skill-2", + installed: 0, + description: "Test skill", + chunks: 5, + createdAt: Date.now(), + updatedAt: Date.now(), + }; + + const installSpy = vi.fn(); + (evolver as any).installer = { + install: installSpy, + }; + + // Call autoInstallIfNeeded + (evolver as any).autoInstallIfNeeded(skill); + + // Assert: install should be called when autoInstall is true + expect(installSpy).toHaveBeenCalledWith("test-skill-2"); + }); + + it("should NOT auto-install when skill status is not active", () => { + mockContext.config.skillEvolution = { + enabled: true, + autoInstall: true, + }; + + const skill: Skill = { + id: "test-skill-3", + name: "test-skill-3", + status: "draft", + version: 1, + dirPath: "/tmp/skills/test-skill-3", + installed: 0, + description: "Draft skill", + chunks: 5, + createdAt: Date.now(), + updatedAt: Date.now(), + }; + + const installSpy = vi.fn(); + (evolver as any).installer = { + install: installSpy, + }; + + (evolver as any).autoInstallIfNeeded(skill); + + // Assert: install should NOT be called for non-active skills + expect(installSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/memos-local-openclaw/src/skill/evolver.ts b/apps/memos-local-openclaw/src/skill/evolver.ts index 42516e8b0..fc8d319cf 100644 --- a/apps/memos-local-openclaw/src/skill/evolver.ts +++ b/apps/memos-local-openclaw/src/skill/evolver.ts @@ -370,17 +370,14 @@ Use selectedIndex 0 when none is highly relevant.`; if (skill.status !== "active") return; const explicitAutoInstall = this.ctx.config.skillEvolution?.autoInstall ?? DEFAULTS.skillAutoInstall; - if (explicitAutoInstall) { - this.installer.install(skill.id); - this.ctx.log.info(`SkillEvolver: auto-installed "${skill.name}" (explicit autoInstall=true)`); + if (!explicitAutoInstall) { + this.ctx.log.debug(`SkillEvolver: skipping auto-install for "${skill.name}" (autoInstall=false)`); return; } + this.installer.install(skill.id); const manifest = SkillInstaller.buildManifest(skill.dirPath, !!skill.installed, skill.name); - if (manifest.installMode === "install_recommended") { - this.installer.install(skill.id); - this.ctx.log.info(`SkillEvolver: auto-installed "${skill.name}" (install_recommended: ${manifest.scriptsCount} scripts, ${Math.round(manifest.totalSize / 1024)}KB)`); - } + this.ctx.log.info(`SkillEvolver: auto-installed "${skill.name}" (autoInstall=true, mode=${manifest.installMode}, ${manifest.scriptsCount} scripts, ${Math.round(manifest.totalSize / 1024)}KB)`); } private readSkillContent(skill: Skill): string | null { diff --git a/apps/memos-local-openclaw/src/viewer/html.ts b/apps/memos-local-openclaw/src/viewer/html.ts index 5e4456e71..8c48f7017 100644 --- a/apps/memos-local-openclaw/src/viewer/html.ts +++ b/apps/memos-local-openclaw/src/viewer/html.ts @@ -7237,7 +7237,7 @@ var _providerDefaults={ gemini:{endpoint:'',embModel:'text-embedding-004',chatModel:'gemini-2.0-flash'}, zhipu:{endpoint:'https://open.bigmodel.cn/api/paas/v4',embModel:'embedding-3',chatModel:'glm-4-flash'}, deepseek:{endpoint:'https://api.deepseek.com/v1',chatModel:'deepseek-chat'}, - bailian:{endpoint:'https://dashscope.aliyuncs.com/compatible-mode/v1',embModel:'text-embedding-v3',chatModel:'qwen-max'}, + bailian:{endpoint:'https://coding.dashscope.aliyuncs.com/v1',embModel:'text-embedding-v3',chatModel:'qwen-max'}, moonshot:{endpoint:'https://api.moonshot.cn/v1',chatModel:'moonshot-v1-8k'} }; function onProviderChange(section){ diff --git a/apps/memos-local-openclaw/tests/skill-auto-install.test.ts b/apps/memos-local-openclaw/tests/skill-auto-install.test.ts new file mode 100644 index 000000000..6fdbb19f5 --- /dev/null +++ b/apps/memos-local-openclaw/tests/skill-auto-install.test.ts @@ -0,0 +1,231 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import * as fs from "fs"; +import * as os from "os"; +import * as path from "path"; +import { SqliteStore } from "../src/storage/sqlite"; +import { SkillEvolver } from "../src/skill/evolver"; +import { RecallEngine } from "../src/recall/engine"; +import type { Logger, PluginContext, MemosLocalConfig, Task } from "../src/types"; + +const noopLog: Logger = { + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, +}; + +let tmpDir: string; +let store: SqliteStore; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memos-autoinstall-")); + const dbPath = path.join(tmpDir, "memos.db"); + store = new SqliteStore(dbPath, noopLog); +}); + +afterEach(() => { + store.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe("SkillEvolver autoInstall behavior", () => { + it("should NOT auto-install install_recommended skills when autoInstall=false", async () => { + const ctx: PluginContext = { + stateDir: tmpDir, + workspaceDir: tmpDir, + config: { + skillEvolution: { + enabled: true, + autoInstall: false, + autoEvaluate: false, + }, + } as MemosLocalConfig, + log: noopLog, + }; + + // Create a skill with install_recommended characteristics (3+ scripts) + const skillDir = path.join(tmpDir, "skills-repo", "deploy-automation"); + const scriptsDir = path.join(skillDir, "scripts"); + fs.mkdirSync(scriptsDir, { recursive: true }); + + fs.writeFileSync(path.join(skillDir, "SKILL.md"), `--- +name: "deploy-automation" +description: "Automated deployment scripts" +version: 1 +--- + +## Steps +1. Run deploy scripts +`, "utf-8"); + + // Create 3 scripts to trigger install_recommended + fs.writeFileSync(path.join(scriptsDir, "deploy.sh"), "#!/bin/bash\necho deploy", "utf-8"); + fs.writeFileSync(path.join(scriptsDir, "rollback.sh"), "#!/bin/bash\necho rollback", "utf-8"); + fs.writeFileSync(path.join(scriptsDir, "health-check.sh"), "#!/bin/bash\necho check", "utf-8"); + + const skillId = "deploy-automation-001"; + store.insertSkill({ + id: skillId, + name: "deploy-automation", + description: "Automated deployment", + version: 1, + status: "active", + tags: "", + sourceType: "task", + dirPath: skillDir, + installed: 0, + owner: "agent:main", + visibility: "private", + qualityScore: 8, + createdAt: Date.now(), + updatedAt: Date.now(), + }); + + const engine = new RecallEngine(store, ctx); + const evolver = new SkillEvolver(store, engine, ctx); + + // Trigger the private autoInstallIfNeeded through reflection + const skill = store.getSkill(skillId); + expect(skill).not.toBeNull(); + + // Use type assertion to access private method for testing + (evolver as any).autoInstallIfNeeded(skill); + + // Verify the skill was NOT installed + const updatedSkill = store.getSkill(skillId); + expect(updatedSkill?.installed).toBe(0); + + const workspaceSkillDir = path.join(tmpDir, "skills", "deploy-automation"); + expect(fs.existsSync(workspaceSkillDir)).toBe(false); + }); + + it("should auto-install install_recommended skills when autoInstall=true", async () => { + const ctx: PluginContext = { + stateDir: tmpDir, + workspaceDir: tmpDir, + config: { + skillEvolution: { + enabled: true, + autoInstall: true, + autoEvaluate: false, + }, + } as MemosLocalConfig, + log: noopLog, + }; + + // Create a skill with install_recommended characteristics + const skillDir = path.join(tmpDir, "skills-repo", "build-tools"); + const scriptsDir = path.join(skillDir, "scripts"); + fs.mkdirSync(scriptsDir, { recursive: true }); + + fs.writeFileSync(path.join(skillDir, "SKILL.md"), `--- +name: "build-tools" +description: "Build automation tools" +version: 1 +--- + +## Steps +1. Run build scripts +`, "utf-8"); + + // Create 3 scripts to trigger install_recommended + fs.writeFileSync(path.join(scriptsDir, "build.sh"), "#!/bin/bash\necho build", "utf-8"); + fs.writeFileSync(path.join(scriptsDir, "test.sh"), "#!/bin/bash\necho test", "utf-8"); + fs.writeFileSync(path.join(scriptsDir, "package.sh"), "#!/bin/bash\necho package", "utf-8"); + + const skillId = "build-tools-001"; + store.insertSkill({ + id: skillId, + name: "build-tools", + description: "Build automation", + version: 1, + status: "active", + tags: "", + sourceType: "task", + dirPath: skillDir, + installed: 0, + owner: "agent:main", + visibility: "private", + qualityScore: 8, + createdAt: Date.now(), + updatedAt: Date.now(), + }); + + const engine = new RecallEngine(store, ctx); + const evolver = new SkillEvolver(store, engine, ctx); + + const skill = store.getSkill(skillId); + expect(skill).not.toBeNull(); + + // Use type assertion to access private method for testing + (evolver as any).autoInstallIfNeeded(skill); + + // Verify the skill WAS installed + const updatedSkill = store.getSkill(skillId); + expect(updatedSkill?.installed).toBe(1); + + const workspaceSkillDir = path.join(tmpDir, "skills", "build-tools"); + expect(fs.existsSync(workspaceSkillDir)).toBe(true); + expect(fs.existsSync(path.join(workspaceSkillDir, "scripts", "build.sh"))).toBe(true); + }); + + it("should respect default autoInstall=true when config is not specified", async () => { + const ctx: PluginContext = { + stateDir: tmpDir, + workspaceDir: tmpDir, + config: { + skillEvolution: { + enabled: true, + // autoInstall not specified, should default to true + }, + } as MemosLocalConfig, + log: noopLog, + }; + + const skillDir = path.join(tmpDir, "skills-repo", "default-test"); + const scriptsDir = path.join(skillDir, "scripts"); + fs.mkdirSync(scriptsDir, { recursive: true }); + + fs.writeFileSync(path.join(skillDir, "SKILL.md"), `--- +name: "default-test" +description: "Test default behavior" +version: 1 +--- + +## Steps +1. Test +`, "utf-8"); + + fs.writeFileSync(path.join(scriptsDir, "script1.sh"), "#!/bin/bash\necho 1", "utf-8"); + fs.writeFileSync(path.join(scriptsDir, "script2.sh"), "#!/bin/bash\necho 2", "utf-8"); + fs.writeFileSync(path.join(scriptsDir, "script3.sh"), "#!/bin/bash\necho 3", "utf-8"); + + const skillId = "default-test-001"; + store.insertSkill({ + id: skillId, + name: "default-test", + description: "Default test", + version: 1, + status: "active", + tags: "", + sourceType: "task", + dirPath: skillDir, + installed: 0, + owner: "agent:main", + visibility: "private", + qualityScore: 8, + createdAt: Date.now(), + updatedAt: Date.now(), + }); + + const engine = new RecallEngine(store, ctx); + const evolver = new SkillEvolver(store, engine, ctx); + + const skill = store.getSkill(skillId); + (evolver as any).autoInstallIfNeeded(skill); + + // Should be installed by default + const updatedSkill = store.getSkill(skillId); + expect(updatedSkill?.installed).toBe(1); + }); +}); diff --git a/apps/memos-local-plugin/README.md b/apps/memos-local-plugin/README.md index d4871a34c..6f896abbe 100644 --- a/apps/memos-local-plugin/README.md +++ b/apps/memos-local-plugin/README.md @@ -97,3 +97,75 @@ npm pack bash install.sh --version ./memtensor-memos-local-plugin-1.0.0-beta.1.tgz ``` +## Configuration + +The plugin reads its configuration from `config.yaml` in the runtime directory. The location is resolved in the following priority order: + +1. **`MEMOS_HOME` environment variable** — points to the runtime root directory (e.g., `/opt/data/.hermes/memos-plugin`) +2. **`MEMOS_CONFIG_FILE` environment variable** — points directly to the config file (e.g., `/opt/data/.hermes/memos-plugin/config.yaml`) +3. **`--home` CLI flag** (bridge.cts only) — specifies the runtime root directory +4. **Default path** — `~/.hermes/memos-plugin/` or `~/.openclaw/memos-plugin/` based on the agent + +### Docker Deployment + +When running the daemon in a Docker container, you must explicitly specify the config location if it differs from the default path. There are three ways to do this: + +#### Option 1: Environment Variable (Recommended) + +Set `MEMOS_HOME` to point to the runtime directory: + +```dockerfile +ENV MEMOS_HOME=/opt/data/home/.hermes/memos-plugin +CMD ["node", "bridge.cts", "--agent=hermes", "--daemon"] +``` + +#### Option 2: CLI Flag + +Pass `--home` directly to the bridge command: + +```dockerfile +CMD ["node", "bridge.cts", "--agent=hermes", "--daemon", "--home=/opt/data/home/.hermes/memos-plugin"] +``` + +#### Option 3: Config File Path + +Set `MEMOS_CONFIG_FILE` to point directly to the config file: + +```dockerfile +ENV MEMOS_CONFIG_FILE=/opt/data/home/.hermes/memos-plugin/config.yaml +CMD ["node", "bridge.cts", "--agent=hermes", "--daemon"] +``` + +### Example Docker Deployment + +For the Hermes Agent Docker image: + +```dockerfile +FROM nousresearch/hermes-agent:latest + +# Install memos-local-plugin +RUN bash -c "$(curl -fsSL https://raw.githubusercontent.com/MemTensor/MemOS/main/apps/memos-local-plugin/install.sh)" + +# Set the config location +ENV MEMOS_HOME=/opt/data/.hermes/memos-plugin + +# Start daemon in background, then run Hermes +CMD node /opt/data/.hermes/plugins/memos-local-plugin/bridge.cts --agent=hermes --daemon && hermes chat +``` + +### Troubleshooting + +If you see warnings like: + +``` +config file not found at /opt/data/.hermes/memos-plugin/config.yaml; using defaults +``` + +This means the bridge process is looking in the wrong location. Check: + +1. Verify your `config.yaml` exists: `ls -la ~/.hermes/memos-plugin/config.yaml` +2. Set `MEMOS_HOME` or use `--home` to point to the correct directory +3. Ensure the path matches the location where `install.sh` created the config + +When config is missing, the plugin falls back to defaults (local embedding, no LLM provider), which will break summarization and reflection features. + diff --git a/apps/memos-local-plugin/adapters/openclaw/index.ts b/apps/memos-local-plugin/adapters/openclaw/index.ts index ba56848cb..9c318889a 100644 --- a/apps/memos-local-plugin/adapters/openclaw/index.ts +++ b/apps/memos-local-plugin/adapters/openclaw/index.ts @@ -29,6 +29,11 @@ import path from "node:path"; import { fileURLToPath } from "node:url"; import { createOpenClawBridge, type BridgeHandle } from "./bridge.js"; +import { + acquireOpenClawRuntimeLock, + DuplicateOpenClawRuntimeError, + type OpenClawRuntimeLockHandle, +} from "./runtime-lock.js"; import { registerOpenClawTools } from "./tools.js"; import type { DefinedPluginEntry, @@ -37,6 +42,7 @@ import type { } from "./openclaw-api.js"; import { bootstrapMemoryCoreFull } from "../../core/pipeline/index.js"; +import { resolveHome } from "../../core/config/index.js"; import { rootLogger, memoryBuffer } from "../../core/logger/index.js"; import type { MemoryCore } from "../../agent-contract/memory-core.js"; import { startHttpServer } from "../../server/http.js"; @@ -75,10 +81,9 @@ interface PluginRuntime { core: MemoryCore; bridge: BridgeHandle; /** - * The viewer HTTP server. May be `null` if the configured port was - * already in use at boot — in that case OpenClaw runs headless - * (memory still works, just no UI). We don't retry: the user can - * free the port and restart the gateway. + * The viewer HTTP server. OpenClaw must own this port; if binding + * fails we abort bootstrap instead of running a second headless + * runtime that would still register hooks and write memory. */ viewer: ServerHandle | null; shutdown: () => Promise; @@ -125,119 +130,172 @@ function resolveViewerStaticRoot(): string | undefined { } } -async function createRuntime(api: OpenClawPluginApi): Promise { +const OPENCLAW_VIEWER_PORT = 18799; + +async function createRuntime( + api: OpenClawPluginApi, + runtimeLock: OpenClawRuntimeLockHandle, +): Promise { const log = rootLogger.child({ channel: "adapters.openclaw" }); log.info("plugin.bootstrap", { version: PLUGIN_VERSION }); - // Bootstrap core — returns `{ core, home, config }` so we know which - // viewer port to bind. - const { core, config, home } = await bootstrapMemoryCoreFull({ - agent: "openclaw", - namespace: { agentKind: "openclaw", profileId: "main" }, - pkgVersion: PLUGIN_VERSION, - }); - await core.init(); - - // Anonymous ARMS telemetry. Mirrors `bridge.cts`'s setup so OpenClaw - // emits the same `plugin_started` / `daily_active` / `memos_search` - // / `memory_ingested` / `feedback_submitted` / `viewer_opened` - // events under the same `memos_local_hermes_v2` group as Hermes. - // Without this every OpenClaw user was invisible in ARMS — only the - // hermes-side `bridge.cts` was emitting events. - // - // Order matters: - // 1. `new Telemetry` reads `config.telemetry` and the credentials - // file under the plugin source root. - // 2. `bindTelemetry` must run before any turn so that - // `memory-core.ts`'s `if (telemetry)` guards see a non-null - // instance on the very first `onTurnStart`. - // 3. `trackPluginStarted` immediately after also fires - // `daily_active` (with persistent dedup; see sender.ts). - // `core.shutdown()` flushes telemetry as part of its `finally` - // block, so we don't need to await `telemetry.shutdown()` here. - const telemetry = new Telemetry( - config.telemetry ?? {}, - home.root, - PLUGIN_VERSION, - rootLogger.child({ channel: "core.telemetry" }), - resolvePluginRoot(), - ); - ( - core as { bindTelemetry?: (t: InstanceType) => void } - ).bindTelemetry?.(telemetry); - telemetry.trackPluginStarted("openclaw"); - - const bridge = createOpenClawBridge({ - agent: "openclaw", - core, - log: api.logger, - }); - - // OpenClaw's viewer port is fixed at :18799 (hermes uses :18800). - // We ignore `config.viewer.port` for the same reason `bridge.cts` - // does: old config.yaml files baked in the legacy single-port - // :18799 used by both agents, and we don't want hermes to collide - // with us because of stale YAML. - const OPENCLAW_VIEWER_PORT = 18799; + let core: MemoryCore | null = null; let viewer: ServerHandle | null = null; + try { - viewer = await startHttpServer( - { - core, - home, - logTail: () => memoryBuffer().tail({ limit: 200 }), - telemetry, - }, - { - port: OPENCLAW_VIEWER_PORT, - host: config.viewer.bindHost, - staticRoot: resolveViewerStaticRoot(), - agent: "openclaw", - }, + // Bootstrap core — returns `{ core, home, config }` so we know which + // viewer port to bind. + const boot = await bootstrapMemoryCoreFull({ + agent: "openclaw", + namespace: { agentKind: "openclaw", profileId: "main" }, + pkgVersion: PLUGIN_VERSION, + }); + core = boot.core; + const { config, home } = boot; + await core.init(); + + // Anonymous ARMS telemetry. Mirrors `bridge.cts`'s setup so OpenClaw + // emits the same `plugin_started` / `daily_active` / `memos_search` + // / `memory_ingested` / `feedback_submitted` / `viewer_opened` + // events under the same `memos_local_hermes_v2` group as Hermes. + // Without this every OpenClaw user was invisible in ARMS — only the + // hermes-side `bridge.cts` was emitting events. + // + // Order matters: + // 1. `new Telemetry` reads `config.telemetry` and the credentials + // file under the plugin source root. + // 2. `bindTelemetry` must run before any turn so that + // `memory-core.ts`'s `if (telemetry)` guards see a non-null + // instance on the very first `onTurnStart`. + // 3. `trackPluginStarted` immediately after also fires + // `daily_active` (with persistent dedup; see sender.ts). + // `core.shutdown()` flushes telemetry as part of its `finally` + // block, so we don't need to await `telemetry.shutdown()` here. + const telemetry = new Telemetry( + config.telemetry ?? {}, + home.root, + PLUGIN_VERSION, + rootLogger.child({ channel: "core.telemetry" }), + resolvePluginRoot(), ); - api.logger.info(`memos-local: viewer live at ${viewer.url}`); - } catch (err) { - const e = err as NodeJS.ErrnoException; - if (e?.code === "EADDRINUSE") { - api.logger.warn( - `memos-local: viewer port :${OPENCLAW_VIEWER_PORT} is already in use — ` + - `running headless. Free the port and restart the gateway to expose it.`, + ( + core as { bindTelemetry?: (t: InstanceType) => void } + ).bindTelemetry?.(telemetry); + telemetry.trackPluginStarted("openclaw"); + + const bridge = createOpenClawBridge({ + agent: "openclaw", + core, + log: api.logger, + }); + + // OpenClaw's viewer port is fixed at :18799 (hermes uses :18800). + // We ignore `config.viewer.port` for the same reason `bridge.cts` + // does: old config.yaml files baked in the legacy single-port + // :18799 used by both agents, and we don't want hermes to collide + // with us because of stale YAML. + try { + viewer = await startHttpServer( + { + core, + home, + logTail: () => memoryBuffer().tail({ limit: 200 }), + telemetry, + }, + { + port: OPENCLAW_VIEWER_PORT, + host: config.viewer.bindHost, + staticRoot: resolveViewerStaticRoot(), + agent: "openclaw", + }, ); - } else { - api.logger.error("memos-local: viewer failed to start", { - err: e?.message ?? String(err), - }); + api.logger.info(`memos-local: viewer live at ${viewer.url}`); + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e?.code === "EADDRINUSE") { + api.logger.error( + `memos-local: viewer port :${OPENCLAW_VIEWER_PORT} is already in use — ` + + `refusing duplicate/headless OpenClaw runtime.`, + ); + } else { + api.logger.error("memos-local: viewer failed to start", { + err: e?.message ?? String(err), + }); + } + throw err; } - } - return { - core, - bridge, - viewer, - async shutdown() { - if (viewer) { + const runtimeCore = core; + const runtimeViewer = viewer; + return { + core: runtimeCore, + bridge, + viewer: runtimeViewer, + async shutdown() { + if (runtimeViewer) { + try { + await runtimeViewer.close(); + } catch (err) { + api.logger.warn("memos-local: viewer close error", { + err: err instanceof Error ? err.message : String(err), + }); + } + } try { - await viewer.close(); + await runtimeCore.shutdown(); } catch (err) { - api.logger.warn("memos-local: viewer close error", { + api.logger.warn("memos-local: shutdown error", { err: err instanceof Error ? err.message : String(err), }); } - } + runtimeLock.release(); + }, + }; + } catch (err) { + await closeViewerAfterFailedBootstrap(viewer); + if (core) { try { await core.shutdown(); - } catch (err) { - api.logger.warn("memos-local: shutdown error", { - err: err instanceof Error ? err.message : String(err), - }); + } catch { + /* best-effort cleanup after failed bootstrap */ } - }, - }; + } + runtimeLock.release(); + throw err; + } +} + +async function closeViewerAfterFailedBootstrap( + viewer: ServerHandle | null, +): Promise { + if (!viewer) return; + try { + await viewer.close(); + } catch { + /* best-effort cleanup after failed bootstrap */ + } } // ─── Registration ────────────────────────────────────────────────────────── function register(api: OpenClawPluginApi): void { + let runtimeLock: OpenClawRuntimeLockHandle; + try { + runtimeLock = acquireOpenClawRuntimeLock({ + home: resolveHome("openclaw"), + pluginId: PLUGIN_ID, + version: PLUGIN_VERSION, + viewerPort: OPENCLAW_VIEWER_PORT, + }); + } catch (err) { + const duplicate = err instanceof DuplicateOpenClawRuntimeError; + api.logger.error("memos-local: duplicate OpenClaw runtime blocked", { + err: err instanceof Error ? err.message : String(err), + code: duplicate ? err.code : (err as { code?: unknown }).code, + }); + throw err; + } + // 1. Memory capability (prompt prelude) — register synchronously so the // host immediately knows who owns the memory slot, even if bootstrap // fails later. @@ -295,15 +353,17 @@ function register(api: OpenClawPluginApi): void { // tools register a shell now and wait for runtime inside execute(). let runtime: PluginRuntime | null = null; let bootstrapError: Error | null = null; - const bootstrapPromise = createRuntime(api) + const bootstrapPromise = createRuntime(api, runtimeLock) .then((r) => { runtime = r; api.logger.info("memos-local: plugin ready"); }) .catch((err) => { bootstrapError = err instanceof Error ? err : new Error(String(err)); + const duplicate = err instanceof DuplicateOpenClawRuntimeError; api.logger.error("memos-local: bootstrap failed", { err: bootstrapError.message, + code: duplicate ? err.code : (err as { code?: unknown }).code, }); }); diff --git a/apps/memos-local-plugin/adapters/openclaw/runtime-lock.ts b/apps/memos-local-plugin/adapters/openclaw/runtime-lock.ts new file mode 100644 index 000000000..55d2f6e43 --- /dev/null +++ b/apps/memos-local-plugin/adapters/openclaw/runtime-lock.ts @@ -0,0 +1,165 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; + +import type { ResolvedHome } from "../../core/config/index.js"; + +const LOCK_DIRNAME = "openclaw-runtime.lock"; +const OWNER_FILENAME = "owner.json"; +const UNWRITTEN_OWNER_STALE_MS = 30_000; + +export interface OpenClawRuntimeLockOwner { + pluginId: string; + version: string; + pid: number; + token: string; + startedAt: number; + dbFile: string; + viewerPort: number; +} + +export interface OpenClawRuntimeLockHandle { + lockDir: string; + owner: OpenClawRuntimeLockOwner; + release(): void; +} + +export interface AcquireOpenClawRuntimeLockOptions { + home: ResolvedHome; + pluginId: string; + version: string; + viewerPort: number; + pid?: number; + now?: () => number; + unwrittenOwnerStaleMs?: number; +} + +export class DuplicateOpenClawRuntimeError extends Error { + readonly code = "duplicate_instance"; + readonly lockDir: string; + readonly owner: OpenClawRuntimeLockOwner | null; + + constructor(lockDir: string, owner: OpenClawRuntimeLockOwner | null) { + const detail = owner + ? `pid=${owner.pid} startedAt=${new Date(owner.startedAt).toISOString()}` + : "owner=unknown"; + super(`memos-local OpenClaw runtime is already active (${detail})`); + this.name = "DuplicateOpenClawRuntimeError"; + this.lockDir = lockDir; + this.owner = owner; + } +} + +export function openClawRuntimeLockDir(home: ResolvedHome): string { + return path.join(home.daemonDir, LOCK_DIRNAME); +} + +export function acquireOpenClawRuntimeLock( + options: AcquireOpenClawRuntimeLockOptions, +): OpenClawRuntimeLockHandle { + const lockDir = openClawRuntimeLockDir(options.home); + const ownerFile = path.join(lockDir, OWNER_FILENAME); + const now = options.now ?? Date.now; + const pid = options.pid ?? process.pid; + const unwrittenOwnerStaleMs = + options.unwrittenOwnerStaleMs ?? UNWRITTEN_OWNER_STALE_MS; + + fs.mkdirSync(options.home.daemonDir, { recursive: true }); + + for (;;) { + try { + fs.mkdirSync(lockDir); + break; + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e.code !== "EEXIST") throw err; + + const owner = readOwner(ownerFile); + if (owner && pidIsAlive(owner.pid)) { + throw new DuplicateOpenClawRuntimeError(lockDir, owner); + } + if (!owner && !lockLooksStale(lockDir, now(), unwrittenOwnerStaleMs)) { + throw new DuplicateOpenClawRuntimeError(lockDir, null); + } + + fs.rmSync(lockDir, { recursive: true, force: true }); + } + } + + const owner: OpenClawRuntimeLockOwner = { + pluginId: options.pluginId, + version: options.version, + pid, + token: randomUUID(), + startedAt: now(), + dbFile: options.home.dbFile, + viewerPort: options.viewerPort, + }; + + try { + fs.writeFileSync(ownerFile, JSON.stringify(owner, null, 2), "utf8"); + } catch (err) { + fs.rmSync(lockDir, { recursive: true, force: true }); + throw err; + } + + let released = false; + const releaseSync = () => { + if (released) return; + released = true; + const current = readOwner(ownerFile); + if (current?.token !== owner.token) return; + fs.rmSync(lockDir, { recursive: true, force: true }); + }; + const onExit = () => releaseSync(); + process.once("exit", onExit); + + return { + lockDir, + owner, + release() { + releaseSync(); + process.off("exit", onExit); + }, + }; +} + +function readOwner(ownerFile: string): OpenClawRuntimeLockOwner | null { + try { + const parsed = JSON.parse(fs.readFileSync(ownerFile, "utf8")) as Partial; + if ( + typeof parsed.pluginId !== "string" || + typeof parsed.version !== "string" || + typeof parsed.pid !== "number" || + typeof parsed.token !== "string" || + typeof parsed.startedAt !== "number" || + typeof parsed.dbFile !== "string" || + typeof parsed.viewerPort !== "number" + ) { + return null; + } + return parsed as OpenClawRuntimeLockOwner; + } catch { + return null; + } +} + +function pidIsAlive(pid: number): boolean { + if (!Number.isInteger(pid) || pid <= 0) return false; + try { + process.kill(pid, 0); + return true; + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + return code === "EPERM"; + } +} + +function lockLooksStale(lockDir: string, now: number, staleMs: number): boolean { + try { + const stat = fs.statSync(lockDir); + return now - stat.mtimeMs >= staleMs; + } catch { + return true; + } +} diff --git a/apps/memos-local-plugin/bridge.cts b/apps/memos-local-plugin/bridge.cts index 4e7ea1ad4..81848acf7 100644 --- a/apps/memos-local-plugin/bridge.cts +++ b/apps/memos-local-plugin/bridge.cts @@ -42,6 +42,7 @@ interface BridgeArgs { noViewer: boolean; tcpPort?: number; agent: "openclaw" | "hermes"; + home?: string; } type BridgeStatus = "connected" | "reconnecting" | "disconnected" | "unknown"; @@ -61,6 +62,7 @@ function parseArgs(argv: readonly string[]): BridgeArgs { else if (raw.startsWith("--tcp=")) args.tcpPort = Number(raw.slice(6)); else if (raw === "--agent=hermes") args.agent = "hermes"; else if (raw === "--agent=openclaw") args.agent = "openclaw"; + else if (raw.startsWith("--home=")) args.home = raw.slice(7); } return args; } @@ -235,11 +237,21 @@ async function main(): Promise { runtimeModule("core/telemetry/index.ts", "dist/core/telemetry/index.js") )) as typeof import("./core/telemetry/index.js"); + // Resolve home early so we can use resolveHome with explicit defaultHome + const { resolveHome } = (await importEsm( + runtimeModule("core/config/paths.ts", "dist/core/config/paths.js") + )) as typeof import("./core/config/paths.js"); + + const resolvedHome = args.home + ? resolveHome(args.agent, args.home) + : undefined; + const { core, config, home } = await bootstrapMemoryCoreFull({ agent: args.agent, namespace: { agentKind: args.agent, profileId: "default" }, pkgVersion, hostLlmBridge: args.daemon ? null : lazyHostLlmBridge, + home: resolvedHome, }); const telemetry = new Telemetry( diff --git a/apps/memos-local-plugin/core/config/index.ts b/apps/memos-local-plugin/core/config/index.ts index a06aec2bb..1466c27ef 100644 --- a/apps/memos-local-plugin/core/config/index.ts +++ b/apps/memos-local-plugin/core/config/index.ts @@ -48,7 +48,11 @@ export async function loadConfig(home: ResolvedHome): Promise } catch (err) { const e = err as NodeJS.ErrnoException; if (e.code === "ENOENT") { - warnings.push(`config file not found at ${home.configFile}; using defaults`); + warnings.push( + `config file not found at ${home.configFile}; using defaults. ` + + `To fix: set MEMOS_HOME or MEMOS_CONFIG_FILE env var, or use --home CLI flag. ` + + `See: https://github.com/MemTensor/MemOS/tree/main/apps/memos-local-plugin#configuration` + ); } else if (MemosError.is(err)) { throw err; } else { diff --git a/apps/memos-local-plugin/core/experience/feedback-builder.ts b/apps/memos-local-plugin/core/experience/feedback-builder.ts index 5de5dad3e..f5a948d40 100644 --- a/apps/memos-local-plugin/core/experience/feedback-builder.ts +++ b/apps/memos-local-plugin/core/experience/feedback-builder.ts @@ -60,6 +60,9 @@ const MIN_SIGNIFICANCE = 0.5; const MERGE_SIMILARITY = 0.72; const MAX_TITLE_CHARS = 120; const MAX_LINE_CHARS = 360; +// Strict scenarios: only full credit counts as a pass (covers {-1,+1} and 0..1 +// reward scales — anything short of 1 means the task was not fully solved). +const FULL_PASS_REWARD = 1; export async function runFeedbackExperience( input: FeedbackExperienceInput, @@ -157,8 +160,15 @@ async function buildDraft(args: { const text = cleanLine(args.text, MAX_LINE_CHARS); const lower = args.text.toLowerCase(); const verifier = extractVerifierMeta(args.feedback.raw, lower); - const pass = isPositiveSignal(args.feedback, lower, args.classified.shape, verifier); - const fail = isNegativeSignal(args.feedback, lower, args.classified.shape, verifier); + // Authoritative success/failure from the verifier payload or episode reward. + // Strict scenarios (coding/math/verifier): ONLY a full pass is positive — a + // partial pass such as 3/4 (or reward 0) is a failure, never a positive exemplar. + const outcome = objectiveOutcome(args.feedback.raw, args.episode?.rTask); + const lexicalPass = isPositiveSignal(args.feedback, lower, args.classified.shape); + const lexicalFail = isNegativeSignal(args.feedback, lower, args.classified.shape); + // Objective outcome dominates; lexical signals only decide when it is unknown. + const pass = outcome === "pass" || (outcome === "unknown" && lexicalPass && !lexicalFail); + const fail = outcome === "fail" || (outcome === "unknown" && lexicalFail); const hasAvoid = /\b(avoid|do not|don't|never|stop|wrong|incorrect|failed|fail)\b/i.test(args.text) || /不要|别|不能|错误|失败|反例/.test(args.text); @@ -169,21 +179,22 @@ async function buildDraft(args: { type = "success_pattern"; polarity = "positive"; skillEligible = true; - } else if (fail && hasAvoid) { - type = "failure_avoidance"; + } else if (fail) { + // Objective failure: never a positive exemplar, never skill-eligible. + type = hasAvoid ? "failure_avoidance" : verifier ? "verifier_feedback" : "repair_instruction"; polarity = "negative"; } else if (args.classified.shape === "preference") { type = "preference"; - polarity = fail ? "negative" : "neutral"; + polarity = "neutral"; } else if (hasAvoid) { type = "failure_avoidance"; polarity = "negative"; - } else if (args.classified.shape === "correction" || args.classified.shape === "constraint" || fail) { + } else if (args.classified.shape === "correction" || args.classified.shape === "constraint") { type = "repair_instruction"; - polarity = fail ? "negative" : "neutral"; + polarity = "neutral"; } else if (verifier) { type = "verifier_feedback"; - polarity = pass ? "positive" : fail ? "negative" : "neutral"; + polarity = "neutral"; } else { type = "repair_instruction"; polarity = "neutral"; @@ -437,27 +448,25 @@ function isPositiveSignal( feedback: FeedbackRow, lower: string, shape: string, - verifier: Record | null, ): boolean { if (feedback.polarity === "positive") return true; if (shape === "positive") return true; - if (verifier && lower.includes("pass")) return true; - return /\b(success|succeeded|passed|task succeeded|works well|correct)\b/.test(lower) - || /成功|通过|正确|太好了|写得很好/.test(lower); + // No substring "pass"/"通过" match here: "passed 3/4" is a partial failure, not + // a positive signal. A genuine full pass is decided by objectiveOutcome(). + return /\b(success|succeeded|works well|looks good|lgtm|correct)\b/.test(lower) + || /成功|正确|太好了|写得很好/.test(lower); } function isNegativeSignal( feedback: FeedbackRow, lower: string, shape: string, - verifier: Record | null, ): boolean { if (feedback.polarity === "negative") return true; if (shape === "negative") return true; if (shape === "correction") return true; - if (verifier && /\b(fail|failed|counterexample)\b/.test(lower)) return true; - return /\b(fail|failed|wrong|incorrect|counterexample|not acceptable)\b/.test(lower) - || /失败|错误|不对|反例/.test(lower); + return /\b(fail|failed|wrong|incorrect|counterexample|not acceptable|timeout|time limit exceeded)\b/.test(lower) + || /失败|错误|不对|反例|超时/.test(lower); } function collectTraceIds(input: FeedbackExperienceInput): TraceId[] { @@ -510,26 +519,88 @@ function extractVerifierMeta(raw: unknown, lower: string): Record = { source: "feedback" }; if (looksVerifier) meta.verifier = true; - if (typeof raw === "object" && raw != null) { - const obj = raw as Record; - for (const key of ["verdict", "score", "reward", "passed", "taskId", "family", "reason"]) { - if (obj[key] !== undefined) meta[key] = obj[key]; + if (src) { + // Read from the verifier payload (top-level or nested under `raw.verifier`) + // so the discriminative fields (reward/passed/total) are preserved. + for (const key of ["verdict", "score", "reward", "passed", "total", "taskId", "family", "reason"]) { + if (src[key] !== undefined) meta[key] = src[key]; } } return Object.keys(meta).length > 1 || looksVerifier ? meta : null; } -function verifierScore(raw: unknown): number { - if (typeof raw !== "object" || raw == null) return 0; - const obj = raw as Record; - for (const key of ["score", "reward", "r", "rating"]) { - const n = Number(obj[key]); - if (Number.isFinite(n)) return Math.min(1, Math.abs(n)); +/** + * Return the object that actually holds verifier fields. Benchmark gateways nest + * them under `raw.verifier`; older/manual feedback puts them at the top level. + */ +function verifierContainer(raw: unknown): Record | null { + let obj: unknown = raw; + if (typeof obj === "string") { + try { + obj = JSON.parse(obj); + } catch { + return null; + } } - return 0; + if (typeof obj !== "object" || obj == null) return null; + const rec = obj as Record; + if (rec.verifier && typeof rec.verifier === "object") { + return rec.verifier as Record; + } + return rec; +} + +interface VerifierStats { + reward: number | null; + passed: number | null; + total: number | null; +} + +function verifierStats(raw: unknown): VerifierStats { + const src = verifierContainer(raw); + const num = (v: unknown): number | null => { + const n = Number(v); + return Number.isFinite(n) ? n : null; + }; + if (!src) return { reward: null, passed: null, total: null }; + return { + reward: num(src.reward ?? src.score ?? src.r ?? src.rating), + passed: num(src.passed), + total: num(src.total), + }; +} + +type ObjectiveOutcome = "pass" | "fail" | "unknown"; + +/** + * Authoritative success/failure from the verifier payload, falling back to the + * episode reward. Strict scenarios (coding/math/verifier) treat ONLY a full pass + * as positive: a partial pass (passed < total) or reward below full credit is a + * failure, never a positive exemplar. + */ +function objectiveOutcome(raw: unknown, rTask: number | null | undefined): ObjectiveOutcome { + const { reward, passed, total } = verifierStats(raw); + if (passed != null && total != null && total > 0) { + return passed >= total ? "pass" : "fail"; + } + if (reward != null) { + // Epsilon guards against a float full-pass (e.g. 0.9999998) being misread as fail. + return reward >= FULL_PASS_REWARD - 1e-9 ? "pass" : "fail"; + } + if (typeof rTask === "number") { + if (rTask > 0) return "pass"; + if (rTask < 0) return "fail"; + } + return "unknown"; +} + +function verifierScore(raw: unknown): number { + const { reward } = verifierStats(raw); + return reward == null ? 0 : Math.min(1, Math.abs(reward)); } function traceHint(trace: TraceRow): string { diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index 4974ee16d..cfb37f64e 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -177,14 +177,23 @@ export async function bootstrapMemoryCoreFull( options: BootstrapOptions, ): Promise { const home = options.home ?? resolveHome(options.agent); - const config = - options.config ?? - (await loadConfig(home)).config; + const configResult = options.config + ? { config: options.config, fromDisk: true, warnings: [], source: home.configFile } + : await loadConfig(home); + const config = configResult.config; const log = rootLogger.child({ channel: "core.pipeline.bootstrap", ctx: { agent: options.agent }, }); + + // Log configuration warnings (e.g., missing config file) + if (configResult.warnings.length > 0) { + for (const warning of configResult.warnings) { + log.warn("config.warning", { message: warning }); + } + } + const namespace = normalizeNamespace(options.namespace, options.agent); // 1. Storage. @@ -702,13 +711,6 @@ export function createMemoryCore( config: input.config, }, ); - if (input.config.lightweightMemory && !llmFilterOutcomeSucceeded(filtered.outcome)) { - filtered = { - ...filtered, - kept: [], - dropped: [...filtered.dropped, ...filtered.kept], - }; - } const kept = new Set(filtered.kept); const dropped = new Set(filtered.dropped); return { @@ -810,10 +812,6 @@ export function createMemoryCore( return text.split(/\n+/).map((line) => line.trim()).find(Boolean)?.slice(0, 240) ?? ""; } - function llmFilterOutcomeSucceeded(outcome: string): boolean { - return outcome === "llm_kept_all" || outcome === "llm_filtered"; - } - function logCandidatesFromHits(hits: readonly RetrievalHitDTO[]): Array<{ tier: number; refKind: string; @@ -1725,7 +1723,7 @@ export function createMemoryCore( : localDropped; const stats = packet ? handle.consumeRetrievalStats(packet.packetId) : null; handle.repos.apiLogs.insert({ - toolName: handle.algorithm.lightweightMemory.enabled ? "memory_search" : "memos_search", + toolName: "memos_search", input: { type: "turn_start", agent: turn.agent, @@ -2456,7 +2454,7 @@ export function createMemoryCore( } finally { try { handle.repos.apiLogs.insert({ - toolName: handle.algorithm.lightweightMemory.enabled ? "memory_search" : "memos_search", + toolName: "memos_search", input: { type: "tool_call", agent: query.agent, @@ -2872,7 +2870,7 @@ export function createMemoryCore( offset: input.offset ?? 0, }); return rows - .filter((r: EpisodeRow) => visibleToCurrent(r) && !isLightweightEpisode(r)) + .filter((r: EpisodeRow) => visibleToCurrent(r)) .map((r: EpisodeRow) => r.id as EpisodeId); } @@ -2885,8 +2883,7 @@ export function createMemoryCore( ensureLive(); return handle.repos.episodes.list({ sessionId: input?.sessionId, limit: 100_000 }).filter((r) => (input?.includeAllNamespaces || visibleToCurrent(r)) && - matchesNamespaceFilter(r, input) && - !isLightweightEpisode(r) + matchesNamespaceFilter(r, input) ).length; } @@ -2912,8 +2909,7 @@ export function createMemoryCore( offset: input?.ownerAgentKind || input?.ownerProfileId ? 0 : input?.offset ?? 0, }).filter((r) => (input?.includeAllNamespaces || visibleToCurrent(r)) && - matchesNamespaceFilter(r, input) && - !isLightweightEpisode(r) + matchesNamespaceFilter(r, input) ); const pagedRows = input?.ownerAgentKind || input?.ownerProfileId ? rows.slice(input?.offset ?? 0, (input?.offset ?? 0) + (input?.limit ?? 50)) @@ -4080,12 +4076,36 @@ export function createMemoryCore( return bestDim; } + function shouldTraceHaveEmbeddings(row: TraceRow): boolean { + // Skip traces where both user and agent text are very short + const userLen = row.userText.trim().length; + const agentLen = row.agentText.trim().length; + + // If both are under 10 chars, definitely skip + if (userLen < 10 && agentLen < 10) { + return false; + } + + // If total combined length is under 20 chars, skip + // (covers cases like "ok" / "Got it, processing..." which aren't meaningful memories) + if (userLen + agentLen < 20) { + return false; + } + + return true; + } + function collectEmbeddingSlots(): EmbeddingSlot[] { const slots: EmbeddingSlot[] = []; const pageSize = 500; for (let offset = 0;; offset += pageSize) { const rows = handle.repos.traces.list({ limit: pageSize, offset, newestFirst: false }); for (const row of rows) { + // Skip traces that shouldn't have embeddings + if (!shouldTraceHaveEmbeddings(row)) { + continue; + } + slots.push({ kind: "trace", id: row.id, diff --git a/apps/memos-local-plugin/core/pipeline/orchestrator.ts b/apps/memos-local-plugin/core/pipeline/orchestrator.ts index 8d4f51d20..75dc7e244 100644 --- a/apps/memos-local-plugin/core/pipeline/orchestrator.ts +++ b/apps/memos-local-plugin/core/pipeline/orchestrator.ts @@ -1156,13 +1156,22 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle { result.sessionId, result.contextHints, ); - const episodeId = openEpisodeBySession.get(sessionId) ?? result.episodeId; + const explicitEpisode = result.episodeId + ? session.sessionManager.getEpisode(result.episodeId) + : null; + const episodeId = explicitEpisode + ? result.episodeId + : openEpisodeBySession.get(sessionId) ?? result.episodeId; if (!episodeId) { throw new Error( "pipeline.onTurnEnd: no open episode for session " + sessionId, ); } - const episode = session.sessionManager.getEpisode(episodeId); + let episode = explicitEpisode ?? session.sessionManager.getEpisode(episodeId); + const wasClosedBeforeTurnEnd = episode?.status === "closed"; + if (wasClosedBeforeTurnEnd) { + episode = session.sessionManager.reopenEpisode(episodeId, "follow_up"); + } if (!episode || episode.status !== "open") { throw new Error( "pipeline.onTurnEnd: episode " + episodeId + " is not open", @@ -1256,6 +1265,14 @@ export function createPipeline(deps: PipelineDeps): PipelineHandle { } } + if (wasClosedBeforeTurnEnd) { + session.sessionManager.finalizeEpisode(episodeId, { + patchMeta: { + delayedAgentEndRecovered: true, + }, + }); + } + // Update the "current open episode" snapshot so the relation // classifier on the NEXT onTurnStart can decide whether the user // changed topic. We mirror the data shape of `lastEpisodeBySession` diff --git a/apps/memos-local-plugin/core/retrieval/retrieve.ts b/apps/memos-local-plugin/core/retrieval/retrieve.ts index fb13b191f..c8f656a1b 100644 --- a/apps/memos-local-plugin/core/retrieval/retrieve.ts +++ b/apps/memos-local-plugin/core/retrieval/retrieve.ts @@ -313,7 +313,9 @@ async function runAll( patternTerms: compiled.patternTerms, includeLowValue: plan.includeLowValue, excludeSessionId: - ctx.reason === "turn_start" && sessionId ? sessionId : undefined, + ctx.reason === "turn_start" && sessionId && !deps.config.lightweightMemory + ? sessionId + : undefined, }, ) : Promise.resolve({ traces: [], episodes: [] }); @@ -383,11 +385,10 @@ async function runAll( // Mechanical retrieval produces high-recall but low-precision // candidates. A small LLM round-trip (see `llm-filter.ts`) prunes // items that share surface keywords with the query but aren't - // actually relevant. Full mode fails open to preserve recall; - // lightweight mode fails closed because it promises summarizer-LLM - // screened raw memories only. - const queryText = - (ctx as { userText?: string }).userText ?? compiled.text ?? ""; + // actually relevant. If the LLM is unavailable, the filter helper + // keeps the mechanical ranking so local lightweight memories remain + // searchable in offline/default installs. + const queryText = (ctx as { userText?: string }).userText ?? compiled.text ?? ""; const filterResult = opts.skipLlmFilter ? { kept: mechanicalRanked, @@ -403,19 +404,10 @@ async function runAll( config: deps.config, }, ); - const filtered = - !opts.skipLlmFilter && - deps.config.lightweightMemory && - !llmFilterSucceeded(filterResult.outcome) - ? { - ...filterResult, - kept: [], - dropped: [...filterResult.dropped, ...filterResult.kept], - } - : filterResult; + const filtered = filterResult; log.debug("llm_filter.done", { outcome: filtered.outcome, - enforced: deps.config.lightweightMemory && filtered !== filterResult, + enforced: false, sufficient: filtered.sufficient, raw: rawCandidateCount, afterThreshold: mechanicalRanked.length, @@ -637,10 +629,6 @@ function round(n: number, d: number): number { return Math.round(n * f) / f; } -function llmFilterSucceeded(outcome: string): boolean { - return outcome === "llm_kept_all" || outcome === "llm_filtered"; -} - /** Thin façade so pipelines can `new Retriever(deps)` if they prefer OO. */ export class Retriever { constructor(private readonly deps: RetrievalDeps) {} diff --git a/apps/memos-local-plugin/core/storage/migrator.ts b/apps/memos-local-plugin/core/storage/migrator.ts index da4c3144d..efefe1885 100644 --- a/apps/memos-local-plugin/core/storage/migrator.ts +++ b/apps/memos-local-plugin/core/storage/migrator.ts @@ -164,6 +164,12 @@ function applyMigration(db: StorageDb, file: MigrationFile): void { ensureSkillUsageColumns(db); return; } + if (file.version === 5 && file.name === "skill-trials") { + if (tableExists(db, "skills") && tableExists(db, "episodes") && tableExists(db, "traces")) { + db.exec(fs.readFileSync(file.fullPath, "utf8")); + } + return; + } if (file.version === 6 && file.name === "world-model-version") { if (tableExists(db, "world_model")) { ensureColumn(db, "world_model", "version", "INTEGER NOT NULL DEFAULT 1"); @@ -184,6 +190,18 @@ function applyMigration(db: StorageDb, file: MigrationFile): void { } return; } + if (file.version === 10 && file.name === "trace-policy-links") { + if (tableExists(db, "traces") && tableExists(db, "policies")) { + db.exec(fs.readFileSync(file.fullPath, "utf8")); + } + return; + } + if (file.version === 12 && file.name === "trace-turn-pagination-index") { + if (tableExists(db, "traces")) { + db.exec(fs.readFileSync(file.fullPath, "utf8")); + } + return; + } db.exec(fs.readFileSync(file.fullPath, "utf8")); } diff --git a/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts b/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts index 19bda3bee..335a41756 100644 --- a/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts +++ b/apps/memos-local-plugin/tests/unit/adapters/openclaw-bridge.test.ts @@ -1064,9 +1064,9 @@ describe("createOpenClawBridge", () => { await (pipeline as PipelineHandle).flush(); const traces = await mc.listTraces({ groupByTurn: true }); - expect(traces).toHaveLength(2); - expect(traces.some((tr) => tr.toolCalls?.[0]?.name === "sh")).toBe(true); - expect(traces.some((tr) => tr.agentText === "done")).toBe(true); + expect(traces).toHaveLength(1); + expect(traces[0]?.toolCalls?.[0]?.name).toBe("sh"); + expect(traces[0]?.agentText).toBe("done"); }); it("handleAgentEnd works even when before_prompt_build was never called (lazy episode open)", async () => { diff --git a/apps/memos-local-plugin/tests/unit/adapters/openclaw-runtime-lock.test.ts b/apps/memos-local-plugin/tests/unit/adapters/openclaw-runtime-lock.test.ts new file mode 100644 index 000000000..bbfa37cda --- /dev/null +++ b/apps/memos-local-plugin/tests/unit/adapters/openclaw-runtime-lock.test.ts @@ -0,0 +1,101 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, describe, expect, it } from "vitest"; + +import type { ResolvedHome } from "../../../core/config/index.js"; +import { + acquireOpenClawRuntimeLock, + DuplicateOpenClawRuntimeError, + openClawRuntimeLockDir, +} from "../../../adapters/openclaw/runtime-lock.js"; + +const roots: string[] = []; + +afterEach(() => { + for (const root of roots.splice(0)) { + fs.rmSync(root, { recursive: true, force: true }); + } +}); + +function tmpHome(): ResolvedHome { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "memos-oc-lock-")); + roots.push(root); + return { + root, + configFile: path.join(root, "config.yaml"), + dataDir: path.join(root, "data"), + dbFile: path.join(root, "data", "memos.db"), + skillsDir: path.join(root, "skills"), + logsDir: path.join(root, "logs"), + daemonDir: path.join(root, "daemon"), + }; +} + +function acquire(home: ResolvedHome, pid = process.pid) { + return acquireOpenClawRuntimeLock({ + home, + pluginId: "memos-local-plugin", + version: "test", + viewerPort: 18799, + pid, + now: () => 1_700_000_000_000, + unwrittenOwnerStaleMs: 0, + }); +} + +describe("OpenClaw runtime lock", () => { + it("creates an owner record and releases the lock directory", () => { + const home = tmpHome(); + const lock = acquire(home); + const ownerPath = path.join(lock.lockDir, "owner.json"); + + expect(fs.existsSync(ownerPath)).toBe(true); + expect(JSON.parse(fs.readFileSync(ownerPath, "utf8"))).toMatchObject({ + pluginId: "memos-local-plugin", + version: "test", + pid: process.pid, + dbFile: home.dbFile, + viewerPort: 18799, + }); + + lock.release(); + expect(fs.existsSync(lock.lockDir)).toBe(false); + }); + + it("rejects a second live owner before another runtime can bootstrap", () => { + const home = tmpHome(); + const lock = acquire(home); + + expect(() => acquire(home)).toThrow(DuplicateOpenClawRuntimeError); + expect(fs.existsSync(path.join(lock.lockDir, "owner.json"))).toBe(true); + + lock.release(); + }); + + it("reclaims a stale owner whose process is gone", () => { + const home = tmpHome(); + const lockDir = openClawRuntimeLockDir(home); + fs.mkdirSync(lockDir, { recursive: true }); + fs.writeFileSync( + path.join(lockDir, "owner.json"), + JSON.stringify({ + pluginId: "memos-local-plugin", + version: "old", + pid: 99_999_999, + token: "stale-token", + startedAt: 1, + dbFile: home.dbFile, + viewerPort: 18799, + }), + "utf8", + ); + + const lock = acquire(home); + expect(lock.owner.pid).toBe(process.pid); + expect(lock.owner.token).not.toBe("stale-token"); + + lock.release(); + }); +}); diff --git a/apps/memos-local-plugin/tests/unit/adapters/openclaw-runtime.test.ts b/apps/memos-local-plugin/tests/unit/adapters/openclaw-runtime.test.ts new file mode 100644 index 000000000..19378853a --- /dev/null +++ b/apps/memos-local-plugin/tests/unit/adapters/openclaw-runtime.test.ts @@ -0,0 +1,174 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { DEFAULT_CONFIG } from "../../../core/config/defaults.js"; +import { resolveHome, type ResolvedHome } from "../../../core/config/index.js"; +import type { + HostLogger, + OpenClawPluginApi, + ServiceDescriptor, +} from "../../../adapters/openclaw/openclaw-api.js"; + +interface MockApi extends OpenClawPluginApi { + services: ServiceDescriptor[]; + logger: HostLogger & { + info: ReturnType; + warn: ReturnType; + error: ReturnType; + }; +} + +const tempRoots: string[] = []; +let oldMemosHome: string | undefined; + +afterEach(() => { + if (oldMemosHome === undefined) delete process.env.MEMOS_HOME; + else process.env.MEMOS_HOME = oldMemosHome; + vi.doUnmock("../../../core/pipeline/index.js"); + vi.doUnmock("../../../server/http.js"); + vi.doUnmock("../../../core/telemetry/index.js"); + vi.resetModules(); + vi.restoreAllMocks(); + for (const root of tempRoots.splice(0)) { + fs.rmSync(root, { recursive: true, force: true }); + } +}); + +function useTempMemosHome(): ResolvedHome { + oldMemosHome = process.env.MEMOS_HOME; + const root = fs.mkdtempSync(path.join(os.tmpdir(), "memos-oc-runtime-")); + tempRoots.push(root); + process.env.MEMOS_HOME = root; + return resolveHome("openclaw"); +} + +function makeCore() { + return { + init: vi.fn(async () => {}), + shutdown: vi.fn(async () => {}), + bindTelemetry: vi.fn(), + }; +} + +function makeApi(): MockApi { + const services: ServiceDescriptor[] = []; + const logger = { + trace: vi.fn(), + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }; + return { + id: "memos-local-plugin", + name: "MemOS Local", + logger, + services, + registerTool: vi.fn(), + registerMemoryCapability: vi.fn(), + on: vi.fn(), + registerService: vi.fn((svc: ServiceDescriptor) => { + services.push(svc); + }), + }; +} + +async function loadPluginWithMocks( + bootstrapMemoryCoreFull: ReturnType, + startHttpServer: ReturnType, +) { + vi.resetModules(); + vi.doMock("../../../core/pipeline/index.js", () => ({ + bootstrapMemoryCoreFull, + })); + vi.doMock("../../../server/http.js", () => ({ + startHttpServer, + })); + vi.doMock("../../../core/telemetry/index.js", () => ({ + Telemetry: class { + trackPluginStarted = vi.fn(); + shutdown = vi.fn(async () => {}); + }, + })); + const mod = await import("../../../adapters/openclaw/index.js"); + return mod.default; +} + +function deferred() { + let resolve!: (value: T) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + +describe("OpenClaw adapter runtime lifecycle", () => { + it("blocks a duplicate register before the second runtime bootstraps", async () => { + const home = useTempMemosHome(); + const firstCore = makeCore(); + const boot = deferred<{ core: ReturnType; config: typeof DEFAULT_CONFIG; home: ResolvedHome }>(); + const bootstrapMemoryCoreFull = vi.fn(() => boot.promise); + const startHttpServer = vi.fn(async () => ({ + url: "http://127.0.0.1:18799", + port: 18799, + closed: false, + close: vi.fn(async () => {}), + })); + const plugin = await loadPluginWithMocks(bootstrapMemoryCoreFull, startHttpServer); + + const api1 = makeApi(); + plugin.register(api1); + expect(bootstrapMemoryCoreFull).toHaveBeenCalledTimes(1); + + const api2 = makeApi(); + expect(() => plugin.register(api2)).toThrow(/already active/); + expect(bootstrapMemoryCoreFull).toHaveBeenCalledTimes(1); + expect(api2.registerTool).not.toHaveBeenCalled(); + expect(api2.on).not.toHaveBeenCalled(); + + boot.resolve({ core: firstCore, config: DEFAULT_CONFIG, home }); + await api1.services[0]!.start?.(); + await api1.services[0]!.stop?.(); + + expect(fs.existsSync(path.join(home.daemonDir, "openclaw-runtime.lock"))).toBe(false); + }); + + it("treats viewer EADDRINUSE as fatal and releases core plus lock", async () => { + const home = useTempMemosHome(); + const core = makeCore(); + const bootstrapMemoryCoreFull = vi.fn(async () => ({ + core, + config: DEFAULT_CONFIG, + home, + })); + const inUse = Object.assign(new Error("address already in use"), { + code: "EADDRINUSE", + }); + const startHttpServer = vi.fn(async () => { + throw inUse; + }); + const plugin = await loadPluginWithMocks(bootstrapMemoryCoreFull, startHttpServer); + + const api = makeApi(); + plugin.register(api); + + await expect(api.services[0]!.start?.()).rejects.toMatchObject({ + code: "EADDRINUSE", + }); + + expect(core.init).toHaveBeenCalledTimes(1); + expect(core.shutdown).toHaveBeenCalledTimes(1); + expect(api.logger.error).toHaveBeenCalledWith( + expect.stringContaining("refusing duplicate/headless OpenClaw runtime"), + ); + expect(api.logger.warn).not.toHaveBeenCalledWith( + expect.stringContaining("running headless"), + ); + expect(fs.existsSync(path.join(home.daemonDir, "openclaw-runtime.lock"))).toBe(false); + }); +}); diff --git a/apps/memos-local-plugin/tests/unit/experience/feedback-builder.test.ts b/apps/memos-local-plugin/tests/unit/experience/feedback-builder.test.ts index 3613dfcd1..c1a24c5a4 100644 --- a/apps/memos-local-plugin/tests/unit/experience/feedback-builder.test.ts +++ b/apps/memos-local-plugin/tests/unit/experience/feedback-builder.test.ts @@ -128,6 +128,34 @@ describe("feedback experience builder", () => { expect(recalled.map((c) => c.refId)).toContain(result.policyId); }); + it("treats a partial verifier pass (3/4, reward 0) as a failure, not a success_pattern", async () => { + const result = await runFeedbackExperience( + { + feedback: feedback({ + id: "fb_partial" as FeedbackRow["id"], + polarity: "neutral", + // The literal word "passed" appears here and used to be substring-matched + // as a positive signal — even though 3/4 with reward 0 is a failure. + rationale: + "Verifier feedback for the previous attempt. Verifier reward: 0.0. passed: 3, total: 4. TimeoutException(): Time Limit Exceeded. Please briefly reflect on what you would keep and what you would improve next time.", + raw: { + source: "evoagentbench_gateway_manual_feedback", + verifier: { reward: 0, passed: 3, total: 4, results: [1, 1, 1, -3] }, + }, + }), + episode: { id: "ep_feedback" as EpisodeId, traceIds: [trace.id], rTask: -0.51 }, + trace, + }, + { repos: handle.repos, embedder: fakeEmbedder(), namespace, now: () => NOW }, + ); + + expect(result.policyId).toBeTruthy(); + const row = handle.repos.policies.getById(result.policyId!); + expect(row?.experienceType).not.toBe("success_pattern"); + expect(row?.evidencePolarity).toBe("negative"); + expect(row?.skillEligible).toBe(false); + }); + it("merges later avoidance feedback into a success-backed experience without losing skill eligibility", async () => { const ok = await runFeedbackExperience( { diff --git a/apps/memos-local-plugin/tests/unit/install/install-sh.test.ts b/apps/memos-local-plugin/tests/unit/install/install-sh.test.ts index 75946dc6f..0fb06a3da 100644 --- a/apps/memos-local-plugin/tests/unit/install/install-sh.test.ts +++ b/apps/memos-local-plugin/tests/unit/install/install-sh.test.ts @@ -80,8 +80,7 @@ describe("install.sh — CLI surface", () => { expect(script).toContain("const MEMOS_TOOL_NAMES = ["); expect(script).toContain("if (!Array.isArray(config.tools.alsoAllow)) config.tools.alsoAllow = []"); expect(script).toContain("config.tools.alsoAllow.push(toolName)"); - expect(script).toContain("delete config.plugins.entries[pluginId].hooks"); - expect(script).not.toContain("config.plugins.entries[pluginId].hooks.allowConversationAccess = true"); + expect(script).toContain("config.plugins.entries[pluginId].hooks.allowConversationAccess = true"); expect(script).not.toContain('"extensions": ["./adapters/openclaw/index.ts"]'); }); diff --git a/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts index 88d5cbbd4..e42af3a79 100644 --- a/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts +++ b/apps/memos-local-plugin/tests/unit/pipeline/memory-core.test.ts @@ -36,12 +36,34 @@ let db: TmpDbHandle | null = null; let pipeline: PipelineHandle | null = null; let core: MemoryCore | null = null; const TEST_EMBED_DIMENSIONS = 384; +const FULL_MEMORY_CONFIG_YAML = ` +version: 1 +algorithm: + lightweightMemory: + enabled: false +`; + +function configWithLightweightMemory(enabled: boolean): typeof DEFAULT_CONFIG { + return { + ...DEFAULT_CONFIG, + algorithm: { + ...DEFAULT_CONFIG.algorithm, + lightweightMemory: { + ...DEFAULT_CONFIG.algorithm.lightweightMemory, + enabled, + }, + }, + }; +} -function buildDeps(h: TmpDbHandle): PipelineDeps { +function buildDeps( + h: TmpDbHandle, + config: typeof DEFAULT_CONFIG = configWithLightweightMemory(false), +): PipelineDeps { return { agent: "openclaw", home: resolveHome("openclaw", "/tmp/memos-mc-test"), - config: DEFAULT_CONFIG, + config, db: h.db, repos: h.repos, llm: null, @@ -258,7 +280,7 @@ describe("MemoryCore façade", () => { }); it("does not require action vectors for lightweight memory traces", async () => { - pipeline = createPipeline(buildDeps(db!)); + pipeline = createPipeline(buildDeps(db!, configWithLightweightMemory(true))); core = createMemoryCore( pipeline, resolveHome("openclaw", "/tmp/memos-mc-test"), @@ -332,6 +354,21 @@ describe("MemoryCore façade", () => { const row = db!.repos.traces.getById("tr_lightweight" as never); expect(row?.vecSummary?.length).toBe(TEST_EMBED_DIMENSIONS); expect(row?.vecAction).toBeNull(); + + await expect(core.listEpisodes({ limit: 10 })).resolves.toEqual(["ep_lightweight"]); + await expect(core.countEpisodes()).resolves.toBe(1); + const episodeRows = await core.listEpisodeRows({ limit: 10 }); + expect(episodeRows).toHaveLength(1); + expect(episodeRows[0]?.id).toBe("ep_lightweight"); + expect(episodeRows[0]?.preview).toContain("What changed in the repo?"); + + const search = await core.searchMemory({ + agent: "openclaw", + query: "lightweight memory mode", + topK: { tier1: 0, tier2: 5, tier3: 0 }, + }); + expect(search.hits.length).toBeGreaterThan(0); + expect(search.hits.map((hit) => hit.snippet).join("\n")).toContain("lightweight memory mode"); }); it("onTurnStart returns a RetrievalResultDTO with tier latencies", async () => { @@ -1149,7 +1186,10 @@ algorithm: // the crash; only the final status flip was lost). // - Un-scored rows with no traces → stay open + `topicState` // `interrupted` so they do not show as skipped. - home = await makeTmpHome({ agent: "openclaw" }); + home = await makeTmpHome({ + agent: "openclaw", + configYaml: FULL_MEMORY_CONFIG_YAML, + }); // First bootstrap: lets migrations run + schema exists. Shut it // down cleanly so we can seed orphans into the DB without holding @@ -1233,7 +1273,10 @@ algorithm: }); it("keeps an interrupted topic open across restart and appends the next same-topic turn", async () => { - home = await makeTmpHome({ agent: "openclaw" }); + home = await makeTmpHome({ + agent: "openclaw", + configYaml: FULL_MEMORY_CONFIG_YAML, + }); const first = await bootstrapMemoryCore({ agent: "openclaw", @@ -1275,7 +1318,10 @@ algorithm: }); it("rescoring closed episodes when traces were appended after the last reward", async () => { - home = await makeTmpHome({ agent: "openclaw" }); + home = await makeTmpHome({ + agent: "openclaw", + configYaml: FULL_MEMORY_CONFIG_YAML, + }); const seeder = await bootstrapMemoryCore({ agent: "openclaw", @@ -1369,7 +1415,10 @@ algorithm: }); it("rescoring finalized closed episodes that have traces but no reward metadata", async () => { - home = await makeTmpHome({ agent: "openclaw" }); + home = await makeTmpHome({ + agent: "openclaw", + configYaml: FULL_MEMORY_CONFIG_YAML, + }); const seeder = await bootstrapMemoryCore({ agent: "openclaw", diff --git a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts index 87b9e6d8f..f4826a6f3 100644 --- a/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts +++ b/apps/memos-local-plugin/tests/unit/pipeline/orchestrator.test.ts @@ -24,6 +24,19 @@ import type { TurnInputDTO, TurnResultDTO } from "../../../agent-contract/dto.js let dbHandle: TmpDbHandle | null = null; let pipeline: PipelineHandle | null = null; +function configWithLightweightMemory(enabled: boolean): typeof DEFAULT_CONFIG { + return { + ...DEFAULT_CONFIG, + algorithm: { + ...DEFAULT_CONFIG.algorithm, + lightweightMemory: { + ...DEFAULT_CONFIG.algorithm.lightweightMemory, + enabled, + }, + }, + }; +} + function buildDeps( h: TmpDbHandle, embedder = fakeEmbedder({ dimensions: 384 }), @@ -31,7 +44,7 @@ function buildDeps( return { agent: "openclaw", home: resolveHome("openclaw", "/tmp/memos-test-home"), - config: DEFAULT_CONFIG, + config: configWithLightweightMemory(false), db: h.db, repos: h.repos, llm: null, diff --git a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts index fa3eaeee9..3d2fb0049 100644 --- a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts +++ b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts @@ -374,7 +374,7 @@ describe("retrieval/integration", () => { expect(res.stats.llmFilterKept).toBeGreaterThan(0); }); - it("lightweight mode returns no memories when summarizer filter is unavailable", async () => { + it("lightweight mode keeps local memories when the summarizer filter is unavailable", async () => { const res = await turnStartRetrieve( { ...makeDeps(handle), @@ -397,9 +397,9 @@ describe("retrieval/integration", () => { expect(res.stats.tier2Count).toBeGreaterThan(0); expect(res.stats.llmFilterOutcome).toBe("no_llm"); - expect(res.stats.llmFilterKept).toBe(0); - expect(res.packet.snippets).toEqual([]); - expect(res.stats.emptyPacket).toBe(true); + expect(res.stats.llmFilterKept).toBeGreaterThan(0); + expect(res.packet.snippets.length).toBeGreaterThan(0); + expect(res.stats.emptyPacket).toBe(false); }); it("skill_invoke is tier1-heavy", async () => { diff --git a/packages/memos-core/src/skill/evolver.ts b/packages/memos-core/src/skill/evolver.ts index 42516e8b0..495728918 100644 --- a/packages/memos-core/src/skill/evolver.ts +++ b/packages/memos-core/src/skill/evolver.ts @@ -370,17 +370,13 @@ Use selectedIndex 0 when none is highly relevant.`; if (skill.status !== "active") return; const explicitAutoInstall = this.ctx.config.skillEvolution?.autoInstall ?? DEFAULTS.skillAutoInstall; - if (explicitAutoInstall) { - this.installer.install(skill.id); - this.ctx.log.info(`SkillEvolver: auto-installed "${skill.name}" (explicit autoInstall=true)`); + if (!explicitAutoInstall) { + this.ctx.log.debug(`SkillEvolver: skipping auto-install for "${skill.name}" (autoInstall=false)`); return; } - const manifest = SkillInstaller.buildManifest(skill.dirPath, !!skill.installed, skill.name); - if (manifest.installMode === "install_recommended") { - this.installer.install(skill.id); - this.ctx.log.info(`SkillEvolver: auto-installed "${skill.name}" (install_recommended: ${manifest.scriptsCount} scripts, ${Math.round(manifest.totalSize / 1024)}KB)`); - } + this.installer.install(skill.id); + this.ctx.log.info(`SkillEvolver: auto-installed "${skill.name}" (autoInstall=true)`); } private readSkillContent(skill: Skill): string | null { diff --git a/packages/memos-core/src/viewer/html.ts b/packages/memos-core/src/viewer/html.ts index 5e4456e71..8c48f7017 100644 --- a/packages/memos-core/src/viewer/html.ts +++ b/packages/memos-core/src/viewer/html.ts @@ -7237,7 +7237,7 @@ var _providerDefaults={ gemini:{endpoint:'',embModel:'text-embedding-004',chatModel:'gemini-2.0-flash'}, zhipu:{endpoint:'https://open.bigmodel.cn/api/paas/v4',embModel:'embedding-3',chatModel:'glm-4-flash'}, deepseek:{endpoint:'https://api.deepseek.com/v1',chatModel:'deepseek-chat'}, - bailian:{endpoint:'https://dashscope.aliyuncs.com/compatible-mode/v1',embModel:'text-embedding-v3',chatModel:'qwen-max'}, + bailian:{endpoint:'https://coding.dashscope.aliyuncs.com/v1',embModel:'text-embedding-v3',chatModel:'qwen-max'}, moonshot:{endpoint:'https://api.moonshot.cn/v1',chatModel:'moonshot-v1-8k'} }; function onProviderChange(section){