diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index c1417f7..f30594d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,12 +9,12 @@ { "name": "specode", "source": "./plugins/specode", - "version": "0.4.0", - "description": "Specification-driven workflow with hard sync enforcement between code and docs (Claude Code + CodeBuddy).", + "version": "0.10.23", + "description": "Specification-driven workflow with advisory hooks, selector prompts, session-bound state, and task-swarm multi-agent orchestration.", "homepage": "https://github.com/qxbyte/specode", "repository": "https://github.com/qxbyte/specode", "license": "MIT", - "keywords": ["spec", "specification", "workflow", "hooks", "code-doc-sync"] + "keywords": ["spec", "specification", "workflow"] } ] } diff --git a/.gitignore b/.gitignore index c69a2b9..de4bf9b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ state/* !state/.gitkeep .vscode/ .idea/ +.pytest_cache/ +.claude/ DEV.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 0904779..05ffa41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,2019 @@ ## Unreleased -_no entries yet_ +## 0.10.23 (2026-05-26) + +### Changed — `requirements.md` / `bugfix.md` 模板重设计 + +两份模板从「机器验收文档」改写为「需求/缺陷描述」。 + +**根因**:旧模板在「验收标准 / 当前行为 / 期望行为」等节预填 `WHEN ... THE System SHALL ...` 占位句,模型按填空模式扩写,整篇文档丢失"用人话讲需求"的语感,读起来像 EARS 句堆而非需求说明。 + +**v3 设计要点**: + +- **主体改为自然语言**:requirements 用「背景与动机 / 目标用户与场景 / 用户故事 / 主流程 / 异常流程」描述 WHY + WHAT;bugfix 用「问题陈述 / 复现路径 / 证据 / 根因分析」描述现象 + 诊断。 +- **EARS 收敛到末尾「验收要点」可选段**——仅在需要机器可读契约时用,不再强制预填。 +- **「范围之外(Scope Out)」前置到第一节**,与「目标」放同一视野,比放在文末更能锚定边界。 +- **新增「典型使用路径(Happy Path)」编号步骤**——比纯散文更具象,复杂分支可附 Mermaid `flowchart`。 +- **bugfix 新增「根因分析」假设表**——三列结构(假设 / 支持证据 / 反对证据 或验证方式)强制把"猜测"与"结论"分开,状态机相关 bug 可附 Mermaid `stateDiagram-v2` 标错误流转点。 +- **「待澄清问题」前置到正文之前**,与本版同步落地的 Pre-requirements Clarification 铁律联动。 +- **新增 Priority / Severity 一行元信息**(可省)。 + +**向后兼容**: + +- 保留 `### 需求 1` / `### 需求 2` 章节锚点(`spec_lint.py REQ_TAG_RE` 反向引用依赖)。 +- 保留 `Spec Type / Workflow / Status / Review Status` 元信息块(`spec_session` 业务依赖)。 +- `spec_lint.py rule_ears_shall` 仅对已存在的 SHALL 行校验,不预填不会触发任何 warning。 +- 全套 228 项测试无回归。 + +### Fixed — 验收通过不再自动弹 iteration-scope(acceptance → iteration 路径) + +`_selectors.py` acceptance-gate 模板和 `_business.py _auto_pending_selector` 互相加强地把"验收通过"等价于"立刻追问要不要继续迭代"——与 `references/iteration.md` §2 / §7「不自动呈现 iteration-scope」自相矛盾。 + +- `_selectors.py` acceptance-gate「用户选定后流程」:验收通过 → 仅做 phase-transition + chat 一句简报 → end turn,不再串接 iteration-scope。 +- `_selectors.py` iteration-scope 模板:「目的」/「前置动作」改写为只在用户**显式提出**迭代调整时呈现;新增「触发条件(必须满足之一才可呈现)」节列三种禁用场景。 +- `_business.py _auto_pending_selector`:`phase == "iteration"` 返回 `None`,合并冗余分支并加注释,让 hook 不再自动注入 iteration-scope。 +- `SKILL.md` 退出 spec 模式判定:移除「acceptance-gate 通过且 iteration-scope ESC」这条等价条件——只剩 `/specode:end` 一条。 +- 新增回归测试 `test_phase_transition_to_iteration_clears_pending_selector`。 + +### Added — Pre-requirements Clarification 铁律(接通 clarification-wizard 触发链路) + +`clarification-wizard` selector 模板早已存在但**永不触发**:`_auto_pending_selector(intake)` 硬编码返回 `"workflow-choice"`,主代理也没有"何时该问、何时可跳"的指引。本次接通主流程,把它做成主代理在 workflow-choice 选定后**必走的歧义自检**: + +- `SKILL.md` §「Pre-requirements Clarification」改写为「铁律」形态:六维歧义自检(scope / behavior / UX / data / validation / acceptance)、四种必呈现场景、唯一例外(用户明确放权"由你决定"/"按业界默认"/"先 MVP"等)+ 反例三条。 +- `_selectors.py` workflow-choice「用户选定后流程」拆 **Step A 歧义自检 + Step B 文档生成**两段式 routing,写文档过程中发现新歧义须停写补 wizard。 +- `commands/spec.md` 第四步成功后必做加第 4 步指引,明确"严禁在源需求不明确时绕过 clarification-wizard 直接写文档——澄清铁律的违反不是'风格瑕疵'而是 spec 失真根因"。 + +## 0.10.22 (2026-05-26) + +### Refactored — `spec_session.py` 拆分 + 两大 CLI 子目录化 + +**两步走的纯重构,无行为变化。** + +#### 第一步(B1):spec_session.py 拆成 5 个 `_ss_*.py` sibling + +`spec_session.py` 从 2360 行的"什么都装"模块拆成薄入口 + 5 个 sibling: + +| 模块 | 承载 | +|---|---| +| `_ss_io.py` | 原子写、session+spec config 读写、锁工具、共享常量(`VALID_PHASES` / `STALE_LOCK_SECONDS`) | +| `_ss_selectors.py` | `SELECTOR_PROMPTS` 字典 + `_fill_selector` | +| `_ss_reminders.py` | reminder 模板字符串 + help 文本渲染 | +| `_ss_business.py` | 所有 `cmd_*` 业务命令 + `_update_session_for_spec` | +| `_ss_hooks.py` | 所有 `hook_on_*` + `_safe_hook` + task-swarm plan 提醒辅助 | + +#### 第二步:两大 CLI 子目录化 + 同名 launcher + +`scripts/` 顶层从 19 个 `.py` 收敛到 7 个 + 2 个 package 目录。`spec_session` +和 `task_swarm` 各自从「下划线前缀 fake namespace」升级为真子目录包: + +``` +scripts/ +├── spec_session.py # ~40 行薄 launcher(utf-8 reconfigure + sys.path + main 转发) +├── spec_session/ # package(_io / _selectors / _reminders / _business / _hooks / _catalog / cli) +├── task_swarm.py # ~25 行薄 launcher +├── task_swarm/ # package(_state / _parse_md / _outbox / _prompt / _writeback / cli) +└── (其余 5 个独立 CLI:spec_init / spec_lint / spec_log / spec_status / spec_vault) +``` + +文件改名规则:`_ss_io.py` → `spec_session/_io.py`,`task_swarm_state.py` → +`task_swarm/_state.py` 等(去前缀、加 `_` 标记 internal)。 + +**外部 API surface 100% 不变**: +- 文件名 `spec_session.py` / `task_swarm.py` 保留——`hooks/hooks.json`、`commands/*.md`、`tests/conftest.py:run_script` 都按这些名字拼绝对路径调用。Python 的 `FileFinder` 在同一 path entry 下 package 优先于 module,launcher 自己被 exec 不走 import 系统,所以同名文件 + 同名目录共存安全。 +- `spec_status.py:25` 的 `from spec_session import read_session, read_spec_config, _session_short, _is_lock_stale` 仍可解析(`spec_session/__init__.py` 从 `_io` re-export 这 4 个符号)。 +- `tests/test_selectors_drift.py` SCRIPTS 路径 → `spec_session/_selectors.py`;4 个 `test_task_swarm_*.py` 的直接 import → `from task_swarm._ import ...`。 + +**包内规范**: +- 包内 import 用 absolute 形式(`from spec_session._io import ...`),出错信息清晰。 +- 包内文件需要找顶层 sibling 脚本时,统一 `_THIS_DIR = Path(__file__).resolve().parents[1]`(= `scripts/`),让旧用法 `_THIS_DIR / "task_swarm.py"` / `_THIS_DIR.parent / ".claude-plugin"` 语义一致。 + +219 项原有测试 + 18 项新 catalog 测试 = 237 全绿;无 schema 变化、无 hook 行为变化、外部用户无需任何 install / config 改动。 + +### Added — `on-user-prompt-catalog` hook:reference 关键词触发提示 + +新 advisory hook,注册到 `hooks.json` `UserPromptSubmit` 数组第 3 位。激活 +门:仅 `mode=active` 触发,`idle / readonly / ended` 一律静默。 + +**机制**: +- 每个 `references/*.md` 文件首部新增 YAML frontmatter `description: Use when …`("何时该读"而非"内容是什么",superpowers 风格)。 +- `spec_session/_catalog.py` 维护一份预编译关键词正则字典 `CATALOG`(含中英文双语 pattern,例如 `lock / takeover / 接管` → `lock-protocol`,`task-swarm / @writes / reviewer` → `task-swarm` 等 8 个 key)。 +- 每轮 user prompt 触发:扫文本、把命中的 reference 列出来 + 嵌入对应 description,作为 `additionalContext` 注入。 + +**目的**:specode 从"全程监考"扩展为"全程监考 + 定向激活"双模并存。 +主代理看到注入后自己决定是否真要 Read 对应 reference;hook 永远 advisory, +不阻断。 + +**新 drift 守卫**: +- `tests/test_catalog.py::test_catalog_keys_have_matching_reference_files` —— `CATALOG` key 必须对应真实 `references/.md` +- `tests/test_catalog.py::test_every_catalog_referenced_file_has_description_frontmatter` —— 每份 referenced 文件必有非空 `description` 字段 + +性能:每次调用纯预编译正则匹配 + 最多 8 次小文件读,远低于 +UserPromptSubmit 80ms 预算。 + +## 0.10.21 (2026-05-23) + +### Fixed — writeback line-safe 算法对多行 `reproduce_cmd` 报"越界" + +**用户痛点(login-page 现场)**:validator-g1-r3 pass,`reproduce_cmd` 含多行(`cd C:\Users\qiang\login-page` + 空行 + `# 验证 P0...` + `node -e "..."`)。`task_swarm.py writeback` 报错: + +``` +writeback 越界:line 49 +原: '## 阶段 5: 集成测试' +新: '# 1. 验证 Vite 构建成功(前端无编译错误)' +``` + +根因:`task_swarm_writeback.py:_format_findings_block` 把 multi-line `reproduce_cmd` 直接拼进 `f"> ✅ validator pass: \`{cmd}\`"` 的 inline backtick——这个字符串作为单元素追加进 `out` 列表。后续 `"\n".join(new_lines + block_lines)` 写入 tasks.md,**`cmd` 内部的 `\n` 被保留**,文件实际多了几行非 `>` 前缀的内容(如 `# 验证 P0 修复`、`node -e "..."`)。`_verify_line_safe` 行级对齐时发现新行不属于"checkbox toggle"也不"以 `>` 开头",报"越界"。 + +修复 `task_swarm_writeback.py:_format_findings_block`: + +```python +if "\n" in cmd: + out.append(f"> ✅ validator{round_text} pass,复现命令:") + out.append("> ```") + for cmd_line in cmd.splitlines(): + out.append(f"> {cmd_line}" if cmd_line else ">") + out.append("> ```") +else: + cmd_text = f": `{cmd}`" if cmd else "" + out.append(f"> ✅ validator{round_text} pass{cmd_text}") +``` + +多行 cmd 用 `> ```fenced` ` 块,每行加 `> ` 前缀(包括空行用 `>`)→ 完全满足 `_verify_line_safe` 的"允许多出 `> ` 前缀或空行"规则。单行 cmd 仍 inline。 + +### Changed — PreToolUse hook 对 `tasks.md` 从软提醒升级为强阻断 + +**用户痛点(login-page 现场)**:上面 writeback 报越界后,主代理**手工 Edit tasks.md** 把 1-4 阶段所有 `[ ]` 改成 `[x]`——破坏 state.json 与 tasks.md 行号一致性,后续 writeback 永远过不去。0.10.13 PreToolUse hook 当时对 tasks.md 给的是**软提醒**("本提醒不阻断当前工具调用"),主代理见 writeback 失败就绕过 CLI 自己改。 + +修复 `spec_session.py:hook_on_pre_tool_use`: + +旧(软提醒,可忽略): + +```python +text = "## ⚠ 检测到正在直接 Edit/Write tasks.md ..." +_emit_hook_additional_context(text, hook_event_name="PreToolUse") +``` + +新(强阻断,exit 2 + stderr 详细原因): + +```python +sys.stderr.write( + f"specode 阻断:主代理不得直接 Edit/Write `tasks.md` ...\n" + "若 writeback 本身报越界,请保留现场报告用户,让 task-swarm 算法层修,\n" + "**不要**手工抹平。\n" +) +sys.exit(2) +``` + +现在 `tasks.md` 跟 `.task-swarm/runs/*/state.json` / `agents/*/outbox/*` 同等待遇——active spec + task-swarm 进行中时主代理一律不能直接 Edit/Write,必须走 `task_swarm.py writeback` CLI。 + +### Tests + +- 新增 `test_writeback_handles_multi_line_reproduce_cmd`:触发 multi-line reproduce_cmd writeback 全流程,断言不报越界 + tasks.md 包含 `> ```` 块 + 每行带 `> ` 前缀 +- 新增 `test_on_pre_tool_use_blocks_edit_of_tasks_md`:active spec + task_swarm_run_id 进行中 → Edit tasks.md → exit 2 + stderr 含 `task_swarm.py writeback` 引导 +- 全套 pytest **219/219 PASS** + +## 0.10.20 (2026-05-23) + +### Added — `--skip-validator` 人工验收模式:task-swarm 跳过 validator/v-fix + +**用户痛点**:login-page 现场显示一轮 validator 跑下来要花 25-50k tokens + 大量 Bash 测试,多轮 v-fix 循环下成本高昂。用户希望有"task-swarm 但不启动 validator"的选项——多 coder 并发 + reviewer + p0-fix 仍走,但跳过 validation/v-fix 循环;代码正确性由用户**事后人工核验**,有问题再跟模型常规对话沟通。 + +实现: + +1. **`task_swarm_state.py` `StateMachine`**:加 `skip_validator: bool = False` 字段,load/to_dict 同步。 +2. **`task_swarm.py cmd_init`**:加 `--skip-validator` argparse flag;写入 state.json;events_append init 事件含 `skip_validator` 字段。 +3. **`task_swarm.py cmd_advance`**:两处分支改造: + - review phase advance:`if sm.p0_pending: begin_p0_fix ...; elif sm.skip_validator: begin_writeback(直接进 writeback); else: begin_validation` + - p0-fix phase advance:`if sm.skip_validator: begin_writeback; else: begin_validation` +4. **`task_swarm_writeback.py`**: + - `GroupFindings` 加 `skip_validator: bool = False` 字段 + - `_format_findings_block` 优先检查 skip_validator——若 True 写 "`> ⏭️ validator 已跳过(人工验收模式)—— 代码正确性由用户人工核验`",否则走原有 pass/fail/deadloop 分支 +5. **`task_swarm.py cmd_writeback`**:构造 `GroupFindings` 时把 `sm.skip_validator` 传入 +6. **`spec_session.py` SELECTOR_PROMPTS["tasks-execution"]** 4 个选项重新组织: + - "task-swarm + validator 自动验收(推荐)" + - "task-swarm + 人工验收(跳过 validator)"(新) + - "顺序执行(同时处理 optional)" + - "暂停 / 调整 tasks.md"(合并原"需要调整" + "暂不 coding") +7. **`commands/task-swarm.md`** 第二步 init 提及 `[--skip-validator]` flag + 触发条件 +8. **`references/selectors.md` A4** drift-sync byte-identical + +**新模式流程**: +``` +init --skip-validator → coding → review → p0-fix → writeback → next group + (跳过 validation / v-fix) +``` + +用户使用: +1. tasks.md 生成后呈现 `tasks-execution` selector +2. 选「task-swarm + 人工验收(跳过 validator)」 +3. 主代理调 `task_swarm.py init --tasks

--session --skip-validator` +4. 流程按 full 模式跑 coding → review → p0-fix(行为不变) +5. **p0-fix 完成后状态机直接进 writeback**(不 fork validator) +6. writeback 把 tasks.md `[ ]` → `[x]`,注释块写"⏭️ validator 已跳过" +7. 用户人工 review 代码 → 有问题跟模型常规对话沟通调整 + +### Tests + +- 新增 `test_init_skip_validator_flag_persists_to_state`:验证 flag 写入 state.json +- 新增 `test_init_without_flag_defaults_to_full_mode`:默认兼容 +- 新增 `test_skip_validator_review_no_p0_skips_validation`:无 P0 直接 writeback +- 新增 `test_skip_validator_p0_fix_done_skips_validation`:p0-fix 完直接 writeback +- 新增 `test_skip_validator_writeback_writes_skipped_note`:writeback 注释含"validator 已跳过" +- 更新 `test_tasks_execution_snapshot` 预期 4 个新选项 +- 全套 pytest **217/217 PASS** + +## 0.10.19 (2026-05-23) + +### Added — `commands/task-swarm.md` 加术语区分节「reviewer 分级 vs validator fail」 + +**用户痛点(login-page 现场)**:validator-g1-r2 报 fail(子任务 1.5 响应式设计未完成),主代理输出"判定为 fail,因为 1 个 P1 问题(响应式设计)仍然存在"——把 validator 的子任务核验失败误称为 reviewer 的 P1 等级。用户看到"P1"自然问"这个 P1 到底需不需要修"——因为按 reviewer 分级体系 P1 是 advisory,**不阻塞 pipeline**。 + +但实际上 validator 跟 reviewer 是**两个完全不同的裁判**: + +- **reviewer 路径是尝试性修复**:p0-fix 只给"带证据标签的 P0"一次修复机会,不论结果都进 validation;P1/P2/无标签 P0 不修。 +- **validator 路径是循环验证**:fail 就必须 v-fix 修到 pass,没有"P1 可跳过"概念,**没有任何"建议性"**。 + +主代理混淆术语的后果:用户被误导以为 1.5 是"建议项"可以跳过;或者把 reviewer P0 误当 validator fail 一直循环修。 + +修复 `commands/task-swarm.md` 加新节「术语区分」: + +1. **4 行对比表**:P0(带证据标签)/ P0(不带证据标签)/ P1·P2 / validator fail,列出来源 + 是否触发 fix loop + 具体策略 +2. **关键差异说明**:reviewer 是"尝试性修复"(一次性),validator 是"循环验证"(修到 pass) +3. **主代理正确措辞示范**:✓ "子任务 1.5 未完成" vs ✗ "1 个 P1 问题" +4. **用户问"能不能跳过"时的回答**:按设计不能,跳过的唯一办法是 abort run + 改 tasks.md 移除该任务 + +放在"advance 报 STATUS 缺失的正确应对"节之前,跟其他易混淆场景集中在一起。 + +**对照源码确认**: +- "P0 不带证据标签自动降级 advisory" 在 `task_swarm_outbox.py:280-286` 真实装 +- "p0-fix 不再 review 直接进 validation" 在 `references/task-swarm.md §3 line 61` 明确写过 +- "validator fix_targets 不带 P0/P1 标签" 在 `references/task-swarm.md §4.3 line 175` 明确写过 + +新节是把分散在 references / 代码里的事实**集中到 commands 一处**,让主代理读 commands 时就能正确分辨,不必再去 references 拼。 + +### Tests + +- 纯文档改动,无 Python 代码路径影响 +- 全套 pytest **212/212 PASS** + +## 0.10.18 (2026-05-23) + +### Fixed — `commands/task-swarm.md` 第 4 步软提示导致主代理提前 advance + team-lead 代笔补 STATUS + +**用户痛点(login-page 事故现场)**:主代理在 `coder-p0fix-g1-r1-f0` 还 ⠙ streaming 时就调 `advance --phase p0-fix`,依据是 team-lead 报告"已修复 result.md STATUS line"。advance 看磁盘上 STATUS 合法 → 返回 `ok:true` 进入 validation → validator 验出 P0 还在(f0 实际没修完)→ 主代理 fork 自定义命名的 `coder-fix-session-validation`(违反 task_swarm 命名规则)→ 同时**两个 agent 并发改 session.js**,状态机进一步崩坏。 + +事后看 state.json:`phase=v-fix`、`failed_status=failed`、`vfix_in_flight=[coder-vfix-g1-r2-f0]`(task_swarm 期待的 agent 没被 spawn,只有目录壳),teammates UI 显示 2 个 streaming agent 在改同文件。 + +根因三层: + +1. **commands/task-swarm.md 第 4 步措辞 "等齐 subagent 返回(PostToolUse hook 注入提醒,可读可忽略)"** —— "可忽略"等于告诉模型可以不等 +2. **team-lead 代笔补 STATUS 反模式没明确禁止** —— 主代理凭口头报告判定完成 +3. **自定义命名 agent 绕开 task_swarm in_flight 规则没明确禁止** —— validator fail 后主代理另起 `coder-fix-xxx` 而不用 plan 给的 `coder-vfix-g{N}-r{R}-f{I}` + +修复 `commands/task-swarm.md`: + +#### 第 4 步全文重写为强约束 + +旧:`4. 等齐 subagent 返回(PostToolUse hook 注入提醒,可读可忽略)` + +新:完整约束(节选): + +> - **必须**先在主代理 UI 看 "Waiting for N teammates" 区域,**所有** fork 出去的 Task 都 ✓ completed 才能进 step 5;**任何 ⠙ streaming / ⠴ running Bash 的就不能 advance**。 +> - **不要**凭口头报告判定完成——包括 team-lead / 其他平台 agent 说"已修复 STATUS"/"已完成"。**只有** subagent 自己的 Task tool 返回 ✓ completed 才算数。 +> - PostToolUse hook 注入的"plan 提醒"**不是**"立即 advance"指令。 +> - 不确定时调 `task_swarm.py plan --run `,若返回 `action: *-waiting`,**禁止** advance。 +> +> **常见误判**: +> - "team-lead 说改完了" ≠ subagent 真完成 +> - "f0 跑了 30 个 tool 看起来快完了" ≠ completed +> - "其他 4 个都 ✓ 了最后 1 个估计也快" ≠ 可以提前;advance 之后没回头路 + +step 3 也加了一句明确禁止自定义 agent_key: + +> `coder-fix-xxx` / `coder-session-fix` 等自定义命名**全部禁止**,必须用 plan 给的 `coder-vfix-g{N}-r{R}-f{I}` 等规范名 + +#### 新节「advance 报 result.md 缺 STATUS / 解析失败的正确应对」 + +放在「异常出口」前。明确列出 4 条错误做法(手补 STATUS / 凭口头报告 advance / 凭印象判定 / 起新名字 agent)+ 正确做法 5 步(保留残缺 result.md / 查 in_flight 状态 / 等 subagent 真完成 / 用同一 agent_key 重 fork / 多次失败报用户 abort)。 + +核心断言: + +> STATUS 缺失多半意味着 subagent 提前退出 / 工作未完成——代码改动可能根本没刷到磁盘。手补 STATUS 后 advance 通过,下游 reviewer/validator 拿到的是半成品代码,必然 fail。 + +### 未做的事 + +- **未修 task_swarm.py advance 加 subagent lifecycle 检查**:stdlib-only 脚本跨不到 Claude Code/codebuddy 框架的 Task spawn API,没法验证"所有 spawn 的 Task 是否真已退出"。约束只能在主代理文档层做。 +- **未扩展 PreToolUse hook 拦截 subagent outbox 写**:team-lead 是独立 subagent,它的 session_id 不在 specode `~/.specode/sessions/` 里,hook 静默放行——这是 0.10.13 hook 的设计盲区,但扩展拦截会误伤合法 subagent 的正常 outbox 写(coder 写 result.md 本来就该走 Edit),暂不动。 + +### Tests + +- 纯文档改动,无 Python 代码路径影响 +- 全套 pytest **212/212 PASS** + +## 0.10.17 (2026-05-23) + +### Changed — `commands/task-swarm.md` 顶部加强制前置阅读指引(修软提示无效问题) + +**用户痛点**:模型读 `commands/task-swarm.md` 后跑 task-swarm 流程,**明知**有 `references/task-swarm.md` 这份详细规格(commands 已有多处 "详见 references/task-swarm.md" 软提示),但**主动选择只读 commands**。模型内心戏证据: + +> "我应该读取 commands/task-swarm.md,因为它可能包含命令的具体用法。" + +结果:模型按 commands 81 行的简化路由开始干,遇到 plan 输出解析 / advance 失败 / writeback 越界等细节就凭印象推——这是 0.10.13 user-login 事故里 r2/r3 漂移 + 主代理手工 Edit state.json 的反模式根源之一。 + +根因:现有 "详见 references/task-swarm.md" 措辞太弱(line 8 / 57-67 / 80 都有),模型当作背景资料而非必读项。 + +修复:`commands/task-swarm.md` 顶部加 **⛔ 强制前置阅读** 节,明确: + +1. 列出 references/task-swarm.md 的 9 个章节 TOC(让模型知道里面有什么) +2. **指令式**前置要求:"**在调任何 `task_swarm.py` 子命令之前**(包括 init / plan / advance / writeback / resolve),必须先 Read references/task-swarm.md 至少扫一遍 TOC + §3 + §9" +3. 明确 commands 文件的边界:"本文件下面的 3 步路由**只够回答'现在该调哪条 CLI'**,不够回答 plan 输出怎么解析 / advance 失败该 retry 还是 fork / writeback 越界怎么办" +4. 兜底约束:"**禁止凭印象推**;如果对任何一步仍不确定,先 Read references 对应章节再动手" + +放置在 frontmatter 后、3 步路由前,最显眼位置。 + +**未改 commands/spec.md 和 commands/continue.md**:用户只反馈 task-swarm 这一处遇到问题,其他 commands 没有真实证据需要同等强化。等出现实际 case 再说,避免预防性过度设计。 + +### Tests + +- 纯文档改动,无 Python 代码路径影响 +- 全套 pytest **212/212 PASS** + +## 0.10.16 (2026-05-23) + +### Fixed — slug 强制 ASCII 与 0.10.14 文档"保留原文不做翻译"自相矛盾(中文 slug 被静默换成英文) + +复现:用户在 codebuddy 跑 `/specode:spec -n 登录页面 帮我做一个简单的登录页面`。主代理按 0.10.14 4a 路径调 `spec_init.py --name 登录页面 ...`,CLI 报错"非法 slug"(exit 3)。主代理**自动 fallback 到 4b 推导**,把 slug 换成 `login-page` 再调一次(成功)——但用户不知道目录名被偷偷换了。 + +根因有两层: + +1. **代码层**:`spec_init.py:174` 的 `SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,79}$")` 强制 ASCII 小写+数字+短横线,跟 0.10.14 commands/spec.md 4a 承诺的"保留用户原文,不做翻译/推导"自相矛盾。这条 ASCII 限制是早期跨 OS 文件系统兼容性的历史包袱,但现代 Python 3 + Windows 10+/macOS/Linux 都支持 UTF-8 路径,已无必要。 +2. **流程层**:主代理在 SLUG_RE 失败后**静默 fallback 推导**,没让用户知情。用户用 `-n` 形式就是想精确控制目录名,自动换成英文 = 欺骗用户。 + +修复: + +1. **`spec_init.py:174` SLUG_RE 放宽**: + + ```python + # 0.10.16+:允许 Unicode(中文/日文/emoji 等),仅禁文件系统危险字符 + SLUG_RE = re.compile( + r'^[^<>:"/\\|?*\s\x00-\x1f.\-]' + r'[^<>:"/\\|?*\s\x00-\x1f]{0,79}$' + ) + ``` + + 拒:`< > : " / \ | ? *`(Windows 禁字符)、控制字符 `\x00-\x1f`、任何空白(避免 shell 转义麻烦);首字符额外拒 `.`(隐藏文件)、`-`(CLI flag 歧义)。新增 `_WIN_RESERVED` set 拒 Windows 保留名(`CON` / `PRN` / `AUX` / `NUL` / `COM1-9` / `LPT1-9`)。新增 `_slug_invalid_reason(slug)` 返回用户可读的拒绝原因(替代单一错误消息)。 + +2. **`commands/spec.md` 4a 加分支**: + + > spec_init.py exit 3(slug 非法)时:**禁止**主代理**静默 fallback 到 4b 推导**——用户用了 `-n` 形式就是想精确控制目录名,自动换成英文 slug 是欺骗用户。正确做法:把 CLI stderr 报给用户让用户重选,仅当用户明确说"你帮我想一个"时才走 4b 推导。 + +3. **`references/workflow.md` step 0** 同步说明 + 例 2(中文 slug)。 + +例 2(新支持): + +``` +/specode:spec -n 登录页面 帮我做一个简单的登录页面 + → --name 登录页面 + → --requirement-name "登录页面"(非 ASCII slug 复用原文做显示名) + → --source-text "帮我做一个简单的登录页面" + → specs/登录页面/ 目录被创建 +``` + +### Added — `/specode:spec -h` 帮助文本顶部加「用法」节 + +之前 help 只有"会话与锁 / 工作流 / 会话日志"三节,没列实际 CLI 用法。0.10.14/0.10.15 加了 `-n ` 显式语法和 `project-root-choice` selector,help 没跟上。本版本在 `HELP_OUTPUT_TEMPLATE` 顶部加「用法」节(5 行简表): + +``` +用法: + /specode:spec -n <需求> 推荐:显式指定 spec 目录名(slug 直接用作 specs//) + /specode:spec <需求> 兼容:主代理从 <需求> 推导 slug(结果不可预知) + /specode:continue [slug] 接管已有 spec(无 slug 时列表选) + /specode:end 退出当前 spec 模式 + /specode:status 查看会话与 spec 状态 +``` + +工作流流程图保持原样——`project-root-choice` 是 selector 自动引导的内部步骤,用户不必在 help 里看到字段名。 + +### Tests + +- 重写 `test_spec_init_rejects_invalid_slug` 为 parametrize 11 case:`evil/path` / `bad\\slash` / `bad` / `bad:colon` / `bad*star` / `has space` / `.hidden` / `CON` / `nul` / `trailing.` / 空 slug 全部 exit 3 +- 新增 `test_spec_init_accepts_unicode_and_extended_ascii_slug` parametrize 7 case:`user-login` / `UserLogin` / `登录页面` / `ログイン` / `auth_v2` / `spec.with.dots` / `user-1.0.0` 全部 exit 0,且磁盘 spec_dir.name == 用户原文 +- 全套 pytest **212/212 PASS** + +## 0.10.15 (2026-05-22) + +### Added — `project_root`:spec 文档目录与代码实现目录解耦 + +**用户痛点**:在 user-login 事故中,task-swarm coder subagent 把 `npm install` / `src/App.tsx` / `database/migrations/` 全写到了 spec 文档目录 `/specs/user-login/` 下,污染了 vault。根因:`render_coder_prompt` 生成的 task.md 上下文段里**只有 `spec_dir` 字段,没有项目实现根目录概念**,subagent 看到 `@writes:src/services/auth-service.ts` 这种相对路径就拿 spec_dir 当根。 + +修复路径:**spec 文档目录与代码实现目录解耦**——`spec_dir` 只放 `.md` 文档和 `.task-swarm/` 状态,代码实际写入到 `project_root`(绝对路径,由用户在 spec 创建后通过 selector 选定)。 + +实现: + +1. **`spec_init.py`**: + - 写 `.config.json` 时记录 `invocation_cwd = os.getcwd()`(用户启动 Claude Code 时的目录,供 selector 渲染用) + - `pending_selector` 默认改为 `project-root-choice`(替代 `workflow-choice`) + - 新增 `project_root: null` 字段(待 `set-project-root` CLI 写入) + +2. **`spec_session.py` SELECTOR_PROMPTS["project-root-choice"]**(新 selector): + - 三选项:`cwd(在已有项目里迭代)` / `cwd/slug(新项目子目录)` / `自定义路径` + - 每个选项 description 带具体路径(hook 注入时填入 `` / ``) + - 用户选定后由主代理调 `set-project-root` CLI 写入 + +3. **`spec_session.py` `cmd_set_project_root`**(新 CLI): + - `set-project-root --spec

--session --root ` + - 校验:lock holder 必须是 current session;`--root` 必须绝对路径;不存在则 mkdir -p;存在但非目录 exit 1 + - 写 `.config.json.project_root` + 把 `pending_selector` 推进到 `workflow-choice`,session 同步 + +4. **`task_swarm_prompt.py` `_context_block`** + 各 `render_*_prompt`: + - context block 加 `- project_root: ` 行(fallback 文本明确"未设置时用 spec_dir") + - `render_coder_prompt` 新增 `## 项目根目录与路径规约` 段:明确"`@writes/@reads` 相对 `project_root`,**严禁**写到 `spec_dir`,Bash 命令请先 `cd` 到 `project_root`" + - reviewer / validator 也注入 project_root(跑测试时 cd 用) + +5. **`task_swarm.py`** 调用方:新增 `_resolve_project_root(sm)` helper 读 `spec_dir/.config.json.project_root`;6 处 `render_*_prompt` 调用全部传入。 + +6. **`commands/spec.md`** 第四步「成功后必做」:从直接呈现 `workflow-choice` 改成**两步走**——先 `project-root-choice` selector → 用户选 → 主代理调 `set-project-root` CLI → 再 `workflow-choice` selector。两步都不 end turn。 + +7. **`references/selectors.md`** drift-sync 新增 `A0 project-root-choice` 节,byte-identical 与 SELECTOR_PROMPTS 一致。 + +**向后兼容**:老 spec(pre-0.10.15)的 `.config.json` 没有 `project_root` 字段,`_resolve_project_root` 返回 `None`,render_*_prompt 输出 fallback 文本("⚠ project_root 未设置;fallback 用 spec_dir"),不阻断流程。用户可手动调 `set-project-root` 补字段。 + +**用户使用流程**: + +``` +/specode:spec -n user-login 添加用户登录功能 + → spec_init.py 创建 spec + 写 invocation_cwd + → 主代理呈现 project-root-choice selector(3 选项含具体路径) + → 用户选「cwd(在已有项目里迭代)」 + → 主代理调 set-project-root --root + → CLI 写 project_root + 推 pending_selector → workflow-choice + → 主代理立即呈现 workflow-choice selector + → ... +``` + +### Tests + +- 新增 5 个 `test_set_project_root_*`:成功路径 + 自动 mkdir + 拒绝相对路径 + 拒绝非目录 + 拒绝非 lock-holder +- 新增 `test_on_user_prompt_project_root_choice_emits_with_cwd_context`:hook 注入 selector 时填充 invocation_cwd / cwd_subdir +- 新增 `test_coder_prompt_includes_project_root_from_spec_config` + `test_coder_prompt_fallback_when_project_root_unset`:覆盖 project_root 注入 task-swarm prompt 的两条路径 +- 更新 3 个测试预期:spec_init 后 pending_selector 是 `project-root-choice` 而非 `workflow-choice`,集成测试加 set-project-root 调用步骤 +- selectors.md drift test 自动 cover `project-root-choice` byte-identical +- 全套 pytest **195/195 PASS** + +## 0.10.14 (2026-05-22) + +### Added — `/specode:spec -n <需求>` 显式指定 spec 目录名 + +用户痛点:当前 `/specode:spec <需求>` 走"主代理推导英文 slug"路径——推导结果对用户不可预知(如用户想要 `refund`,主代理可能推成 `order-refund-flow`)。即使用前缀形式 `<名称>:<内容>`,左侧也只是 `requirement_name`(中文显示名),slug 仍是主代理推。用户无法精确控制 `/specs//` 的目录名。 + +`spec_init.py` 的 CLI 层早就支持 `--name `(line 230,必填),bug 在文档/指引层始终引导主代理"推导"。本版本在 4 处文档加入显式 `-n` / `--name` 路径作为**推荐形式**: + +- `commands/spec.md`:argument-hint 加 `-n <需求>` 在最前;第四步拆成 4a(显式 `-n`,推荐)+ 4b(推导,兼容) +- `skills/specode/SKILL.md` 路由表第一行:标注"优先 `-n `" +- `skills/specode/references/workflow.md` §1.1:加 step 0「显式 slug」,明确"有 `-n` 时跳过 step 1+2 的前缀解析与推导" + +`requirement_name` 默认从 slug 推:短横线 → 空格 + 首字母大写(如 `user-login` → `User Login`)。 + +例: +- `/specode:spec -n user-login 添加用户登录功能` → `--name user-login --requirement-name "User Login" --source-text "添加用户登录功能"` +- `/specode:spec --name dark-mode 加个深色主题切换` → `--name dark-mode --requirement-name "Dark Mode" --source-text "加个深色主题切换"` + +旧形式(纯 `<需求>` / `<名称>:<内容>`)保留兼容,但 workflow.md 明确"推导结果对用户不可预知;若用户在意目录名应引导改用 `-n` 形式"。 + +### Tests + +- 无新增测试:`spec_init.py --name` 一直是必填字符串参数,无需在 CLI 层验证;本次纯文档改动,不影响代码路径。 +- 全套 pytest **186/186 PASS** + +## 0.10.13 (2026-05-22) + +### Fixed — task-swarm v-fix prompt 写到 `r{round+1}`,但 state 命名是 `r{round}`(导致 "产物文件不存在" 死锁) + +复现:`/specode:spec ...` 走到 task-swarm → validation round 1 fail → 进 v-fix。`begin_v_fix` 把 `sm.round` 从 1 升到 2,并把 `vfix_in_flight = ["coder-vfix-g1-r2-f*"]`(用当前 round 命名,正确)。但 `task_swarm.py:_materialize_prompts_v_fix` 调 `render_coder_prompt(round_=sm.round + 1)` —— 多 +1 一次 → 磁盘 task.md 写到 `agents/coder-vfix-g1-r3-f*/task.md`。 + +后果链: + +1. plan_next 输出的 fork hint(`L572: r{sm.round+1}`,**这里也 +1,因为它在 begin_v_fix 之前调用,sm.round 还是旧值**)刚好等于磁盘文件名 → 主代理按 hint fork `coder-vfix-g1-r3-f*` subagent → 产物落在 r3 目录 +2. 下一次 `advance --phase v-fix` 时,cmd_advance 按 `vfix_in_flight = [r2-*]` 找 `agents/coder-vfix-g1-r2-f*/outbox/result.md` → 全部不存在 → 报 "产物文件不存在" +3. 主代理面对 state(r2)与磁盘(r3)不一致,**判定为 "naming mismatch" 然后手工 Edit `state.json`** 抹平差异 → 状态机被人为污染 → 后续 phase 持续走错 → 最终 `validator-g1-r2` subagent spawn 后无人回收产物 → Claude Code 界面无限刷 "Waiting for 1 teammate..." + +修复:`task_swarm.py:818` 把 `round_=sm.round + 1` 改成 `round_=sm.round`。理由:`_materialize_prompts_v_fix` 是 cmd_advance 在 `begin_v_fix` **之后**调用的,`sm.round` 已经自增过;`begin_v_fix` 写 `vfix_in_flight` 用的也是 `sm.round`(state.py:385)。在已自增的 round 上再 +1 = 多 +1 一次。同理 plan_next L572 / L583 因为是 begin_v_fix 之前调用,仍保留 `+1`,这是对的。 + +回归测试:新增 `test_v_fix_prompt_files_match_state_in_flight` —— 触发 validation fail → begin_v_fix 后断言 `state.vfix_in_flight` 每个 agent_key 对应的 `agents//task.md` 必须存在。旧 bug 下这个 test 直接挂。 + +### Added — PreToolUse hook 阻断主代理直接 Edit/Write task-swarm 受控路径 + +事故还原显示:上面 Bug A 触发"state 跟磁盘不一致"时,主代理推理"这是 naming mismatch 我需要手工修 state.json",连续 5 次 Edit `state.json`(清 `vfix_in_flight` 列表 / 把 `failed_status: "failed"` 改成 `null` / 把 `completed_at` 写未来时间戳 / events 追加伪造 `completed` 事件),还 Edit 了 subagent 的 `outbox/result.md` 手工补 STATUS 行。这些"修补"全是**绕过 task_swarm.py 状态机契约**的反模式,导致状态污染雪崩。 + +加防护:`hook_on_pre_tool_use` 检测 tool_input.file_path 是否落在以下三类受控路径下,命中则 `sys.exit(2)`(PreToolUse 阻断)并把拒绝原因写 stderr: + +| 路径模式 | 拒绝原因 | +|---|---| +| `.task-swarm/runs/*/state.json` | state 唯一事实来源,只能 `task_swarm.py advance` 改 | +| `.task-swarm/runs/*/agents/*/task.md` | task_swarm.py 为 subagent 生成的 prompt,改了 subagent 也不会重读 | +| `.task-swarm/runs/*/agents/*/outbox/*` | subagent 产物,手工补 STATUS = 伪造工作 | + +stderr 详细说明 why + 正确路径 hint(如"重新 fork subagent 或汇报 task_swarm.py 解析 bug")。仅在 `mode==active` 且 `task_swarm_run_id` 已绑定时生效,非 task-swarm 场景零开销。原 `tasks.md` 直写软提醒保留(不阻断)。 + +### Tests + +- 新增 `test_v_fix_prompt_files_match_state_in_flight`(Bug A 回归) +- 新增 5 个 `test_on_pre_tool_use_*` 覆盖 state.json / agent task.md / outbox 阻断 + 正常源码 Edit 通行 + idle session 不拦截 +- 全套 pytest **186/186 PASS** + +## 0.10.12 (2026-05-22) + +### Fixed — `/specode:end` 之后模型仍在响应末尾输出 `─── spec-mode ───` 状态行(banner 残留) + +复现:`/specode:spec ...` → 走若干 turn → `/specode:end`(CLI 返回 `ok:true`)→ 后续任意 turn 模型仍输出 `─── spec-mode ─── spec: ... | /specode:end 退出` 状态行。 + +根因:`hook_on_user_prompt` 在 `mode in ("idle","ended")` 时静默 early-return,**不注入任何反向消息**。但此前 N 个 turn 已反复注入 `STATUS_FOOTER_TEMPLATE`("请在本次响应正文之后**额外**输出一行 ─── spec-mode ─── ...")与 `SPEC_MODE_CONTINUE_REMINDER`("下一 turn 必须继续遵守 ... 通过 /specode:end 才能正式退出")。`/specode:end` 提交那一 turn mode 仍是 active,hook 最后一次注入照常进行;end 之后下一 turn hook 安静停止,但模型 context 里堆积的"必须输出 footer / 下一 turn 必须继续遵守"指令仍生效,凭惯性继续输出 banner。 + +修复: + +1. 新增 `SPEC_MODE_ENDED_REMINDER` 模板:明确告知模型"已退出,作废此前所有 spec-mode 指令,**不要**再输出 `─── spec-mode ───` footer" +2. `cmd_end` 设 `post_end_reminder_pending=True`;同时**对齐 `end.md` 文档**清掉 `active_spec_slug` / `active_spec_dir` / `spec_id` / `phase` / `task_swarm_run_id`(此前实现只改 `mode/ended_at/lock_state/pending_selector`,违反文档约定) +3. `hook_on_user_prompt` 在 `mode=="ended" and post_end_reminder_pending` 时注入提醒并清标志;其他 `ended/idle` 路径维持原静默 + +行为:end 后**第 1 turn** 模型收到明确反向指令 → **第 2 turn 起** hook 完全静默 → banner 不再出现。 + +### Changed — `doc-confirm-*` selector option description 用具体环节名替代「下一 phase」 + +`requirements.md / bugfix.md / design.md` 三份文档确认 selector 的「确认(推荐)」和「查看全文」option description 此前都用泛化"进入下一 phase / 不进入下一 phase"。每个 selector 的下一阶段实际固定: + +- `doc-confirm-requirements` → 进入设计(design)环节 +- `doc-confirm-bugfix` → 进入设计(design)环节 +- `doc-confirm-design` → 进入任务拆分(tasks)环节 + +同步更新 `references/selectors.md`(drift test byte-identical cover)。 + +`workflow-choice` 的"进入下一阶段"保留泛化(next 按 workflow 动态选 requirements/design/bugfix 三选一,无法静态命名)。 + +### Tests + +- 扩展 `test_end_sets_mode_ended_and_releases_lock` 覆盖 `active_spec_*` 字段清零 + `post_end_reminder_pending` 标志 +- 新增 `test_on_user_prompt_post_end_reminder_emits_once_then_clears`(hook 单元,覆盖第 1 turn 注入 + 第 2 turn 静默) +- 重写集成测试 `test_after_end_user_prompt_emits_nothing` → `..._emits_one_shot_then_nothing` +- 全套 pytest **180/180 PASS** + +## 0.10.11 (2026-05-22) + +### Removed — `spec-writer` subagent;4 份核心 spec 文档改由主代理直接生成 + +复现:用户跑 `/specode:spec 在 git 目录做登录页面` → 走到 requirements phase → +主代理 fork `spec-writer` agent 写 requirements.md → spec-writer 各种 Glob/Read +找不到 `assets/templates/` 模板(实际找的是不存在的 `.template.md` 后缀)→ +hallucinate 18 条通用登录页面 SHALL + 408 行 design.md(JWT/HTTPS/CSRF/2FA), +跟用户原始需求"在 git 目录做登录页面"完全脱节。 + +根因:subagent 设计反模式 —— 每个 subagent 是独立 LLM 调用 + 新 context window, +**拿不到主代理上下文**(不读 SKILL.md / 不知道用户原始 `source_text` / 不知道 +流程状态)。即使模板路径正确,subagent 仍按通用模板填空,内容不贴合用户具体 +需求。主代理本身就有完整 SKILL + 流程上下文 + source_text,直接写质量更高。 + +修复(用户授权我自决方案): + +1. **删除** `plugins/specode/agents/spec-writer.md` +2. **`SKILL.md` 加 §「Spec 文档生成」(单一规则来源)**:主代理 Read + `${CLAUDE_PLUGIN_ROOT}/assets/templates/.md` 作骨架 + 按 + `/.config.json.source_text` 填空 + Write 到 `/.md` +3. **`SKILL.md` Iron Rule 7 改写**:移除 "必须 fork spec-writer subagent" 约束 +4. **31 处 `fork spec-writer` 引用全部替换** 成 "主代理按 SKILL.md §「Spec 文档生成」走": + - `spec_session.py SELECTOR_PROMPTS` 6 selector 的「用户选定后流程」段 + - `references/selectors.md` 同步 6 处(byte-identical,drift test cover) + - `references/workflow.md` 4 处 phase 流程 + - `references/templates.md` 顶部说明 + 6 处分散提及 + - `commands/task-swarm.md` 1 处 cross-ref + - `scripts/spec_init.py` 1 处 docstring + - `assets/templates/tasks.md` 1 处 ## 测试要点 说明 +5. **`assets/templates/` 4 份模板(requirements.md / bugfix.md / design.md / tasks.md)保留** + 作为主代理 Read 的骨架来源 + +### Changed — `commands/spec.md` + `commands/continue.md` 进一步变薄(commands 薄 / SKILL 厚) + +按用户指导原则 "命令中不要设置过多流程,只列关键必要内容,让模型与 skill 对接流程": + +- **`spec.md` 第四步「成功后必做」**:从 3 件事详细描述压缩成「按 SKILL.md + §Status Footer「新 spec 创建/接管的当 turn」走」一句话引用 + 保留关键 + 禁止项("严禁 hallucinate '请下一轮输入 /specode:continue'") +- **`continue.md`** 大幅瘦身:删除详细 5 步描述,改成「按 `references/workflow.md` + §9.1 / §9.2 走 N 步」+ 关键禁止项("禁止跳过 selector 直接 acquire"、"禁止 + Grep 项目目录"、"LockHeld 禁止直接 --force") + +commands 现在只列入口路由 + 关键不可漏的约束(hallucinate 防御);业务流程 +全部 link 到 SKILL.md / `references/workflow.md`。模型从入口跳到 SKILL 拿详 +细规则,避免 commands 跟 SKILL 双份维护漂移。 + +### 测试 + +- drift test 11/11 PASS(selectors.md 与 SELECTOR_PROMPTS byte-identical) +- 全套 pytest **179/179 PASS** + +## 0.10.10 (2026-05-22) + +### Fixed — selector 选定后流程缺失 + 主代理 hallucinate "退出 spec 模式" / invent 简化 selector + +承接 0.10.9 修好 `/specode:spec` 创建后引导 hallucinate 之后,又发现两类同源问题: + +**1. selector 选定后流程缺失** + +复现:用户跑 `/specode:spec <需求>` → workflow-choice 选 "Requirements first" +→ 主代理只 chat 一句 "已选择 Requirements first" + "请下一轮输入 +`/specode:continue` 继续,或直接提出你的需求细节" → end turn。**没**调 +`phase-transition` / **没** fork spec-writer / **没**生成 requirements.md / +**没**呈现 doc-confirm-requirements selector。 + +证据: +- `references/workflow.md` §2:105 明确说 "用户选完 → 调 phase-transition + → 进入对应 phase";§3.1 写了 fork spec-writer → 生成 requirements.md → + 呈现 doc-confirm-requirements 4 步 +- selector 模板末尾约束段都是 "调用工具后立即 end turn 等待用户选择" —— 这条 + 历史措辞误导主代理:把 `AskUserQuestion` 当作 "end turn 触发器",拿到 user + 选项后只 chat ack 一句就 end turn 让用户输新命令推进 +- 但 `AskUserQuestion` 是**同步阻塞工具**——它返回 user 选项作为 tool result, + 主代理在**同一 turn 内**继续处理,**不应该** end turn + +**2. 主代理 hallucinate "退出 spec 模式" + invent 简化 selector** + +复现:tasks 完成后主代理输出 "Spec 流程完成!现在退出 spec 模式,开始编码实现" ++ 用 "任务清单已就绪,下一步? → 开始编码" 这种 **invent 的简化 selector** +(不是 tasks-execution 模板的 4 个固定选项:用 task-swarm / 顺序执行 / 需要 +调整 / 暂不 coding)。 + +证据: +- `spec_session.py:_auto_pending_selector` line 926-945 phase=tasks → 设 + `pending_selector=tasks-execution`,模板有 4 个固定选项 +- `phase-transition` 是 spec 内部 phase 切换(intake→requirements→...→ + implementation→acceptance→iteration),**不**退出 spec 模式;**只有** + `/specode:end` 才退出 +- 但 SKILL.md 没明确"phase-transition 不退出 spec",也没明确"呈现 selector + 时禁止 invent / 简化",主代理因此 hallucinate + +修复(commands 薄 / SKILL 厚原则): + +**SELECTOR_PROMPTS / selectors.md(10 个 selector 各加「用户选定后流程」段)** + +每个 selector 模板末尾约束段后新增 `**用户选定后流程(同一 turn 内继续)**` +段,列出**每个选项**的下一步动作(phase-transition target / fork agent / +下一个 selector 等)。注意用 `**bold**` 而非 `### H3`——避免被 drift test +的 H3/H4 regex 误识别为 selector 边界。 + +`spec_session.py SELECTOR_PROMPTS` 与 `references/selectors.md` 同步修改, +`test_selectors_drift.py` 11/11 通过保证 byte-identical。 + +**SKILL.md §Selectors 顶部加 3 个子节** + +1. **`AskUserQuestion` 工具语义(重要 / 关乎流程连续性)**:澄清 + `AskUserQuestion` 是同步阻塞工具,拿到选项后**同一 turn 内**按 selector + "用户选定后流程" 段继续;**严禁** "已选择 X,请下一轮输入 /specode:continue" + 就 end turn—— `/specode:spec` / `/specode:continue` 是持续流程入口而非 + 回合触发器 +2. **呈现 selector 时禁止 invent / 简化选项**:必须用 SELECTOR_PROMPTS / + selectors.md 模板 question / label / description 逐字传参,**禁止** invent + 简化版(如 "任务清单已就绪,下一步? / 开始编码") +3. **phase-transition 不退出 spec 模式**:`phase-transition` 是 spec 内部 + phase 切换,spec 仍 mode=active 持锁;**只有** `/specode:end` 才退出; + **严禁** "Spec 流程完成!现在退出 spec 模式,开始编码实现" 这类话—— + implementation phase 期间 hook 继续注入 4 条提醒(文档优先 / 代码-文档 + 同步 / 状态行 footer / 仍处于 spec 模式),主代理改代码前后必须按 + §Code-Doc Sync Reminders 同步 tasks.md / implementation-log.md / design.md + +pytest **179/179 PASS**(drift test 11/11 + 全套 168 不变)。 + +## 0.10.9 (2026-05-22) + +### Fixed — `/specode:spec` 创建后 hallucinate 引导 + 漏状态行 footer + +复现:用户跑 `/specode:spec <需求>`,主代理输出 "Spec 已创建成功" 详情后接 +"你可以使用 `/specode:continue` 进入下一阶段继续推进",且**漏了**状态行 +footer。 + +证据: +- `spec_init.py:400-408` 只输出纯 JSON,无任何 "/specode:continue 进入下一阶段" + 引导(全 repo `grep` "使用 /specode:continue 进入" 命中 0 次) +- `hook_on_user_prompt` 注入 footer (line 1550)、但只在 user-prompt 提交时跑; + 用户输 `/specode:spec` 时 session 还是 idle / new,**没**注入 footer 提醒 +- `hook_on_stop` 只 emit `CODE_DOC_SYNC_STOP` + `SPEC_MODE_CONTINUE_REMINDER` + (文字提醒"下一 turn 要 footer"),**不 emit `STATUS_FOOTER_TEMPLATE` 本身** +- → `spec_init.py` 把 session 改成 mode=active + pending_selector=workflow-choice + 之后,本 turn hook 已经跑过、不会重新注入 footer / selector;commands/spec.md + 第四步没规定"成功后主代理本 turn 必做 footer + selector + 禁止 hallucinate + 让用户输命令的引导"——主代理因此漏 footer 又 hallucinate `/specode:continue` + +修复(commands 薄 / SKILL 厚原则): + +1. **`commands/spec.md` 第四步加「成功后必做」子节**:明确 `spec_init` exit 0 后 + 本 turn 必做 3 件事—— + - chat 简报 2-3 行(slug / phase / spec_dir),**禁止**说 "使用 + `/specode:continue` 进入下一阶段" / "你可以使用 ... 推进" / "下一步请 + 输入 ..." 等让用户再输命令的引导 + - 输出状态行 footer + - 立即调 `AskUserQuestion` 呈现 `workflow-choice` selector + +2. **`SKILL.md §Status Footer` 加「新 spec 创建 / 接管的当 turn」子节**:统一 + 覆盖 `/specode:spec`(spec_init 完成)和 `/specode:continue [slug]` + (acquire+load+continue 完成)两类首 turn 场景,规定 hook 未刷新时主代理 + 必须主动 chat 简报 + footer + selector,**严禁** "持续流程被打断"类的 + 命令引导。`/specode:spec` 和 `/specode:continue` 是持续流程的入口,进入 + 之后整条 phase 链由 selector + hook + phase-transition 自动推进。 + +`spec_session.py` / `spec_init.py` 不动,是引导文档层修复。pytest 179/179 通过 +(修改的是 .md 文件,不影响测试)。 + +audit 同源风险(其他 commands): + +- `continue.md`(0.10.5 重构后):step 5 已要求 footer;SKILL.md 新子节覆盖 + 主动 selector,无需 commands 再补 +- `end.md`:mode=ended 不输 footer,by-design +- `status.md`:active 期间应输 footer(SKILL.md §Status Footer),轻微风险 +- `task-swarm.md`:init 后立即 plan→fork,by-design + +## 0.10.8 (2026-05-21) + +### Fixed — `spec-in/-/specs` device 段从未被代码实现 + +`references/obsidian.md` §0-§1 + `SKILL.md:158` 明确约定 spec 文档应该落在 +`/spec-in/-/specs/`(让同一 vault 在多设备 / 多用户 +共享时各 device 的 spec 互不串扰、避免锁串扰、避免文件冲突),但 +`spec_vault.py:resolve_doc_root` **从未实现 `device_segment`**——`auto` / +`config-obsidianRoot` 命中后直接返回 vault 根,spec_init 拼出来变成 +`/specs/`,少了关键的 `spec-in//` 整层。 + +复现: + +- `~/.config/specode/config.json` 不存在、`SPECODE_ROOT` 未设 +- `Documents\Notes/.obsidian/` 存在 → auto-detect 命中 +- 跑 `/specode:spec <需求>` → spec_dir 落在 `Documents\Notes\specs\` + 而非约定的 `Documents\Notes\spec-in\windows-qiang\specs\` + +修复(`spec_vault.py`): + +1. 加 `_device_segment()` 函数(`platform.system()` + `getpass.getuser()`), + 返回 `windows-qiang` / `macos-alice` / `linux-bob` 这种规范化串。 +2. `resolve_doc_root` 内部按字段语义分场景追加 `spec-in/` 段: + + | source | 来源 | 追加 device 段? | + |----------|-------------------------------------|------------------| + | override | `--root` 参数 | 否(用户给什么用什么) | + | env | `SPECODE_ROOT` 环境变量 | 否 | + | config | `config.json.rootOverride` | 否 | + | config | `config.json.obsidianRoot`/`docRoot`| **是** | + | auto | Obsidian auto-detect | **是** | + | none | 三层全 miss | — | + +3. `cmd_set` 之前 `--vault` 和 `--root` 都写 `obsidianRoot`(导致 `rootOverride` + 字段在代码里实际从未被使用,文档与运行时不一致)。修正为: + + - `--vault

` → 写 `obsidianRoot`(`resolve_doc_root` 追加 device 段) + - `--root

` → 写 `rootOverride`(不追加) + - 互斥:写其中一个字段时清掉另一个 + 清掉 legacy `docRoot` + - 输出的 `doc_root` 用 `resolve_doc_root()` 重算,反映 device 段 + +`spec_init.py` / `spec_session.py list-specs` call site **不动**(仍 +`/specs/`,但 `doc_root` 现在已含 `spec-in/`,最终 +路径自动变成 `/spec-in//specs/`)。 + +### Changed — `spec_vault.py` set 字段语义对齐 obsidian.md + +`cmd_set` 现在区分 `obsidianRoot` (`--vault`) 与 `rootOverride` (`--root`) 两个 +互斥字段,跟 `references/obsidian.md` §1 描述对齐。已经用旧版本 `set --root` +写过 config 的用户字段名是 `obsidianRoot`,跑过一次新版 `set --root` 会自动 +迁移成 `rootOverride`(同时清掉旧 `obsidianRoot`)。 + +### Added — 4 个 doc_root device 段测试 + +`tests/test_spec_vault.py` 新增覆盖: + +- `test_status_with_root_override_no_device_suffix`:`rootOverride` 命中不追加 +- `test_root_override_takes_precedence_over_obsidian_root`:两字段并存时 + `rootOverride` 胜出 +- `test_set_root_writes_root_override_no_device_suffix`:`set --root` 写 + `rootOverride` 字段且不追加 device 段 +- `test_set_vault_then_root_replaces_field`:连续 `set --vault` 后 `set --root` + 字段切换 + 互斥清理 + +更新现有 3 个测试以反映新 schema(`test_status_with_config_only` / +`test_set_vault_writes_config_and_status_reflects_config` 现在断言路径含 +`spec-in/` 段)。 + +pytest 全套 **179/179 PASS**(从 0.10.7 的 176 → 179,3 个净新增)。 + +### Notes — 升级影响 + +旧版本生成的 spec 目录(在 `/specs/` 下,缺 `spec-in/`) +**不会被自动迁移**。升级到 0.10.8 后: + +- 新 `/specode:spec` 命令会按约定路径创建(`/spec-in//specs/`) +- `/specode:continue` 调 `list-specs` 时也会看新路径,找不到旧路径下的 spec +- 如需保留旧 spec 内容,手动 `mv /specs/` 到 + `/spec-in//specs/` 并更新对应 `sessions/.json` 的 + `active_spec_dir` 与 `/.active-specode.json` pointer 字段 + +## 0.10.7 (2026-05-21) + +### Changed — `/specode:spec -h` help 删去「命令一览」节 + +命令清单在 `SKILL.md` / `commands/*.md` / README 已有详细说明且会随版本演进, +help 文本内重复列一份反而容易过时(0.10.4 / 0.10.5 加 doc_root 确认步骤、 +task-swarm 前置校验时都需要同步改 help)。help 改为只展示版本号 + 会话与锁 / +工作流概要 / 日志开关,命令细节让用户查 SKILL.md。 + +无业务行为变化。`spec_session.py:HELP_OUTPUT_TEMPLATE` 删除 13 行(line 614-628)。 +hook 测试 17/17 通过。 + +## 0.10.6 (2026-05-21) + +### Fixed — `references/selectors.md` 与 `SELECTOR_PROMPTS` 漂移 + +Audit 发现 selector 模板有 3 处真 drift: +- `workflow-choice`:selectors.md 缺 "**调用 `AskUserQuestion` 工具**" 后的 + "**,参数完全按下列结构(直接传入,不要翻译/重写选项)**" 子句;约束段 + "立即 end turn" 缺 "等待用户选择"、"工具" 缺 "宿主"、"ESC" 缺 "取消"。 +- `doc-confirm-bugfix` / `doc-confirm-design`:selectors.md §A3 把这两个变体 + 压缩成表格列差异(line 178-181),没给完整 `\`\`\`text` 块,结果文档跟运行时 + 无法逐字对比;spec_session.py 实际模板的简报句格式也跟表格描述对不齐。 +- §A3 H3 标题里残留 `doc-confirm-tasks`(0.9.3 起已废弃合并进 `tasks-execution`)。 + +修法:selectors.md 跟运行时(`spec_session.py SELECTOR_PROMPTS`)对齐—— +- 补 `workflow-choice` 缺失措辞 +- §A3 重构为「H3 分组介绍 + H4 三个 key 各带完整 `\`\`\`text` 块」结构 + (`doc-confirm-requirements` / `doc-confirm-bugfix` / `doc-confirm-design`) +- H3 标题去掉 `tasks` 残留 + +`spec_session.py` 不动(运行时是注入的实际真相,selectors.md 跟它走)。 + +### Added — `test_selectors_drift.py` 防回归 + +`plugins/specode/tests/test_selectors_drift.py` 在 pytest 阶段自动比对两边: +- `test_keys_match`:runtime selector key 集合必须与 selectors.md `### / ####` + 反引号标题命中的 key 集合一致;orphan(一边有一边没)直接 fail +- `test_byte_identical[]`:parametrize 10 个 selector,每个 key 的 + `\`\`\`text` 块内容必须与 `SELECTOR_PROMPTS[key]` `strip()` 后逐字相等 + +跑了一遍:11/11 passed,全套 pytest 176/176 passed(165 + 11 新增)。 + +未来改 selector 措辞 / 增删 selector,pytest 自动 fail 提醒同步两边。 + +## 0.10.5 (2026-05-21) + +### Fixed — `/specode:continue` 跳过 selector 直接 acquire / `/specode:task-swarm` 缺前置校验 + +承接 0.10.3 / 0.10.4 在 `/specode:spec` 上修好的"commands 直接给 CLI 命令 → 主代理 bypass SKILL 业务规则"反模式,本次 audit 发现 `continue.md` 和 `task-swarm.md` 同源: + +**`continue.md`(类型 1,同源 / 高严重度)** +旧版「## 立即调用」行 22-26 直接给 `acquire --spec

--session ` 完整模板, +主代理见命令就跑,**跳过** `references/workflow.md` §9 要求的 5 步流程 +(list-specs 报告 → `AskUserQuestion` 让用户选 ≤4 项 → LockHeld → `takeover-options` +selector → acquire → load)。无 slug 时主代理还会 invent ``。 + +修复:重写为两步路由 +- 第一步(无 slug):先确认 doc_root(接 SKILL.md §「首次使用 / auto-detect 命中时的确认」)→ `list-specs` → 空列表引导 `/specode:spec` / 非空 chat 1-2 行摘要 + `AskUserQuestion` 单列单选(≤4,按 `last_heartbeat_at` 取最近)→ 用户选定后转第二步 +- 第二步(有 slug):解析 `spec_dir` → `acquire`(exit 4 `LockHeld` → **禁止**直接 `--force`,先 `takeover-options` selector 让用户选)→ `load` → `continue` → 报告 + 状态行 footer + +**`task-swarm.md`(类型 2,弱同源 / 中严重度)** +旧版行 8-12「## 立即调用」直接给 `task_swarm.py init --tasks /tasks.md`, +`` 占位符**鼓励主代理 invent 路径**而不去读 `sessions/.json` 拿 +`active_spec_dir`;缺 phase / `pending_selector` 前置校验,用户裸输 +`/specode:task-swarm` 时主代理无前置检查直接 init。 + +修复:拆 3 步 +- 第一步(前置校验,必做):先 `read-session` 拿状态,强制满足 `mode=active` / + `active_spec_dir` 非空 / `phase=tasks` / `pending_selector=tasks-execution` 且 + 已选 task-swarm 路径 +- 第二步(init):用 step 1 的 `active_spec_dir + /tasks.md`,禁止 invent +- 第三步(7 步循环):保留 sketch,详细规格全部指向 `references/task-swarm.md` + +同时 SKILL.md §Task-Swarm 补「`/specode:task-swarm` 前置校验(强制)」小节, +是 commands/task-swarm.md 第一步引用的业务规则单一来源。 + +### Changed — commands/task-swarm.md 大幅精简(commands 薄 / references 厚) + +原 task-swarm.md 132 行重复了 `references/task-swarm.md` 的 5 段内容(Phase 状态机 +ASCII 图 / 7 步循环展开 / 文件冲突 / 详细异常处理 / 命令调用样例)。精简到 ~70 行, +只保留 commands 路由层职责(前置校验 / init / 7 步 sketch + heartbeat / 异常出口 +摘要),详细规格全部 link 到 `references/task-swarm.md` §1-§9 单一来源。 + +设计原则延续 0.10.4:commands 薄(路由 + 边界引导)、SKILL / references 厚(业务规则 ++ 协议详解)。commands 不重复细节,边界 case 指章节,业务流程改动只动 SKILL / references。 + +## 0.10.4 (2026-05-21) + +### Fixed — 新建 spec 时 silent fallback 到 Obsidian vault(首次使用确认缺失) + +承接 0.10.3 修好 `/specode:spec -h` fast-path 旁路后,又一个 `commands/spec.md` +引导主代理调 CLI 而 bypass 业务规则的 case:用户在 git repo 下输入 +`/specode:spec 在 git 目录下创建一个项目,用来做一个登录页面`,主代理直接 +`sh ... spec_init.py --name login-page ...`,spec 文档**silent 落到** +`C:\Users\qiang\Documents\Notes\specs\login-page`(Obsidian vault 自检测命中), +没有任何确认。 + +证据: +- `~/.config/specode/config.json` 不存在,`SPECODE_ROOT` 未设 +- `Documents\Notes\.obsidian/` 存在 → `spec_init.py` 走第 3 层 silent fallback +- `spec_vault.py status` 返回 `{"source": "auto", "doc_root": "Documents\\Notes"}` +- 主代理 chain-of-thought 截图:直接解析 slug + 调 CLI,没问 doc_root + +根因:与 0.10.3 同源——`commands/spec.md` 旧版第二步「## 立即调用」直接给 +`sh spec_init.py ...` 命令,主代理照执行,**SKILL.md § Document Root Resolution +只讲了"三层全 miss → exit 3",没规则约束"第 3 层命中(非全 miss)也应先确认"**。 +spec_init.py 实现是 silent 用了。 + +修复(双管齐下): + +1. `SKILL.md § Document Root Resolution` 加新子章节 + **「首次使用 / auto-detect 命中时的确认(强制)」**:明确 `source = auto` + 或 `none` 时**禁止**直接调 `spec_init.py`,必须先 `AskUserQuestion` 三选 + (接受检测到的 vault + 持久化 / 改用其他绝对路径 + 持久化 / 中止), + 用户选定后 `spec_vault.py set --vault

` 持久化,下次自动用、不再问。 + +2. `commands/spec.md` 重构为 **4 步路由**(依次匹配 `$ARGUMENTS` 形态): + - 第一步:fast-path(`-h` / `--help` / `--vault-status` / `--detect-vault` / + `--sync-status`,hook 已注入模板)→ verbatim print + - 第二步:set 命令(`--set-vault

` / `--set-root

`,hook **不**拦截) + → 调 `spec_vault.py set --vault

`,end turn + - 第三步:新建 spec 前必做 —— 调 `spec_vault.py status`,按 SKILL.md + 新规则做 doc_root 确认 + - 第四步:`spec_init.py` 创建 spec + + 修正 0.10.3 commands/spec.md 第一步把 `--set-vault` / `--set-root` 误列入 + fast-path 旗标的 bug(hook 实际不拦截这俩,主代理按"等 hook"会卡住)。 + set 命令现在有独立第二步,调 `spec_vault.py set` 后 end turn。 + +设计原则:commands 薄(路由 + 边界引导)、SKILL 厚(业务规则)。commands 不 +重复 SKILL 里的细则,只在边界 case 指向 SKILL 章节,让业务流程在 SKILL.md +单一来源维护。 + +## 0.10.3 (2026-05-21) + +### Fixed — `/specode:spec -h` fast-path 被 commands/spec.md 引导旁路 + +0.10.2 修复了 hook emit `UnicodeEncodeError` 之后,`hook_on_user_prompt` +能正确向主代理 `additionalContext` 注入完整 fast-path 模板(含 verbatim +print 指令 + HELP CONTENT BEGIN/END + `specode v0.10.2 ...` 完整 help +body,验证 stdout 2598 字节,session log 无 `hook_exception`)。但主代理 +**仍然不按 fast-path 走**,而是 `sh ... spec_init.py -h`,把 spec_init.py +自己的 argparse help 当成 specode help 输出。 + +根因:`commands/spec.md` 顶部「## 立即调用」标题 + `sh ... spec_init.py ...` +代码块**视觉优先级压倒**原本藏在底部 bullet 第 3 项的"fast-path 参数由 hook +拦截"备注。主代理看到 `-h` 时按"立即调用"分支执行,调起 spec_init.py。 + +修复:把 fast-path 分支前置成「## 第一步」,明确 `-h` / `--help` / +`--vault-status` / `--detect-vault` / `--sync-status` / `--set-vault` / +`--set-root` **不要调任何 CLI**,**禁止** `sh ... spec_init.py -h` 等, +只 verbatim 输出 hook 注入内容;常规需求降为「## 第二步」。 + +证据链: +- CodeBuddy 缓存 0.10.2 已正确部署(plugin.json + scripts/utf-8 reconfigure + + run.sh alias stub 检测均在位)。 +- 最新 session log(`615f599c-...jsonl`)中 `hook_on_user_prompt` 后再无 + `hook_exception`(0.10.2 emit 修复有效)。 +- 用真实 session id 跑 `prompt="/specode:spec -h"` → hook stdout 2598 字节 + 完整 fast-path JSON,含 `specode v0.10.2` 完整 help。 +- 主代理 chain-of-thought 截图:先说 "According to the system reminder hook, + I should output the help content verbatim ...",紧接调 `sh spec_init.py -h` + ——证明主代理同时收到 hook 注入与 commands 引导,按 commands 走。 + +## 0.10.2 (2026-05-21) + +### Fixed — Windows 上 hook 注入彻底失效(两个连续根因) + +1. **Launcher 命中 Microsoft Store alias stub**(commit `fb2ef14`)—— + `plugins/specode/scripts/run.sh` / `run.cmd` 探测 `python3` 时会命中 + `%LOCALAPPDATA%\Microsoft\WindowsApps\python3.exe`(0 字节 App Execution + Alias stub,跑起来只打印 "Python was not found" 并 exit 49), + `spec_session.py` 根本没被执行 → CodeBuddy 启动报 + `Hook SessionStart [warning]`,后续所有 hook 全部空跑。 + 修复:`run.sh` 新增 alias stub 路径检测跳过;`run.cmd` 优先级改成 + `py → python3 → python`(`py.exe` 不受 alias 影响)。 + +2. **emit 阶段 UnicodeEncodeError 被 `_safe_hook` 吞并**(commit `6b0a06f`)—— + Windows pipe stdout 默认 fallback 到 locale 编码(中文 Windows 为 + `cp936/gbk`),无法编码 emoji `📝/🪧/⛔`(来自 + `DOC_PRIORITY_REMINDER_ACTIVE` / `STATUS_FOOTER_TEMPLATE` / + `SPEC_MODE_CONTINUE_REMINDER` 模板)。`_emit_hook_additional_context` + 写入时抛 `UnicodeEncodeError`,被 `_safe_hook` 装饰器的 + `except BaseException` 吞掉 → hook exit 0、stdout 空 → CodeBuddy 拿不到 + `additionalContext` → 主代理收不到 fast-path / session_id / selector / + footer / 文档优先提醒。 + 修复:`spec_session.py` / `spec_init.py` 顶部 + `sys.stdout / stderr.reconfigure(encoding="utf-8", errors="replace")`, + 绕过 text-mode encoding。 + +### Fixed — 测试套件跨平台支持(Windows pytest 165/165) + +测试代码硬编码 macOS 路径、`.read_text()` 默认 locale 解码、`subprocess.run` +不指定 encoding、`fake_home` 未隔离 `APPDATA` 等多个跨平台问题: + +- `tests/conftest.py` + 6 个 `tests/test_task_swarm_*.py`:`SCRIPTS_DIR` + 从硬编码 `/Users/xueqiang/Git/specode/...` 改成 + `Path(__file__).resolve().parents[1] / "scripts"`。 +- `conftest.run_script` + `test_task_swarm_cli.py` + `test_task_swarm_hook.py`: + `subprocess.run` 加 `encoding="utf-8"`,env 设 `PYTHONUTF8=1` / + `PYTHONIOENCODING=utf-8`,让子进程 pathlib 与 stdio 同时 utf-8。 +- `conftest.fake_home`:monkeypatch `APPDATA` / `LOCALAPPDATA` 到 + `tmp_path`,防止用户真实 Obsidian 安装漏到 `spec_vault.detect` 测试。 +- 6 个 test 文件的 `.read_text()` 加 `encoding="utf-8"`:解决 utf-8 写入的 + `.config.json` / `sessions/*.json` 被默认 cp936 解码失败。 +- `test_spec_session_hooks::test_on_user_prompt_help_fastpath`: + `"specode v0.6"` 断言改成 `"specode v"`,兼容 0.10.1+ 动态版本号。 + +无业务行为变化。Windows 上 `pytest` 从 109 fail → 165/165 全过。 + +## 0.10.1 (2026-05-20) + +### Changed — `/specode:spec -h` help 文本 + +`plugins/specode/scripts/spec_session.py` 内 `HELP_OUTPUT_TEXT` 改为模板: + +- 版本号不再硬编码为 `v0.6`,改为运行时从 `.claude-plugin/plugin.json` + 读 `version` 注入(失败降级 `unknown`),后续 bump 不再需要手动改 help。 +- 新增「会话日志(v0.10.0+)」段,简述 logs/ 默认行为、env / config + 双开关优先级、`spec_log.py status / replay` 用法,作为新用户从 help + 入口直达日志能力的导航。 + +无业务行为变化;纯文档/渲染层调整。 + +## 0.10.0 (2026-05-20) + +### Added — Session 日志收集(默认开启,可关) + +新增 `plugins/specode/scripts/spec_log.py` 模块 + 双 hook 通配监听 ++ 各 CLI 入口集成,全程收集 spec 模式期间的事件流,便于排查 +"主代理为什么走偏 / 漏 fork spec-writer / 选错 selector" 等问题。 + +收集的事件类型: + +- `hook_invoked` —— 每个 hook(SessionStart / UserPromptSubmit / + Stop / SessionEnd / PostToolUse Task / PreToolUse Edit|Write|MultiEdit / + on-heartbeat-quiet)触发时 +- `tool_pre` / `tool_post` —— 主代理每次 Bash / Read / Write / Edit / + Task 等工具调用前后(PreToolUse `*` + PostToolUse `*` 全通配新 hook) +- `cli_call` / `cli_exit` —— specode 自身 CLI(spec_session / + spec_init / spec_status / task_swarm)被调用前后的 cmd / argv / exit_code +- `hook_exception` —— hook 内部异常 trace(被 _safe_hook 吞并的,仍记日志) + +**存储**:`~/.specode/logs/.jsonl`(每行一个 JSON event; +无 session_id 的事件落 `_orphan.jsonl`)。 + +**双开关**(默认开启): + +```sh +# 临时关闭(仅当前 shell) +export SPECODE_LOG=off + +# 永久关闭:编辑 ~/.config/specode/config.json 加 +# { "logging": false } +``` + +env 优先于 config;env 可取 `off / false / 0 / no` 关闭,`on / true / 1 / yes` 强制打开。 + +**隐私保护**(默认): + +- 字段名 redact 黑名单:`password / passwd / pwd / api_key / apikey / + token / access_token / refresh_token / secret / client_secret / + authorization / auth / cookie / private_key / ssh_key` 命中即替换为 + ``。可在 config 加 `redact_keys: ["custom_key", …]` 扩展。 +- 字符串字段超 500 字符自动截断(后缀 `...`)。 +- 递归深度 >8 → ``。 + +**回放 + 状态查询**: + +```sh +# 按时序打印一个 session 的事件流 +sh "$CLAUDE_PLUGIN_ROOT/scripts/run.sh" \ + "$CLAUDE_PLUGIN_ROOT/scripts/spec_log.py" replay --session + +# 占用查询(输出 enabled / switch_source / 文件数 / 总字节) +sh "$CLAUDE_PLUGIN_ROOT/scripts/run.sh" \ + "$CLAUDE_PLUGIN_ROOT/scripts/spec_log.py" status +``` + +**rotation 策略**:不自动切片;超过 100MB 时 status 命令提示手动清 +(`rm -rf ~/.specode/logs/`)。session-bound 写入即可控制大小。 + +**异常隔离**:日志收集任何异常都吞并(spec_log 内部 try/except + 各 +集成点用 contextlib.suppress 包裹),绝不阻断业务流程。失败时主代理 +看不到日志写入痕迹,但 spec / hook / CLI 本身行为完全一致。 + +### Changed — hooks.json 新增 2 个全通配 hook + +`PostToolUse "*"` matcher 和 `PreToolUse "*"` matcher 各加一条 hook, +分别调 `spec_session.py on-log-post-tool-use / on-log-pre-tool-use`。 +这两个新 hook 仅落日志,不注入 additionalContext,不影响主代理行为。 +原有 `PreToolUse Edit|Write|MultiEdit` 和 `PostToolUse Task` matcher +保持不变(继续走 `on-pre-tool-use / on-task-completed` 的 advisory +注入逻辑)。 + +### Changed — 文档(SKILL.md / CONTRIBUTING.md / README × 2) + +- SKILL.md 加 §Session Logging 节,列出存储位置 / 双开关 / 隐私 / + 回放 / 占用查询。 +- CONTRIBUTING.md 加 §Debugging with session logs 节,给开发者 + 排查问题用 replay 的命令示例 + 新 hook/CLI 子命令应在入口加 + `_log_event` 的约定。 +- README / README.zh-CN 「Global bypass」节加 `SPECODE_LOG=off`; + 各自新增 Session logging / 会话日志收集 小节简述用法 + 关闭方式。 + +### Tests + +165 pass (152 previous + 13 new in `test_spec_log.py`):write_event / +disabled-via-env / disabled-via-config / redact-default-keys / +redact-extended-via-config / truncate-long-string / replay / +replay-missing / status × 3 / hook-invocation-writes-log / +cli-call-writes-log. 原 152 个测试 0 个破——日志收集是完全 backward- +compatible 的纯加项。 + +### Migration + +无需迁移。0.10.0 启动后会开始往 `~/.specode/logs/` 写日志;不想要的 +按上面方式关掉。已有 sessions / specs / 锁 / state.json 全部不变。 + +```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.9.3 (2026-05-20) + +### Added — 2 条新 Iron Rule(SKILL.md) + +0.9.2 真实跑测时观察到主代理在 `/specode:spec` 后多处违纪:自己 Write +requirements.md(应该 fork spec-writer)+ 写完后又 Edit 改文档头 +`Status: Requirements Draft → Complete`(不该越权改 phase 状态)。 +现有 6 条 Iron Rule 没有强约束这两点,本版补: + +- **Iron Rule 7**:`requirements.md` / `bugfix.md` / `design.md` / + `tasks.md` 4 份核心文档必须 fork `spec-writer` subagent 写。主代理 + 用 Write / Edit 直接写这 4 份文档视为流程违规。subagent 的工具白 + 名单(无 Bash)是物理隔离边界,绕过它就是绕过 review/validator + 兜底。`implementation-log.md` 例外,主代理可以直接追加。 +- **Iron Rule 8**:文档头 `Status` / `Review Status` 字段不允许主代 + 理手改。这些字段反映 phase / 评审状态,由 `phase-transition` CLI + 与 selector 流程驱动改变。主代理写完 requirements.md 把 + `Status: Requirements Draft` 改成 `Requirements Complete` 是越权 + (这是 selector 走完后才该发生的事);保持模板默认值不动。 + +### Changed — `doc-confirm-tasks` 合并入 `tasks-execution`(8→7 个选择器) + +0.9.2 真实跑测时观察到 tasks phase 走两步选择器(先 doc-confirm-tasks +确认 tasks.md、再 tasks-execution 选执行方式)冗余且易出错——主代理 +在第一步把标准 3 选项「确认 / 查看全文 / 继续沟通」简化成 2 选项 +「确认,继续 / 需要调整」,漏掉了 task-swarm 路径。 + +本版合并为一步: + +- 废弃 `SELECTOR_PROMPTS["doc-confirm-tasks"]`,把「需要调整」作为 + `tasks-execution` 的回退出口。tasks-execution 现 4 选项: + - 用 task-swarm 多 agent 并发(推荐) + - 顺序执行(同时处理 optional) + - 需要调整 tasks.md + - 暂不 coding +- 不再区分「开始 required」vs「开始 required + optional」——默认两 + 种执行方式都把 optional 一起跑;要只跑 required 走 Other 输入。 +- `phase=tasks` 的 `pending_selector` 推导从 `"doc-confirm-tasks"` + 改为 `"tasks-execution"`。 +- 同步:SKILL.md 8 场景表 → 7 场景表;selectors.md §A4 + `tasks-execution` 模板镜像 + 表格删 doc-confirm-tasks 行; + workflow.md §3.3 / §5 流程改一步出图;test_selector_prompts.py + 删 test_doc_confirm_tasks_snapshot;test_spec_session_hooks.py + pending_selector fixture 改 "tasks-execution"。 + +### Changed — tasks.md 模板统一为 task-swarm 兼容格式 + +0.9.2 真实跑测时观察到主代理选「用 task-swarm 多 agent 并发」后 +`task_swarm.py init` 报 `tasks.md 中未解析出任何 ## 阶段 N: 段`, +被迫主代理自己 Write 覆盖 tasks.md(违反新 Iron Rule 7)。根因: +spec-writer 生成的 tasks.md 用 `- [ ] 1. 阶段标题 / - [ ] 1.1 子任务` +嵌套格式,但 `task_swarm.py parse_md.py` 期望 `## 阶段 N: 标题` 顶层 +段 + `- [ ] N.M ... @writes:... _需求:x.y_`。两边对不齐。 + +本版统一为 task-swarm 兼容格式(顺序执行也兼容——task-swarm 标签被 +顺序执行 agent 当作注释忽略): + +- `assets/templates/tasks.md` 完整重写:顶层 `## 阶段 N: ...` + + `- [ ] N.M ... @writes:... _需求:x.y_` + 格式约定头部说明。 +- `spec_init.py FALLBACK_TEMPLATES["tasks.md"]` 同步。 +- `references/templates.md §4` 模板示例 + 约束规则改写。 +- `agents/spec-writer.md phase=tasks` 子工作流明示新格式 + 不符合时 + 应回到 `tasks-execution` 选「需要调整」让 spec-writer 重写(**不 + 许主代理 Write 覆盖**,呼应 Iron Rule 7)。 + +### Changed — `commands/task-swarm.md` 立即调用段澄清 + +0.9.2 真实跑测时主代理调 `task_swarm.py` 时漏 `init` 子命令、把 +spec 目录传给 `--tasks`(应该传 tasks.md 绝对路径)。立即调用段示例 +原本用 `` 太抽象,本版改为 `/tasks.md`,并在「注意」 +块明示:`init` 子命令必传、`--tasks` 是 tasks.md 路径而非 spec 目录、 +不符合格式时回到 selector 让 spec-writer 重写。 + +### Tests + +152 pass(down from 153;删除 `test_doc_confirm_tasks_snapshot` +随 selector 合并;其余 fixture / 断言同步更新)。 + +### Migration + +无需迁移。`tasks-execution` 推荐项变成「task-swarm 多 agent 并发」, +但仍保留「顺序执行」「需要调整」「暂不 coding」三个出口。已经写好的 +旧格式 tasks.md(无 `## 阶段 N:` 段)在选 task-swarm 时会报错;主 +代理按新 Iron Rule 7 + tasks-execution 「需要调整」入口让 spec-writer +重写即可,不要手改。 + +```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.9.2 (2026-05-19) + +### Removed — `DESIGN.md` 与 `IMPLEMENTATION-AUDIT.md` 从仓库移除 + +两份文档都不再作为项目产物维护: + +- `DESIGN.md`(1515 行 / ~78KB)是 v0.5 → v0.8 重建期的设计文档, + 写给设计者 / 维护者读,不是最终用户文档。0.8.0 / 0.9.x 系列演进 + 后部分章节已与代码漂移(如 §3.3 sessions schema 字段名、§3.9 + spec_lint 规则数量),全文同步代价过高。 +- `IMPLEMENTATION-AUDIT.md`(327 行 / ~26KB)是 v0.7.0 时点的一次性 + 对账表,行号引用大多因为代码演进失效,且没有持续维护价值。 + +历史信息仍可通过 `CHANGELOG.md` + git log 回溯。当前真实代码状态 +看 `SKILL.md` + `references/*.md`(与代码同步演进)。 + +随之清理: + +- `plugins/specode/commands/status.md` —— 引用 `DESIGN.md §3.3` 改为 + 指向 SKILL.md §Session Lifecycle。 +- `plugins/specode/scripts/spec_session.py` —— `HELP_OUTPUT_TEXT` + 末尾的 "DESIGN.md §3" 引用改为 "SKILL.md 与 references/"。 +- `plugins/specode/skills/specode/references/task-swarm.md` —— 章节 + 标题里 `(§11.X)` 全部去掉,开头 "对应 DESIGN.md §11" 删除。 +- `plugins/specode/scripts/*.py`(9 个文件)—— 所有 docstring / 注释 + 里指向 `DESIGN.md` 的 `§X.Y` 章节引用改为指向具体的 + `references/*.md` 章节号,或简化为指向 SKILL.md。 +- `plugins/specode/commands/task-swarm.md` —— 3 处 `§11.X` 引用 + 对齐到 `references/task-swarm.md §X`。 +- `plugins/specode/skills/specode/references/selectors.md` —— 历史 + 反 pattern 列表里 `请按 §3.7.X 类型骨架输出` 改为 `请按 + selectors.md 类型骨架输出`。 + +### Changed — `CONTRIBUTING.md` 规范化重写 + +- 删除头部过时的 0.6.0 note(提到 75 tests / 4 hooks / 6 references, + 现在都不准确)。 +- 测试数字从 "75 tests" 更新到 "153 tests",覆盖范围描述同步扩大 + (加 task-swarm 全套 + selector_prompts + 集成 + 兼容性回归)。 +- 新增 **CLI invocation contract** 整节:明确所有 CLI 必须走 + `run.sh` 包装 + `${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}` + 完整路径模板(0.8.0 起的硬要求,避免主代理裸调对 cwd 失败)。 +- 新增 **On-disk schema fields** 整节:约束新写入用中性字段名 + (`session_id` / `holder`,不再用 `claude_session_id`),读侧 + 必须三键 fallback;schema 字段命名变更走 minor 还是 major 的 + semver 取决于是否带 read-side fallback。 +- semver 表加 hook event names 与 persisted schema fields 到 + "API surface" 范围;schema rename + fallback 明示走 minor。 +- release 流程加 step 3 "pytest 一次",明示双宿主 CLI 命令等价。 +- 全文去掉 v0.6 字样,去掉 host-specific 措辞。 + +### Removed — `.gitignore` 加强 + +补 `.pytest_cache/` 与 `.claude/` 两条防御性 ignore(目前都未被 +追踪,但 `git add -A` 可能在未来误抓)。 + +### Tests + +153 pass,无净变化。本版仅 docs 删除 / docstring 调整 / `.gitignore` +更新,无代码逻辑变化。 + +### Migration + +无需迁移。如果你之前 fork 了仓库且依赖 `DESIGN.md` 做参考,请改 +看 Obsidian 备份或对应版本的 git tag。 + +```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.9.1 (2026-05-19) + +### Changed — `tasks.md ## 测试要点` 降权为"参考清单" + +0.9.0 在把 acceptance-checklist.md 折叠进 tasks.md 时过度强调了 +新 `## 测试要点` 节——把它包装成「跟随式更新铁律」、acceptance-gate +验收硬条件、`DOC_PRIORITY_REMINDER` 每轮注入提示、spec-writer 5 处 +「同 turn 更新」、workflow.md §3.2 整章纪律……约 50 处分布在 12 +个文件,与节点本身只是「给测试人员一份验证清单参考」的实际地位 +严重不匹配。 + +本版降权: + +- **`acceptance-gate` selector**(`spec_session.py` 与 + `selectors.md` 镜像)—— 验收推荐只看 `tasks.md` 是否全 `[x]`, + 测试要点降为"chat 简报里顺带提一下"的参考信息,不参与判定。 +- **`DOC_PRIORITY_REMINDER_ACTIVE`**(hook 每轮注入文本)—— 删 + "同 turn 更新测试要点"那一行,文档清单里加一句注解说明 tasks.md + 末尾有这一节即可。 +- **SKILL.md** —— 5 份文档列表 / 文档表格里不再标"同 turn 重写 + 测试要点"硬纪律;tasks.md 行改为「spec-writer 在 tasks phase + 按 SHALL 补几行供测试人员参考」。 +- **`references/workflow.md`** —— 删 §3.2 整章 + 「tasks.md 测试要点跟随式更新(铁律)」;删 §3.1 / §4 / §5 + 各处「同 turn 更新测试要点」步骤;§7 acceptance phase 不再 + 要求"逐行跑测试要点 + 全 [x] 才推荐验收通过",回到只看 tasks.md + 完成度;§9.2 持续沟通模式不再强调测试要点;流程图去掉 + 「测试要点跟随式更新」列。后续 §3.3 / §3.4 重编号为 §3.2 / §3.3。 +- **`references/templates.md`** —— 删 §4.2 整节「填充规则」;§4 + tasks.md 模板的 `## 测试要点` 示例从带 `[ ]` 改成纯 bullets + (表明非待办清单);`## 验收` 节删「测试要点全部跨过」那行; + 增设 §4.2「填充提示」短节说明 spec-writer 在 tasks phase + 按 SHALL 顺手补几行即可,模糊时可留 `_待补充_`。 +- **`agents/spec-writer.md`** —— 删整个「## tasks.md 测试要点 + 同 turn 更新」section;phase=requirements / phase=bugfix 流程 + 里删「同 turn 更新测试要点」步骤;phase=tasks 加一句「填末尾 + `## 测试要点` 节,按 SHALL 补几行供测试人员参考」。 +- **`references/iteration.md`** —— iteration 期间不再要求改测试 + 要点;累积规则降为「按需追加」;ASCII 示例从 `[ ] / [x]` checkbox + 改成纯 bullets。 +- **README / README.zh-CN / DESIGN / IMPLEMENTATION-AUDIT** —— 顶层 + 描述去掉「跟随式」「同 turn 更新」措辞。 + +### 不变(保留) + +- `assets/templates/tasks.md` 模板里的 `## 测试要点` 节本身保留 +- `spec_init.py FALLBACK_TEMPLATES["tasks.md"]` 里的 `## 测试要点` + 节本身保留(格式从 `- [ ]` 改回纯 bullets) +- `obsidian.md` 目录树里的 `tasks.md ← 末尾自带 ## 测试要点 章节` + 说明保留(信息性) +- `spec-writer agent` 在 tasks phase 仍负责按 SHALL 顺手补充 + 测试要点行(但作为 tasks 文档的一部分,不是独立铁律) + +### Tests + +153 pass(无净变化)。本版仅 docs/prompts 文字调整,无代码逻辑改变。 + +### Migration + +无需迁移。`acceptance-gate` 推荐判定从「tasks.md 全 [x] + 测试要点 +全跨过」简化为「tasks.md 全 [x]」——对已经在用 0.9.0 的用户来说, +验收门只会变得**更容易**通过,不会出现"以前能过、现在卡住"的情形。 + +```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.9.0 (2026-05-19) + +### Removed — `acceptance-checklist.md` retired, test points moved into `tasks.md` + +Spec document count dropped from 6 to 5. The standalone +`acceptance-checklist.md` (which existed to give QA reviewers a +verification checklist parallel to `tasks.md`) has been folded into +a new `## 测试要点` section at the end of `tasks.md`. Reasons: + +- The checklist file duplicated information that already had a home + in `tasks.md` (`_需求:x.y_` traceability tags). +- Maintaining two parallel docs encouraged drift — one would update + while the other lagged, and the `spec_lint` `checklist-lag` rule + papered over a symptom rather than fixing the duplication. +- A single `## 测试要点` section keeps the QA-facing artifact next + to the engineering-facing tasks it validates, so changes propagate + in one Edit. + +Acceptance phase now decides "passed" based on `tasks.md` being all +`[x]` plus every test-point line in `## 测试要点` being crossed +(`[x]` or `[-]` with reason), instead of a separate checklist table. + +**Changes**: + +- `spec_init.py` — drops the `acceptance-checklist.md` template + string and its write step; `tasks.md` template now ships with a + `## 测试要点` section. +- `spec_session.py` — `SELECTOR_PROMPTS["acceptance-gate"]` + rewritten (lists `n_done/n_total` + `n_fail` instead of pass/fail + count, and prompts the model to run `spec_lint.py` first); the + 6-doc list in `spec_doc_names` (`list-specs`) shrinks to 5; the + document-first reminder text drops the checklist line. +- `spec_lint.py` — `rule_checklist_lag` removed (3 rules remain: + traceability / log / EARS). +- `agents/spec-writer.md` — the "same-turn rewrite of + `acceptance-checklist.md`" iron rule becomes "same-turn update of + the `## 测试要点` section in `tasks.md`". +- `SKILL.md` — 6→5 doc list; new line under §Phase Order instructing + the model to invoke `spec_lint.py` once when entering acceptance. +- `references/workflow.md` — §3.2 retitled and rewritten as + "tasks.md 测试要点 跟随式更新(铁律)"; acceptance phase steps + (§7) now include the `spec_lint` call and reference test-point + rows rather than checklist rows. +- `references/templates.md` — §5 (`acceptance-checklist.md` template) + removed entirely; `tasks.md` template (§4) gains the `## 测试要点` + section + a new §4.2 with the fill rules; subsequent sections + renumbered (§6→§5, §7→§6, …). +- `references/iteration.md` — iteration-time accumulation rules now + describe appending lines to `## 测试要点` instead of rewriting a + checklist table. +- `references/obsidian.md` — spec directory tree drops the + checklist file. +- `references/selectors.md` — A6 `acceptance-gate` constant + rewritten to mirror the new `SELECTOR_PROMPTS["acceptance-gate"]`. +- README / README.zh-CN / DESIGN / IMPLEMENTATION-AUDIT updated to + reflect the 5-doc list and the new acceptance criterion. + +### Added — `spec_lint.py` wired into acceptance phase + +`spec_lint.py` existed as a standalone tool since 0.6.0 but no +hook/command/agent ever invoked it. Now SKILL.md §Phase Order and +the `acceptance-gate` selector text both instruct the main agent +to call it once when entering acceptance and list any +traceability / log / EARS warnings in the chat preamble. Lint is +still advisory (`exit 0`), never blocking. + +### Removed (cont.) + +- `spec_lint.rule_checklist_lag` and the 1 corresponding pytest + case in `test_spec_lint.py` (`test_lint_checklist_lag_warns`). + 5 cases remain: clean-spec + trace + log + ears + all-bad + multi-fire. +- `acceptance-checklist.md` entry from `test_spec_init.py` + `DOC_FILENAMES` (6→5). + +### Tests + +153 pass (down from 154; the deleted `checklist-lag` case is the +only loss — clean/trace/log/ears/all-bad coverage of the 3 +surviving rules stays). + +### Migration + +**Existing specs created before 0.9.0** keep their +`acceptance-checklist.md` file on disk — no auto-delete. Treat +those as historical artifacts; copy the still-useful lines into +the new `## 测试要点` section in `tasks.md` and delete the file +manually if you no longer need it. New specs created from 0.9.0 +onward never get the file. + +```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.8.1 (2026-05-19) + +### Changed — `references/prompts.md` renamed to `references/selectors.md` + +The file name `prompts.md` was too generic for a document whose +content is "AskUserQuestion selector specification + 8 fixed scenario +constant library". `selectors.md` matches the file's own title +("Selectors — AskUserQuestion 调用规范") and aligns with the +single-word file naming convention used by the other references +(`workflow.md`, `templates.md`, `iteration.md`, …). + +Renamed via `git mv` so history follows. All 23 in-repo references +were updated (SKILL.md, 5 cross-references in `references/*.md`, +DESIGN.md, IMPLEMENTATION-AUDIT.md). Earlier `CHANGELOG.md` entries +that mention `prompts.md` were **left untouched** because they +reflect the actual file name at the time of those releases. + +### Added — type variant A+ registered (single-select + preview) + +`references/selectors.md` now documents the **A+ variant**: when an +option carries a `preview` field, the host UI auto-switches to a +side-by-side layout (vertical option list on the left, monospace +markdown preview on the right that updates as the user moves the +focus). Single-select only — `multiSelect=true` rejects `preview`. + +Currently **no fixed scenario uses A+** — this is template-only, +registered ahead of any phase-gate that needs visual artifact +comparison (UI mockups / code snippets / config variants). If a +future scenario adopts A+, add the constant to +`spec_session.py SELECTOR_PROMPTS` and append it to the 8-scenario +table below the variant note. + +### Tests + +154 pass; no test changes. + +### Migration + +None. + +## 0.8.0 (2026-05-19) + +### Changed — host-neutral wording + sessions schema field rename + +Two coordinated cleanups so the plugin reads as host-agnostic and the +on-disk schema uses neutral key names. + +**1. Description neutralization across docs and code.** All +user-facing wording that singled out one host CLI was reworded to +neutral terms ("host CLI" / "宿主" / "CLI agent"). Affected files: + +- `README.md` / `README.zh-CN.md` — install section lists CodeBuddy + before Claude Code; tagline says "for CLI coding agents". +- `SKILL.md` + 6 `references/*.md` — "Claude Code 内置 X 工具" → "宿主 + 内置 X 工具"; "Claude 窗口" / "Claude 会话" → 中性词。 +- `DESIGN.md` / `CHANGELOG.md` / `CONTRIBUTING.md` / + `IMPLEMENTATION-AUDIT.md` — same treatment. +- `plugin.json` / `marketplace.json` description fields drop the + "(Claude Code + CodeBuddy)" suffix. +- `spec_session.py` `HELP_OUTPUT_TEXT`, hook context strings, error + messages neutralized. + +Technical contracts retained verbatim because they are platform- +injected, not stylistic: `CLAUDE_PLUGIN_ROOT` env var (with the +existing `:-${CODEBUDDY_PLUGIN_ROOT}` fallback), `.claude-plugin/` +directory name (plugin discovery protocol), the `claude plugin …` +install commands users actually type. + +**2. Sessions / state.json schema: `claude_session_id` → `session_id`.** +All write sites now produce the new key. Read sites are +backwards-compatible: + +- `read_session()` (`spec_session.py:135`) auto-migrates legacy files + by copying `claude_session_id` → `session_id` in-memory; the next + write lands the new key on disk. +- `StateMachine.load()` (`task_swarm_state.py:149`) does the same for + `~/.specode/runs//state.json` (renames the dataclass field + too: `sm.claude_session_id` → `sm.session_id`). +- Lock holder reads (`list-specs`, `on-heartbeat-quiet`) fall back + through `holder → session_id → claude_session_id` in priority + order. The lock field's actual persisted key has always been + `holder`; the rename does not touch `/.config.json`. + +No manual migration needed. Existing `~/.specode/sessions/*.json` +and `state.json` files keep working; they get rewritten in the new +schema on the next mutating CLI call. + +### Changed — command md files carry copy-pasteable CLI templates + +Every command md under `plugins/specode/commands/` now opens with an +**「立即调用」** section that embeds the full `sh +"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" +"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/.py" +` template the model should execute. Motivation: a recent +session showed the model looping six times on bare `python3 +spec_session.py …` invocations against the wrong cwd, because the +command md only listed `/specode:continue $ARGUMENTS` and the model +never consulted SKILL.md for the wrapper rule before retrying. + +SKILL.md also gained a "**CLI 调用规约(强制)**" subsection and an +Iron Rule explicitly banning bare `python3` invocations. + +### Added — backwards-compat regression tests + +Two new tests pin the auto-migration behavior so future refactors +can't quietly drop legacy support: + +- `test_read_session_migrates_legacy_claude_session_id` + (`tests/test_spec_session_business.py`). +- `test_load_migrates_legacy_claude_session_id` + (`tests/test_task_swarm_state.py`). + +### Removed + +- `migrate-from-spec-mode.sh` — one-shot migration script for users + upgrading from 0.1.0's `spec-mode` plugin name. Long past its + usefulness window; deleted. + +### Tests + +154 pass (152 previous + 2 new compat regressions). All existing +fixtures updated to write the new `session_id` key directly. + +### Migration + +None. Plugin cache update sufficient: + +```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.7.3 (2026-05-19) + +### Changed — all selector references unified to YAML three-section format + +Following 0.7.2 (which rewrote SELECTOR_PROMPTS to the three-section ++ YAML format), this release brings the two **dynamic selectors** in +`references/obsidian.md` and the **8 static scenarios** in +`references/prompts.md` to the same format so every selector +reference across the plugin is visually identical. + +Two dynamic selectors updated: + +- **§3 multi-vault selection** (when `spec_vault.py detect` finds >1 + vault and no `obsidianRoot` is set yet) — was Python-call form; + now three-section YAML. +- **§5.1 `/specode:continue` no-slug spec picker** (when + `spec_session.py list-specs` returns >0 specs) — was Python-call + form; now three-section YAML. Empty list still skips the tool and + prompts the user to run `/specode:spec ` instead. + +The 8 static scenarios in `prompts.md` (workflow-choice, +clarification-{wizard,done}, doc-confirm-{requirements,bugfix,design, +tasks}, tasks-execution, takeover-options, acceptance-gate, +iteration-scope) were rewritten from Python-call form to **byte-for- +byte the same YAML three-section format as SELECTOR_PROMPTS**, with +the same wrapper section structure (目的 / 上下文 / 前置动作 / 工具参数 / +约束). The previous "Python-call form and YAML form are equivalent" +caveat is removed — there is now only one format. + +A worked example with three clarification points (login UX scenario) +was added to §B1 to give the model a concrete reference for wizard +construction. + +### Tests + +- All 152 tests pass; no test changes required (snapshots already + match the new format after 0.7.2). + +### Migration + +None. Plugin cache update sufficient: + +```sh +claude plugin marketplace update specode +claude plugin update specode +``` + +## 0.7.2 (2026-05-19) + +### Changed — SELECTOR_PROMPTS rewritten to three-section + YAML format + +All 11 entries in `spec_session.py SELECTOR_PROMPTS` have been rewritten +to a **three-section + YAML-indented** format that matches the +"directly paste into the host CLI and the tool fires" prompt style +the maintainer validated in another window. + +Each constant now has the same structure: + +``` +## 选择器节点: + +**目的**: + +**上下文**:active spec=, phase=, + +**前置动作(chat 简报,≤N 行)**: + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "" + header: "<≤12 char chip>" + multiSelect: + options: + - label: "

| /specode:end 退出` + (readonly mode adds `[只读]` segment). +- **Document-first discipline as advisory hooks**, replacing the + exit-2 INV-1 / INV-2 enforcement entirely: + - `on-user-prompt` injects "📝 文档优先提醒(输入侧)" listing the + six spec docs, asking the model to check whether the current + input warrants a doc edit *before* code. + - `on-stop` injects "🔄 代码-文档同步提醒(输出侧)" asking the + model to self-check whether the just-finished turn left a + code change without a matching doc update. +- **`/specode:spec -h` fast-path** — hook intercepts the prompt and + injects the full help text into `additionalContext` for the model + to verbatim-print, replacing the prior unstable "model reads + references/help-output.md" path. +- **Six core scripts** (stdlib-only): `spec_vault.py` (3-tier doc + root resolution + Obsidian vault detection), `spec_init.py` (spec + scaffolding with forced double-write of sessions + .config.json + and rollback), `spec_session.py` (1500 LOC — business commands, + hook subcommands, SELECTOR_PROMPTS), `spec_lint.py` (4 advisory + rules), `spec_status.py`, plus `run.sh` / `run.cmd` launchers. +- **`spec-writer` agent** — new agent for document generation with + tools `Read, Write, Edit, Grep, Glob` (no Bash; physical + isolation prevents the agent from touching code, locks, or + phase transitions). +- **SKILL.md and 6 references rewritten** for the new model: + `workflow.md`, `lock-protocol.md`, `obsidian.md`, `prompts.md` + (selector scenarios constant library), `templates.md` (six doc + templates + EARS SHALL), `iteration.md`. +- **75 pytest tests** covering 3-tier vault resolution, init & + rollback, business lock state machine, four hooks across mode + matrix, SELECTOR_PROMPTS snapshot, lint rules, and end-to-end + event chain. All passing. + +### Removed + +Nothing further beyond the 0.5.0 skeleton removal. **INV-1 through +INV-11 and `spec_choice.py` remain gone** and are not coming back — +their goals are now served by advisory hook injections plus model +self-discipline guided by SKILL.md. + +### Global bypass + +`SPECODE_GUARD=off` short-circuits all hooks to `exit 0` with no +output and no state writes. Reserved for debugging. + +### Compatibility + +- **Plugin commands**: `/specode:spec`, `/specode:continue`, + `/specode:end`, `/specode:status`, `/specode:task-swarm` + (placeholder, v0.7). +- **State migration**: nothing automatic. Users coming from 0.4.x + who still have `~/.specode/sessions/*.json` in the old schema + should run `/specode:end` once (which will write the new schema + with `mode=ended`) or remove the file. New schema is written + starting from the next `SessionStart`. + +## 0.5.0 (2026-05-18) + +### Removed (breaking — please read) + +This release strips the plugin back to a skeleton. Every runtime +enforcement and helper introduced from 0.1.0 through 0.4.0 has been +removed; what remains is the plugin shell and the agent role docs. + +- **All hook handlers removed.** `plugins/specode/hooks/` (both + `hooks.json` and the `hooks-probe.json` diagnostic) is deleted. The + 6 hook events (SessionStart / UserPromptSubmit / PreToolUse / + PostToolUse / Stop / SessionEnd) no longer fire any plugin code. +- **All invariants removed.** INV-1 through INV-11 (CDSG hard-deny, + CDSG advisory, eviction guard, acceptance follow-mode, status-block + injection, phase gate, subagent_type prefix, subagent @writes + boundary, tasks.md writeback, outbox schema, non-interactive Bash + guard) no longer exist as code paths. +- **All scripts removed.** `plugins/specode/scripts/` is deleted — + `spec_guard.py`, `spec_session.py`, `spec_init.py`, `spec_sync.py`, + `spec_choice.py`, `spec_state.py`, `spec_status.py`, `spec_lint.py`, + `spec_vault.py`, `spec_telemetry.py`, `task_swarm.py`, + `task_swarm_*.py`, `bash_guard.py`, `run.sh`, `verify_local.sh`. +- **Tests removed.** `plugins/specode/tests/` is deleted in full. +- **Skill references removed.** `plugins/specode/skills/specode/references/` + (workflow / commands / prompts / lock-protocol / templates / iteration / + obsidian / help-output / task-swarm / task-swarm-example / + sample-analysis) is deleted. +- **SKILL.md** rewritten as a short skeleton describing the spec-mode + activation contract; the iron rules referencing INV / hooks / + scripts are gone. +- **`/task-swarm` command** rewritten as a placeholder; the 7-step + CLI-driven orchestrator protocol it used to host is removed. + +### Kept + +- `.claude-plugin/marketplace.json` + `plugins/specode/.claude-plugin/plugin.json` +- `plugins/specode/commands/` — entry stubs for `/spec`, `/continue`, + `/end`, `/status`, `/task-swarm` +- `plugins/specode/skills/specode/SKILL.md` — skeleton +- `plugins/specode/agents/` — task-swarm planner / coder / reviewer / + validator role docs (descriptive only; the orchestrator that + dispatched them is gone) +- Top-level docs (`README.md` / `README.zh-CN.md` / `CHANGELOG.md` / + `CONTRIBUTING.md` / `DEV.md` / `migrate-from-spec-mode.sh`) are + retained and updated to reflect the new skeleton state. + +### Migration + +No automatic migration. Reinstall on top of the new version to drop +the hooks; user runtime state under `~/.specode/` and +`~/.config/specode/` is untouched and can be removed manually: + +```sh +rm -rf ~/.specode ~/.config/specode +``` + +If you were relying on any 0.4.x behaviour (CDSG advisories, INV-11 +non-interactive Bash guard, task-swarm orchestrator), pin to +`specode--v0.4.0` until the runtime is rebuilt. ## 0.4.0 (2026-05-18) @@ -114,7 +2126,7 @@ Users on 0.3.x upgrading to 0.4.0: ### Notes - `scripts/spec_guard.py` legitimately reads stdin (hook payload from - Claude Code / CodeBuddy, bounded JSON + immediate close) — annotated + the host CLI, bounded JSON + immediate close) — annotated with `# stdin-block: hook entry point` to satisfy the new scanner. ## 0.3.1 (2026-05-18) @@ -285,7 +2297,7 @@ they become advisory. Audit logs continue to record them. - New command `/task-swarm /tasks.md` for manual triggering of task-swarm mode outside the standard selector flow. - New plugin subdirectory `agents/` carrying the 4 task-swarm subagents. - Claude Code auto-registers these (namespaced as `specode:task-swarm-*`). + The host CLI auto-registers these (namespaced as `specode:task-swarm-*`). - New references: - `references/task-swarm.md` — full protocol (single authority for editing behavior, subagent contract, write-back rules, loop semantics, iron-rule @@ -317,7 +2329,7 @@ they become advisory. Audit logs continue to record them. - **P0 — subagent_type must be plugin-prefixed**: dispatching with the bare name `Task(subagent_type="task-swarm-coder", ...)` is rejected by - Claude Code with `"Agent type not found"`. All 13 references in + the host CLI with `"Agent type not found"`. All 13 references in `commands/task-swarm.md` and `references/task-swarm.md` now use the fully-qualified `specode:task-swarm-coder` (and `-reviewer` / `-validator` / `-planner`). The `agents/*.md` frontmatter `name` is @@ -356,7 +2368,7 @@ detects and prints reminders for these. ### Phase 1 — bootstrap - Initial plugin skeleton. -- `plugin.json` for Claude Code / CodeBuddy. +- `plugin.json` consumed by the host CLI's plugin loader. - `hooks/hooks.json` wiring SessionStart / UserPromptSubmit / PreToolUse / PostToolUse / Stop / SessionEnd → `scripts/spec_guard.py`. - `scripts/spec_guard.py`: dispatch entry, audit-log every event, all @@ -368,7 +2380,7 @@ detects and prints reminders for these. `.active-specode.json` / per-spec `.config.json`. Owns `~/.specode/{sessions,.any-active}`. CLI: status / sync-sentinel / demo-activate / demo-deactivate. -- `spec_guard.py`: SessionStart writes Claude-session record; +- `spec_guard.py`: SessionStart writes the host-session record; UserPromptSubmit injects a status block via `hookSpecificOutput.additionalContext` when a spec is active; other handlers fast-exit when no active spec. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..92c6df5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,142 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## What this repo is + +A single-plugin marketplace for **specode** — a specification-driven workflow plugin for Claude Code and CodeBuddy CLIs. The plugin walks a host agent through a fixed phase pipeline (requirements → design → tasks → implementation → acceptance) using five Markdown documents as the single source of truth, with advisory hooks and `AskUserQuestion` selectors at phase gates. + +All implementation lives under `plugins/specode/`. The repo root only contains the marketplace manifest (`.claude-plugin/marketplace.json`), the README, the CHANGELOG, and CONTRIBUTING. Plugin internals are listed in README §Architecture — do not re-derive the file tree, read the README. + +## Commands + +```sh +# Run the test suite (must be from repo root; tests are hermetic and redirect $HOME) +python3 -m pytest plugins/specode/tests/ -v + +# Single test file +python3 -m pytest plugins/specode/tests/test_spec_session_business.py -v + +# Single test +python3 -m pytest plugins/specode/tests/test_spec_session_hooks.py::test_on_user_prompt_injects_status_footer -v + +# Local plugin install (development) +claude --plugin-dir ./plugins/specode +codebuddy --plugin-dir ./plugins/specode +``` + +There is no lint or typecheck step configured at the repo level. Lint logic the plugin itself ships (`spec_lint.py`) is for the user's spec docs, not this codebase. + +## Non-negotiable conventions + +These are the rules from `CONTRIBUTING.md` that are easy to violate and expensive to fix. Read CONTRIBUTING.md in full before opening a PR. + +### Runtime is stdlib-only +Code under `plugins/specode/scripts/` MUST use only the Python 3.8+ standard library. Plugin users install via host CLI `plugin install`; they do not `pip install`. Tests under `plugins/specode/tests/` MAY use `pytest` (dev dependency only). + +### CLI invocation must go through `run.sh` +Every script under `plugins/specode/scripts/` is a CLI invoked from hooks or directly by the host agent via the `run.sh` wrapper with the absolute `$CLAUDE_PLUGIN_ROOT` (fallback `$CODEBUDDY_PLUGIN_ROOT`) path: + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/.py" \ + +``` + +`run.sh` probes `python3 → python → py` (with Windows Store alias-stub skipping) so the script works on any host with Python 3.8+. Bare `python3 .py` calls fail in most cwds — when adding a new entry point, match the wrapper template used everywhere in `hooks/hooks.json` and `commands/*.md`. + +### Hook safety contract +Hooks in `spec_session.py` are advisory. Every handler MUST: +1. Be wrapped in `@_safe_hook` (catches all exceptions, returns 0). +2. **Never `exit 2`** — push guidance via `additionalContext` JSON to stdout and still `exit 0`. The one exception is `hook_on_pre_tool_use` for direct edits to `tasks.md`, which escalated to a hard block in 0.10.21 (see CHANGELOG); do not add other exit-2 paths without explicit need. +3. Honour `SPECODE_GUARD=off` for global bypass (early-return with no output and no state writes). +4. Tolerate non-TTY stdin (`_read_stdin_payload()` handles this). + +Performance budgets (CONTRIBUTING §Performance budget): `UserPromptSubmit` <80ms (fires every user turn), `PreToolUse`/`PostToolUse Task` <100ms, `Stop` <300ms, `SessionStart`/`SessionEnd` <500ms. + +### On-disk schema evolution +Two schemas the plugin owns: +- `~/.specode/sessions/.json` — per-host-session state +- `/.config.json` — per-spec config + lock holder + +Rules: +- New writes use neutral names (`session_id`, not `claude_session_id`; `holder`, not host-specific names). +- Read sites MUST fall back through historical names. Auto-migrate on read so the next write lands the new key transparently — see `read_session()` and `StateMachine.load()` for the existing pattern, and `test_read_session_migrates_legacy_claude_session_id` / `test_load_migrates_legacy_claude_session_id` as test templates. +- Bump minor for renames with a read-side fallback; bump major if a rename breaks reads. +- All state writes use `tempfile + os.replace + fsync` (see `_atomic_write_json` in `spec_session/_io.py`). If one side of the dual write (sessions file + spec config) fails, roll back and exit 1 — never leave in-memory half-success. + +### Test conventions +- Scripts are CLIs, not importable modules. Tests invoke them via `subprocess.run` through the `run_script` fixture in `tests/conftest.py`. +- Use the `fake_home` fixture to redirect `$HOME`, `XDG_CONFIG_HOME`, `APPDATA`, `LOCALAPPDATA`, and clear `SPECODE_ROOT` / `SPECODE_GUARD`. Tests MUST be hermetic — never touch the real `~/.specode/`. +- Use `init_spec` fixture to scaffold a spec directory the way `spec_init.py` would. +- Every persisted-schema change needs a "legacy file migration" regression test. + +## Architecture — the parts that span multiple files + +### scripts/ layout +Both heavyweight CLIs (`spec_session.py`, `task_swarm.py`) are subdirectory packages with a thin same-name launcher at the `scripts/` root. Python's FileFinder gives package precedence over module within a path entry, and the launcher is exec'd (not imported), so `spec_session.py` (launcher) and `spec_session/` (package) coexist safely. The launcher injects `scripts/` into `sys.path` and calls `.cli.main()`. + +| `scripts/` member | Role | +|---|---| +| `spec_session.py` | Thin launcher (~40 lines: utf-8 stdout reconfigure + sys.path + `from spec_session.cli import main`). Filename preserved because `hooks.json`, every `commands/*.md`, and `tests/conftest.py:run_script` reference it by name. | +| `spec_session/` package | `__init__.py` (re-exports `read_session` / `read_spec_config` / `_session_short` / `_is_lock_stale` for `spec_status.py:25`), `cli.py` (argparse + `COMMANDS` dispatch + main), `_io.py`, `_selectors.py`, `_reminders.py`, `_business.py`, `_hooks.py`, `_catalog.py` | +| `task_swarm.py` | Thin launcher (~25 lines: sys.path + `from task_swarm.cli import main`). | +| `task_swarm/` package | Empty `__init__.py` (no external `from task_swarm import ...` consumer), `cli.py` (main CLI), `_state.py`, `_parse_md.py`, `_outbox.py`, `_prompt.py`, `_writeback.py` | +| `spec_init.py` / `spec_lint.py` / `spec_log.py` / `spec_status.py` / `spec_vault.py` | Single-file CLIs at the top level. `spec_log.py` is also shared (defensively imported by both packages' modules for session logging). | +| `run.sh` / `run.cmd` | Python interpreter probes (`python3 → python → py`) — Windows alias-stub handling lives here. | + +**Do not rename** `spec_session.py` or `task_swarm.py` (those filenames are the API surface). **Do not delete** `spec_session/__init__.py` re-exports (spec_status.py depends on them). Inside the packages, intra-package imports are absolute (`from spec_session._io import …`) for clear error messages. + +### `_THIS_DIR` convention inside packages +Modules under `spec_session/` and `task_swarm/` that need to find sibling top-level scripts (e.g. `_hooks.py:_run_task_swarm_plan` calling `task_swarm.py`) define `_THIS_DIR = Path(__file__).resolve().parents[1]` — that resolves to `scripts/`, keeping `_THIS_DIR / "task_swarm.py"` and `_THIS_DIR.parent / ".claude-plugin"` semantically identical to the pre-split layout. Don't use `parents[0]` (it points inside the package). + +### Hook → CLI → state-file flow +1. Host CLI fires a hook event (`SessionStart`, `UserPromptSubmit`, `PreToolUse`, `PostToolUse Task`, `Stop`, `SessionEnd`) per `hooks/hooks.json`. +2. The hook command shells into `run.sh` → `spec_session.py ` (the launcher), which imports `spec_session.cli.main` and dispatches the hook subcommand handler. +3. Handlers (wrapped in `@_safe_hook` from `spec_session/_hooks.py`) read `~/.specode/sessions/.json` and the active spec's `.config.json`, then emit `additionalContext` JSON to stdout to inject guidance (status footer, selector reminder, doc-sync nag, B2 catalog hits) into the next agent turn. +4. The host agent, following `skills/specode/SKILL.md`, responds to the injection by calling `AskUserQuestion` for selectors or by invoking a `spec_session.py` business subcommand (`acquire` / `phase-transition` / etc.) which atomically updates both state files. + +### Session as the integration boundary +Everything is keyed by the host's `session_id` (injected by `SessionStart`, re-injected every `UserPromptSubmit`). Multiple terminal windows = multiple session files = multiple parallel specs. Lock holder is the `session_id`; stale-lock window is 30 minutes (`STALE_LOCK_SECONDS` in `spec_session/_io.py`). The agent MUST NOT invent a session_id, MUST NOT parse one from user input, MUST NOT echo full IDs in chat (8-char prefix only). + +### Document root resolution +Three-tier with no fallback (`spec_vault.py`): +1. `--root` flag or `SPECODE_ROOT` env (highest) +2. `~/.config/specode/config.json.obsidianRoot` +3. Auto-detected Obsidian vault → `/spec-in/-/specs` + +If all three miss, `spec_init.py` exits 3 with a setup hint. Do NOT add a cwd or `~/specs` fallback. The `--detect-vault` / `--vault-status` / `--set-vault` / `--set-root` / `--sync-status` flags are routed in `commands/spec.md`; some of them are "fast-path" hooks where the hook pre-renders the output and the agent only prints it verbatim (see `FAST_PATH_HELP` / `FAST_PATH_VAULT` constants in `spec_session/_hooks.py`). + +### Phase pipeline + selectors +Valid phases (`VALID_PHASES` in `spec_session/_io.py`): `intake → requirements/bugfix → design → tasks → implementation → acceptance → iteration`. Transitions go through `spec_session.py phase-transition`, which also sets `pending_selector` so the next hook turn knows which `AskUserQuestion` skeleton to remind about. The 7 fixed selector scenarios are defined in `SELECTOR_PROMPTS` (in `spec_session/_selectors.py`) and documented in `skills/specode/references/selectors.md`; `tests/test_selector_prompts.py` snapshots them, and `tests/test_selectors_drift.py` parses the file by regex (keep the `SELECTOR_PROMPTS: dict[str, str] = {...}` literal grep-able). + +### Reference catalog (description-as-trigger) +Every `skills/specode/references/*.md` file carries a YAML frontmatter `description: Use when …` that captures *when* a reader should pick it up (superpowers style — trigger-first, not summary-first). The `on-user-prompt-catalog` hook (`spec_session/_catalog.py`) maintains a `CATALOG` dict of keyword regex → reference key (e.g. `lock|takeover|接管` → `lock-protocol`, `task-swarm|@writes|reviewer` → `task-swarm`); each `UserPromptSubmit` it scans the prompt, lists hit references with their descriptions, and emits an advisory injection. Active-only (silent for `idle`/`readonly`/`ended`). `tests/test_catalog.py` enforces: every `CATALOG` key has a real reference file, every targeted reference has a non-empty `description` field. When adding a new reference or extending keyword coverage, update both the frontmatter and `CATALOG`. + +### task-swarm orchestration +`task_swarm.py` (launcher → `task_swarm.cli`) is a separate state machine for the implementation phase. The state file is the single source of truth (`/.task-swarm/runs//state.json`). The flow is `init → plan → fork (N coders) → advance → writeback → resolve`, with reviewer (advisory, one round of P0 fixes if findings carry evidence tags) and validator (blocking pass/fail loop, deadloop guard after 3 identical failures). The four subagent role definitions live in `agents/task-swarm-{coder,planner,reviewer,validator}.md`; they are intentionally tool-restricted (reviewer/validator have no Edit/Write — physical isolation). Submodules under `task_swarm/`: +- `_state.py` — state machine load/save with legacy migration +- `_parse_md.py` — parses `tasks.md` `## 阶段 N:` sections + `@writes` / `@depends-on` tags +- `_outbox.py` — parses subagent `result.md` / `review.md` / `validation.md` per the schemas in `references/task-swarm.md` §4 +- `_writeback.py` — line-safe diff back into `tasks.md` (exits 1 on out-of-bounds; see 0.10.21 CHANGELOG entry for the multi-line `reproduce_cmd` bug) +- `_prompt.py` — materializes per-agent prompts into `agents//prompt.md` + +### Session logging +`spec_log.py` writes append-only JSONL events to `~/.specode/logs/.jsonl` (hook fires, tool calls, CLI invocations, phase/lock changes). Default redaction of secret-like keys (`password`, `api_key`, `token`, `secret`, `authorization`, `cookie`) and 500-char string truncation. Disable with `SPECODE_LOG=off` or `~/.config/specode/config.json.logging=false`. Any logging exception is swallowed — logging never blocks business flow. When adding a new hook or CLI subcommand, call `_log_event("event_name", payload, session_id)` at the entry point to keep replay useful. + +## Release procedure (summary) + +Detailed steps are in CONTRIBUTING.md §Release. The two manifests carrying `version` MUST match or the plugin tag tooling refuses: +- `plugins/specode/.claude-plugin/plugin.json` → `"version"` +- `.claude-plugin/marketplace.json` → `plugins[0].version` + +Workflow: bump both manifests → rename `## Unreleased` in CHANGELOG to `## X.Y.Z (YYYY-MM-DD)` + add a fresh `## Unreleased` above → run tests → commit + push → `claude plugin tag --dry-run plugins/specode` → `claude plugin tag plugins/specode --push`. Tag format is `specode--v{version}` (annotated). **Pushing the tag IS the release** — there is no tarball or registry artifact; host CLIs fetch the marketplace manifest from GitHub by git tag. + +Semver "API surface" for this plugin = slash command set, agent names, hook event names, and persisted-state schema fields. Field renames with a read-side fallback are minor; without fallback are major. + +## Where to look for what + +- **README.md** — what the plugin does, install/usage, architecture map. +- **CONTRIBUTING.md** — the full version of the conventions summarised above (stdlib rule, CLI wrapper contract, hook safety, schema evolution, performance budgets, release). +- **CHANGELOG.md** — narrative history; useful when a behavior seems weird because it documents past bugs and the reasoning behind subtle fixes (e.g. 0.10.21 writeback line-safe, 0.10.13 / 0.10.17 task-swarm STATUS recovery). +- **plugins/specode/skills/specode/SKILL.md** + **references/** — the runtime behavior spec the *host agent* follows. When modifying selectors, phase order, or the lock protocol, the SKILL.md and the corresponding `references/.md` need to stay in sync with the CLI behavior; selector drift is enforced by `tests/test_selectors_drift.py`. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c59cffd..a7367de 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,20 +1,46 @@ # Contributing +Project-level conventions for changes under `plugins/specode/`. Read +this before opening a PR or cutting a release. + ## Runtime is stdlib-only -The runtime code under `plugins/specode/scripts/` MUST use only the -Python standard library. This is a hard rule, declared in `plugin.json`: +Any runtime code under `plugins/specode/scripts/` MUST use only the +Python standard library. Plugin users install via the host CLI's +`plugin install`; they don't `pip install -r requirements.txt`. +Pulling third-party packages in either silently breaks for users +without them or forces a heavier install path. + +Tests under `plugins/specode/tests/` MAY use `pytest` (it's a dev +dependency, not runtime). + +## CLI invocation contract -```json -"requires": { "python": ">=3.9", "stdlib_only": true } +Every script under `plugins/specode/scripts/` is a CLI invoked from +hook commands (`hooks.json`) or directly by the main agent. **All +invocations MUST go through the `run.sh` wrapper with the full +`$CLAUDE_PLUGIN_ROOT` (fallback `$CODEBUDDY_PLUGIN_ROOT`) path**: + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/.py" \ + ``` -Reason: plugin users install the plugin via `--plugin-dir`. They don't run a -`pip install -r requirements.txt`. Pulling in third-party packages would -either silently break for users without those packages, or require a heavier -install path that fights the purpose of the plugin. +Why: -Tests under `tests/` MAY use `pytest` (it's a dev dependency, not runtime). +- `run.sh` probes `python3 → python → py` so it works on any host + with Python 3.8+ on PATH. +- Both `CLAUDE_PLUGIN_ROOT` and `CODEBUDDY_PLUGIN_ROOT` are + platform-injected env vars; the `:-` fallback covers both Claude + Code and CodeBuddy without forcing the user to pick one. +- Bare `python3 spec_session.py …` calls fail in most cwds because + the scripts are not on PATH and the agent doesn't know where it + is. This was observed as a real failure mode pre-0.8.0; see + `SKILL.md §CLI 调用规约(强制)` for the hard rule. + +`hooks/hooks.json` and the `commands/*.md` "立即调用" sections all +use this template — match them when adding new entry points. ## Test conventions @@ -24,98 +50,128 @@ Run the suite from the repo root: python3 -m pytest plugins/specode/tests/ -v ``` -When adding behavior to `spec_sync.py` or `spec_guard.py`, add: +153 tests cover: 3-tier vault resolution, spec scaffolding with +rollback, business lock state machine, all 7 hooks across the mode +matrix, `SELECTOR_PROMPTS` snapshot, lint rules (3 surviving rules +after the 0.9.0 cleanup), legacy-field migration for `session_id`, +the task-swarm CLI / state machine / outbox parser / writeback / hook, +and an end-to-end SessionStart → /specode:spec → /specode:end → +SessionEnd event chain. + +When adding behavior, prefer: + +- Unit tests that call the CLI script through `subprocess.run` (the + scripts are CLIs, not importable modules). +- Use `tmp_path` + `monkeypatch.setenv('HOME', tmp_path)` to keep + tests isolated from real `~/.specode/`. +- For hook tests, feed stdin payloads matching the host CLI hook + schema and assert against the JSON `additionalContext`. +- For any persisted schema change (sessions / state.json / lock + fields), add a "legacy file migration" regression test pinning + read-side backwards compatibility — see + `test_read_session_migrates_legacy_claude_session_id` and + `test_load_migrates_legacy_claude_session_id` as templates. + +## Hook safety contract -1. A unit test under `plugins/specode/tests/test_spec_sync.py` for the - pure function. -2. An integration test under `plugins/specode/tests/test_spec_guard.py` - exercising the handler path with a fabricated stdin payload through - `hook_caller`. +Every hook handler in `spec_session.py` MUST: -Use the `workspace` fixture for handler tests — it creates a tmp spec_dir -+ project_root and monkey-patches `spec_state.find_active_spec` so you don't -need a real Obsidian vault. +1. Catch all exceptions internally and return 0 (the `@_safe_hook` + decorator does this). +2. **Never `exit 2`.** All hooks are advisory only. If you need to + influence the model, inject `additionalContext` JSON to stdout + and still `exit 0`. +3. Honour `SPECODE_GUARD=off` for global bypass — return early with + no output and no state writes. +4. Detect non-TTY stdin (hook payload arrives via pipe). On TTY, the + script must not block; `_read_stdin_payload()` already handles + this. -## Hook safety contract +## On-disk schema fields + +Two schemas the plugin owns: + +- `~/.specode/sessions/.json` — per-host-session state +- `/.config.json` — per-spec config + lock field + +Conventions: + +- New writes use neutral field names (`session_id`, not + `claude_session_id`; `holder`, not `claude_session_id` for lock + holders). Avoid host-specific naming in persisted schema. +- Read sites MUST fall back through any historical names before + giving up — for `session_id` the order is `session_id` → + `claude_session_id`; for lock holder it's `holder` → + `session_id` → `claude_session_id`. `read_session()` and + `StateMachine.load()` auto-migrate on read so the next write + lands the new key without manual user action. +- Bump **minor** for schema field renames that ship a read-side + fallback (existing files keep working). Bump **major** if a + rename breaks reads. + +## Debugging with session logs (0.10.0+) + +specode 默认收集每个 session 的日志到 `~/.specode/logs/.jsonl`, +含 hook 触发、主代理工具调用、CLI 调用、phase / lock 变化。 + +```sh +# 回放一个 session 的事件流(按时序) +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_log.py" \ + replay --session + +# 查看 logs/ 占用 +sh "$CLAUDE_PLUGIN_ROOT/scripts/run.sh" \ + "$CLAUDE_PLUGIN_ROOT/scripts/spec_log.py" status -Every handler in `spec_guard.py` MUST: +# 临时关日志 +export SPECODE_LOG=off -1. Catch all exceptions internally and return 0 from `main()` (the dispatcher - already wraps handler calls in try/except). Never wedge a user's Claude - Code session because of a plugin bug. -2. Honor `SPECODE_GUARD=off` for global bypass. -3. Audit log via `_audit()` for any decision that *did work* — silent fast- - exits when no active spec are deliberately *not* audited (avoid log spam). -4. Use `deny(msg)` (exit 2 + stderr) ONLY for genuine invariant violations - that the model should react to. +# 永久关:编辑 ~/.config/specode/config.json 加 "logging": false +``` + +排查"主代理为什么走偏"类问题时,用 replay 看 hook 时序 + 工具调用顺序, +通常能定位到「该呈现 selector 没呈现」「fork spec-writer 漏了」「Status +字段被越权改」之类的违规点。新增 hook / CLI 子命令时记得在入口加 +`_log_event("event_name", payload, session_id)`,便于日后调试。 -## Performance budget +## Performance budget (guideline) | Hook | Budget | |---|---| | `SessionStart` / `SessionEnd` | <500 ms | | `UserPromptSubmit` | <80 ms (fires every user turn — keep it cheap) | -| `PreToolUse` / `PostToolUse` | <100 ms | -| `Stop` | <300 ms (allowed slightly larger; runs once per turn) | +| `PreToolUse` / `PostToolUse Task` | <100 ms | +| `Stop` | <300 ms (runs once per turn) | -If your change crosses these budgets, profile first; don't accept the +If a change crosses these budgets, profile first; don't accept the regression. -## Sentinel discipline - -`~/.specode/.any-active` is the shell-short-circuit sentinel. Maintain -its truth via `spec_state.sync_any_active_sentinel()` — never write it -ad-hoc. If you add a code path that activates or deactivates a spec, call -sync after. - ## Release -Public release procedure for plugin maintainers. Not for contributors who -are only sending PRs — wait for a maintainer to cut the release that -includes your change. +Public release procedure for plugin maintainers. ### Version manifests (must agree) -Two manifests both carry `version`. They MUST match or `claude plugin tag` -refuses to operate: +Two manifests carry `version`. They MUST match or the plugin tag +tooling refuses to operate: - `plugins/specode/.claude-plugin/plugin.json` → `"version": "X.Y.Z"` - `.claude-plugin/marketplace.json` → `plugins[0].version: "X.Y.Z"` ### Picking the next version (semver) -For this plugin, "API" = the slash command set, hook contract, agent names, -and persisted-state schema (anything users see or that their stored data -depends on). +"API surface" for semver purposes = the slash command set, agent +names, hook event names, and persisted-state schema fields that +users or future runtime code can observe. | Bump | When | Examples | | --- | --- | --- | -| **major** (1.0.0 → 2.0.0) | A user feels a breaking change after `claude plugin update` | rename a slash command; rename `~/.specode/sessions/` schema; rename a subagent's `name` field; remove a hook event | -| **minor** (0.1.0 → 0.2.0) | Backwards-compatible new capability | new slash command; new subagent; new optional `@swarm:*` label; new selector option | -| **patch** (0.1.0 → 0.1.1) | Bug fix / docs / internal refactor with no surface change | fix a typo in a prompt; fix a `subagent_type` typo; clarify a reference; CI-only | - -When in doubt, bump higher. Users can pin to a version; they cannot rewind -persisted state if a "patch" silently changes a schema. - -### Pre-release checklist (do not skip) - -```sh -# 1. All tests pass -python3 -m pytest plugins/specode/tests/ -v +| **major** | A user feels a breaking change after a plugin update | rename a slash command; remove an agent; rename a hook event; rename a schema field with no read-side fallback | +| **minor** | Backwards-compatible new capability or evolution | new slash command; new agent; new optional label; schema field rename **with** read-side fallback | +| **patch** | Bug fix / docs / internal refactor with no surface change | fix a typo in a prompt; clarify a reference; CI-only; remove dev-only files from the repo | -# 2. CHANGELOG.md has an `## Unreleased` section with concrete entries -# (no "TBD" placeholders, no stale "WIP" markers) -grep -A 1 "^## Unreleased" CHANGELOG.md - -# 3. main is clean and up to date -git status # → nothing to commit -git rev-parse --abbrev-ref HEAD # → main -git pull --ff-only -``` - -If any step fails: fix before continuing. Never publish a release whose -tests are red or whose CHANGELOG is empty — installed users have no -other way to discover what changed. +When in doubt, bump higher. ### Cutting a release @@ -128,23 +184,27 @@ $EDITOR .claude-plugin/marketplace.json # then add a fresh empty `## Unreleased` above it for the next cycle $EDITOR CHANGELOG.md -# 3. Commit + push (message format: "Bump to X.Y.Z: ") -git commit -am "Bump to 0.2.0:

" +# 3. Run the test suite one more time +python3 -m pytest plugins/specode/tests/ -q + +# 4. Commit + push +git commit -am "Bump to X.Y.Z: " git push -# 4. Dry-run the tag first +# 5. Dry-run the tag first claude plugin tag --dry-run plugins/specode +# (or codebuddy plugin tag --dry-run plugins/specode — pick whichever +# host CLI is installed; both wrap the same git operations) -# 5. Create + push the annotated tag +# 6. Create + push the annotated tag claude plugin tag plugins/specode --push ``` -Tag format: `specode--v{version}` (annotated, message `specode {version}`). -Pushed to `origin` by default; override with `--remote`. - -The plugin is **not** packaged into a tarball or registry artifact — -Claude Code and CodeBuddy fetch the marketplace manifest directly from -GitHub and resolve plugins by git tag. **Pushing the tag IS the release.** +Tag format: `specode--v{version}` (annotated, message +`specode {version}`). The plugin is **not** packaged into a tarball +or registry artifact — host CLIs fetch the marketplace manifest +directly from GitHub and resolve plugins by git tag. **Pushing the +tag IS the release.** ### Re-tagging the same version @@ -161,22 +221,11 @@ Once a release is in user hands, prefer a new patch version. ### Verifying after release ```sh +# Adjust the CLI name for whichever host you use (claude / codebuddy). claude plugin marketplace update specode claude plugin install specode@specode # or `update` claude plugin list | grep specode # confirm new version ``` -CodeBuddy users follow the same procedure substituting `codebuddy`. - -## Decision history - -Two non-obvious design calls are encoded in the rules: - -- **1A**: freeform mode relaxes INV-1 (file-not-in-tasks check) but does - NOT exempt INV-2 (turn conservation) or INV-6 (phase gate). Freeform is - an INV-1 escape hatch, not a full specode bypass. -- **2A**: `implementation-log.md` counts as a doc change for INV-2. - Cosmetic-doc abuse (one space added to design.md to satisfy INV-2) is - caught by `spec_lint.py` as a WARNING, not by hook denial. - -Change these only via an explicit design-doc decision, not silently. +Users on a different host follow the same procedure with their host's +CLI name (`codebuddy plugin …`). diff --git a/README.md b/README.md index 56bff8c..a683301 100644 --- a/README.md +++ b/README.md @@ -2,88 +2,95 @@ # specode -Specification-driven workflow plugin for **Claude Code** and **CodeBuddy**. - -Load-bearing workflow rules are enforced by Claude Code hooks — deterministic -shell commands the harness runs — rather than relying on the model to -remember and obey instructions in context. - -## What it enforces - -Once a spec is active, these invariants are **harness-checked**. As of 0.4.0 -INVs split into two levels: - -- **Advisory** (process discipline) — violation records a sticky warning on - the ledger and surfaces in the next `UserPromptSubmit` status block. The - tool call still goes through. Editing any spec doc auto-clears INV-1/2/4; - use `/spec --dismiss-advisories` to clear manually. -- **Enforced** (data / contract protection) — violation hard-denies the - tool call (exit 2). These guard against data corruption, evicted writes, - bad subagent dispatch, and subagent-boundary breaches. - -| ID | Rule | Hook | Level | -|---|---|---|---| -| **INV-1** | Editing a source file requires `tasks.md` coverage, same-turn doc edit, or `freeform` mode | `PreToolUse` | **advisory** | -| **INV-2** | A turn that touched source code should touch at least one spec document | `Stop` | **advisory** | -| **INV-3** | Spec-doc writes are rejected if the session was evicted by another window | `PreToolUse` | **enforced** | -| **INV-4** | `requirements.md` / `bugfix.md` edits should be followed by `tasks.md ## 测试要点` updates same turn | `Stop` | **advisory** | -| **INV-5** | Each user turn injects a status block (`spec / phase / lock / turn / advisories`) into the model's context | `UserPromptSubmit` | injection | -| **INV-6** | Source-code edits in pre-implementation phases (intake / requirements / bugfix / design / tasks) | `PreToolUse` | **advisory** | -| **INV-7** | `Task` tool's `subagent_type` must be prefixed `specode:` when a task-swarm run is active | `PreToolUse` | **enforced** | -| **INV-8** | Subagent writes outside their `@writes` boundary are blocked | `PreToolUse` | **enforced** | -| **INV-9** | `tasks.md` edits during a task-swarm run must go through `writeback` (line-safe diff) | `PreToolUse` | **enforced** | -| **INV-11** | `Bash` commands that block on TTY input (`npm create`, `git commit` no `-m`, `vim`, etc.) are denied with a non-interactive rewrite suggestion; `PostToolUse` also scans tool output for hang-prompt signatures and injects an advisory | `PreToolUse` + `PostToolUse` | **enforced** + advisory | - -INV-1 and INV-2 form the **Code-Doc Sync Guard (CDSG)** — advisory since -0.4.0 (previously hard-deny in 0.3.x). - -## Architecture - -``` -.claude-plugin/marketplace.json ← single-plugin marketplace manifest -plugins/specode/ - .claude-plugin/plugin.json ← plugin manifest - hooks/hooks.json ← 6 event handlers with shell short-circuit on sentinel - hooks/hooks-probe.json ← diagnostic probe (swap in for re-verification) - skills/specode/ ← skill content (SKILL.md + references) - commands/ ← /spec, /continue, /status, /end, /task-swarm - agents/ ← task-swarm-{coder,reviewer,validator,planner} - scripts/ - spec_guard.py ← hook entry; dispatches to handlers; audit log - spec_state.py ← read-only state probe + sentinel + Claude-session record - spec_sync.py ← INV-1/2/3/4/6 logic; ledger; phase gate; glob matcher - spec_session.py ← lock + phase + active-pointer model - spec_init.py / spec_lint.py / spec_status.py / spec_choice.py / spec_vault.py - tests/ ← 19 pytest cases (unit + integration) -``` - -## Install +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./README.md#license) +[![Version](https://img.shields.io/badge/version-0.10.21-blue.svg)](./plugins/specode/.claude-plugin/plugin.json) +[![Claude Code](https://img.shields.io/badge/Claude%20Code-compatible-8A2BE2)](https://github.com/qxbyte/specode#installation) +[![CodeBuddy](https://img.shields.io/badge/CodeBuddy-2.97.1%2B-1E90FF)](https://github.com/qxbyte/specode#installation) +[![Tests](https://img.shields.io/badge/pytest-152%20cases-success)](./plugins/specode/tests) + +> A specification-driven workflow plugin for CLI coding agents +> (Claude Code / CodeBuddy). + +specode turns a one-line requirement into a disciplined, +document-first delivery loop. The agent is walked through a fixed +phase pipeline — **requirements → design → tasks → implementation → +acceptance** — with five Markdown documents +(`requirements.md` / `bugfix.md` / `design.md` / `tasks.md` / +`implementation-log.md`) as the single source of truth. At every +phase gate the user picks the next move through an in-chat +selector; in between, advisory hooks keep the agent on script +without ever blocking a tool call. + +If you've watched an LLM agent drift mid-task, lose context across +windows, or merge unreviewed code, specode is the rails. + +## Highlights + +- **Document-first discipline.** Every requirement begins with a spec + doc, not code. Hooks remind the agent to consult and update docs + before — and after — touching code. +- **Advisory hooks, never blocking.** All seven hooks `exit 0`. They + inject guidance into the model's context (status footer, phase + selector, doc-sync reminder, silent lock heartbeat) but never abort + a tool call. No mid-flow "hook denied" surprises. +- **Session-bound state.** Every host session has its own state file + at `~/.specode/sessions/.json` (atomic writes). Open + three CLI windows in parallel and they stay disambiguated. +- **Phase-gate selectors.** At each decision point the agent renders + one of three selector skeletons (single-select / wizard / + multi-select) drawn from 11 fixed scenarios — you steer, the + agent executes. +- **task-swarm: built-in orchestrator that implements `tasks.md` in + parallel.** After `tasks.md` is approved, task-swarm fans out + multiple **coder** subagents that work concurrently (auto-grouped + to avoid file-write conflicts via `@writes` and respect + `@depends-on` task ordering), then funnels their output through a + single **reviewer** (advisory findings, P0 issues trigger one fix + round) and a single **validator** (binary pass/fail, fail triggers + a fix loop until pass). A deadloop guard trips after three rounds + of identical failures. +- **Obsidian-aware doc root.** Three-tier resolution + (env > config > auto-detected Obsidian vault) keeps your specs in + your knowledge base, not scattered across project folders. +- **Status footer on every active turn.** You always know where you + are: + ``` + ─── spec-mode ─── spec: | session: <8-prefix> | phase:

| /specode:end to exit + ``` +- **Per-session JSONL logs** for "why did the agent go off-script" + forensics, with automatic secret redaction and 500-char string + truncation. +- **Main agent writes spec docs directly.** No subagent fork — the + main agent reads template skeletons from `assets/templates/.md` + and fills them against your original requirement text, keeping + context and conversational state intact (a previous `spec-writer` + subagent was removed in 0.10.11 precisely because it couldn't + see the main agent's context and tended to hallucinate generic + template content). + +## Installation ### From GitHub (recommended) +Works with either CLI; the plugin manifest is shared. +CodeBuddy verified on 2.97.1. + ```sh +# CodeBuddy +codebuddy plugin marketplace add https://github.com/qxbyte/specode +codebuddy plugin install specode@specode + # Claude Code claude plugin marketplace add https://github.com/qxbyte/specode claude plugin install specode@specode - -# CodeBuddy (verified on 2.97.1) -codebuddy plugin marketplace add https://github.com/qxbyte/specode -codebuddy plugin install specode@specode ``` -Both harnesses clone the marketplace, locate the plugin under -`plugins/specode/`, and auto-load `hooks/`, `skills/`, `commands/`. -Updates land via `claude plugin update specode` or `claude plugin -marketplace update specode`. - -### One-shot session (Claude Code only) +### One-shot (Claude Code only) ```sh claude --plugin-url https://github.com/qxbyte/specode/archive/refs/heads/main.zip ``` -Loads the plugin for the current session only; nothing persists. - ### Local development ```sh @@ -92,135 +99,150 @@ claude --plugin-dir ./specode/plugins/specode codebuddy --plugin-dir ./specode/plugins/specode ``` -Once loaded: +### Uninstall +```sh +claude plugin uninstall specode@specode +claude plugin marketplace remove specode +# optional: wipe user-level state +rm -rf ~/.specode ~/.config/specode ``` -/help # list /specode:* commands -/reload-plugins # after editing plugin files + +### Update + +```sh +# Claude Code +claude plugin update specode@specode +claude plugin marketplace update specode + +# CodeBuddy +codebuddy plugin update specode@specode +codebuddy plugin marketplace update specode ``` -Hook activity logs to `~/.specode/audit/.log` (UTC). +## Usage -Optional **local telemetry** of workflow events (spec lifecycle, INV -violations, task-swarm rounds) — disabled by default, enable with -`SPECODE_TELEMETRY=on`. Events go to a single `~/.specode/telemetry.jsonl` -(append-only, no remote upload, no daily rotation — grep-friendly). Run -`python3 scripts/spec_state.py telemetry-summary` for a local aggregate. +### 1. Configure your document root (first run) -### Uninstall +specode stores spec docs under `/specs//`. Bind a +root once and it's remembered: ```sh -# 1. Uninstall the plugin first -claude plugin uninstall specode@specode +/specode:spec --set-vault # use an Obsidian vault +/specode:spec --set-root # any folder works (equivalent) +/specode:spec --detect-vault # list detected Obsidian vaults +/specode:spec --vault-status # show current root + resolution source +``` -# 2. Then remove the marketplace -claude plugin marketplace remove specode +If unset, specode auto-detects an Obsidian vault, otherwise asks at +spec creation. -# 3. (optional) Remove user-level runtime state — NOT touched by step 1 -rm -rf ~/.specode ~/.config/specode -# also vault-side index if you want a fully clean slate: -# find -name '.active-specode.json' -delete +### 2. Start a spec + +```sh +/specode:spec -n # recommended: explicit slug +/specode:spec # or let the agent derive one +/specode:spec : # or set display name + requirement ``` -Notes: -- **Order matters**: uninstall the plugin *before* the marketplace, otherwise - Claude Code reports an orphaned plugin on next start. -- `claude plugin uninstall` only removes the install record; the plugin cache - under `~/.claude/plugins/cache/specode/` is garbage-collected ~7 days after - it becomes orphaned. To reclaim disk immediately: `rm -rf ~/.claude/plugins/cache/specode/`. -- `~/.specode/` and `~/.config/specode/` are *user* state (audit logs, sessions, - obsidianRoot config) and are deliberately **not** removed by the uninstall - commands — you keep your spec history across reinstalls. Delete them manually - if you want a clean slate. -- To temporarily disable without uninstalling: `claude plugin disable specode@specode` - (and `enable` to bring it back). - -## Task-Swarm Mode (multi-agent acceleration) - -After tasks are confirmed, the "task execution" selector offers a third option -`用 task-swarm 多 agent 并发`. Selecting it delegates execution to **task-swarm**: -the specode session stays as orchestrator (lock, ledger, tasks.md writes), -but actual coding is fanned out to dedicated subagents — one **coder** -subagent per top-level stage, one **reviewer** subagent per stage, and the -existing "检查点" tasks become **validator** subagents. - -Reviewer and validator subagents are spawned **without Edit/Write tools** — -they physically cannot modify code. This is the anti-self-approval guarantee: -the agent that wrote the code is never the agent that reviews or accepts it. - -For a spec with 5 stages, 20 leaf tasks, and 5 checkpoint tasks, this dispatches -**15 subagents** rather than the naive 60 (1:3 expansion) — and that's just for the -**initial pass**. Each stage runs a `coder → reviewer → validator` loop with up to -3 rounds: reviewer P0 findings trigger a focused coder fix round (no scope creep, -only the listed P0s), then re-review; validator fail triggers another coder fix -round + re-review + re-validate. A stage's `[x]` is only written when reviewer -shows 0 P0s **and** validator passes. Default `--max-rounds 3`; reviewer and -validator each detect "same P0 / same failure as last round" to short-circuit -infinite loops. - -→ Protocol: `plugins/specode/skills/specode/references/task-swarm.md` -→ Example tasks.md: `plugins/specode/skills/specode/references/task-swarm-example.md` -→ Manual entry: `/task-swarm /tasks.md` +`-n` keeps the slug verbatim (Unicode allowed — Chinese, Japanese, +emoji), only forbidding filesystem-dangerous characters. The +slug-less form lets the agent infer one, which is convenient but +less predictable. -## Usage +After creation, the agent walks you through two consecutive +selectors: + +1. **project-root-choice** — where generated code should live (decoupled + from the doc directory). +2. **workflow-choice** — start from `requirements.md`, jump to + `bugfix.md` for a fix flow, etc. + +From here, every model turn ends with the status footer and (at +phase gates) a selector for the next step. + +### 3. Manage sessions + +```sh +/specode:continue [slug] # resume — current session or a named spec +/specode:status # show mode / phase / lock / pending selector +/specode:end # end the session (docs preserved) +``` + +State is keyed by host `session_id`, so each terminal window keeps +its own thread. + +### 4. Run tasks in parallel with task-swarm -Inside a Claude Code session with the plugin loaded: +Once `tasks.md` is approved and you pick the `task-swarm` path on the +`tasks-execution` selector, the orchestrator takes over: ``` -/specode:spec --persist # start persistent spec session -/specode:continue [slug] # resume / switch -/specode:status # show current session -/specode:end # end persistent session - -/specode:spec --freeform # relax INV-1 (INV-2 still enforced) -/specode:spec --strict # restore INV-1 -/specode:spec --sync-status # ledger / pending sync / last violation +init → plan → fork (N coders) → advance → writeback → resolve + ↑ ↓ + └─────── reviewer / validator ────┘ ``` -Once a spec is active: +- **coder** agents run in parallel, auto-partitioned by `@writes` + file conflicts. +- **reviewer** runs once per group; P0 findings carrying evidence tags + (`[req:x.y]` / `[security]` / `[contract]`) trigger one round of + `p0-fix`; everything else is advisory. +- **validator** runs once per group; `fail` triggers a `v-fix` loop + until `pass`, or three identical-failure rounds (deadloop guard). +- `--skip-validator` is a selectable option for human-acceptance + mode. -- Every user prompt is augmented with a `specode active` status block - identifying the spec, phase, lock state, turn id, and freeform mode. -- Edits to project source files outside `tasks.md` are blocked unless a - same-turn doc change preceded them (INV-1). -- Stopping a turn that touched code without touching docs fails until the - model adds a `design.md` / `tasks.md` / `implementation-log.md` entry - (INV-2). -- `requirements.md` / `bugfix.md` edits force a same-turn update to - `tasks.md` (the `## 测试要点` section, INV-4). -- Code edits during `intake` / `requirements` / `bugfix` / `design` / `tasks` - phases are absolutely refused — freeform does NOT exempt INV-6. +`/specode:task-swarm` is the entry point; the full state-machine +spec lives in `references/task-swarm.md`. -## Asymmetry note +### 5. Inspect session logs -INV-2 is **unidirectional**: source-code change ⇒ doc change required, but -doc-only edits (typo fixes, wording tweaks) do NOT require a code change. -`implementation-log.md` counts as a doc change to satisfy INV-2 cheaply — -`spec_lint.py` reports a soft WARNING for log entries shorter than 30 chars -or that don't reference any actual code file (the *cosmetic-doc* concern). +specode writes per-session event streams to +`~/.specode/logs/.jsonl` (hooks, agent tool calls, +phase / lock transitions). Use them when debugging +"why did the agent skip a phase": -## Performance +```sh +sh "$CLAUDE_PLUGIN_ROOT/scripts/run.sh" \ + "$CLAUDE_PLUGIN_ROOT/scripts/spec_log.py" replay --session +``` -| Hook | Wall-clock budget | -|---|---| -| `SessionStart` / `SessionEnd` | always runs Python; <500ms | -| `UserPromptSubmit` | only runs Python when `~/.specode/.any-active` sentinel exists; <80ms | -| `PreToolUse` / `PostToolUse` / `Stop` | same shell short-circuit; <100ms when running | +Secrets are redacted by default (`password / api_key / token / …`) +and strings truncate at 500 chars. Extend +`~/.config/specode/config.json.redact_keys` to add more. -When no spec is active, the shell `[ ! -e ~/.specode/.any-active ]` check -exits before any Python startup → effectively free. +### 6. Global bypass (debug only) -## CodeBuddy support +```sh +SPECODE_GUARD=off # short-circuit all hooks to exit 0 +SPECODE_LOG=off # disable session logging +``` + +## Architecture -Verified on CodeBuddy 2.97.1: same `hooks/hooks.json` and -`scripts/spec_guard.py` run unmodified. CodeBuddy ships a Claude Code -2.1.142 agent under the hood and injects both `CLAUDE_PLUGIN_ROOT` and -`CODEBUDDY_PLUGIN_ROOT`, so the integration is byte-for-byte compatible. +``` +.claude-plugin/marketplace.json single-plugin marketplace manifest +plugins/specode/ + .claude-plugin/plugin.json plugin manifest + hooks/hooks.json 7 advisory hook handlers + commands/ /specode:spec, :continue, :end, + :status, :task-swarm + agents/ task-swarm-{planner,coder, + reviewer,validator} + scripts/ spec_vault / spec_init / + spec_session / spec_lint / + spec_status / task_swarm* + skills/specode/ SKILL.md + references/ + assets/templates/ seed templates + tests/ 152 pytest cases +``` ## Contributing -See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for the runtime stdlib-only -rule, hook safety contract, and test conventions. +See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for the stdlib-only +runtime rule, hook safety contract (advisory only, never `exit 2`), +and test conventions. ## License diff --git a/README.zh-CN.md b/README.zh-CN.md index 70fb9ae..3815192 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -2,75 +2,76 @@ # specode -面向 **Claude Code** 与 **CodeBuddy** 的规格驱动工作流插件。 - -承担约束的工作流规则由 Claude Code hooks 强制——也就是 harness 实际执行的确定性 shell 命令——而不是依赖模型在上下文里"记住并遵守"指令。 - -## 强制保证(Invariants) - -只要 spec 处于激活状态,下列规则即由 **harness 检查**。0.4.0 起 INV 分两级: - -- **advisory**(流程纪律):违反时在 ledger 写一条 sticky 提醒,下轮 `UserPromptSubmit` 状态块显示;工具调用照样放行。改任一 spec 文档自动清除 INV-1/2/4;手动清除运行 `/spec --dismiss-advisories`。 -- **enforced**(数据/契约保护):违反时硬拒(exit 2),防止数据丢失、被驱逐覆盖写、subagent 派单错、subagent 越界写。 - -| ID | 规则 | Hook | 等级 | -|---|---|---|---| -| **INV-1** | 编辑源码需 `tasks.md` 列表覆盖、同轮 doc 编辑、或 `freeform` 模式 | `PreToolUse` | **advisory** | -| **INV-2** | 触碰源码的 turn 应在结束前至少触碰一份 spec 文档 | `Stop` | **advisory** | -| **INV-3** | 当前会话已被其他窗口驱逐时,spec 文档写入被拒 | `PreToolUse` | **enforced** | -| **INV-4** | `requirements.md` / `bugfix.md` 编辑应同轮更新 `tasks.md ## 测试要点` | `Stop` | **advisory** | -| **INV-5** | 每个用户 turn 注入状态块(`spec / phase / lock / turn / advisories`) | `UserPromptSubmit` | 注入 | -| **INV-6** | 实现前阶段(intake / requirements / bugfix / design / tasks)禁止源码编辑 | `PreToolUse` | **advisory** | -| **INV-7** | task-swarm 运行期间 `Task` 工具的 `subagent_type` 必须 `specode:` 前缀 | `PreToolUse` | **enforced** | -| **INV-8** | subagent 写出 `@writes` 边界的文件被拒 | `PreToolUse` | **enforced** | -| **INV-9** | task-swarm 期间 `tasks.md` 编辑必须经 `writeback`(line-safe diff) | `PreToolUse` | **enforced** | -| **INV-11** | 会卡死在 TTY 的 `Bash` 命令(`npm create`、`git commit` 无 `-m`、`vim` 等)被拒,附非交互改写建议;`PostToolUse` 还会扫描输出中的交互提示特征,命中即注入 advisory | `PreToolUse` + `PostToolUse` | **enforced** + advisory | - -INV-1 与 INV-2 共同构成 **Code-Doc Sync Guard (CDSG)** —— 0.4.0 起降为 advisory(0.3.x 仍为硬拒)。 - -## 项目结构 - -``` -.claude-plugin/marketplace.json ← 单插件 marketplace 清单 -plugins/specode/ - .claude-plugin/plugin.json ← 插件清单 - hooks/hooks.json ← 6 个事件处理器,挂哨兵 shell 短路 - hooks/hooks-probe.json ← 诊断探针(重新验证时替换上去) - skills/specode/ ← skill 内容(SKILL.md + references) - commands/ ← /spec, /continue, /status, /end - scripts/ - spec_guard.py ← hook 入口;分发 + 审计日志 - spec_state.py ← 只读状态探测 + 哨兵 + Claude 会话登记 - spec_sync.py ← INV-1/2/3/4/6 逻辑;ledger;阶段闸门;glob 匹配 - spec_session.py ← 锁 + 阶段 + active-pointer 模型 - spec_init.py / spec_lint.py / spec_status.py / spec_choice.py / spec_vault.py - tests/ ← 19 个 pytest 用例(单元 + 集成) -``` +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./README.zh-CN.md#许可证) +[![Version](https://img.shields.io/badge/version-0.10.21-blue.svg)](./plugins/specode/.claude-plugin/plugin.json) +[![Claude Code](https://img.shields.io/badge/Claude%20Code-compatible-8A2BE2)](https://github.com/qxbyte/specode#installation) +[![CodeBuddy](https://img.shields.io/badge/CodeBuddy-2.97.1%2B-1E90FF)](https://github.com/qxbyte/specode#installation) +[![Tests](https://img.shields.io/badge/pytest-152%20cases-success)](./plugins/specode/tests) + +> 面向 CLI 编码代理(Claude Code / CodeBuddy)的规格驱动工作流插件。 + +specode 把一句话需求变成一条「文档优先」的纪律化交付链路。代理被牵着走过 +固定的 phase 流水线 —— **requirements → design → tasks → implementation +→ acceptance**,五份 Markdown 文档(`requirements.md` / `bugfix.md` / +`design.md` / `tasks.md` / `implementation-log.md`)是唯一事实源。 +每个 phase-gate 由你在 chat 里通过选择器决定下一步;中间过程由提醒式 +hook 把代理钉在轨道上,**永不阻断**工具调用。 + +如果你见过 LLM 代理跑着跑着就飘、跨窗口丢上下文、合上未审过的代码 —— +specode 就是给它套的轨道。 + +## 能力亮点 + +- **Document-first 纪律**:每条需求都先落到 spec 文档再动代码。Hook 在 + 写代码前后都会提醒代理读 / 改文档。 +- **提醒式 Hook,永不阻断**:7 个 hook 全部 `exit 0`,只往模型上下文 + 注入提示(状态行 footer、phase 选择器、文档-代码同步提醒、静默续锁 + 心跳),不会中途打断工具调用,不再有「hook 拒绝」的意外。 +- **会话状态绑定 `session_id`**:每个宿主 session 拥有独立状态文件 + `~/.specode/sessions/.json`(原子写)。同时开三个窗口 + 也不会混在一起。 +- **Phase-gate 选择器**:每个决策点由代理按三种骨架(A 单选 / B + wizard / C 复选)渲染 11 个固定场景之一 —— 你选方向,代理执行。 +- **task-swarm —— 内置的并发实现编排器**:`tasks.md` 确认后,task-swarm + 扇出多个 **coder** 子代理并发干活(按 `@writes` 文件写冲突自动切 + group、按 `@depends-on` 排拓扑),再让单实例 **reviewer** 提 + 建议(P0 触发一次修复)和单实例 **validator** 做最终判定(pass/fail + 二元,fail 会循环修到 pass)。连续 3 轮同 fail 触发死循环保护。 +- **Obsidian 感知的文档根**:三层解析(env > config > 自动探测 + Obsidian vault),spec 落进你的知识库,而不是散在各 project 目录。 +- **active 期间每个 turn 都有状态行 footer**:永远知道自己在哪里: + ``` + ─── spec-mode ─── spec: | session: <前 8 位> | phase:

| /specode:end 退出 + ``` +- **每个 session 的 JSONL 日志**:用来排查「代理为什么走偏」,默认 + 屏蔽敏感字段、字符串自动截断到 500 字符。 +- **主代理直接写 spec 文档**:不 fork subagent —— 主代理读 + `assets/templates/.md` 模板骨架、按用户原始需求填空,全程 + 保留上下文与对话状态(0.10.11 之前用过 `spec-writer` subagent, + 因为拿不到主代理上下文容易 hallucinate 通用模板内容,故移除)。 ## 安装 -### 通过 GitHub(推荐) +### GitHub(推荐) + +两个 CLI 都行,插件清单通用。CodeBuddy 已在 2.97.1 上验证。 ```sh +# CodeBuddy +codebuddy plugin marketplace add https://github.com/qxbyte/specode +codebuddy plugin install specode@specode + # Claude Code claude plugin marketplace add https://github.com/qxbyte/specode claude plugin install specode@specode - -# CodeBuddy(已在 2.97.1 上验证) -codebuddy plugin marketplace add https://github.com/qxbyte/specode -codebuddy plugin install specode@specode ``` -两个 harness 都会克隆 marketplace、定位到 `plugins/specode/` 下的插件,然后自动加载 `hooks/`、`skills/`、`commands/`。后续升级用 `claude plugin update specode` 或 `claude plugin marketplace update specode`。 - ### 一次性会话(仅 Claude Code) ```sh claude --plugin-url https://github.com/qxbyte/specode/archive/refs/heads/main.zip ``` -仅当前会话生效,不持久化任何状态。 - ### 本地开发 ```sh @@ -79,89 +80,141 @@ claude --plugin-dir ./specode/plugins/specode codebuddy --plugin-dir ./specode/plugins/specode ``` -加载后: +### 卸载 +```sh +claude plugin uninstall specode@specode +claude plugin marketplace remove specode +# 可选:清理用户级状态 +rm -rf ~/.specode ~/.config/specode ``` -/help # 列出 /specode:* 系列命令 -/reload-plugins # 修改插件文件后重新加载 + +### 升级 + +```sh +# Claude Code +claude plugin update specode@specode +claude plugin marketplace update specode + +# CodeBuddy +codebuddy plugin update specode@specode +codebuddy plugin marketplace update specode ``` -Hook 行为日志写入 `~/.specode/audit/.log`(UTC)。 +## 使用 -可选的**本地 telemetry**(记录 spec 生命周期、INV 触发、task-swarm 收敛轮数等流程事件)——默认关闭,需要 `SPECODE_TELEMETRY=on` 启用。事件写入单文件 `~/.specode/telemetry.jsonl`(append-only,不上报远端,不按日切,方便 grep)。运行 `python3 scripts/spec_state.py telemetry-summary` 做本地聚合分析。 +### 1. 首次使用:绑定文档根 -### 卸载 +spec 文档落在 `/specs//`。绑定一次即可长期记住: ```sh -# 1. 先卸载插件 -claude plugin uninstall specode@specode +/specode:spec --set-vault <路径> # 绑定 Obsidian vault +/specode:spec --set-root <路径> # 任意目录都行(等价) +/specode:spec --detect-vault # 列出已检测到的 vault +/specode:spec --vault-status # 查看当前文档根 + 解析来源 +``` -# 2. 再移除 marketplace -claude plugin marketplace remove specode +未绑定时 specode 会自动探测 Obsidian vault,没有就会在创建 spec 时 +问你。 -# 3.(可选)清理用户级运行时状态(步骤 1 不会动这些) -rm -rf ~/.specode ~/.config/specode -# 想彻底清干净还可以删 vault 里的索引文件: -# find -name '.active-specode.json' -delete +### 2. 新建 spec + +```sh +/specode:spec -n <需求> # 推荐:显式 slug +/specode:spec <需求> # 或让代理推导 slug +/specode:spec <名称>: <需求> # 或同时指定显示名 + 需求 ``` -注意事项: +`-n` 保留 slug 原文(允许 Unicode:中文 / 日文 / emoji 都行),只禁 +文件系统危险字符。不带 `-n` 的写法让代理推导 slug,方便但结果不可预知。 -- **顺序很重要**:必须先卸 plugin 再卸 marketplace,否则 Claude Code 下次启动会 - 报 orphaned plugin 警告。 -- `claude plugin uninstall` 只移除安装记录;`~/.claude/plugins/cache/specode/` - 下的缓存会在 orphan 后约 7 天自动 GC。想立刻回收磁盘: - `rm -rf ~/.claude/plugins/cache/specode/`。 -- `~/.specode/` 和 `~/.config/specode/` 是 *用户* 数据(audit 日志、会话记录、 - obsidianRoot 配置),卸载命令**故意不动**它们——这样重装时你的 spec 历史不丢。 - 想从头开始就手动删。 -- 只想**临时禁用**而不卸载:`claude plugin disable specode@specode`(用 - `enable` 启回来)。 +创建成功后代理会**连续**呈现两个选择器: -## 使用 +1. **project-root-choice**:代码写到哪个目录(与文档目录解耦)。 +2. **workflow-choice**:从 `requirements.md` 起步,还是走 `bugfix.md` + 缺陷修复流程等。 + +之后每个 turn 都以状态行 footer 收尾,phase-gate 处由代理弹选择器 +让你决定下一步。 + +### 3. 管理会话 -会话内(插件已加载): +```sh +/specode:continue [slug] # 恢复当前 session 或切到指定 spec +/specode:status # 查看 mode / phase / lock / pending selector +/specode:end # 结束 session(文档保留) +``` + +状态按宿主 `session_id` 隔离,每个终端窗口各自一条线。 + +### 4. task-swarm:并发跑 tasks + +`tasks.md` 确认后,在 `tasks-execution` 选择器里选 `task-swarm` 路径, +编排器接管: ``` -/specode:spec --persist <需求> # 启动持久 spec 会话 -/specode:continue [slug] # 恢复 / 切换 spec -/specode:status # 查看当前会话状态 -/specode:end # 结束持久会话 - -/specode:spec --freeform # 放宽 INV-1(INV-2 仍然强制) -/specode:spec --strict # 恢复 INV-1 -/specode:spec --sync-status # 查看 ledger / 待同步项 / 上次违规 +init → plan → fork(N 个 coder) → advance → writeback → resolve + ↑ ↓ + └────────── reviewer / validator ────┘ ``` -spec 激活后: +- **coder** 并发执行,按 `@writes` 文件冲突自动切 group。 +- **reviewer** 每个 group 单实例;只有带证据标签(`[req:x.y]` / + `[security]` / `[contract]`)的 P0 才触发一轮 `p0-fix`,其余 advisory。 +- **validator** 每个 group 单实例;`fail` 进入 `v-fix` 循环直到 `pass`, + 连续 3 轮同 fail 触发死循环保护。 +- 可在 `tasks-execution` 选择「task-swarm + 人工验收(跳过 validator)」 + → 加 `--skip-validator` 走人工验收。 + +`/specode:task-swarm` 是入口;完整状态机规格见 +`references/task-swarm.md`。 -- 每个用户 prompt 都会被附加一段 `specode active` 状态块,标注 spec、phase、锁状态、turn id、freeform 模式 -- 对项目源文件的编辑(不在 `tasks.md` 列表内的)会被拦截,除非同一轮先动了文档(INV-1) -- 触碰过代码的 turn 在停止前未触碰任何文档时会被拦下,模型必须补一条 `design.md` / `tasks.md` / `implementation-log.md`(INV-2) -- 改 `requirements.md` / `bugfix.md` 必须在同一轮更新 `tasks.md` 的 `## 测试要点` 节(INV-4) -- intake / requirements / bugfix / design / tasks 阶段绝对不允许源代码编辑——freeform **不**豁免 INV-6 +### 5. 查看 session 日志 -## 不对称约束说明 +specode 默认把每个 session 的事件流写到 +`~/.specode/logs/.jsonl`(含 hook 触发、主代理工具调用、 +phase / lock 变化),用于排查「代理为什么跳过 phase / fork 错 agent / +选错 selector」: -INV-2 是**单向**的:源代码变更 ⇒ 必须有文档变更,但纯文档编辑(错别字、措辞调整)**不**强制配套代码变更。`implementation-log.md` 算作满足 INV-2 的轻量文档动作——`spec_lint.py` 对短于 30 字符或没有引用任何实际代码文件的日志条目会发出软 WARNING(防止"装饰性文档"绕规则)。 +```sh +sh "$CLAUDE_PLUGIN_ROOT/scripts/run.sh" \ + "$CLAUDE_PLUGIN_ROOT/scripts/spec_log.py" replay --session +``` -## 性能预算 +默认 redact 黑名单(`password / api_key / token / …`),字符串字段 +自动截断到 500 字符。可通过 `~/.config/specode/config.json.redact_keys` +扩展黑名单。 -| Hook | 墙钟预算 | -|---|---| -| `SessionStart` / `SessionEnd` | 总会跑 Python;<500ms | -| `UserPromptSubmit` | 仅当 `~/.specode/.any-active` 哨兵存在时跑 Python;<80ms | -| `PreToolUse` / `PostToolUse` / `Stop` | 同样的 shell 短路;运行时 <100ms | +### 6. 全局 bypass(仅调试) -无 spec 激活时,shell 那行 `[ ! -e ~/.specode/.any-active ]` 直接退出,Python 根本不启动 → 实际成本可忽略。 +```sh +SPECODE_GUARD=off # 让所有 hook 立刻 exit 0 +SPECODE_LOG=off # 让 session 日志不写入 +``` -## CodeBuddy 支持 +## 项目结构 -已在 CodeBuddy 2.97.1 上验证:相同的 `hooks/hooks.json` 与 `scripts/spec_guard.py` 不需任何修改即可工作。CodeBuddy 内部基于 Claude Code 2.1.142 agent,并同时注入 `CLAUDE_PLUGIN_ROOT` 与 `CODEBUDDY_PLUGIN_ROOT`,因此集成是字节级兼容的。 +``` +.claude-plugin/marketplace.json 单插件 marketplace 清单 +plugins/specode/ + .claude-plugin/plugin.json 插件清单 + hooks/hooks.json 7 个提醒式 hook handler + commands/ /specode:spec, :continue, :end, + :status, :task-swarm + agents/ task-swarm-{planner,coder, + reviewer,validator} + scripts/ spec_vault / spec_init / + spec_session / spec_lint / + spec_status / task_swarm* + skills/specode/ SKILL.md + references/ + assets/templates/ 文档模板 + tests/ 152 个 pytest 用例 +``` ## 贡献 -参见 [`CONTRIBUTING.md`](./CONTRIBUTING.md):runtime 仅限标准库、hook 安全契约、测试规范。 +参见 [`CONTRIBUTING.md`](./CONTRIBUTING.md):runtime 仅限标准库、 +hook 安全契约(提醒式、永不 `exit 2`)、测试规范。 ## 许可证 diff --git a/migrate-from-spec-mode.sh b/migrate-from-spec-mode.sh deleted file mode 100755 index 12e3aec..0000000 --- a/migrate-from-spec-mode.sh +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env bash -# specode 从 spec-mode 改名而来的一次性迁移脚本。 -# -# 迁移以下用户运行时状态: -# ~/.spec-mode/ → ~/.specode/ -# ~/.config/spec-mode/ → ~/.config/specode/ -# /.active-spec-mode.json → /.active-specode.json -# /spec-in/.active-spec-mode.json → ...active-specode.json -# -# 并提示用户修改 shell 配置里的 SPEC_MODE_ROOT 等环境变量。 -# -# 用法: -# ./migrate-from-spec-mode.sh # 实际迁移 -# ./migrate-from-spec-mode.sh --dry-run # 只看会做什么 - -set -euo pipefail - -DRY=0 -if [[ "${1:-}" == "--dry-run" ]]; then - DRY=1 -fi - -if [[ -t 1 ]]; then - G="\033[32m"; Y="\033[33m"; R="\033[31m"; B="\033[1m"; D="\033[2m"; N="\033[0m" -else - G=""; Y=""; R=""; B=""; D=""; N="" -fi - -log() { printf "%b\n" "${B}>${N} $*"; } -ok() { printf "%b\n" "${G}✔${N} $*"; } -warn() { printf "%b\n" "${Y}!${N} $*"; } -skip() { printf "%b\n" "${D}·${N} $*"; } - -run() { - if (( DRY )); then - printf " ${D}(dry-run)${N} %s\n" "$*" - else - eval "$@" - fi -} - -move_if_exists() { - local from="$1" to="$2" - if [[ ! -e "$from" ]]; then - skip "skip $from (不存在)" - return - fi - if [[ -e "$to" ]]; then - warn "存在 $to (目标已存在)" - warn " 请手动检查并合并: $from -> $to" - return - fi - run "mkdir -p '$(dirname "$to")'" - run "mv '$from' '$to'" - ok "迁移 $from → $to" -} - -log "specode 迁移脚本 (mode=$([ $DRY -eq 1 ] && echo dry-run || echo apply))" -echo - -# ---------- 1. 用户级状态目录 ---------- -log "1. 用户级状态目录" -move_if_exists "$HOME/.spec-mode" "$HOME/.specode" -move_if_exists "$HOME/.config/spec-mode" "$HOME/.config/specode" -echo - -# ---------- 2. Vault 内 active 索引文件 ---------- -log "2. Vault 内 active 索引文件" - -# 找 obsidianRoot -OBS_ROOT="" -if [[ -f "$HOME/.config/specode/config.json" ]]; then - OBS_ROOT="$(python3 -c "import json,os; d=json.load(open(os.path.expanduser('~/.config/specode/config.json'))); print(d.get('obsidianRoot','') or d.get('documentRoot',''))" 2>/dev/null || true)" -fi -# 也试老配置目录(迁移前的) -if [[ -z "$OBS_ROOT" && -f "$HOME/.config/spec-mode/config.json" ]]; then - OBS_ROOT="$(python3 -c "import json,os; d=json.load(open(os.path.expanduser('~/.config/spec-mode/config.json'))); print(d.get('obsidianRoot','') or d.get('documentRoot',''))" 2>/dev/null || true)" -fi - -if [[ -z "$OBS_ROOT" ]]; then - warn "找不到 obsidianRoot 配置,跳过 vault 内 .active-spec-mode.json 迁移" - warn "如有需要请手动: find -name '.active-spec-mode.json' -exec rename ..." -else - log " obsidianRoot: $OBS_ROOT" - # vault 根下 / spec-in 下都找 - while IFS= read -r -d '' f; do - target="${f%/.active-spec-mode.json}/.active-specode.json" - move_if_exists "$f" "$target" - done < <(find "$OBS_ROOT" -maxdepth 6 -name ".active-spec-mode.json" -print0 2>/dev/null) -fi -echo - -# ---------- 3. 项目内 .claude-plugin marketplace 引用 ---------- -log "3. 检查 Claude Code / CodeBuddy 已安装 plugin" - -CC_PLUGIN_PATH="$HOME/.claude/plugins/spec-mode" -if [[ -d "$CC_PLUGIN_PATH" ]]; then - warn "发现 Claude Code 已安装老 plugin: $CC_PLUGIN_PATH" - warn " 建议: claude plugin uninstall spec-mode 然后重新装 specode" -fi -CB_PLUGIN_PATH="$HOME/.codebuddy/plugins/spec-mode" -if [[ -d "$CB_PLUGIN_PATH" ]]; then - warn "发现 CodeBuddy 已安装老 plugin: $CB_PLUGIN_PATH" - warn " 建议: codebuddy plugin uninstall spec-mode 然后重新装 specode" -fi -echo - -# ---------- 4. 环境变量提示 ---------- -log "4. 环境变量改名提示 (脚本无法替你改 shell 配置)" -ENV_FOUND=0 -for var in SPEC_MODE_ROOT SPEC_MODE_GUARD; do - if printenv "$var" > /dev/null 2>&1; then - new_var="${var/SPEC_MODE_/SPECODE_}" - cur="$(printenv "$var")" - warn "检测到 $var=$cur" - warn " 请改为: export $new_var=$cur" - ENV_FOUND=1 - fi -done -if (( ENV_FOUND == 0 )); then - ok "未检测到 SPEC_MODE_* 环境变量 (无需手动改)" -fi -echo - -# ---------- 5. 结束 ---------- -if (( DRY )); then - log "dry-run 完成。确认无误后跑 ./migrate-from-spec-mode.sh 实际执行。" -else - ok "迁移完成" - echo - echo "下一步:" - echo " - 重新装 plugin: claude plugin install specode@specode" - echo " - 检查 shell rc (.bashrc/.zshrc) 里 SPEC_MODE_* 改成 SPECODE_*" - echo " - 跑 /specode:status 验证" -fi diff --git a/plugins/specode/.claude-plugin/plugin.json b/plugins/specode/.claude-plugin/plugin.json index 69f106f..1388adc 100644 --- a/plugins/specode/.claude-plugin/plugin.json +++ b/plugins/specode/.claude-plugin/plugin.json @@ -1,12 +1,12 @@ { "name": "specode", - "version": "0.4.0", - "description": "Specification-driven workflow with hard sync enforcement between code and docs (Claude Code + CodeBuddy).", + "version": "0.10.23", + "description": "Specification-driven workflow with advisory hooks, selector prompts, session-bound state, and task-swarm multi-agent orchestration.", "author": { "name": "xueqiang", "email": "xueqiang361@gmail.com" }, "homepage": "https://github.com/qxbyte/specode", "license": "MIT", - "keywords": ["spec", "specification", "workflow", "hooks", "code-doc-sync"] + "keywords": ["spec", "specification", "workflow"] } diff --git a/plugins/specode/agents/task-swarm-planner.md b/plugins/specode/agents/task-swarm-planner.md index 3e50877..0fc2f13 100644 --- a/plugins/specode/agents/task-swarm-planner.md +++ b/plugins/specode/agents/task-swarm-planner.md @@ -25,13 +25,13 @@ model: sonnet ```markdown - [ ] T1: <动作描述> @writes:<具体文件路径> @role:coder - 详细要求... + 详细要求... - [ ] T2: 评审 T1 @reads:<相关文件> @depends-on:T1 @role:reviewer - 评审重点... + 评审重点... - [ ] T3: 验收 @reads:<相关文件> @depends-on:T2 @role:validator - 验收标准... + 验收标准... ``` ### 标签规范 diff --git a/plugins/specode/agents/task-swarm-reviewer.md b/plugins/specode/agents/task-swarm-reviewer.md index 4538bc1..3c3ec5f 100644 --- a/plugins/specode/agents/task-swarm-reviewer.md +++ b/plugins/specode/agents/task-swarm-reviewer.md @@ -12,8 +12,8 @@ model: sonnet - 你是**建议提供者**,**不阻塞推进**。 - 你的产出(review.md)会被解析后作为 `> ⚠️ 评审建议` 注释**写入 tasks.md**,让使用者决定是否人工跟进。 - 你不会让 stage failed、不会触发 coder 重派。这是 task-swarm 的明确设计: - - **validator** = 跑测试的客观信号 → 进入修复循环(coder ↔ validator) - - **reviewer** = 读代码的主观信号 → 仅记录、由人决定 + - **validator** = 跑测试的客观信号 → 进入修复循环(coder ↔ validator) + - **reviewer** = 读代码的主观信号 → 仅记录、由人决定 ## 你的唯一职责 @@ -85,17 +85,17 @@ needs-changes | approved-with-comments | approved ### 严重度判定(自主判,遵循以下规则) - **P0**(带证据标签): - - 正确性错误(逻辑错、边界漏判、API 用错)→ 通常对应 `[req:x.y]` - - 安全 / 数据完整性问题 → `[security]` - - 与 SHALL **直接冲突** → `[req:x.y]` - - 缺关键错误处理(异常会让进程崩溃 / 数据损坏)→ `[security]` 或 `[req:x.y]` - - 接口契约不一致 → `[contract]` + - 正确性错误(逻辑错、边界漏判、API 用错)→ 通常对应 `[req:x.y]` + - 安全 / 数据完整性问题 → `[security]` + - 与 SHALL **直接冲突** → `[req:x.y]` + - 缺关键错误处理(异常会让进程崩溃 / 数据损坏)→ `[security]` 或 `[req:x.y]` + - 接口契约不一致 → `[contract]` - **P1**(建议): - - 边界情况未覆盖但主路径 OK - - 测试覆盖度不足 - - 命名 / 结构可改善 - - 文档 / 注释缺失 - - **没有证据标签的"我觉得这里不太好"** + - 边界情况未覆盖但主路径 OK + - 测试覆盖度不足 + - 命名 / 结构可改善 + - 文档 / 注释缺失 + - **没有证据标签的"我觉得这里不太好"** - **P2**(可选):纯风格、命名偏好、轻微重构机会 ### 零 P0 是允许的 diff --git a/plugins/specode/agents/task-swarm-validator.md b/plugins/specode/agents/task-swarm-validator.md index 883fb86..9a18321 100644 --- a/plugins/specode/agents/task-swarm-validator.md +++ b/plugins/specode/agents/task-swarm-validator.md @@ -132,7 +132,7 @@ AssertionError: expected status 423 LOCKED, got 401 UNAUTHORIZED ## 给 coder 的修复指引(必填) - 文件: src/api/login.py -- 位置: login() 函数失败分支 +- 位置: login 函数失败分支 - 问题: 没有调用 lockout 计数器,第 5 次失败应返回 423 并写 Redis 锁 - 建议: 引入 src/auth/lockout.py(如 _需求:5.1_ 中描述),在失败分支调用 record_failure(user_id),返回 423 当 count >= 5 - 涉及需求: _需求:1.3_、_需求:5.1_ diff --git a/plugins/specode/assets/templates/bugfix.md b/plugins/specode/assets/templates/bugfix.md index 954b7c4..c32f341 100644 --- a/plugins/specode/assets/templates/bugfix.md +++ b/plugins/specode/assets/templates/bugfix.md @@ -4,42 +4,104 @@ Spec Type: Bugfix Workflow: bugfix Status: Bug Analysis Draft Review Status: unreviewed +Severity: P2 -## 问题摘要 +> 填写约定:不适用的小节**整节删掉**,不要留"暂无 / 待补充"。 + +--- + +## 一、问题陈述 + +> 1-3 句话讲清楚:**什么场景**下、用户**看到了什么不对**、**期望看到什么**。 +> 这一节先讲"现象",根因留到下文「根因分析」。 {{summary}} -## 复现步骤 +--- + +## 二、复现路径 + +外部读者按这些步骤能**百分百复现**问题。无法稳定复现时在「证据」一节标注复现率(如 "仅 30% 复现")并补样本日志。 + +1. 进入 ____(具体页面 / 命令 / 环境) +2. 操作 ____(具体动作 + 输入) +3. 观察到 ____(错误结果 / 报错信息 / 异常表现) + +**期望应该看到**:____ + +> 复现路径含多分支 / 状态切换时可附 Mermaid `flowchart` 辅助。 + +--- + +## 三、影响范围 + +- **谁受影响**:(用户群体 / 角色 / 环境) +- **多严重**:(阻塞性?规避方案?数据损坏?仅体验问题?) +- **从什么时候开始**:(首次出现的版本 / 提交 / 回归点,若已查到) + +--- + +## 四、证据 + +> 证据决定根因诊断的可信度。**没有证据的"我觉得是 XX"是猜测,不是根因。** + +- 日志 / 错误堆栈: +- 失败用例 / 截图: +- 用户反馈 / 工单: +- 最小复现命令: + +--- + +## 五、待澄清问题(动手分析前确认) + +> **铁律**:问题描述里**任何一处**模糊措辞、复现条件不全、影响范围未知, +> **必须**先通过 `clarification-wizard` 与用户确认,不允许凭"应该是 XX"开干。 + +- [ ] (待澄清点 1 —— 例:报错只在 Safari 出现还是所有浏览器?) +- [ ] (待澄清点 2 —— 例:是否所有租户都受影响,还是仅 X 配置下的租户?) -1. 待补充复现步骤。 -2. 待补充触发条件。 -3. 待补充当前错误结果。 +--- -## 当前行为 +## 六、根因分析 -1. WHEN 缺陷触发条件满足,THEN THE System 出现当前错误行为。 +**先列假设,再列验证方式,最后写结论**——不要把"应该是 XX"当结论直接进入修复。 -## 期望行为 +| 假设 | 支持证据 | 反对证据 / 验证方式 | +|------|----------|---------------------| +| A:____ | | | +| B:____ | | | -1. WHEN 缺陷触发条件满足,THE System SHALL 执行正确行为。 +**确认的根因**:____(验证完上面假设后,一句话讲清楚"为什么会出错"——能写到这里,说明 bug 已被理解,不是被猜出来的) -## 保持不变的行为 +> 状态机相关 bug(订单错状态 / 并发 race condition / 工单流转异常)可附 Mermaid `stateDiagram-v2` 标出错误流转点: +> +> ```mermaid +> stateDiagram-v2 +> [*] --> 待支付 +> 待支付 --> 已支付 : 支付成功 +> 待支付 --> 已取消 : 超时 +> 已支付 --> 已取消 : ❌ 不该发生 +> ``` -1. WHEN 不相关既有场景发生,THE System SHALL CONTINUE TO 保持当前正确行为。 +--- -## 影响范围 +## 七、修复方向 -- 待根据代码和用户反馈确认。 +- **是什么**:一两句自然语言描述这次要改的核心点。 +- **改哪里**:列具体文件路径 / 函数 / 数据结构(能定位即可)。 +- **不要扩大范围**:列出**显式不在本次修复范围**的相邻问题——避免顺手改一堆。 -## 证据 +--- -- 待补充日志、错误信息、测试、截图或用户报告。 +## 八、回归保护 -## 约束 +- 期望加什么测试?(单元 / 集成 / e2e) +- 之前没测到这个 bug 的原因是什么?要补一类此前缺失的覆盖吗? -- 修复应保持最小变更,不扩大范围。 +--- -## 待确认问题 +## 九、验收要点(可选) -- 是否已有复现步骤、错误日志或失败测试? +> 仅在需要机器可读语义时用 EARS。日常 bugfix 通常一句话契约就够。 +- (例:用户在 Safari 点击「保存」时,系统在 1s 内完成保存并返回成功提示。) diff --git a/plugins/specode/assets/templates/requirements.md b/plugins/specode/assets/templates/requirements.md index 68b40ba..3211a3d 100644 --- a/plugins/specode/assets/templates/requirements.md +++ b/plugins/specode/assets/templates/requirements.md @@ -4,46 +4,122 @@ Spec Type: Feature Workflow: requirements-first Status: Requirements Draft Review Status: unreviewed +Priority: P2 -## 简介 +> 填写约定:不适用的小节**整节删掉**,不要留"暂无 / 待补充"。 +> 图示按需用 Mermaid,简单需求不要为图而图。 + +--- + +## 一、背景 / 目标 / 范围 + +**背景与动机**:用 2-4 句话讲清楚**为什么要做**——当前痛点 / 业务诉求是什么。不要写"实现 XX 功能"那种 WHAT 句式。 {{summary}} ---- +**目标**(尽量可衡量): + +- 目标 1:(例:X 操作路径从 5 步缩短至 2 步) +- 目标 2: -## 词汇表 +**范围之外**(明确不做什么,避免范围蔓延): -- **System**:当前项目中需要实现该需求的系统或组件。 +- 本次不覆盖: +- 遗留到下期: --- -## 需求 +## 二、目标用户与场景 -### 需求 1:核心能力 +- **主要用户**:(例:负责审阅 PR 的资深工程师) +- **典型场景**:(例:每天打开 review queue 一次,希望快速过滤自己作为 author 的条目) +- **非目标用户**:(显式声明谁**不是**目标,避免实现方向飘) -**用户故事:** 作为目标用户,我希望系统支持该需求描述的核心能力,以便完成预期工作流。 +**典型使用路径(Happy Path)**: -#### 验收标准 +1. 用户进入 ____ +2. 触发操作 ____ +3. 系统响应 ____ +4. 完成 ____ -1. WHEN 用户触发该能力,THE System SHALL 按需求描述执行预期行为。 -2. IF 输入或前置条件无效,THEN THE System SHALL 返回清晰、可处理的错误反馈。 -3. WHILE 该能力执行中,THE System SHALL 保持现有不相关行为不变。 +> 分支复杂、含循环或决策时,下方可附 Mermaid `flowchart`: +> +> ```mermaid +> flowchart LR +> A[进入] --> B{校验?} +> B -->|是| C[响应] +> B -->|否| D[提示] +> D --> A +> C --> E[完成] +> ``` --- -## 边界情况 +## 三、待澄清问题(生成正文前主动 wizard) -1. WHEN 需求输入缺少关键细节,THE System SHALL 暂停实现并要求确认。 +> **铁律**:源需求里**任何一处**模糊措辞——"等"/"诸如此类"/"差不多"/单句口语化需求—— +> **必须**在写正文前通过 `clarification-wizard` 与用户确认,不允许假设。 +> 仅当用户明确放权("由你决定"/"按业界默认"/"先 MVP"等)才可跳过。 + +- [ ] (待澄清点 1 —— 例:触发入口在哪儿?UI / CLI / 后台定时?) +- [ ] (待澄清点 2) --- -## 非功能需求 +## 四、需求详述 + +### 需求 1:(一句话标题,描述能力本身,不是验收) + +**用户故事**:作为 ____,我希望 ____,以便 ____。 + +**主流程**(自然语言写清楚正常路径下用户做了 X 看到 Y;写到对方读完不会反问"那如果……怎么办?"为止): + +- +- + +**异常流程**(输入异常 / 网络异常 / 权限不足等场景下用户看到什么、能怎么继续): -1. WHEN 该能力被实现,THE System SHALL 保持项目既有架构、风格和测试约定。 +- +- + +**验收要点**(可选 — 仅在需要机器可读语义时用 EARS SHALL;上面已说清就略过): + +- + +> 涉及显式状态流转(订单 / 审批 / 工单类)时可附 Mermaid `stateDiagram-v2`: +> +> ```mermaid +> stateDiagram-v2 +> [*] --> 待审核 +> 待审核 --> 已通过 : 通过 +> 待审核 --> 已驳回 : 驳回 +> 已驳回 --> 待审核 : 修改重提 +> 已通过 --> [*] +> ``` --- -## 待确认问题 +### 需求 2:…… + +(按上面格式重复;每条需求都是一段可独立讨论、可独立验收的能力) + +--- + +## 五、非功能 / 约束(可选) + +- 性能:(如有具体 SLA 写这里) +- 兼容:(版本 / 渠道 / 设备) +- 安全 / 权限: +- 与既有功能的关系:(不要破坏什么) + +## 六、依赖与风险(可选) + +- 上下游依赖:(哪些外部服务 / 团队需要确认?) +- 主要风险与应对:(最坏情况是什么?怎么兜底?) +- 回滚方案:(出问题如何回退?功能开关 / 灰度 / 数据回滚) + +## 七、UI 交互细节(可选,仅 UI 类需求) -- 目标用户、边界条件、验证命令和验收标准是否需要进一步补充? -- 是否确认当前需求方向?确认后再继续生成 `design.md`。 +- 加载态 / 空态 / 错误态 / 权限隔离 / 响应式:每项一句话,不适用就删行 +- 关键文案:(按钮、提示、错误信息中容易反复挑剔的) +- 设计稿链接: diff --git a/plugins/specode/assets/templates/tasks.md b/plugins/specode/assets/templates/tasks.md index 7ed3f9a..19ba4b7 100644 --- a/plugins/specode/assets/templates/tasks.md +++ b/plugins/specode/assets/templates/tasks.md @@ -9,39 +9,25 @@ Review Status: unreviewed 基于已确认的需求与设计,将实现拆分为可执行、可验证的任务。任务执行时必须先更新状态,完成验证后才能标记完成。 -## 任务 - -- [ ] 1. 完善需求与设计追踪 - - [ ] 1.1 确认需求、设计和任务之间的映射 - - 检查 `requirements.md` 或 `bugfix.md` - - 检查 `design.md` - - 更新任务与需求编号的映射 - - 验证:人工 review - - _需求:1_ - -- [ ] 2. 实现核心能力 - - [ ] 2.1 按设计实现最小必要代码变更 - - 遵循项目既有架构、风格和测试约定 - - 文件:待确认 - - 验证:待确认 - - _需求:1_ - -- [ ] 3. 补充测试与回归验证 - - [ ] 3.1 添加或更新相关测试 - - 覆盖核心行为、错误路径和回归场景 - - 文件:待确认 - - 验证:待确认 - - _需求:1_ - -- [ ] 4. 检查点 —— 确保所有测试通过 - - 运行相关测试和检查。 - - 如有失败,停止继续执行并修复或向用户确认。 +**格式约定(task-swarm 兼容)**: + +- 顶层 `## 阶段 N: 标题` 段落对应一个 stage(task-swarm 的 fork 粒度) +- 每条具体任务 `- [ ] N.M 任务描述 @writes:文件路径 @reads:文件路径 @depends-on:N _需求:x.y_` + - `@writes`:本任务写哪些文件(task-swarm 据此切 group 避免冲突) + - `@reads`:本任务读哪些文件(可选) + - `@depends-on:N`:本 stage 依赖阶段 N(可选;不写则仅靠 @writes 冲突切 group) + - `_需求:x.y_`:traceability,链回 requirements.md / bugfix.md 的 SHALL 编号 +- 可选任务把 `[ ]` 写成 `[*]`;checkpoint 任务保留 `[ ]` 但标题以「检查点」开头 + +## 阶段 1: 待规划阶段标题 + +- [ ] 1.1 待规划任务描述 @writes:src/path/to/file.py _需求:1.1_ ## 测试要点 -供测试人员快速了解需要验证的场景。每行对应 `requirements.md` / `bugfix.md` 中的一条 SHALL,需求或 bug 行为变更时由 agent 在同一轮 turn 内同步更新本节。 +供测试人员快速了解需要验证的场景。主代理 在 tasks phase 按 requirements.md / bugfix.md 的 SHALL 顺手补几行,每行关联 SHALL 编号。非验收硬条件,acceptance phase 时主代理把这一节简述给用户作参考即可。 -- [ ] _agent 待填充_:触发场景 → 预期结果(需求 X.Y) +- _agent 待填充_:触发场景 → 预期结果(需求 X.Y) ## 验收 @@ -49,9 +35,3 @@ Review Status: unreviewed - [ ] 所有指定验证命令通过。 - [ ] 未完成或跳过的 optional 任务已记录。 - [ ] 用户确认验收。 - -## 执行确认 - -- [ ] 用户确认开始执行 required tasks。 -- [ ] 用户确认开始执行 required and optional tasks。 -- [ ] 用户选择暂不 coding。 diff --git a/plugins/specode/commands/continue.md b/plugins/specode/commands/continue.md index a5ef725..dcd1105 100644 --- a/plugins/specode/commands/continue.md +++ b/plugins/specode/commands/continue.md @@ -1,6 +1,35 @@ --- -description: 恢复或切换当前 specode 持久会话 -argument-hint: "[spec名称或目录]" +description: 恢复或切换 specode 会话 +argument-hint: "[spec-slug]" --- -/continue $ARGUMENTS +/specode:continue $ARGUMENTS + +按 `$ARGUMENTS` 形态分两步路由:无 slug 走第一步、有 slug 走第二步。**禁止**主代理跳过 selector 直接 `acquire`、**禁止**根据用户裸输入 invent slug。 + +## 第一步:无 slug —— 列出可选 spec + +若 `$ARGUMENTS` 为空: + +1. 先按 SKILL.md §「Document Root Resolution / 首次使用 / auto-detect 命中时的确认」确认 doc_root;无配置且用户选"中止"则引导 `/specode:spec --set-vault

` 后 end turn +2. 调 `spec_session.py list-specs --session ` 拿可选 spec 列表 + + ```sh + sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py" \ + list-specs --session + ``` + +3. 按 `references/workflow.md` §9.1 处理结果:空列表 → chat 引导 `/specode:spec <需求>` 创建新 spec;非空 → chat 摘要 + 按 SKILL.md §Selectors 主动呈现 spec 选择(类型 A 单列单选,≤4 项;每个选项 `label=`)让用户选;选完下一轮以 slug 进入第二步 + +## 第二步:有 slug —— 接管 + 加载 + +按 `references/workflow.md` §9.2 + `references/lock-protocol.md` 走 5 步: + +1. 解析 `spec_dir = /specs/`(**不要** Grep 项目目录,spec 不在项目里) +2. 调 `spec_session.py acquire --spec

--session `;`exit 4 LockHeld` 时**禁止**直接 `--force`,按 SKILL.md §Multi-Window + Lock 呈现 `takeover-options` selector 让用户选 +3. `spec_session.py load --spec ` +4. `spec_session.py continue --spec --session `(只读模式跳过) +5. 完成后**按 SKILL.md §Status Footer「新 spec 创建/接管的当 turn」走**:chat 简报 "已加载 spec:(phase=

, iteration=, lock=)" + 状态行 footer + (若 sess.pending_selector 有值)主动呈现对应 selector + +CLI 调用模板见 SKILL.md §CLI 调用规约(**禁止**裸 `python3 spec_session.py …`)。 diff --git a/plugins/specode/commands/end.md b/plugins/specode/commands/end.md index 711bee1..044b97f 100644 --- a/plugins/specode/commands/end.md +++ b/plugins/specode/commands/end.md @@ -2,4 +2,17 @@ description: 结束当前 specode 持久会话(不删除文档) --- -/end +/specode:end + +## 立即调用 + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py" \ + end --session +``` + +强制双写:`~/.specode/sessions/.json` + 当前 spec 的 `.config.json`。落地动作:写 `mode=ended` / `ended_at` / 清 `active_spec_*` / 释锁 / 清 `task_swarm_run_id`。 + +- 任一写失败视为整命令失败;**不接受** in-memory 半成功 +- 调用模板规约见 SKILL.md §CLI 调用规约(**禁止**裸 `python3 spec_session.py …`) diff --git a/plugins/specode/commands/spec.md b/plugins/specode/commands/spec.md index 268575b..1ffca35 100644 --- a/plugins/specode/commands/spec.md +++ b/plugins/specode/commands/spec.md @@ -1,6 +1,120 @@ --- -description: specode 入口:持久会话(--persist)、Obsidian/根目录(--set-vault)、帮助(-h) -argument-hint: "[--persist] <需求> | <名称>:<需求> | --set-vault

| --set-root

| --detect-vault | --vault-status | -h" +description: 进入 specode 持久会话,开始新 spec 或调用子命令 +argument-hint: "-n <需求> | <需求> | <名称>: <需求> | -h | --set-vault

| --set-root

| --detect-vault | --vault-status | --sync-status" --- -/spec $ARGUMENTS +/specode:spec $ARGUMENTS + +按 `$ARGUMENTS` 形态分四步路由,**依次**判断、命中即执行并 end turn,不要跳过 / 并行。 + +## 第一步:fast-path 参数(hook 已注入模板) + +若 `$ARGUMENTS` 以下列任一旗标开头(hook 实际拦截范围,见 `spec_session.py:FAST_PATH_HELP / FAST_PATH_VAULT`): + +- `-h` / `--help` +- `--vault-status` / `--detect-vault` / `--sync-status` + +→ **不要在本 turn 调任何 CLI**(**禁止** `sh ... spec_init.py -h` / `sh ... spec_vault.py status` 等)。 +UserPromptSubmit hook 已在 `additionalContext` 里注入 fast-path 模板, +你**唯一动作**是把 hook 注入的 ```text 围栏内容**逐字**输出,然后立即 end turn。 +禁止任何额外说明文字("以下是帮助" / "希望对你有帮助" 等都不允许)。 + +## 第二步:set 命令(持久化 doc_root,不创建 spec) + +若 `$ARGUMENTS` 是 `--set-vault ` 或 `--set-root `(**hook 不拦截**,必须主动调 CLI): + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_vault.py" \ + set --vault +``` + +`--set-root` 与 `--set-vault` 等价(写同一个 `obsidianRoot` 字段)。执行成功后向用户 +confirm 写入位置(`~/.config/specode/config.json`),然后 end turn。**不**进入第三步 / 第四步。 + +## 第三步:doc_root 确认(新建 spec 前必做) + +若 `$ARGUMENTS` 是 `-n <需求>` / `<需求>` / `<名称>: <需求>`(既不是第一步的 fast-path、也不是第二步的 set 命令), +**先**调 `spec_vault.py status` 拿到 `source` 字段: + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_vault.py" \ + status +``` + +- `source` = `env` 或 `config` → 已显式配置,**直接进第四步** +- `source` = `auto` 或 `none` → **禁止直接调 `spec_init.py`**,按 SKILL.md + §「Document Root Resolution / 首次使用 / auto-detect 命中时的确认」走 + `AskUserQuestion` 三选 + `spec_vault.py set --vault

` 持久化流程; + 用户选"中止"则 end turn,否则持久化后再进第四步 + +## 第四步:常规创建 spec + +按 `$ARGUMENTS` 形态分两个子分支,**优先 4a**: + +### 4a. 显式 `-n ` / `--name `(推荐) + +若 `$ARGUMENTS` 以 `-n ` 或 `--name ` 开头: + +- 第二个 token 是 spec 目录名 slug,**保留用户原文**,不做翻译/推导。 +- 0.10.16+ 起允许 Unicode(中文/日文/emoji 都可),只禁文件系统危险字符 + (`< > : " / \ | ? *`、空白字符、首字符 `.` 或 `-`、Windows 保留名)。 +- 剩余文本 → `source_text`(原始需求)。 +- `requirement_name` 默认:英文 slug 按短横线 → 空格 + 首字母大写 + (如 `user-login` → `User Login`);非 ASCII slug(如中文)直接复用原文。 + +示例: + +- `/specode:spec -n user-login 添加用户登录功能` → + `--name user-login --requirement-name "User Login" --source-text "添加用户登录功能"` + +(非 ASCII slug 情况见上一段 `requirement_name` 默认规则。) + +**spec_init.py exit 3(slug 非法)时的应对**: + +- **禁止**主代理**静默 fallback 到 4b 推导**——用户用了 `-n` 形式就是想精确控制目录名, + 自动换成英文 slug 是欺骗用户。 +- 正确做法:把 CLI 的 stderr 错误原信息(如"不能含 / \\ * ? 或空白;不能以 - 开头") + 报给用户,要求用户重新提供一个合法 slug,**不要替用户决定**。 +- 仅当用户明确说"你帮我想一个"时才走 4b 推导。 + +**这条路径的好处**:用户能精确控制 `/specs//` 的目录名, +不会出现"主代理把 '订单退款' 推成 `order-refund-flow` 但用户想要 `refund`"的歧义。 + +### 4b. 推导式(兼容、不推荐) + +若 `$ARGUMENTS` 是纯 `<需求>` 或 `<名称>: <需求>`: + +- 按 `references/workflow.md` §1.1「名称前缀解析 + slug 推导」由主代理推导。 +- **推导结果对用户不可预知**——若用户在意目录名,应引导改用 4a 形式。 + +### 调用 spec_init.py + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_init.py" \ + --name --requirement-name "<显示名>" --source-text "<原文>" --session +``` + +- doc-root 三层解析详见 SKILL.md §Document Root Resolution +- 三层全 miss → exit 3 + 引导提示;**不**回退到 cwd / ~/specs +- 详细流程见 SKILL.md §Session Lifecycle / references/obsidian.md +- 调用模板规约见 SKILL.md §CLI 调用规约(**禁止**裸 `python3 spec_init.py …`) + +### 第四步成功后必做(0.10.15+:先 project-root,再 workflow) + +`spec_init.py` `exit 0` 后 spec 已进入 **active 模式**(`mode=active` / `pending_selector=project-root-choice`)。**`/specode:spec` 是持续流程入口,不需要用户再输命令推进**——本 turn hook 未刷新,主代理按 SKILL.md §Status Footer「新 spec 创建/接管的当 turn」走(chat 简报 + 状态行 footer),然后**依次**: + +1. **立即调 `AskUserQuestion` 呈现 `project-root-choice` selector**(决定代码写到哪个目录;模板见 `_selectors.py` SELECTOR_PROMPTS['project-root-choice']) +2. 拿到用户选择后**本 turn 内**调 `spec_session.py set-project-root --spec

--session --root <选定路径>` +3. CLI 成功后立即调 `AskUserQuestion` 呈现 `workflow-choice` selector(模板见 `_selectors.py` SELECTOR_PROMPTS['workflow-choice']) +4. 用户选完工作流后**先做需求歧义自检**——见 SKILL.md §「Pre-requirements Clarification(铁律)」:有阻塞性歧义且用户未明确放权 → 先调 `clarification-wizard` 与用户讨论,**禁止假设/invent**;自检无歧义或用户已放权 → 再 `phase-transition` + 生成 `requirements.md` / `bugfix.md` / `design.md`。 + +**两步都不要 end turn 让用户再输命令**——project-root 选完直接进 workflow 选择。 + +**严禁**说 "使用 `/specode:continue` 进入下一阶段" / "你可以使用 ... 推进" / "下一步请输入 ..." 这类让用户再输命令的引导——流程由 selector 推进。 + +**严禁**在源需求不明确时绕过 clarification-wizard 直接写文档——澄清铁律的违反不是"风格瑕疵"而是 spec 失真根因,详见 SKILL.md §「Pre-requirements Clarification(铁律)」。 + +**为什么要先选 project_root**:spec 文档目录(`/specs//`)只放 `.md` 文档和 `.task-swarm/` 状态;代码实际写到的目录是 `project_root`。两者解耦后,task-swarm subagent 能明确知道"代码写哪里",避免 0.10.13 之前那种"代码错写到 spec dir 污染文档目录"的事故。 diff --git a/plugins/specode/commands/status.md b/plugins/specode/commands/status.md index 566807f..80577be 100644 --- a/plugins/specode/commands/status.md +++ b/plugins/specode/commands/status.md @@ -2,4 +2,19 @@ description: 显示当前 specode 会话状态 --- -/status +/specode:status + +## 立即调用 + +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py" \ + status --session +``` + +输出含 session + spec_config + lock 摘要。 + +- session 文件:`~/.specode/sessions/.json` +- spec 配置:`/.config.json`(路径取自 session 文件 `active_spec_dir`) +- 字段语义见 SKILL.md §Session Lifecycle +- 调用模板规约见 SKILL.md §CLI 调用规约(**禁止**裸 `python3 spec_session.py …`) diff --git a/plugins/specode/commands/task-swarm.md b/plugins/specode/commands/task-swarm.md index d3d7a4a..aba3bf8 100644 --- a/plugins/specode/commands/task-swarm.md +++ b/plugins/specode/commands/task-swarm.md @@ -1,111 +1,170 @@ --- -description: 多角色 agent 并发执行 tasks.md(CLI 驱动调度协议;脚本管状态机,模型只负责派单与文本生成) -argument-hint: "[/tasks.md] [--parallel N] [--max-rounds N]" +description: 多 agent 并发执行 tasks.md(state.json 单一事实源;主代理按 plan→fork→advance 循环驱动) +argument-hint: "[/tasks.md] [--max-parallel N] [--max-rounds N]" --- -把 tasks.md 委派给 task-swarm 编排器:`$ARGUMENTS` +/specode:task-swarm $ARGUMENTS -## 协议(强制按顺序执行) +## ⛔ 强制前置阅读(不可跳过) -你是 task-swarm 调度器。**不要尝试理解状态机** — 状态机在 `scripts/task_swarm.py` 里。你只执行以下循环。 +本文件**只列**入口路由 + 关键禁止项。所有实操细节都在 `references/task-swarm.md`: -### 1. 初始化(只跑一次) +- §1 角色 / 并发度 +- §2 文件冲突 / group 切分(`@writes` 不相交 + `@depends-on` 拓扑) +- §3 Phase 状态机(reviewer / p0-fix / validator / v-fix 转换规则) +- §4 子代理产物 schema(coder `result.md` / reviewer `review.md` / validator `validation.md`) +- §5 tasks.md writeback 格式(line-safe diff / 修复状态标签) +- §6 `on-task-completed` hook 提醒矩阵 +- §7 信息流总览 +- §8 死循环保护(连续 3 轮同 fail 签名 → `failed-deadloop`) +- §9 CLI 接口速查 -```bash -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/task_swarm.py init \ - --tasks \ - [--parallel N] [--max-rounds N] -``` +**在调任何 `task_swarm.py` 子命令之前**(包括 init / plan / advance / writeback / resolve), +必须先 Read `references/task-swarm.md` 至少扫一遍 TOC + §3 + §9。本文件下面的 3 步路由 +**只够回答"现在该调哪条 CLI"**,不够回答"plan 输出怎么解析 / advance 失败时该 retry +还是 fork / writeback 越界怎么办"——这些细节都在 references 里。**禁止凭印象推**。 -返回 JSON 含 `run_id`。**把 `run_id` 记住**——后续每个子命令都要传 `--run `。 +如果对任何一步流程仍不确定,**先 Read references 对应章节再动手**,不要边猜边跑。 -如返回 `error` → 把错误原文呈现给用户后停止。 +--- -### 2. 主循环 +按以下 3 步路由。**禁止**主代理直接 `task_swarm.py init`、**禁止**根据用户裸输入 invent ``。task-swarm 是 tasks phase + `tasks-execution` selector 选中 task-swarm 路径后的下游编排,不是用户裸触发的入口(详见 SKILL.md §Task-Swarm + `references/task-swarm.md`)。 -每一轮做四步,按 JSON 返回的 `action` 字段分支: +## 第一步:前置校验(必做) -#### 2.1 拿下一步指令 +调 `spec_session.py read-session --session ` 拿当前 session 状态: -```bash -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/task_swarm.py next --run +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py" \ + read-session --session ``` -返回 `{"action": "fork|writeback|wait|done", ...}`。 +必须全部满足: -#### 2.2 按 action 执行 +- `mode == "active"`(不是 idle / ended / readonly) +- `active_spec_dir` 非空(init 的 `--tasks` 必须用此值 + `/tasks.md`,**禁止 invent**) +- `phase == "tasks"` +- `pending_selector == "tasks-execution"` 且用户已选 task-swarm 路径 -**`action == "fork"`** — 派发 subagent: +任一不满足 → **不要** init,在 chat 引导用户先到 tasks phase 跑 `tasks-execution` selector 选 task-swarm 路径,end turn。 -``` -Task( - description=, ← **逐字拷贝**,不要根据 outbox / 自己理解改写 - subagent_type=, ← 必须是 "specode:task-swarm-{coder|reviewer|validator}" - prompt= ← prompt 已由脚本预渲染好,不要改 -) -``` +详见 SKILL.md §「Task-Swarm / `/specode:task-swarm` 前置校验」。 -`description` 已经带了 scope(例如 `[validator-fail-fix]`)。自己改写 description(比如把 validator 的修复指引塞成"修复 N 个 P0")会误导观察者以为 reviewer 触发了循环——reviewer 是 advisory,从不触发 r2 coder。 +## 第二步:init -subagent 返回后**立刻**: +校验通过后用 step 1 拿到的 `active_spec_dir`: -```bash -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/task_swarm.py parse \ - --run --stage --role --round +```sh +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/task_swarm.py" \ + init --tasks "/tasks.md" --session [--skip-validator] ``` -拿到 `{"judgment": "...", ...}`,再 advance: +- `--tasks` 是 **tasks.md 的绝对路径**(不是 spec 目录),用 step 1 的 `active_spec_dir + /tasks.md` +- `--skip-validator`(0.10.20+):**人工验收模式**——review/p0-fix 完成后跳过 validation/v-fix 直接 writeback。 + 仅当用户在 `tasks-execution` selector 选了「task-swarm + 人工验收(跳过 validator)」时加这个 flag; + 默认(不加)是 full 模式,含 validator 自动验收循环。 +- init 报 `tasks.md 中未解析出任何 ## 阶段 N: 段` → 格式不对,**回到 `tasks-execution` 选「暂停 / 调整 tasks.md」**让主代理按 SKILL.md §「Spec 文档生成」重写 +- 拿到 `{run_id, run_dir, groups, skip_validator}` 后转第三步 -```bash -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/task_swarm.py advance \ - --run --stage --role --round \ - --judgment -``` +## 第三步:7 步循环(plan → fork → advance → writeback → resolve) -**schema-error 重派**:若 `parse.retry == true`(judgment=schema-error),**不要** advance。 -脚本已自动清空 outbox 与 in_flight。把 `parse.outbox_snapshot` 字典里的内容拼到下一次 fork 的 prompt -里(告诉 subagent 上次为何被拒),然后直接回到 2.1 调 `next`——会拿到对同一 stage/role/round 的 fork 指令。 +按下面 7 步循环驱动,**所有** `task_swarm.py` 子命令套同一 run.sh 包装模板: -**`action == "writeback"`**: +1. `init`(第二步已做) +2. `plan --run ` 拿下次 fork 列表 +3. `fork`:同 message 发 N 个 Task block(按 `plan.fork` 逐字拷贝,**不要**凭印象自创 agent_key—— + `coder-fix-xxx` / `coder-session-fix` 等自定义命名**全部禁止**,必须用 plan 给的 `coder-vfix-g{N}-r{R}-f{I}` 等规范名) +4. **等齐 subagent 返回(advance 前置强约束,违反必出乱)**: + - **必须**先在主代理 UI 看 "Waiting for N teammates" 区域,**所有** fork 出去的 Task 都 ✓ completed 才能进 step 5; + **任何 ⠙ streaming / ⠴ running Bash 的就不能 advance**。 + - **不要**凭口头报告判定完成——包括 team-lead / 其他平台 agent 说"已修复 STATUS"/"已完成"。 + **只有** subagent 自己的 Task tool 返回 ✓ completed 才算数。 + - PostToolUse hook 注入的"plan 提醒"**不是**"立即 advance"指令——它只是告知"这个 Task 完成了", + advance 仍要等齐**所有** in-flight Task。 + - 不确定时调 `task_swarm.py plan --run `,若返回 `action: coding-waiting` / `p0-fix-waiting` / + `v-fix-waiting` / `v-fix-waiting`,**禁止** advance,回到等待。 -```bash -bash # 即: sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/task_swarm.py writeback --run --stage N -``` + **常见误判**: + - "team-lead 说改完了" ≠ subagent 真完成(team-lead 修补可能是直接 Edit outbox,绕过 subagent 工作) + - "f0 跑了 30 个 tool 看起来快完了" ≠ completed(最后写盘可能还没刷) + - "其他 4 个都 ✓ 了最后 1 个估计也快" ≠ 可以提前——advance 之后没回头路 +5. `advance --run --phase

--round ` 推进 state +6. `writeback --run --group `(validator pass 后回写 tasks.md,line-safe diff,越界 exit 1) +7. `resolve --run `(所有 group 完成 → 回到 spec-mode acceptance phase) + +完整规格见 `references/task-swarm.md`(TOC 见本文件 §⛔强制前置阅读 节)。 + +## heartbeat(长流程必做) + +主代理每 5 分钟 / 每完成一个 subagent 后调用(保证 spec 锁不被 stale 回收), +沿用 §第三步同款 run.sh 包装模板(**不要**裸 `python3 task_swarm.py …` / `python3 spec_session.py …`): + +- `task_swarm.py heartbeat --run ` +- `spec_session.py heartbeat --spec

--session ` + +## 术语区分:reviewer 分级 vs validator fail(容易混) + +主代理常把 validator 报的"子任务 1.5 未完成"误称为"P1 问题" / "P2 问题"—— +这会让用户误以为可以跳过。两者是**不同维度**,**修复策略**也根本不同: + +| 概念 | 来源 | 触发 fix loop? | +|---|---|---| +| **P0(带证据标签)** | reviewer `review.md` `## P0`,**必带** `[req:x.y]` / `[security]` / `[contract]` 之一 | ✓ 触发 **p0-fix(尝试型,仅一轮)**:所有 P0 并发 fork 一轮 coder → **不再 review** → 直接进 validation;未修成功的 P0 在 tasks.md 标 `[P0 未修复]`,**不循环再修**(最终是否修好交给 validator 验) | +| **P0(不带证据标签)** | reviewer `## P0` 但漏写证据标签 | 自动降级 **advisory** → ✗ 不触发任何 fix(见 `task_swarm_outbox.py:282-286`) | +| **P1 / P2** | reviewer `review.md` `## P1` / `## P2` 节 | ✗ 不触发任何 fix(advisory,仅写入 tasks.md 注释) | +| **validator fail** | validator `validation.md` `## 判定 = fail` | ✓ 触发 **v-fix(循环到 pass)**:按 validator 修复指引并发 fork → 重新 validation → 再 fail 再 fork → 连续 3 轮同 fail 签名才 `failed-deadloop` | + +**关键差异(设计意图,详见 `references/task-swarm.md` §3)**: + +- **reviewer 路径是"尝试性修复"**:reviewer 是 advisory(建议性裁判),p0-fix 只给"带证据标签的 P0"一次修复机会,不论结果都进 validation;P1/P2/无标签 P0 直接进 advisory 不修。**reviewer 不是阻塞性 gate**。 +- **validator 路径是"循环验证"**:validator 是任务级裁判,输出 pass/fail 二元判定,**不分级**。fail 就必须 v-fix 修到 pass,没有"P1 可跳过"概念。 -脚本内部已处理 verify-lock + heartbeat + 行级安全 Edit。**不要**自己 Edit tasks.md。 +validator **不输出 P0/P1/P2 标签**(见 `references/task-swarm.md` §4.3 schema "`## 给 coder 的修复指引(**不带 P0/P1 标签**)`")。 +它的 fix_targets 全是"任务没做完",按 task-swarm 状态机 fail 必修。 -**`action == "wait"`**:当前并发已满或文件有冲突。等下一个 subagent 完成后再 `next`。 +**主代理报告 validator fail 时的正确措辞**: +- ✓ "validator 判 fail,子任务 1.5(响应式设计)未完成" +- ✗ "validator 判 fail,因为 1 个 P1 问题"(误用 reviewer 术语,会让用户误以为可跳过) -**`action == "done"`**:把 `json.summary` 用人话呈现给用户,结束。 +**如果用户问"这条任务能不能跳过"**: +- 按 task-swarm 设计**不能**——validator fail 必进 v-fix 循环到 pass +- 想跳过的唯一办法:esc 中断 → abort run → 改 tasks.md 移除该任务(或挪到下一 group)→ 重新 `init` +- **不要**给用户"可以不修"的错觉。tasks.md 里写了就是 must-do,要么修要么改 tasks.md,没有第三条路。 -#### 2.3 回到 2.1 +## advance 报 "result.md 缺 STATUS / 解析失败" 的正确应对 -## 严禁 +这是 0.10.13 (user-login) / 0.10.17 (login-page) 两次事故的根源——必须按这套走,不要自创修补。 -- ❌ 不要自己读 tasks.md 决定派什么——`next` 会告诉你 -- ❌ 不要自己解析 review.md / validation.md——`parse` 会给你结构化判定 -- ❌ 不要自己 Edit tasks.md——hook 会拦下来(INV-9),用 `writeback` 子命令 -- ❌ 不要省略 `parse → advance` 这一对调用——否则 state.json 不前进,`next` 卡死 -- ❌ 不要用 `general-purpose` 作为 subagent_type——hook 会拦下来(INV-7) -- ❌ 不要自行拼 subagent prompt——`next` 给的 `prompt_file` 已包含 @writes 边界、修复轮指引、检查点专用文案 +### 错误做法(已知反模式) -## 前提 +- ❌ team-lead / 主代理 / 任何外部 agent **直接 Edit `agents//outbox/result.md`** 把 STATUS 补上 +- ❌ 主代理基于"已修复 STATUS"口头报告**直接 advance** +- ❌ 凭印象判定"subagent 应该已经把代码改了,只是 result.md 格式不对" +- ❌ 给同一个 P0 fork 一个**新名字的 agent**(如 `coder-fix-session-validation`)绕开命名规则—— + task_swarm 永远等不到这个 agent 的 result(不在 `in_flight` 列表),且会跟原 `coder-p0fix-g{N}-r{R}-f{I}` 并发改同一文件 -- 已有 active spec session 且持锁 -- plugin agents 已注册(specode plugin 安装时自动) -- 当前 spec 阶段是 `tasks` 或 `implementation` +**为什么错**:STATUS 缺失多半意味着 **subagent 提前退出 / 工作未完成**——代码改动可能根本没刷到磁盘。 +手补 STATUS 后 advance 通过,下游 reviewer/validator 拿到的是**半成品代码**,必然 fail,进入 v-fix 循环, +浪费资源 + 污染状态机 + 多 agent 并发改同文件互相覆盖。 -任一不满足 → 不要 init,告诉用户原因。 +### 正确做法 -## 调试 +1. **保留**残缺的 `result.md`(作为证据,不要 Edit 它) +2. 调 `task_swarm.py status --run ` 查看 `coder_in_flight` / `p0_in_flight` / `vfix_in_flight`, + 确认该 agent 是不是还在 in_flight +3. **如果 in_flight**(subagent 还在跑): + - 等 subagent 真完成(看 teammates UI ✓ completed) + - 一直 ⠙ streaming 不收尾(>10 分钟)→ esc 取消那个 Task + 报用户决定 abort 还是重 fork +4. **如果不在 in_flight 但 result.md 残缺**(subagent 已退出但产物不合规): + - **重新 fork 该 agent**(用**同一个** agent_key,比如还叫 `coder-p0fix-g1-r1-f0`) + - fork 前先 `rm -rf agents//outbox/*` 确保干净重跑(不要让旧残物干扰) + - **禁止**起新名字(`coder-fix-xxx` 不在 state 的 in_flight 里,task_swarm 永远等不到) +5. 重 fork 都不行(subagent 反复无法产出合规 result.md)→ 报告用户 + 准备 abort run -| 想看什么 | 命令 | -|---|---| -| run 当前状态 | `task_swarm.py status --run ` | -| 某 subagent 的 prompt | `cat .task-swarm/runs//agents//task.md` | -| 某 subagent 的产出 | `cat .task-swarm/runs//agents//outbox/*` | -| 所有 run | `ls .task-swarm/runs/` | +**永远不要**:让 team-lead 或主代理代笔补 STATUS。绕过 subagent 工作的"修补"是把状态机推向更深的失序。 -## 协议背后的设计 +## 异常出口 -参考 `references/task-swarm.md` —— 文档解释为什么状态机、解析、回写要全部下沉到脚本(防"自我认可"、防模型在长循环里数错轮号、防 outbox 格式漂移)。运行时**不需要**读那份文档,按上面循环走即可。 +- coder STATUS=`failed`/`blocked`、writeback 越界、`failed-deadloop` → 停循环、向用户报告并等用户介入,**不要**自动 retry。详见 `references/task-swarm.md` §3 / §8。 diff --git a/plugins/specode/hooks/hooks-probe.json b/plugins/specode/hooks/hooks-probe.json deleted file mode 100644 index 73c4cfa..0000000 --- a/plugins/specode/hooks/hooks-probe.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "_comment": "CodeBuddy adapter probe. Captures env, stdin, cwd, event for each hook invocation. Pure shell, no plugin root resolution needed. Swap with hooks.json during verification, then revert.", - "hooks": { - "SessionStart": [ - { - "hooks": [ - { - "type": "command", - "command": "mkdir -p $HOME/.specode/probe && { printf '=== EVENT=session-start TIME=%s CWD=%s PID=%s ===\\n' \"$(date '+%Y-%m-%dT%H:%M:%S%z')\" \"$PWD\" \"$$\"; echo '---ENV---'; env | sort; echo '---STDIN---'; cat; printf '\\n---END---\\n'; } >> $HOME/.specode/probe/session-start.log 2>&1" - } - ] - } - ], - "UserPromptSubmit": [ - { - "hooks": [ - { - "type": "command", - "command": "mkdir -p $HOME/.specode/probe && { printf '=== EVENT=user-prompt-submit TIME=%s CWD=%s PID=%s ===\\n' \"$(date '+%Y-%m-%dT%H:%M:%S%z')\" \"$PWD\" \"$$\"; echo '---ENV---'; env | sort; echo '---STDIN---'; cat; printf '\\n---END---\\n'; } >> $HOME/.specode/probe/user-prompt-submit.log 2>&1" - } - ] - } - ], - "PreToolUse": [ - { - "matcher": "Edit|Write|MultiEdit", - "hooks": [ - { - "type": "command", - "command": "mkdir -p $HOME/.specode/probe && { printf '=== EVENT=pre-tool-use TIME=%s CWD=%s PID=%s ===\\n' \"$(date '+%Y-%m-%dT%H:%M:%S%z')\" \"$PWD\" \"$$\"; echo '---ENV---'; env | sort; echo '---STDIN---'; cat; printf '\\n---END---\\n'; } >> $HOME/.specode/probe/pre-tool-use.log 2>&1" - } - ] - } - ], - "PostToolUse": [ - { - "matcher": "Edit|Write|MultiEdit", - "hooks": [ - { - "type": "command", - "command": "mkdir -p $HOME/.specode/probe && { printf '=== EVENT=post-tool-use TIME=%s CWD=%s PID=%s ===\\n' \"$(date '+%Y-%m-%dT%H:%M:%S%z')\" \"$PWD\" \"$$\"; echo '---ENV---'; env | sort; echo '---STDIN---'; cat; printf '\\n---END---\\n'; } >> $HOME/.specode/probe/post-tool-use.log 2>&1" - } - ] - } - ], - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "mkdir -p $HOME/.specode/probe && { printf '=== EVENT=stop TIME=%s CWD=%s PID=%s ===\\n' \"$(date '+%Y-%m-%dT%H:%M:%S%z')\" \"$PWD\" \"$$\"; echo '---ENV---'; env | sort; echo '---STDIN---'; cat; printf '\\n---END---\\n'; } >> $HOME/.specode/probe/stop.log 2>&1" - } - ] - } - ], - "SessionEnd": [ - { - "hooks": [ - { - "type": "command", - "command": "mkdir -p $HOME/.specode/probe && { printf '=== EVENT=session-end TIME=%s CWD=%s PID=%s ===\\n' \"$(date '+%Y-%m-%dT%H:%M:%S%z')\" \"$PWD\" \"$$\"; echo '---ENV---'; env | sort; echo '---STDIN---'; cat; printf '\\n---END---\\n'; } >> $HOME/.specode/probe/session-end.log 2>&1" - } - ] - } - ] - } -} diff --git a/plugins/specode/hooks/hooks.json b/plugins/specode/hooks/hooks.json index 6319649..d27f655 100644 --- a/plugins/specode/hooks/hooks.json +++ b/plugins/specode/hooks/hooks.json @@ -5,7 +5,7 @@ "hooks": [ { "type": "command", - "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_guard.py\" session-start" + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-session-start" } ] } @@ -15,49 +15,73 @@ "hooks": [ { "type": "command", - "command": "[ ! -e \"$HOME/.specode/.any-active\" ] || sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_guard.py\" user-prompt-submit" + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-user-prompt" + }, + { + "type": "command", + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-heartbeat-quiet" + }, + { + "type": "command", + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-user-prompt-catalog" } ] } ], - "PreToolUse": [ + "Stop": [ { - "matcher": "Edit|Write|MultiEdit|Task|Bash", "hooks": [ { "type": "command", - "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_guard.py\" pre-tool-use" + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-stop" } ] } ], - "PostToolUse": [ + "SessionEnd": [ { - "matcher": "Edit|Write|MultiEdit|Bash", "hooks": [ { "type": "command", - "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_guard.py\" post-tool-use" + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-session-end" } ] } ], - "Stop": [ + "PostToolUse": [ + { + "matcher": "Task", + "hooks": [ + { + "type": "command", + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-task-completed" + } + ] + }, { "hooks": [ { "type": "command", - "command": "[ ! -e \"$HOME/.specode/.any-active\" ] || sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_guard.py\" stop" + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-log-post-tool-use" } ] } ], - "SessionEnd": [ + "PreToolUse": [ + { + "matcher": "Edit|Write|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-pre-tool-use" + } + ] + }, { "hooks": [ { "type": "command", - "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_guard.py\" session-end" + "command": "sh \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh\" \"${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/spec_session.py\" on-log-pre-tool-use" } ] } diff --git a/plugins/specode/scripts/bash_guard.py b/plugins/specode/scripts/bash_guard.py deleted file mode 100644 index c9a532f..0000000 --- a/plugins/specode/scripts/bash_guard.py +++ /dev/null @@ -1,251 +0,0 @@ -"""INV-11: Non-interactive Bash guard. - -Two responsibilities: - -1. **PreToolUse check** — reject Bash commands known to wait on stdin - under an agent harness (no TTY). The reject message includes a - ready-to-paste non-interactive rewrite so the model fixes it in one - retry instead of guessing. - -2. **PostToolUse signature scan** — after a Bash run completes, scan - stdout/stderr for "we asked for input and waited" markers (e.g. - "Ok to proceed? (y)"). If matched, return an advisory string the - caller injects as additionalContext so the next-round model knows - the command died waiting and should change tactic (use --yes, - re-run with explicit args, or report to the user). - -Stays stdlib-only (project rule). Pure functions, no I/O — the calling -hook owns audit/telemetry/output. -""" -from __future__ import annotations - -import re -from dataclasses import dataclass - - -# --------------------------------------------------------------------------- -# PreToolUse — interactive-command blacklist -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class Rule: - name: str - # `match` runs against the trimmed command string. - match: re.Pattern - # `bypass` (optional) — if the command also matches this, the rule is satisfied - # (e.g. `npm create` is fine when `--yes` is present). - bypass: re.Pattern | None - # Human-facing message + suggested rewrite. - reason: str - rewrite: str - - -# Ordering matters: more specific rules first so generic patterns don't shadow. -INTERACTIVE_RULES: list[Rule] = [ - Rule( - name="npm-create", - match=re.compile(r"(?:^|[;&|\s])npm\s+create(\s|$)"), - bypass=re.compile(r"(--yes|\s-y(\s|$)|(?:^|\s)yes\s*\|)"), - reason="`npm create` first installs the scaffolder and prompts 'Ok to proceed? (y)' — pure TTY interaction, will hang under any agent Bash tool.", - rewrite="add `-- --yes` after the scaffolder name, e.g. `npm create vite@latest myapp -- --yes --template react-ts`", - ), - Rule( - name="npm-init", - match=re.compile(r"(?:^|[;&|\s])npm\s+init(\s|$)"), - bypass=re.compile(r"(\s-y(\s|$)|--yes)"), - reason="`npm init` walks an interactive Q&A unless `-y` is present.", - rewrite="add `-y` to accept all defaults, e.g. `npm init -y`", - ), - Rule( - name="yarn-create", - match=re.compile(r"(?:^|[;&|\s])yarn\s+create(\s|$)"), - bypass=re.compile(r"--yes"), - reason="`yarn create` may install a scaffolder that prompts for confirmation.", - rewrite="append `--yes` to skip prompts", - ), - Rule( - name="pnpm-create", - match=re.compile(r"(?:^|[;&|\s])pnpm\s+create(\s|$)"), - bypass=re.compile(r"--yes"), - reason="`pnpm create` may install a scaffolder that prompts for confirmation.", - rewrite="append `--yes`", - ), - Rule( - name="npx-create", - # Only `npx ` is interactive (downloads + asks). Plain - # `npx --yes` or `npx -y` is already safe. - match=re.compile(r"(?:^|[;&|\s])npx(\s|$)"), - bypass=re.compile(r"(--yes|\s-y(\s|$)|^yes\s*\|\s*npx\s)"), - reason="`npx` prompts 'Ok to proceed? (y)' when it has to download the package.", - rewrite="prefix with `--yes`, e.g. `npx --yes create-foo`", - ), - Rule( - name="git-rebase-interactive", - match=re.compile(r"(?:^|[;&|\s])git\s+rebase\s+(-i|--interactive)(\s|$)"), - bypass=None, - reason="`git rebase -i` opens $EDITOR — there's no way to drive it from an agent Bash.", - rewrite="use non-interactive rebase or scripted commit surgery (`git commit --amend --no-edit`, `git reset --soft`, etc.)", - ), - Rule( - name="git-add-interactive", - match=re.compile(r"(?:^|[;&|\s])git\s+add\s+(-p|-i|--patch|--interactive)(\s|$)"), - bypass=None, - reason="`git add -p / -i` is interactive.", - rewrite="use explicit paths: `git add path/to/file`", - ), - Rule( - name="git-commit-needs-message", - # Matches `git commit` (top-level), denies unless -m / -F / -C / --amend --no-edit / --no-edit is present. - match=re.compile(r"(?:^|[;&|\s])git\s+commit(\s|$)"), - bypass=re.compile(r"(\s-m(\s|$)|\s-F(\s|$)|\s-C(\s|$)|--amend\s+--no-edit|--no-edit|\s--message[\s=]|\s--file[\s=])"), - reason="`git commit` without `-m` / `-F` / `--no-edit` opens $EDITOR and hangs.", - rewrite="pass the message inline: `git commit -m \"your message\"` (use a HEREDOC for multi-line)", - ), - Rule( - name="tty-editor", - match=re.compile(r"(?:^|[;&|\s])(vim|vi|nano|emacs|less|more|man|top|htop|btop|nvim)(\s|$)"), - bypass=None, - reason="full-screen TUI; will never return.", - rewrite="use the Read/Edit tools (for files) or pipe into `head`/`cat` (for viewing)", - ), - Rule( - name="interactive-shell", - match=re.compile(r"(?:^|[;&|\s])(bash|sh|zsh|fish|python|python3|node|ipython|psql|mysql|mongo|redis-cli)\s+(-i\b|--interactive\b)"), - bypass=None, - reason="explicit `-i` requests an interactive shell that waits for stdin.", - rewrite="drop `-i` and pass the command directly: `python3 -c ''`, `psql -c ''`, etc.", - ), - Rule( - name="repl-bare", - # Bare REPL invocations (python alone, node alone) — also hang. - match=re.compile(r"(?:^|[;&|])\s*(python|python3|node|ipython|psql|mysql|redis-cli)\s*(?:[;&|]|\s*$)"), - bypass=None, - reason="bare REPL invocation will block on stdin.", - rewrite="pass `-c ''` (python/node) or `-c ''` (psql/mysql)", - ), - Rule( - name="ssh-no-batch", - match=re.compile(r"(?:^|[;&|\s])ssh(\s|$)"), - bypass=re.compile(r"(BatchMode=yes|-o\s+BatchMode=yes|-i\s)"), - reason="`ssh` may prompt for password / host-key confirmation.", - rewrite="add `-o BatchMode=yes` (fails fast on auth prompt) or use key-based auth with `-i `", - ), - Rule( - name="gh-pr-create-no-args", - match=re.compile(r"(?:^|[;&|\s])gh\s+pr\s+create(\s|$)"), - bypass=re.compile(r"(--title[\s=]|--body[\s=]|--body-file[\s=]|--fill)"), - reason="`gh pr create` without `--title` / `--body` / `--fill` opens an editor.", - rewrite="pass `--title \"...\" --body \"...\"` (use HEREDOC for body)", - ), - Rule( - name="apt-no-yes", - match=re.compile(r"(?:^|[;&|\s])(sudo\s+)?(apt|apt-get|aptitude)\s+(install|upgrade|remove|purge|dist-upgrade)(\s|$)"), - bypass=re.compile(r"(\s-y(\s|$)|--yes|--assume-yes|DEBIAN_FRONTEND=noninteractive)"), - reason="apt prompts 'Do you want to continue? [Y/n]'.", - rewrite="add `-y` (e.g. `apt-get install -y nginx`); for postinstall prompts also prefix `DEBIAN_FRONTEND=noninteractive`", - ), -] - - -@dataclass(frozen=True) -class BashCheckResult: - decision: str # "ok" | "deny" - rule: str | None - message: str # empty when ok - - -def check_bash_command(command: str) -> BashCheckResult: - """Inspect a Bash command string. Returns BashCheckResult. - - Empty / whitespace-only commands pass. Multi-line commands are checked - as one flat string (rules use word boundaries / shell separators). - """ - if not command or not command.strip(): - return BashCheckResult("ok", None, "") - # Normalize: collapse whitespace; the regexes are tolerant of either. - flat = " " + re.sub(r"\s+", " ", command.strip()) + " " - for rule in INTERACTIVE_RULES: - if not rule.match.search(flat): - continue - if rule.bypass and rule.bypass.search(flat): - continue - msg = _format_deny(rule, command) - return BashCheckResult("deny", rule.name, msg) - return BashCheckResult("ok", None, "") - - -def _format_deny(rule: Rule, original: str) -> str: - return ( - f"INV-11 (non-interactive Bash) — rule [{rule.name}] denied\n" - f" reason: {rule.reason}\n" - f" fix: {rule.rewrite}\n" - f" retry the command in non-interactive form, or report to the user if it genuinely needs human input." - ) - - -# --------------------------------------------------------------------------- -# PostToolUse — hang-signature scan on captured stdout/stderr -# --------------------------------------------------------------------------- - -# Substrings (case-insensitive) seen in real-world hang reports. We scan the -# tail of stdout+stderr only; full scans are unnecessary and expensive. -HANG_SIGNATURES: list[str] = [ - "ok to proceed?", - "press [enter]", - "press enter to continue", - "continue? [y/n]", - "continue? (y/n)", - "[y/n]", - "[y/n/?]", - "[y/n/all]", - "(y/n)", - "(y/n/a)", - "[y/n]?", - "are you sure", - "please confirm", - "type 'yes'", - "type \"yes\"", - "username for", - "password:", - "passphrase:", - "确认吗", - "是否继续", - "请输入", - "请确认", -] - -# Tail size: chars from end of combined stdout+stderr to scan. 4 KiB covers -# typical prompt blocks without scanning megabyte-sized npm install logs. -TAIL_SCAN_BYTES = 4096 - - -def detect_hang(stdout: str, stderr: str = "", exit_code: int | None = None) -> tuple[bool, str]: - """Return (is_hang, reason). exit_code=124 = killed by GNU timeout. - - Even without exit_code=124, a tail with a known prompt signature is a - strong indicator the process either is still waiting or was killed - mid-prompt. - """ - if exit_code == 124: - return True, "process killed by `timeout` (exit 124) — likely waiting on stdin" - tail = ((stdout or "") + "\n" + (stderr or ""))[-TAIL_SCAN_BYTES:].lower() - for sig in HANG_SIGNATURES: - if sig in tail: - return True, f"detected interactive prompt: {sig!r}" - return False, "" - - -def format_hang_advisory(reason: str, command_excerpt: str | None = None) -> str: - """Format an additionalContext block for injection on hang detection.""" - lines = [ - "⚠ INV-11 ADVISORY: previous Bash run appears to have hung on stdin.", - f" {reason}", - ] - if command_excerpt: - excerpt = command_excerpt.strip().splitlines()[0][:200] - lines.append(f" command: {excerpt}") - lines.append( - " action: do NOT retry the same command. Either rewrite to a non-interactive form " - "(--yes / -y / explicit args), or report to the user with the command for them to run manually." - ) - return "\n".join(lines) diff --git a/plugins/specode/scripts/run.cmd b/plugins/specode/scripts/run.cmd index d625a26..9bf575b 100644 --- a/plugins/specode/scripts/run.cmd +++ b/plugins/specode/scripts/run.cmd @@ -1,18 +1,28 @@ @echo off -rem specode python launcher for native Windows cmd.exe. -rem Probes python3, python, py (in that order) and forwards all arguments. +REM specode plugin python launcher (Windows). +REM 优先用 py.exe Launcher——避开 Microsoft Store 的 App Execution Alias stub +REM (%LOCALAPPDATA%\Microsoft\WindowsApps\python{,3}.exe,跑起来只会打印 +REM "Python was not found"),py launcher 不受 alias 影响。 -where python3 >nul 2>&1 && ( +where py >NUL 2>&1 +if %ERRORLEVEL%==0 ( + py -3 %* + exit /B %ERRORLEVEL% +) + +where python3 >NUL 2>&1 +if %ERRORLEVEL%==0 ( python3 %* - exit /b %ERRORLEVEL% + exit /B %ERRORLEVEL% ) -where python >nul 2>&1 && ( + +where python >NUL 2>&1 +if %ERRORLEVEL%==0 ( python %* - exit /b %ERRORLEVEL% + exit /B %ERRORLEVEL% ) -where py >nul 2>&1 && ( - py -3 %* - exit /b %ERRORLEVEL% -) -echo specode: cannot find python interpreter ^(tried python3, python, py^) 1>&2 -exit /b 127 + +echo specode: 未找到可用的 Python 解释器(已尝试 py / python3 / python)。 1>&2 +echo 请从 python.org 安装 Python 3.8+,或在「设置 ^> 应用 ^> 高级应用设置 1>&2 +echo ^> 应用执行别名」中关闭 python.exe / python3.exe 的 Microsoft Store 别名。 1>&2 +exit /B 127 diff --git a/plugins/specode/scripts/run.sh b/plugins/specode/scripts/run.sh index bff8474..523a2bc 100755 --- a/plugins/specode/scripts/run.sh +++ b/plugins/specode/scripts/run.sh @@ -1,18 +1,40 @@ -#!/usr/bin/env sh -# specode python launcher (POSIX shells: bash / zsh / dash / Git Bash / MSYS). +#!/bin/sh +# specode plugin python launcher (POSIX / Git Bash / MSYS). +# 依次探测 python3 / python / py,找到就 exec 并透传所有参数。 # -# Probes python3 → python → py (in that order) and execs the first one found, -# forwarding all arguments. Enables cross-platform invocation from hooks.json -# and SKILL/references command samples without hard-coding `python3`. +# Windows 注意:PATH 上的 python.exe / python3.exe 可能是 Microsoft Store 的 +# App Execution Alias stub(路径形如 .../WindowsApps/python3.exe,跑起来只会 +# 打印 "Python was not found" 并 exit 49)。这里通过路径模式跳过这种 stub, +# 继续探测真实解释器(通常落在 py launcher 上)。 -for cand in python3 python py; do - if command -v "$cand" >/dev/null 2>&1; then - if [ "$cand" = "py" ]; then - exec py -3 "$@" - fi - exec "$cand" "$@" - fi -done +set -u -echo "specode: cannot find python interpreter (tried python3, python, py)" >&2 +_specode_is_alias_stub() { + case "$1" in + */WindowsApps/python.exe|*/WindowsApps/python3.exe) return 0 ;; + */WindowsApps/python|*/WindowsApps/python3) return 0 ;; + esac + return 1 +} + +p3="$(command -v python3 2>/dev/null || true)" +if [ -n "$p3" ] && ! _specode_is_alias_stub "$p3"; then + exec "$p3" "$@" +fi + +p="$(command -v python 2>/dev/null || true)" +if [ -n "$p" ] && ! _specode_is_alias_stub "$p"; then + exec "$p" "$@" +fi + +if command -v py >/dev/null 2>&1; then + exec py -3 "$@" +fi + +printf '%s\n' "specode: 未找到可用的 Python 解释器(已尝试 python3 / python / py)。" >&2 +printf '%s\n' " 请安装 Python 3.8+ 并确保其位于 PATH 中后再次重试。" >&2 +printf '%s\n' " Windows 用户:若提示 \"Python was not found\",多半是命中了 Microsoft" >&2 +printf '%s\n' " Store 的 python.exe 别名 stub。请从 python.org 安装真 Python,或在" >&2 +printf '%s\n' " 「设置 > 应用 > 高级应用设置 > 应用执行别名」中关闭 python.exe /" >&2 +printf '%s\n' " python3.exe 的 Microsoft Store 别名。" >&2 exit 127 diff --git a/plugins/specode/scripts/spec_choice.py b/plugins/specode/scripts/spec_choice.py deleted file mode 100755 index e47b3c6..0000000 --- a/plugins/specode/scripts/spec_choice.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python3 -"""Stateless selector emitter for specode confirmations. - -Design (post-0.4.0): this script is **non-interactive only**. It prints the -title + option block + a machine-readable sentinel and exits 0. The agent -(Claude Code / CodeBuddy main session) is expected to relay the stdout to -the user verbatim and resume work when the user replies with a number. - -Why no `input()` / curses anymore: agent Bash tools do not have a real TTY, -and any blocking stdin read leaves the process hanging indefinitely. The -prior TTY-only paths were a hang risk under CodeBuddy's piped stdin (the -process never received EOF). This module now physically cannot block. - -The `--no-curses` flag is kept for back-compat but is a no-op. -""" -from __future__ import annotations - -import argparse -import json -import sys -from dataclasses import dataclass - - -@dataclass -class Option: - label: str - description: str = "" - recommended: bool = False - - -def parse_option(raw: str) -> Option: - parts = raw.split("::") - label = parts[0].strip() - description = parts[1].strip() if len(parts) > 1 else "" - flags = {part.strip().lower() for part in parts[2:]} - return Option(label=label, description=description, recommended="recommended" in flags) - - -def print_result(index: int, option: Option, as_json: bool) -> None: - result = {"index": index + 1, "label": option.label, "description": option.description} - if as_json: - print(json.dumps(result, ensure_ascii=False)) - else: - print(option.label) - - -def emit_options(title: str, options: list[Option], default: int) -> None: - """Print the option block + sentinel. Never reads stdin.""" - print(title) - for index, option in enumerate(options, start=1): - suffix = " (Recommended)" if option.recommended else "" - print(f"{index}. {option.label}{suffix}") - if option.description: - print(f" {option.description}") - prompt = f"Select 1-{len(options)}" - if default >= 0: - prompt += f" [{default + 1}]" - prompt += ": " - sys.stdout.write(prompt) - sys.stdout.write("\n") - sys.stdout.write("[specode:non-interactive] 选项已就绪:请把上方选项原样转发给用户,并在对话中等待编号回复。\n") - sys.stdout.write("[specode:non-interactive] AWAITING_USER_CHOICE\n") - sys.stdout.flush() - - -def main() -> int: - parser = argparse.ArgumentParser(description="Selector emitter for specode confirmations (non-interactive).") - parser.add_argument("--title", required=True) - parser.add_argument("--option", action="append", required=True, help="label::description::recommended") - parser.add_argument("--json", action="store_true", help="Used with --print-default: emit JSON.") - parser.add_argument("--default-index", type=int, help="1-based default index.") - parser.add_argument("--no-curses", action="store_true", help="No-op; retained for back-compat.") - parser.add_argument("--print-default", action="store_true", help="Print the default selection without prompting.") - args = parser.parse_args() - - options = [parse_option(raw) for raw in args.option] - default = (args.default_index - 1) if args.default_index else next( - (index for index, option in enumerate(options) if option.recommended), - 0, - ) - if not 0 <= default < len(options): - default = 0 - - if args.print_default: - print_result(default, options[default], args.json) - return 0 - - emit_options(args.title, options, default) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/plugins/specode/scripts/spec_guard.py b/plugins/specode/scripts/spec_guard.py deleted file mode 100755 index ad91475..0000000 --- a/plugins/specode/scripts/spec_guard.py +++ /dev/null @@ -1,608 +0,0 @@ -#!/usr/bin/env python3 -# Hook entry for specode plugin. -# -# Phase 3: wires Code-Doc Sync Guard. -# - UserPromptSubmit: inject status block; start new turn in ledger; refresh tasks_files. -# - PreToolUse: INV-1 — block code edits not covered by tasks/doc-change/freeform. -# - PostToolUse: append the just-completed change to the ledger. -# - Stop: INV-2 (turn conservation). -# - SessionStart / SessionEnd: track Claude session, sync sentinel. -# -# Invariants: -# - Never raise out of main(). Internal errors log to audit and return 0. -# - SPECODE_GUARD=off → global bypass. - -import json -import os -import sys -import traceback -from datetime import datetime, timezone -from pathlib import Path -from typing import Optional - -sys.path.insert(0, str(Path(__file__).resolve().parent)) -import bash_guard # noqa: E402 -import spec_state # noqa: E402 -import spec_sync # noqa: E402 -import spec_telemetry # noqa: E402 -import task_swarm_guard # noqa: E402 - - -AUDIT_DIR = Path( - os.environ.get("SPECODE_AUDIT_DIR") - or os.path.expanduser("~/.specode/audit") -) - -# Per-file size cap. When today's daily log exceeds this, it gets truncated -# in place (keeping the most recent half). No cross-file pruning — older -# daily files are left alone. At ~256 bytes/record this holds ~80k entries -# per day, plenty of headroom for normal use; the cap exists to bound -# pathological growth (e.g. an error loop emitting tracebacks). -AUDIT_MAX_BYTES = int( - os.environ.get("SPECODE_AUDIT_MAX_BYTES") or 20 * 1024 * 1024 -) - -_truncate_checked = False - - -def _maybe_truncate(log_file: Path) -> None: - """If log_file is over the cap, rewrite it keeping only the tail half. - - Called once per process at first _audit() write. Safe under concurrent - writers from other hook processes — worst case is a lost record at the - rewrite boundary, which is acceptable for an advisory audit log. - """ - try: - size = log_file.stat().st_size - except OSError: - return - if size <= AUDIT_MAX_BYTES: - return - keep = AUDIT_MAX_BYTES // 2 - try: - with log_file.open("rb") as f: - f.seek(-keep, 2) - tail = f.read() - nl = tail.find(b"\n") - if nl >= 0: - tail = tail[nl + 1:] - marker = ( - json.dumps({ - "ts": datetime.now(timezone.utc).isoformat(), - "event": "_truncate", - "decision": "ok", - "msg": f"prev_size={size} kept_bytes={len(tail)}", - }, ensure_ascii=False) + "\n" - ).encode("utf-8") - log_file.write_bytes(marker + tail) - except OSError: - pass - - -def _audit(event: str, payload: dict, decision: str, msg: str = "") -> None: - global _truncate_checked - try: - AUDIT_DIR.mkdir(parents=True, exist_ok=True) - log_file = AUDIT_DIR / f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}.log" - if not _truncate_checked: - _truncate_checked = True - _maybe_truncate(log_file) - record = { - "ts": datetime.now(timezone.utc).isoformat(), - "event": event, - "decision": decision, - "msg": msg, - "tool": payload.get("tool_name"), - "session_id": payload.get("session_id"), - "cwd": payload.get("cwd") or os.getcwd(), - } - with log_file.open("a", encoding="utf-8") as f: - f.write(json.dumps(record, ensure_ascii=False) + "\n") - except Exception: - pass - - -def ok() -> int: - return 0 - - -def deny(msg: str) -> int: - sys.stderr.write(msg) - return 2 - - -def _emit_violation(inv_id: str, payload: dict, info: Optional[dict], target: Optional[Path], extra: Optional[dict] = None) -> None: - fields: dict = {"inv": inv_id} - if info: - fields["spec_slug"] = info.get("spec_slug") - fields["phase"] = info.get("current_phase") - fields["session_id"] = info.get("session_id") - if target is not None: - fields["file"] = str(target) - fields["tool"] = payload.get("tool_name") - fields["cwd"] = payload.get("cwd") or os.getcwd() - if extra: - fields.update(extra) - spec_telemetry.emit("inv.violation", **fields) - - -def _advisory(inv_id: str, msg: str, ledger: dict, spec_dir: Path, target: Optional[Path]) -> int: - """Record an INV-{1,2,4,6} advisory: write to sticky ledger + warn on stderr, do NOT block. - - Returns ok() so the calling hook does not deny the action. The advisory - becomes visible in the next UserPromptSubmit's status block (sticky until - spec doc edit auto-dismisses it or user runs /spec --dismiss-advisories). - """ - file_str = str(target) if target else None - spec_sync.record_advisory(ledger, inv_id, msg, file=file_str) - spec_sync.write_ledger(spec_dir, ledger) - # Surface immediately to the model — stderr is the only signal a same-turn - # tool result carries back. The sticky queue handles cross-turn visibility. - sys.stderr.write(f"⚠ {inv_id} ADVISORY ({'本次操作' if target else '本回合'}已放行)\n") - sys.stderr.write(msg + "\n") - sys.stderr.write(" (sticky 提醒已写入 ledger; 改任一 spec 文档即自动清除, 或运行 /spec --dismiss-advisories)\n") - return ok() - - -def _prefer_session_id() -> str: - return os.environ.get("TERM_SESSION_ID") or "" - - -def _resolve_project_root(payload: dict, ledger: dict) -> Path: - """Pick the project root used to classify code edits. - - Priority: - 1. ledger['project_root'] if previously set - 2. payload['cwd'] - 3. os.getcwd() - """ - pr = ledger.get("project_root") - if pr: - return Path(pr).expanduser() - cwd = payload.get("cwd") or os.getcwd() - return Path(cwd).expanduser() - - -def _edit_target(payload: dict) -> Optional[Path]: - tool_input = payload.get("tool_input") or {} - raw = tool_input.get("file_path") or tool_input.get("path") - if not raw: - return None - return Path(raw).expanduser() - - -def _enclosing_subagent_workspace(target: Path, project_root: Path) -> Optional[Path]: - """If target lives inside a task-swarm agent workspace, return that ws path. - - The convention is: - /.task-swarm/runs//agents/stage-N-[-rR]/{inbox,outbox,task.md,...} - - Return the agent workspace (the directory containing task.md) if target - is inside it, else None. - """ - try: - target_abs = target.resolve() - project_abs = project_root.resolve() - except OSError: - return None - try: - rel = target_abs.relative_to(project_abs) - except ValueError: - return None - parts = rel.parts - if len(parts) < 5: - return None - if parts[0] != ".task-swarm" or parts[1] != "runs" or parts[3] != "agents": - return None - ws = project_abs / parts[0] / parts[1] / parts[2] / parts[3] / parts[4] - return ws if ws.exists() else None - - -# --- handlers --------------------------------------------------------------- - -def handle_session_start(payload: dict) -> int: - sid = payload.get("session_id") or "" - try: - spec_state.write_claude_session(sid, payload) - is_active = spec_state.sync_any_active_sentinel() - except Exception as e: - _audit("SessionStart", payload, "state-error", str(e)) - return ok() - _audit("SessionStart", payload, "ok", f"any_active={is_active}") - return ok() - - -def handle_user_prompt_submit(payload: dict) -> int: - info = spec_state.find_active_spec(prefer_session_id=_prefer_session_id()) - if info is None: - spec_state.sync_any_active_sentinel() - return ok() - - spec_dir = Path(info["spec_dir"]) - ledger = spec_sync.read_ledger(spec_dir) - tasks_files = spec_sync.extract_tasks_files(spec_dir) - project_root = _resolve_project_root(payload, ledger) - spec_sync.start_new_turn(ledger, project_root, tasks_files) - # Freeform flag is read from per-spec config and mirrored into ledger. - config = spec_sync._read_spec_config(spec_dir) - ledger["freeform_mode"] = bool(config.get("freeformMode")) - spec_sync.write_ledger(spec_dir, ledger) - - block = spec_state.render_status_block(info) - if ledger["freeform_mode"]: - block += "\nmode: freeform (INV-1 silenced; INV-2/4/6 still advisory; INV-3/7/8/9 still enforced)" - else: - block += "\nmode: strict (INV-1/2/4/6 advisory; INV-3/7/8/9 enforced)" - block += f"\ntasks_files: {len(tasks_files)} entries" - block += f"\nturn: {ledger['turn_id']}" - - # Sticky advisories from prior turns (cleared by spec-doc edit or - # /spec --dismiss-advisories). - advisories_block = spec_sync.format_advisories_block(ledger) - if advisories_block: - block += "\n" + advisories_block - - # Task-swarm run state, if a run is active in this project. - swarm_block = _render_task_swarm_block(project_root) - if swarm_block: - block += "\n" + swarm_block - - output = { - "hookSpecificOutput": { - "hookEventName": "UserPromptSubmit", - "additionalContext": block, - } - } - sys.stdout.write(json.dumps(output, ensure_ascii=False)) - _audit("UserPromptSubmit", payload, "injected", info.get("spec_slug") or "") - return ok() - - -def _render_task_swarm_block(project_root: Path) -> str: - """Return a multi-line status block for the active task-swarm run, or ''.""" - run_dir = task_swarm_guard.find_active_run(project_root) - if run_dir is None: - return "" - state_path = run_dir / "state.json" - if not state_path.exists(): - return "" - try: - state = json.loads(state_path.read_text(encoding="utf-8")) - except Exception: - return "" - - summary_lines: list[str] = [] - counts = {"pending": 0, "running": 0, "converged": 0, "failed": 0, "skipped": 0} - in_flight: list[str] = [] - for s in state.get("stages") or []: - counts[s.get("phase", "pending")] = counts.get(s.get("phase", "pending"), 0) + 1 - if s.get("in_flight"): - ifl = s["in_flight"] - in_flight.append(f"stage {s['num']} {ifl['role']} r{ifl['round']}") - next_hint = ( - f"sh ${{CLAUDE_PLUGIN_ROOT}}/scripts/run.sh " - f"${{CLAUDE_PLUGIN_ROOT}}/scripts/task_swarm.py next --run {state['run_id']}" - ) - summary_lines.append("--- task-swarm ---") - summary_lines.append(f"run: {state['run_id']}") - summary_lines.append( - f"stages: ✔{counts['converged']} ▶{counts['running']} ○{counts['pending']}" - f" ✗{counts['failed']} —{counts['skipped']}" - ) - summary_lines.append( - f"max_rounds: {state['config']['max_rounds']} parallel: {state['config']['parallel']}" - ) - if in_flight: - summary_lines.append("in-flight: " + "; ".join(in_flight)) - summary_lines.append(f"next: {next_hint}") - summary_lines.append("------------------") - return "\n".join(summary_lines) - - -def handle_pre_tool_use(payload: dict) -> int: - info = spec_state.find_active_spec(prefer_session_id=_prefer_session_id()) - if info is None: - spec_state.sync_any_active_sentinel() - # Even without an active spec, INV-7 (Task subagent_type) may still - # apply if a task-swarm run is active locally. Fall through. - target = None - else: - target = _edit_target(payload) - - tool_name = (payload.get("tool_name") or "").strip() - - # ---- INV-11: Bash interactive-command guard (works without active spec) ---- - if tool_name == "Bash": - command = (payload.get("tool_input") or {}).get("command") or "" - result = bash_guard.check_bash_command(command) - if result.decision == "deny": - _audit("PreToolUse", payload, f"deny-INV-11[{result.rule}]", command[:200]) - spec_telemetry.emit( - "inv.violation", - inv="INV-11", - rule=result.rule, - command=command[:200], - tool="Bash", - cwd=payload.get("cwd") or os.getcwd(), - ) - return deny(result.message) - _audit("PreToolUse", payload, "ok-INV-11", command[:120]) - return ok() - - # ---- INV-7: Task tool subagent_type prefix ---- - if tool_name == "Task": - project_root_for_swarm = Path(payload.get("cwd") or os.getcwd()).expanduser() - if task_swarm_guard.is_task_swarm_active(project_root_for_swarm): - subagent_type = (payload.get("tool_input") or {}).get("subagent_type") or "" - decision, msg = task_swarm_guard.check_inv7_subagent_type(subagent_type) - if decision == "deny": - _audit("PreToolUse", payload, "deny-INV-7", subagent_type) - _emit_violation("INV-7", payload, info, None, {"subagent_type": subagent_type}) - return deny(msg) - _audit("PreToolUse", payload, "ok-INV-7", subagent_type) - return ok() - - if info is None or target is None: - _audit("PreToolUse", payload, "ok-no-target", "") - return ok() - - spec_dir = Path(info["spec_dir"]) - ledger = spec_sync.read_ledger(spec_dir) - project_root = _resolve_project_root(payload, ledger) - current_phase = info.get("current_phase") or "unknown" - session_id = info.get("session_id") or _prefer_session_id() - slug = info.get("spec_slug") or spec_dir.name - - # ---- INV-8: subagent @writes boundary ---- - subagent_ws = _enclosing_subagent_workspace(target, project_root) - if subagent_ws is not None and task_swarm_guard.is_task_swarm_active(project_root): - decision, msg = task_swarm_guard.check_inv8_writes_boundary(target, subagent_ws, project_root, spec_dir) - if decision == "deny": - _audit("PreToolUse", payload, "deny-INV-8", str(target)) - _emit_violation("INV-8", payload, info, target) - return deny(msg) - # Inside a subagent workspace — internal swarm artifact, not project - # source. Bypass spec_sync INV-1/INV-6 checks. - _audit("PreToolUse", payload, "ok-swarm-internal", str(target)) - return ok() - - # ---- INV-9: protect tasks.md during task-swarm ---- - if task_swarm_guard.is_task_swarm_active(project_root) and task_swarm_guard.is_tasks_md(target, spec_dir): - tool_input = payload.get("tool_input") or {} - old_string = tool_input.get("old_string") - new_string = tool_input.get("new_string") - if old_string is not None and new_string is not None: - try: - full_text = target.read_text(encoding="utf-8") - # Apply the Edit hypothetically to get the post-edit text. - if tool_input.get("replace_all"): - new_text = full_text.replace(old_string, new_string) - else: - new_text = full_text.replace(old_string, new_string, 1) - decision, msg = task_swarm_guard.check_inv9_tasks_md_diff(full_text, new_text) - if decision == "deny": - _audit("PreToolUse", payload, "deny-INV-9", str(target)) - _emit_violation("INV-9", payload, info, target) - return deny(msg) - except OSError: - pass - elif tool_name == "Write": - # Full file overwrite — compare against current text if exists. - try: - old_text = target.read_text(encoding="utf-8") if target.exists() else "" - new_text = tool_input.get("content") or "" - decision, msg = task_swarm_guard.check_inv9_tasks_md_diff(old_text, new_text) - if decision == "deny": - _audit("PreToolUse", payload, "deny-INV-9", str(target)) - _emit_violation("INV-9", payload, info, target) - return deny(msg) - except OSError: - pass - - cls = spec_sync.classify_path(target, spec_dir, project_root) - - if cls == "outside": - _audit("PreToolUse", payload, "ok-outside", str(target)) - return ok() - - if cls == "spec-doc": - # INV-3: verify lock ownership before spec-doc writes. - decision, info_msg = spec_sync.check_verify_lock(spec_dir, session_id, slug) - if decision == "deny": - ledger["last_violation"] = {"id": "INV-3", "file": str(target), "at": spec_sync._now()} - spec_sync.write_ledger(spec_dir, ledger) - _audit("PreToolUse", payload, "deny-INV-3", str(target)) - _emit_violation("INV-3", payload, info, target) - return deny(info_msg) - _audit("PreToolUse", payload, f"ok-spec-doc[{info_msg}]", str(target)) - return ok() - - # project-code branch. - # INV-6 phase gate first (absolute; freeform does NOT exempt). - decision, msg = spec_sync.check_phase_gate(current_phase) - if decision == "deny": - ledger["last_violation"] = { - "id": "INV-6", - "phase": current_phase, - "file": str(target), - "at": spec_sync._now(), - } - _audit("PreToolUse", payload, "advisory-INV-6", f"phase={current_phase} target={target}") - _emit_violation("INV-6", payload, info, target) - return _advisory("INV-6", msg, ledger, spec_dir, target) - - # Then INV-1 (relaxable by freeform). - decision, msg = spec_sync.check_pre_edit(target, spec_dir, project_root, ledger) - if decision == "deny": - ledger["last_violation"] = {"id": "INV-1", "file": str(target), "at": spec_sync._now()} - _audit("PreToolUse", payload, "advisory-INV-1", str(target)) - _emit_violation("INV-1", payload, info, target) - return _advisory("INV-1", msg, ledger, spec_dir, target) - - _audit("PreToolUse", payload, "ok-code-allowed", str(target)) - return ok() - - -def handle_post_tool_use(payload: dict) -> int: - info = spec_state.find_active_spec(prefer_session_id=_prefer_session_id()) - - tool_name = (payload.get("tool_name") or "").strip() - - # ---- INV-11: Bash hang signature scan (works without active spec) ---- - if tool_name == "Bash": - tool_input = payload.get("tool_input") or {} - command = tool_input.get("command") or "" - # tool_response may be a string or dict depending on harness version. - tr = payload.get("tool_response") - if isinstance(tr, dict): - stdout = tr.get("stdout") or tr.get("output") or "" - stderr = tr.get("stderr") or "" - exit_code = tr.get("exit_code") or tr.get("returncode") - else: - stdout = tr or "" - stderr = "" - exit_code = None - is_hang, reason = bash_guard.detect_hang(stdout, stderr, exit_code) - if is_hang: - advisory = bash_guard.format_hang_advisory(reason, command_excerpt=command) - _audit("PostToolUse", payload, "advisory-INV-11-hang", reason) - spec_telemetry.emit( - "inv.violation", - inv="INV-11", - kind="post-hang", - reason=reason, - command=command[:200], - ) - output = { - "hookSpecificOutput": { - "hookEventName": "PostToolUse", - "additionalContext": advisory, - } - } - sys.stdout.write(json.dumps(output, ensure_ascii=False)) - return ok() - _audit("PostToolUse", payload, "ok-Bash", command[:120]) - return ok() - - if info is None: - spec_state.sync_any_active_sentinel() - return ok() - - target = _edit_target(payload) - if target is None: - _audit("PostToolUse", payload, "ok-no-target", "") - return ok() - - spec_dir = Path(info["spec_dir"]) - ledger = spec_sync.read_ledger(spec_dir) - project_root = _resolve_project_root(payload, ledger) - cls = spec_sync.classify_path(target, spec_dir, project_root) - - if cls == "spec-doc": - spec_sync.append_change(ledger, "doc", str(target), payload.get("tool_name") or "") - # Spec doc just got edited — auto-dismiss sticky INV-1/2/4 advisories. - # The drift those warned about is being addressed by this very edit. - dropped = spec_sync.auto_dismiss_on_doc_change(ledger) - if dropped: - _audit("PostToolUse", payload, f"advisories-cleared({dropped})", str(target)) - elif cls == "project-code": - spec_sync.append_change(ledger, "code", str(target), payload.get("tool_name") or "") - else: - _audit("PostToolUse", payload, "ok-outside", str(target)) - return ok() - - spec_sync.write_ledger(spec_dir, ledger) - _audit("PostToolUse", payload, f"ledger-{cls}", str(target)) - return ok() - - -def handle_stop(payload: dict) -> int: - info = spec_state.find_active_spec(prefer_session_id=_prefer_session_id()) - if info is None: - spec_state.sync_any_active_sentinel() - return ok() - - spec_dir = Path(info["spec_dir"]) - ledger = spec_sync.read_ledger(spec_dir) - violations = spec_sync.check_stop(ledger) - - if violations: - ledger["last_violation"] = { - "ids": [v["id"] for v in violations], - "at": spec_sync._now(), - } - # INV-2 / INV-4 are advisory as of 0.4.0 — record, warn, but do NOT - # block the turn. The sticky advisory queue ensures the model sees - # it on the next UserPromptSubmit until resolved. - for v in violations: - spec_sync.record_advisory(ledger, v["id"], v["msg"]) - _emit_violation(v["id"], payload, info, None) - spec_sync.reset_turn(ledger) - spec_sync.write_ledger(spec_dir, ledger) - _audit("Stop", payload, "advisory-" + "+".join(v["id"] for v in violations), "") - sys.stderr.write( - "⚠ Stop ADVISORY (本回合已放行): " + ", ".join(v["id"] for v in violations) + "\n" - ) - for v in violations: - sys.stderr.write(v["msg"] + "\n") - sys.stderr.write( - " (sticky 提醒已写入 ledger; 下轮起在 status block 提示, 改 spec 文档自动清除)\n" - ) - return ok() - - # Pass: reset turn counters (but keep turn_id until next UserPromptSubmit). - spec_sync.reset_turn(ledger) - spec_sync.write_ledger(spec_dir, ledger) - _audit("Stop", payload, "ok-conserved", info.get("spec_slug") or "") - return ok() - - -def handle_session_end(payload: dict) -> int: - sid = payload.get("session_id") or "" - try: - spec_state.clear_claude_session(sid) - spec_state.sync_any_active_sentinel() - except Exception as e: - _audit("SessionEnd", payload, "state-error", str(e)) - return ok() - _audit("SessionEnd", payload, "ok") - return ok() - - -HANDLERS = { - "session-start": handle_session_start, - "user-prompt-submit": handle_user_prompt_submit, - "pre-tool-use": handle_pre_tool_use, - "post-tool-use": handle_post_tool_use, - "stop": handle_stop, - "session-end": handle_session_end, -} - - -def main(argv: list) -> int: - if os.environ.get("SPECODE_GUARD", "").lower() == "off": - return 0 - - if len(argv) < 2 or argv[1] not in HANDLERS: - sys.stderr.write(f"spec_guard: unknown subcommand {argv[1:]!r}\n") - return 0 - - subcommand = argv[1] - - try: - # stdin-block: hook entry point — Claude Code / CodeBuddy feed a bounded JSON payload then close stdin, will not hang - raw = sys.stdin.read() - payload = json.loads(raw) if raw.strip() else {} - except json.JSONDecodeError as e: - _audit(subcommand, {}, "bad-json", str(e)) - return 0 - - try: - return HANDLERS[subcommand](payload) - except Exception as e: - _audit(subcommand, payload, "handler-error", f"{e}\n{traceback.format_exc()}") - return 0 - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/plugins/specode/scripts/spec_init.py b/plugins/specode/scripts/spec_init.py index d34a2d4..60183d8 100755 --- a/plugins/specode/scripts/spec_init.py +++ b/plugins/specode/scripts/spec_init.py @@ -1,223 +1,471 @@ #!/usr/bin/env python3 +"""spec_init.py — `/specode:spec <需求>` 入口。 + +参数: + --name spec 目录名(建议短横线 slug) + --requirement-name "<显示名>" 人类可读名称(写入 .config.json) + --source-text "<原始需求文本>" 写入 requirements.md / bugfix.md 的 summary + --session 会话 id(必填) + [--root ] 覆盖三层 doc_root 解析 + [--detect-vault] 仅打印 vault 检测结果后退出 + +行为: + 1. resolve_doc_root(含 --root / SPECODE_ROOT / config / auto) + 2. 三层全 miss → 输出引导 + exit 3 + 3. 在 doc_root 下创建 specs//{requirements.md,bugfix.md,design.md,tasks.md, + implementation-log.md,.config.json} + (tasks.md 末尾自带 `## 测试要点` 章节,由 agent 跟随 requirements/bugfix 同步更新) + 4. 更新 /.active-specode.json + 5. 强制写 ~/.specode/sessions/.json (atomic tempfile + os.replace + fsync) + 6. 任一失败 → 回滚已写文件 + exit 1 + 7. 成功输出 JSON:{"spec_dir","specId","session_id","phase"} + +stdlib-only。 +""" from __future__ import annotations import argparse +import contextlib import json +import os import re +import shutil import sys +import tempfile +import time import uuid -from datetime import datetime, timezone from pathlib import Path +from typing import Optional -import spec_session -import spec_telemetry -import spec_vault - - -ROOT = Path(__file__).resolve().parents[1] -TEMPLATE_DIR = ROOT / "assets" / "templates" - - -SLUG_INVALID = re.compile(r"[^a-z0-9-]+") - - -def normalize_slug(value: str) -> str: - """Format-normalize a slug. Does not infer semantics from Chinese; agent must - pass a semantically meaningful English slug via --name.""" - value = value.strip().lower() - value = SLUG_INVALID.sub("-", value) - value = re.sub(r"-+", "-", value).strip("-") - return value[:64] - - -def resolve_document_root(root: str | None) -> tuple[Path, str]: - """Three-tier resolution: --root → SPECODE_ROOT/config → Obsidian. - - Returns (resolved_root, source_tag). On total failure raises SystemExit with - a guidance message and a JSON error code on stderr for agent consumption. - """ - if root: - return Path(root).expanduser().resolve(), "explicit" - vault_root, source = spec_vault.resolve_spec_root() - if vault_root is not None: - return vault_root, source - raise SystemExit(json.dumps({ - "error": "no_spec_root", - "message": ( - "未检测到 Obsidian vault,且未配置 spec 根目录。请选择以下方式之一:\n" - " 1. 安装 Obsidian 后重试(推荐)\n" - " 2. /spec --set-vault \n" - " 3. /spec --set-root <自定义目录>" - ), - }, ensure_ascii=False)) - - -def read_source(args: argparse.Namespace) -> str: - chunks: list[str] = [] - if args.source_file: - chunks.append(Path(args.source_file).expanduser().read_text(encoding="utf-8")) - if args.source_text: - chunks.append(args.source_text) - if not chunks: - chunks.append("New spec initialized without a source requirement.") - return "\n\n".join(chunks).strip() - - -def render(template: str, values: dict[str, str]) -> str: - for key, value in values.items(): - template = template.replace("{{" + key + "}}", value) - return template - - -def write_if_missing(path: Path, content: str, force: bool) -> bool: - if path.exists() and not force: - return False - path.write_text(content, encoding="utf-8") - return True - - -def main() -> int: - parser = argparse.ArgumentParser(description="Initialize a Kiro-style specode document folder.") - parser.add_argument("--root", help="Document management root. The script creates //.") - parser.add_argument("--name", required=True, - help="Semantic slug (lowercase, hyphen-separated). The agent must compute and pass this; " - "the script does not infer slugs from Chinese.") - parser.add_argument("--requirement-name", help="Display name for the spec. Defaults to --name.") - parser.add_argument("--source-text", help="Requirement text, usually the text after /spec.") - parser.add_argument("--source-file", help="Path to a requirement source document.") - parser.add_argument("--workflow", choices=["requirements-first", "design-first", "bugfix"], default="requirements-first") - parser.add_argument("--spec-type", choices=["feature", "bugfix"], default="feature") - parser.add_argument("--persistent", action="store_true", help="Bind this spec to an active persistent session.") - parser.add_argument("--session", help="Window/thread/session id for persistent mode.") - parser.add_argument("--agent", help="Agent name recorded into lock metadata when --persistent.") - parser.add_argument( - "--current-phase", - choices=sorted(spec_session.PHASES - {"ended"}), - default="intake", - help="Initial phase for persistent mode.", - ) - parser.add_argument("--force", action="store_true", help="Overwrite existing generated documents.") - args = parser.parse_args() +# 见 spec_session.py 顶部说明:Windows pipe stdout 默认 cp936/gbk 无法编码 emoji / +# 部分中文错误消息发到 CodeBuddy / pytest 后变乱码,强制 utf-8 + errors=replace。 +with contextlib.suppress(Exception): + sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined] +with contextlib.suppress(Exception): + sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined] + +# 复用 spec_vault.py 的解析与原子写 +THIS_DIR = Path(__file__).resolve().parent +if str(THIS_DIR) not in sys.path: + sys.path.insert(0, str(THIS_DIR)) + +# 0.10.0+ 日志(defensive import;失败时降级为 no-op) +try: + from spec_log import write_event as _log_event # type: ignore +except Exception: + def _log_event(event: str, payload: Optional[dict] = None, + session_id: Optional[str] = None) -> None: + return None + +from spec_vault import resolve_doc_root, _atomic_write_json # type: ignore # noqa: E402 + + +# ------------------------------------------------------------------------- +# 模板 +# ------------------------------------------------------------------------- + +TEMPLATE_DIR = THIS_DIR.parent / "assets" / "templates" + +# fallback 骨架(模板缺失时使用) +FALLBACK_TEMPLATES: dict[str, str] = { + "requirements.md": """# 需求文档 + +Spec Type: Feature +Workflow: requirements-first +Status: Requirements Draft + +## 简介 + +{{summary}} + +## 需求 + +### 需求 1:核心能力 + +#### 验收标准 + +1. WHEN 用户触发该能力,THE System SHALL 按需求描述执行预期行为。 +""", + "bugfix.md": """# Bugfix 文档 + +Spec Type: Bugfix +Workflow: bugfix +Status: Bug Analysis Draft + +## 问题摘要 + +{{summary}} + +## 当前行为 + +1. WHEN 缺陷触发条件满足,THEN THE System 出现当前错误行为。 + +## 期望行为 + +1. WHEN 缺陷触发条件满足,THE System SHALL 执行正确行为。 +""", + "design.md": """# 设计文档:{{name}}({{slug}}) + +Status: Design Draft + +## 概述 + +{{summary}} + +## 架构 + +待补充。 + +## 组件与接口 + +待补充。 +""", + "tasks.md": """# 实现计划:{{name}}({{slug}}) + +Status: Tasks Draft + +## 阶段 1: 待规划阶段标题 + +- [ ] 1.1 待规划任务描述 @writes:src/path/to/file.py _需求:1.1_ + +## 测试要点 + +供测试人员快速了解需要验证的场景。主代理在 tasks phase 按 SHALL 顺手补几行作为参考;非验收硬条件。 + +- _agent 待填充_:触发场景 → 预期结果(需求 X.Y) + +## 验收 + +- [ ] 所有 required 任务完成。 +""", + "implementation-log.md": """# 实现记录:{{name}}({{slug}}) + +> 记录实现期间的设计偏离、关键决策、阻塞与解决方案。空白等于没改过——请勿留空。 + +## {{created_at}} — 初始化 + +- spec 已初始化,等待 intake / requirements 推进。 +""", +} + + +def _render(text: str, ctx: dict[str, str]) -> str: + # 简单 {{key}} 替换;缺失保留原文(不报错) + def repl(m: "re.Match[str]") -> str: + key = m.group(1).strip() + return ctx.get(key, m.group(0)) + return re.sub(r"\{\{\s*([a-zA-Z0-9_]+)\s*\}\}", repl, text) - slug = normalize_slug(args.name) + +def _load_template(name: str) -> str: + p = TEMPLATE_DIR / name + if p.exists(): + try: + return p.read_text(encoding="utf-8") + except Exception: + pass + return FALLBACK_TEMPLATES.get(name, f"# {name}\n\n待补充。\n") + + +# ------------------------------------------------------------------------- +# 工具 +# ------------------------------------------------------------------------- + +# 0.10.16+:允许 Unicode(中文 / 日文 / emoji 等),仅禁文件系统危险字符。 +# 拒:< > : " / \ | ? *(Windows 禁字符)、控制字符、任何空白(避免 shell 转义麻烦)。 +# 首字符额外拒:. (避免隐藏文件)、- (避免被误判为 CLI flag)。 +# 长度 1-80。 +SLUG_RE = re.compile( + r'^[^<>:"/\\|?*\s\x00-\x1f.\-]' + r'[^<>:"/\\|?*\s\x00-\x1f]{0,79}$' +) + +# Windows 保留名(case-insensitive)——即使字符合法也不能用作文件夹名 +_WIN_RESERVED = ( + {"CON", "PRN", "AUX", "NUL"} + | {f"COM{i}" for i in range(1, 10)} + | {f"LPT{i}" for i in range(1, 10)} +) + + +def _slug_invalid_reason(slug: str) -> Optional[str]: + """返回 slug 非法原因(用户可读);合法返回 None。""" if not slug: - print(json.dumps({ - "error": "invalid_name", - "message": "--name 必须是合法 slug(小写字母/数字/连字符),由 agent 根据需求语义生成。", - }, ensure_ascii=False), file=sys.stderr) - return 2 - - name = (args.requirement_name or args.name).strip() - spec_type = "bugfix" if args.workflow == "bugfix" else args.spec_type - - source = read_source(args) - document_root, root_source = resolve_document_root(args.root) - spec_dir = document_root / slug - spec_dir.mkdir(parents=True, exist_ok=True) - - summary = source - if len(summary) > 1200: - summary = summary[:1200].rstrip() + "\n\n[Source truncated in seed document. Read the source file for full context.]" - - values = { - "name": name, + return "slug 不能为空" + if not SLUG_RE.match(slug): + return ( + 'slug 不能含 < > : " / \\ | ? * 或空白字符;' + '不能以 . 或 - 开头;长度 1-80' + ) + if slug.upper() in _WIN_RESERVED: + return f"slug 是 Windows 保留名 ({slug!r}) — 请换一个" + if slug.endswith(".") or slug.endswith(" "): + return "slug 不能以 . 或空格结尾(Windows 限制)" + return None + + +def _now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def _sessions_dir() -> Path: + return Path.home() / ".specode" / "sessions" + + +def _atomic_write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp( + prefix=path.name + ".", + suffix=".tmp", + dir=str(path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + fh.write(content) + fh.flush() + try: + os.fsync(fh.fileno()) + except OSError: + pass + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# ------------------------------------------------------------------------- +# 主流程 +# ------------------------------------------------------------------------- + +def _print_root_missing_hint() -> None: + msg = ( + "specode: 未能解析出可用的文档根目录(doc_root)。\n" + "已尝试:\n" + " 1) --root 参数 / 环境变量 SPECODE_ROOT\n" + " 2) ~/.config/specode/config.json 的 obsidianRoot\n" + " 3) 自动检测 Obsidian vault\n\n" + "请任选其一:\n" + " - 运行 `spec_vault.py set --vault <绝对路径>` 持久化\n" + " - 或 `export SPECODE_ROOT=<绝对路径>` 临时指定\n" + " - 或在 Obsidian 中打开任意 vault 后重试\n" + ) + sys.stderr.write(msg) + + +def main(argv: Optional[list[str]] = None) -> int: + parser = argparse.ArgumentParser(prog="spec_init.py", description="initialise a new specode spec") + parser.add_argument("--name", required=True, help="spec slug(短横线小写)") + parser.add_argument("--requirement-name", required=True, help="人类可读名称") + parser.add_argument("--source-text", required=True, help="原始需求文本(写入 summary)") + parser.add_argument("--session", required=True, help="会话 id(宿主注入的 session_id)") + parser.add_argument("--root", help="覆盖 doc_root(绝对路径)") + parser.add_argument("--detect-vault", action="store_true", help="仅打印 vault 检测结果后退出") + args = parser.parse_args(argv) + + if args.detect_vault: + # 透传给 spec_vault.detect + from spec_vault import cmd_detect # type: ignore + ns = argparse.Namespace() + return cmd_detect(ns) + + slug = args.name.strip() + reason = _slug_invalid_reason(slug) + if reason: + sys.stderr.write(f"非法 slug:{slug!r}({reason})。\n") + return 3 + + # 1. 解析 doc_root + root, source = resolve_doc_root(override=args.root) + if root is None: + _print_root_missing_hint() + return 3 + if not root.exists(): + sys.stderr.write( + f"doc_root 不存在(来源={source}):{root}\n" + "请创建该目录后重试,或换一个 --root 参数。\n" + ) + return 3 + + specs_root = root / "specs" + spec_dir = specs_root / slug + if spec_dir.exists(): + sys.stderr.write( + f"spec 目录已存在:{spec_dir}\n" + "请换一个 --name slug,或使用 /specode:continue 接管已有 spec。\n" + ) + return 3 + + spec_id = str(uuid.uuid4()) + created_at = _now_iso() + ctx = { + "summary": args.source_text, + "name": args.requirement_name, "slug": slug, - "summary": summary, - "workflow": args.workflow, - "spec_type": "Bugfix" if spec_type == "bugfix" else "Feature", + "spec_type": "Feature", + "workflow": "requirements-first", + "created_at": created_at, + "spec_id": spec_id, } - created: list[str] = [] - first_doc = "bugfix.md" if spec_type == "bugfix" or args.workflow == "bugfix" else "requirements.md" - for template_name, output_name in [ - (first_doc, first_doc), - ("design.md", "design.md"), - ("tasks.md", "tasks.md"), - ]: - template = (TEMPLATE_DIR / template_name).read_text(encoding="utf-8") - target = spec_dir / output_name - if write_if_missing(target, render(template, values), args.force): - created.append(str(target)) - - config = { - "specId": str(uuid.uuid4()), - "workflowType": args.workflow, - "specType": spec_type, - "documentRoot": str(document_root), - "requirementName": name, + # 文档内容 + doc_files = { + "requirements.md": _render(_load_template("requirements.md"), ctx), + "bugfix.md": _render(_load_template("bugfix.md"), ctx), + "design.md": _render(_load_template("design.md"), ctx), + "tasks.md": _render(_load_template("tasks.md"), ctx), + "implementation-log.md": _render(_load_template("implementation-log.md"), ctx), + } + + # invocation_cwd:记录 spec_init.py 被调用时的 cwd(即用户启动 Claude Code/ + # codebuddy 的目录)。供后续 project-root-choice selector 给用户 3 选项: + # cwd(已有项目里迭代)/ cwd/slug(新项目子目录)/ 自定义路径。 + invocation_cwd = os.getcwd() + + spec_config = { + "specId": spec_id, "slug": slug, - "sourceFile": str(Path(args.source_file).expanduser().resolve()) if args.source_file else None, - "createdBy": "specode", - "createdAt": datetime.now(timezone.utc).isoformat(), - "persistentMode": False, - "sessionStatus": None, - "currentSessionId": None, - "currentPhase": None, - "lastActivityAt": None, - "endedAt": None, - "endedReason": None, - "sessions": {}, - "lock": None, - "evictedSessions": [], - "iterationRound": 0, - "iterationHistory": [], + "name": args.requirement_name, + "createdAt": created_at, + "phase": "intake", + "workflow": None, # workflow 选择器之后写入 + # 0.10.15+:先 project-root-choice,用户选完后 set-project-root CLI + # 会把 pending_selector 推进到 workflow-choice。 + "pending_selector": "project-root-choice", + "lock": { + "holder": args.session, + "acquired_at": created_at, + "last_heartbeat_at": created_at, + }, + "doc_root": str(root), + "source": source, + "source_text": args.source_text, + "invocation_cwd": invocation_cwd, # 用于 selector 渲染(cwd / cwd/slug 选项) + "project_root": None, # set-project-root CLI 后写入 } - config_path = spec_dir / ".config.json" - if write_if_missing(config_path, json.dumps(config, ensure_ascii=False, indent=2) + "\n", args.force): - created.append(str(config_path)) - - session: dict[str, object] | None = None - if args.persistent: - current_config = json.loads(config_path.read_text(encoding="utf-8")) - current_config.setdefault("lock", None) - current_config.setdefault("evictedSessions", []) - session_id = spec_session.normalize_session_id(args.session) - # Acquire lock before binding the session. New specs are unlocked, so - # this should never raise LockHeld; we use force=False on purpose. - spec_session._acquire(spec_dir, session_id, force=False, agent=args.agent) - current_config = json.loads(config_path.read_text(encoding="utf-8")) - current_config = spec_session.update_config_session( - spec_dir, - current_config, - session_id, - "active", - args.current_phase, - ) - active = spec_session.load_active(document_root) - active["sessions"][session_id] = spec_session.entry_for( - spec_dir, - current_config, - session_id, - ) - spec_session.save_active(document_root, active) - session = { - "sessionId": session_id, - "status": "active", - "currentPhase": args.current_phase, - "activeFile": str(spec_session.active_path(document_root)), + + active_pointer_path = root / ".active-specode.json" + sessions_path = _sessions_dir() / f"{args.session}.json" + + # 跟踪已创建以便回滚 + created_paths: list[Path] = [] + # 备份 active-pointer 用于回滚 + prior_active_pointer: Optional[str] = None + if active_pointer_path.exists(): + try: + prior_active_pointer = active_pointer_path.read_text(encoding="utf-8") + except Exception: + prior_active_pointer = None + prior_session_blob: Optional[str] = None + if sessions_path.exists(): + try: + prior_session_blob = sessions_path.read_text(encoding="utf-8") + except Exception: + prior_session_blob = None + + def _rollback() -> None: + # 删除新建的 spec_dir(整个目录是新建的) + try: + if spec_dir.exists(): + shutil.rmtree(spec_dir) + except Exception: + pass + # 还原 active-pointer + try: + if prior_active_pointer is None: + if active_pointer_path.exists(): + active_pointer_path.unlink() + else: + _atomic_write_text(active_pointer_path, prior_active_pointer) + except Exception: + pass + # 还原 sessions + try: + if prior_session_blob is None: + if sessions_path.exists(): + sessions_path.unlink() + else: + _atomic_write_text(sessions_path, prior_session_blob) + except Exception: + pass + + try: + # 3. 创建 spec_dir + 6 份文档 + .config.json + spec_dir.mkdir(parents=True, exist_ok=False) + created_paths.append(spec_dir) + for fname, content in doc_files.items(): + fp = spec_dir / fname + _atomic_write_text(fp, content) + created_paths.append(fp) + _atomic_write_json(spec_dir / ".config.json", spec_config) + created_paths.append(spec_dir / ".config.json") + + # 4. 更新 active-pointer + active_payload = { + "active_spec_slug": slug, + "active_spec_dir": str(spec_dir), + "specId": spec_id, + "updatedAt": created_at, + "session_id": args.session, } + _atomic_write_json(active_pointer_path, active_payload) - spec_telemetry.emit( - "spec.init", - spec_slug=slug, - spec_dir=str(spec_dir), - document_root=str(document_root), - workflow=args.workflow, - spec_type=spec_type, - persistent=bool(args.persistent), - initial_phase=args.current_phase if args.persistent else None, - created_count=len(created), - ) + # 5. 强制写 sessions/.json + session_payload = { + "session_id": args.session, + "started_at": created_at, + "last_activity_at": created_at, + "ended_at": None, + "mode": "active", + "active_spec_slug": slug, + "active_spec_dir": str(spec_dir), + "spec_id": spec_id, + "phase": "intake", + "lock_state": "ok", + "task_swarm_run_id": None, + "pending_selector": "project-root-choice", + } + _atomic_write_json(sessions_path, session_payload) + + except Exception as exc: + _rollback() + sys.stderr.write(f"spec_init 失败,已回滚:{exc}\n") + return 1 - print(json.dumps({ - "specDir": str(spec_dir), - "documentRoot": str(document_root), - "documentRootSource": root_source, - "created": created, - "session": session, - }, ensure_ascii=False, indent=2)) + # 7. 输出 + out = { + "spec_dir": str(spec_dir), + "specId": spec_id, + "session_id": args.session, + "phase": "intake", + "doc_root": str(root), + "doc_root_source": source, + } + sys.stdout.write(json.dumps(out, ensure_ascii=False, indent=2) + "\n") return 0 +def _log_wrap_main(argv: Optional[list[str]] = None) -> int: + """0.10.0+ 包一层捕捉 cli_call / cli_exit 事件。""" + import contextlib as _cl + argv_list = list(sys.argv[1:]) if argv is None else list(argv) + sid = None + for i, a in enumerate(argv_list): + if a == "--session" and i + 1 < len(argv_list): + sid = argv_list[i + 1] + break + with _cl.suppress(Exception): + _log_event("cli_call", {"script": "spec_init.py", "argv_len": len(argv_list)}, session_id=sid) + rc = main(argv) + with _cl.suppress(Exception): + _log_event("cli_exit", {"script": "spec_init.py", "exit_code": rc}, session_id=sid) + return rc + + if __name__ == "__main__": - raise SystemExit(main()) + try: + sys.exit(_log_wrap_main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/spec_lint.py b/plugins/specode/scripts/spec_lint.py index 30c2e92..6e326e2 100755 --- a/plugins/specode/scripts/spec_lint.py +++ b/plugins/specode/scripts/spec_lint.py @@ -1,145 +1,152 @@ #!/usr/bin/env python3 -from __future__ import annotations +"""spec_lint.py — 对当前 spec 目录做轻量 lint。 -import argparse -import json -from datetime import datetime, timezone -from pathlib import Path +仅产出 WARNING;所有 lint 一律 exit 0(不阻断模型流程)。 + +规则: + 1. tasks.md 中的 `_需求:x.y_` 标签必须在 requirements.md / bugfix.md + 找到对应 "需求 x" 或 "x.y" 章节标记;找不到 → WARNING + 2. implementation-log.md 中每个 `## ` 条目正文 < 30 字符或缺 + 文件引用 (`.py` / `.md` 等) → WARNING("空 log 等于没改过") + 3. requirements.md 中的 EARS SHALL 行缺动词或缺 trigger + (形如 WHEN / IF / WHILE / WHERE 关键字开头)→ WARNING -import spec_session -from spec_session import TASK_RE, task_section +接入:acceptance phase 进入前由主代理调一次,把 WARNING 列给用户参考 +(详见 SKILL.md §Phase Order 中 acceptance 部分)。 +用法: + spec_lint.py --spec lint 该 spec 目录下 5 份文档 -def read(path: Path) -> str: - return path.read_text(encoding="utf-8") +stdlib-only。 +""" +from __future__ import annotations +import argparse +import re +import sys +from pathlib import Path +from typing import Optional + + +# ------------------------------------------------------------------------- + +REQ_TAG_RE = re.compile(r"_需求[::]\s*([0-9]+(?:\.[0-9]+)?)_") +FILE_REF_RE = re.compile(r"[A-Za-z0-9_./-]+\.(py|md|js|ts|tsx|jsx|go|rs|java|kt|rb|c|h|cpp|sh|yaml|yml|json)") +EARS_HEADS = ("WHEN", "IF", "WHILE", "WHERE", "WHENEVER") +SHALL_LINE_RE = re.compile(r"\bSHALL\b", re.IGNORECASE) + + +def _warn(buf: list[str], rule: str, msg: str) -> None: + buf.append(f"[WARN][{rule}] {msg}") + + +def _read(p: Path) -> Optional[str]: + try: + if p.exists() and p.is_file(): + return p.read_text(encoding="utf-8", errors="replace") + except Exception: + return None + return None + + +def rule_task_traceability(spec_dir: Path, warnings: list[str]) -> None: + tasks = _read(spec_dir / "tasks.md") + if not tasks: + return + haystack_parts = [] + for fn in ("requirements.md", "bugfix.md"): + s = _read(spec_dir / fn) + if s: + haystack_parts.append(s) + haystack = "\n".join(haystack_parts) + if not haystack: + # tasks.md 含标签但无 req/bugfix → 全报 + for tag in set(REQ_TAG_RE.findall(tasks)): + _warn(warnings, "trace", + f"tasks.md 引用 _需求:{tag}_ 但 requirements.md / bugfix.md 不存在或为空。") + return + for tag in sorted(set(REQ_TAG_RE.findall(tasks))): + # 容许匹配 "需求 1" / "需求 1.2" / "1.2" + if not re.search(rf"需求\s*{re.escape(tag)}\b", haystack) and tag not in haystack: + _warn(warnings, "trace", + f"tasks.md 的 _需求:{tag}_ 在 requirements.md / bugfix.md 中找不到对应章节。") + + +def rule_log_entries(spec_dir: Path, warnings: list[str]) -> None: + log = _read(spec_dir / "implementation-log.md") + if not log: + return + # 拆 ## 开头的条目 + parts = re.split(r"(?m)^##\s+", log) + # parts[0] 是文件头;条目从 parts[1:] 起 + for entry in parts[1:]: + # 取第一行作为 title,正文是其余 + head, _, body = entry.partition("\n") + body_stripped = body.strip() + title = head.strip() + if not body_stripped: + _warn(warnings, "log", + f"implementation-log.md 条目「{title[:30]}」正文为空。") + continue + if len(body_stripped) < 30: + _warn(warnings, "log", + f"implementation-log.md 条目「{title[:30]}」正文过短(< 30 字符);信息量不足。") + if not FILE_REF_RE.search(body_stripped): + _warn(warnings, "log", + f"implementation-log.md 条目「{title[:30]}」未引用任何源码 / 文档文件路径。") + + +def rule_ears_shall(spec_dir: Path, warnings: list[str]) -> None: + req = _read(spec_dir / "requirements.md") + if not req: + return + for idx, line in enumerate(req.splitlines(), start=1): + if not SHALL_LINE_RE.search(line): + continue + # 简单 EARS 检查:行内或紧邻上文应含 EARS 关键字 + upper = line.upper() + has_trigger = any(k in upper for k in EARS_HEADS) + if not has_trigger: + # 看前后两行 + # 注意:splitlines 不保留尾换行;这里不报上下文,仅提示 + _warn(warnings, "ears", + f"requirements.md 第 {idx} 行包含 SHALL 但未检测到 EARS trigger(WHEN/IF/WHILE/WHERE)。") + continue + # 检查 SHALL 之后是否有动词(粗略判定:SHALL 后至少有非空白且非 thE/A/AN 的词) + m = re.search(r"SHALL\s+([A-Za-z一-鿿]+)", line, re.IGNORECASE) + if not m or m.group(1).lower() in ("the", "a", "an"): + _warn(warnings, "ears", + f"requirements.md 第 {idx} 行 SHALL 后缺动词。") + + +# ------------------------------------------------------------------------- + +def main(argv: Optional[list[str]] = None) -> int: + parser = argparse.ArgumentParser(prog="spec_lint.py", description="lint a specode spec directory") + parser.add_argument("--spec", required=True, help="spec 目录绝对路径") + args = parser.parse_args(argv) + + spec_dir = Path(args.spec).expanduser().resolve() + if not spec_dir.is_dir(): + sys.stderr.write(f"spec 目录不存在:{spec_dir}\n") + return 0 # lint 不阻断;返回 0 -def lint(spec_dir: Path) -> list[str]: - errors: list[str] = [] warnings: list[str] = [] - config_data: dict = {} - - req = spec_dir / "requirements.md" - bug = spec_dir / "bugfix.md" - design = spec_dir / "design.md" - tasks = spec_dir / "tasks.md" - config = spec_dir / ".config.json" - - if req.exists() and bug.exists(): - errors.append("Spec should not contain both requirements.md and bugfix.md.") - if not req.exists() and not bug.exists(): - errors.append("Missing requirements.md or bugfix.md.") - if not design.exists(): - errors.append("Missing design.md.") - if not tasks.exists(): - errors.append("Missing tasks.md.") - if not config.exists(): - warnings.append("Missing .config.json.") - else: - try: - config_data = json.loads(config.read_text(encoding="utf-8")) - except json.JSONDecodeError as exc: - errors.append(f".config.json is invalid JSON: {exc}") - config_data = {} - if config_data: - document_root = Path(config_data.get("documentRoot") or spec_dir.parent).expanduser().resolve() - try: - spec_session.ensure_within_root(spec_dir, document_root) - except SystemExit as exc: - errors.append(str(exc)) - if not config_data.get("specId"): - errors.append(".config.json is missing specId.") - sessions = config_data.get("sessions") - if sessions is not None and not isinstance(sessions, dict): - errors.append(".config.json sessions must be an object keyed by session id.") - current_phase = config_data.get("currentPhase") - if current_phase and current_phase not in spec_session.PHASES: - errors.append(f".config.json currentPhase is invalid: {current_phase}") - session_status = config_data.get("sessionStatus") - if session_status and session_status not in {"active", "ended"}: - errors.append(f".config.json sessionStatus is invalid: {session_status}") - if config_data.get("persistentMode") and session_status == "ended": - warnings.append(".config.json persistentMode is true but the current session is ended.") - if session_status == "ended" and not config_data.get("endedAt"): - warnings.append(".config.json ended session has no endedAt timestamp.") - if sessions: - for session_id, session in sessions.items(): - status = session.get("status") - phase = session.get("currentPhase") - if status not in {"active", "ended"}: - errors.append(f"Session {session_id} has invalid status: {status}") - if phase not in spec_session.PHASES: - errors.append(f"Session {session_id} has invalid currentPhase: {phase}") - - # Lock field structural check - lock = config_data.get("lock") - if lock is not None: - if not isinstance(lock, dict): - errors.append(".config.json lock must be an object or null.") - else: - for key in ("sessionId", "acquiredAt", "lastHeartbeatAt"): - if not lock.get(key): - errors.append(f".config.json lock is missing required field: {key}") - # Stale lock advisory - last_hb = lock.get("lastHeartbeatAt") or lock.get("acquiredAt") - try: - ts = datetime.fromisoformat(last_hb) - elapsed = (datetime.now(timezone.utc) - ts).total_seconds() - if elapsed > spec_session.LOCK_STALE_SECONDS: - warnings.append( - f".config.json lock has been stale for {int(elapsed)}s " - f"(threshold {spec_session.LOCK_STALE_SECONDS}s); next acquire will reclaim it." - ) - except (TypeError, ValueError): - pass - evicted = config_data.get("evictedSessions") - if evicted is not None and not isinstance(evicted, list): - errors.append(".config.json evictedSessions must be a list.") - - first_doc = bug if bug.exists() else req - if first_doc and first_doc.exists(): - text = read(first_doc) - if "SHALL" not in text: - warnings.append(f"{first_doc.name} has no EARS-style SHALL criteria.") - placeholder_markers = ["待补充", "[问题]", "[需求", "[触发条件]", "[期望行为]"] - if any(marker in text for marker in placeholder_markers): - warnings.append(f"{first_doc.name} still contains template placeholder markers.") - - if design.exists(): - text = read(design) - for heading in ["## 概述", "## 架构", "## 测试策略"]: - if heading not in text: - warnings.append(f"design.md is missing {heading}.") - - if tasks.exists(): - text = read(tasks) - section = task_section(text) - task_matches = list(TASK_RE.finditer(section)) - if not task_matches: - errors.append("tasks.md has no checkbox tasks.") - if config.exists(): - active_task_exists = any(match.group(1) == "~" for match in task_matches) - if active_task_exists and config_data.get("currentPhase") not in {"implementation", "acceptance"}: - warnings.append("tasks.md has in-progress tasks but currentPhase is not implementation or acceptance.") - if "验证:" not in section and "Validation:" not in section: - warnings.append("tasks.md does not contain validation notes.") - if "_需求:" not in section and "Requirements:" not in section and "Behavior:" not in section: - warnings.append("tasks.md does not contain requirement traceability.") - - return [f"ERROR: {item}" for item in errors] + [f"WARNING: {item}" for item in warnings] - - -def main() -> int: - parser = argparse.ArgumentParser(description="Lint a specode folder.") - parser.add_argument("spec_dir", type=Path) - args = parser.parse_args() - messages = lint(args.spec_dir) - if messages: - print("\n".join(messages)) - else: - print("Spec lint passed.") - return 1 if any(msg.startswith("ERROR:") for msg in messages) else 0 + rule_task_traceability(spec_dir, warnings) + rule_log_entries(spec_dir, warnings) + rule_ears_shall(spec_dir, warnings) + + if not warnings: + sys.stdout.write("spec_lint: 0 warnings.\n") + return 0 + sys.stdout.write(f"spec_lint: {len(warnings)} warning(s).\n") + for w in warnings: + sys.stdout.write(w + "\n") + return 0 if __name__ == "__main__": - raise SystemExit(main()) + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/spec_log.py b/plugins/specode/scripts/spec_log.py new file mode 100644 index 0000000..25505ed --- /dev/null +++ b/plugins/specode/scripts/spec_log.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +"""spec_log.py — specode 会话日志收集(0.10.0+)。 + +收集 spec 模式期间的事件流到 ~/.specode/logs/.jsonl, +用于排查 bug 时回溯主代理调用 / hook 注入 / CLI 调用的现场。 + +设计: +- 单一事实源:~/.specode/logs/.jsonl(每行一个 JSON event) +- 双开关:SPECODE_LOG=off env 临时关 / ~/.config/specode/config.json.logging=false 永久关 +- 默认 redact:password / api_key / token / secret / authorization 等键名匹配 → 占位 +- 默认截断:字符串字段超过 500 字符 → 截断 + 标记 +- 不 rotation:手动清 rm -rf ~/.specode/logs/;status 子命令报当前占用 + +子命令: +- write-event (内部)由其他脚本 / hook 调用写一条 event +- replay 按时序输出指定 session 的可读 events +- status 输出 ~/.specode/logs/ 占用与文件数 +- enable 临时打开(清除 SPECODE_LOG env 提示) +- disable 临时关闭(提示设置 SPECODE_LOG=off) + +stdlib-only。 +""" +from __future__ import annotations + +import argparse +import json +import os +import sys +import time +from pathlib import Path +from typing import Any, Optional + + +DEFAULT_REDACT_KEYS = ( + "password", "passwd", "pwd", + "api_key", "apikey", "api-key", + "token", "access_token", "refresh_token", + "secret", "client_secret", + "authorization", "auth", + "cookie", "session_cookie", + "private_key", "ssh_key", +) +DEFAULT_TRUNCATE_LEN = 500 +REDACT_PLACEHOLDER = "" +TRUNCATE_SUFFIX = "..." + + +# ------------------------------------------------------------------------- +# 配置 + 开关 +# ------------------------------------------------------------------------- + +def _logs_dir() -> Path: + return Path.home() / ".specode" / "logs" + + +def _config_path() -> Path: + return Path.home() / ".config" / "specode" / "config.json" + + +def _read_config() -> dict: + p = _config_path() + if not p.exists(): + return {} + try: + with p.open("r", encoding="utf-8") as fh: + data = json.load(fh) + return data if isinstance(data, dict) else {} + except Exception: + return {} + + +def is_logging_enabled() -> bool: + """env > config > default(True).""" + env = os.environ.get("SPECODE_LOG", "").lower() + if env in ("off", "false", "0", "no"): + return False + if env in ("on", "true", "1", "yes"): + return True + cfg = _read_config() + val = cfg.get("logging") + if val is False: + return False + return True # default + + +def _redact_key_set() -> set[str]: + cfg = _read_config() + extra = cfg.get("redact_keys") or [] + keys = set(k.lower() for k in DEFAULT_REDACT_KEYS) + if isinstance(extra, list): + keys.update(k.lower() for k in extra if isinstance(k, str)) + return keys + + +# ------------------------------------------------------------------------- +# Redact + 截断 +# ------------------------------------------------------------------------- + +def _truncate(s: str, limit: int = DEFAULT_TRUNCATE_LEN) -> str: + if len(s) <= limit: + return s + return s[:limit] + TRUNCATE_SUFFIX + + +def _sanitize(value: Any, redact_keys: set[str], depth: int = 0) -> Any: + """递归处理 dict / list / str;key 命中 redact 则替换占位,str 截断。""" + if depth > 8: + return "" + if isinstance(value, dict): + out = {} + for k, v in value.items(): + if isinstance(k, str) and k.lower() in redact_keys: + out[k] = REDACT_PLACEHOLDER + else: + out[k] = _sanitize(v, redact_keys, depth + 1) + return out + if isinstance(value, list): + return [_sanitize(v, redact_keys, depth + 1) for v in value] + if isinstance(value, str): + return _truncate(value) + return value + + +# ------------------------------------------------------------------------- +# 写 event +# ------------------------------------------------------------------------- + +def _now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def write_event(event: str, payload: Optional[dict] = None, + session_id: Optional[str] = None) -> None: + """对外主入口;其他 .py 调这一个函数即可。 + + 任何异常都吞并;日志失败绝不阻断业务流程。 + """ + try: + if not is_logging_enabled(): + return + sid = session_id or (payload or {}).get("session_id") + if not sid: + # 没有 session_id 的事件落到 _orphan.jsonl + sid = "_orphan" + log_path = _logs_dir() / f"{sid}.jsonl" + log_path.parent.mkdir(parents=True, exist_ok=True) + redact = _redact_key_set() + record = { + "ts": _now_iso(), + "event": event, + "payload": _sanitize(payload or {}, redact), + } + with log_path.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(record, ensure_ascii=False) + "\n") + except Exception: + # 静默吞并 —— logging 永不阻断主流程 + pass + + +# ------------------------------------------------------------------------- +# CLI 子命令 +# ------------------------------------------------------------------------- + +def cmd_write_event(args: argparse.Namespace) -> int: + """write-event --event [--session ] [--payload ]。 + + payload 从 stdin 或 --payload JSON 字符串。 + """ + payload = {} + if args.payload: + try: + payload = json.loads(args.payload) + except Exception: + sys.stderr.write(f"invalid --payload JSON: {args.payload!r}\n") + return 1 + elif not sys.stdin.isatty(): + try: + raw = sys.stdin.read().strip() + if raw: + payload = json.loads(raw) + except Exception: + pass + write_event(args.event, payload, session_id=args.session) + return 0 + + +def cmd_replay(args: argparse.Namespace) -> int: + """replay --session 按时序打印 events。""" + log_path = _logs_dir() / f"{args.session}.jsonl" + if not log_path.exists(): + sys.stderr.write(f"no log for session: {args.session}\n") + sys.stderr.write(f" expected: {log_path}\n") + return 3 + total = 0 + with log_path.open("r", encoding="utf-8") as fh: + for line in fh: + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + except Exception: + sys.stdout.write(f"[skip malformed] {line}\n") + continue + ts = rec.get("ts", "?") + ev = rec.get("event", "?") + payload = rec.get("payload", {}) + payload_str = json.dumps(payload, ensure_ascii=False) + if len(payload_str) > 200: + payload_str = payload_str[:200] + "..." + sys.stdout.write(f"[{ts}] {ev} {payload_str}\n") + total += 1 + sys.stderr.write(f"(replayed {total} events from {log_path})\n") + return 0 + + +def cmd_status(args: argparse.Namespace) -> int: + """status 输出 ~/.specode/logs/ 占用 + 文件数。""" + d = _logs_dir() + enabled = is_logging_enabled() + info = { + "enabled": enabled, + "switch_source": _switch_source(), + "logs_dir": str(d), + "exists": d.exists(), + } + if d.exists(): + files = list(d.glob("*.jsonl")) + total_bytes = sum(p.stat().st_size for p in files if p.is_file()) + info["session_log_files"] = len(files) + info["total_bytes"] = total_bytes + info["total_mb"] = round(total_bytes / 1024 / 1024, 2) + # 占用大时给个提示 + if total_bytes > 100 * 1024 * 1024: + info["hint"] = "logs 超过 100MB,可手动清理:rm -rf ~/.specode/logs/" + sys.stdout.write(json.dumps(info, ensure_ascii=False, indent=2) + "\n") + return 0 + + +def _switch_source() -> str: + env = os.environ.get("SPECODE_LOG", "").lower() + if env: + return f"env:SPECODE_LOG={env}" + cfg = _read_config() + if "logging" in cfg: + return f"config.json.logging={cfg.get('logging')}" + return "default(on)" + + +def cmd_enable(args: argparse.Namespace) -> int: + """提示如何打开(env unset / config 写 true)。""" + msg = ( + "spec_log 默认开启。当前开关来源:" + _switch_source() + "\n\n" + "如果当前是关闭状态,按下列之一打开:\n" + " 1) 临时打开:unset SPECODE_LOG (或 export SPECODE_LOG=on)\n" + " 2) 永久打开:编辑 ~/.config/specode/config.json,把 logging 设为 true 或删除该字段\n" + ) + sys.stdout.write(msg) + return 0 + + +def cmd_disable(args: argparse.Namespace) -> int: + """提示如何关闭。""" + msg = ( + "关闭 spec_log 的两种方式:\n\n" + " 1) 临时关闭(仅当前 shell):export SPECODE_LOG=off\n" + " 2) 永久关闭:编辑 ~/.config/specode/config.json,加 \"logging\": false\n\n" + "当前开关来源:" + _switch_source() + "\n" + ) + sys.stdout.write(msg) + return 0 + + +# ------------------------------------------------------------------------- +# main +# ------------------------------------------------------------------------- + +def main(argv: Optional[list[str]] = None) -> int: + parser = argparse.ArgumentParser( + prog="spec_log.py", + description="specode session log collection (0.10.0+)", + ) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_w = sub.add_parser("write-event", help="write a single event (internal)") + p_w.add_argument("--event", required=True, help="event name") + p_w.add_argument("--session", help="session id (or via payload)") + p_w.add_argument("--payload", help="payload JSON string (or via stdin)") + p_w.set_defaults(func=cmd_write_event) + + p_r = sub.add_parser("replay", help="replay a session's events in order") + p_r.add_argument("--session", required=True, help="session id to replay") + p_r.set_defaults(func=cmd_replay) + + p_s = sub.add_parser("status", help="show logs/ size + enable state") + p_s.set_defaults(func=cmd_status) + + p_e = sub.add_parser("enable", help="show how to enable logging") + p_e.set_defaults(func=cmd_enable) + + p_d = sub.add_parser("disable", help="show how to disable logging") + p_d.set_defaults(func=cmd_disable) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/spec_session.py b/plugins/specode/scripts/spec_session.py index e10d250..be4e40c 100755 --- a/plugins/specode/scripts/spec_session.py +++ b/plugins/specode/scripts/spec_session.py @@ -1,939 +1,46 @@ #!/usr/bin/env python3 -from __future__ import annotations - -import argparse -import json -import os -import re -import sys -from contextlib import contextmanager -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Iterator - -sys.path.insert(0, str(Path(__file__).resolve().parent)) -import spec_telemetry # noqa: E402 - - -ACTIVE_FILE = ".active-specode.json" -ACTIVE_VERSION = 2 -SESSION_RE = re.compile(r"[^a-zA-Z0-9_.-]+") -PHASES = { - "intake", - "requirements", - "bugfix", - "design", - "tasks", - "implementation", - "acceptance", - "iteration", - "ended", -} -TASK_RE = re.compile(r"^\s*-\s*\[( |x|~|\*|-)\]\s+(.+)$", re.MULTILINE) -TASK_LABELS = {" ": "pending", "x": "completed", "~": "in_progress", "*": "optional", "-": "skipped"} - -# Document filenames managed by the spec workflow. Used for dynamic column width -# in `command_load` so adding a new document does not silently break alignment. -DOC_FILENAMES = ( - "requirements.md", - "bugfix.md", - "design.md", - "tasks.md", -) -DOC_COL_WIDTH = max(len(name) for name in DOC_FILENAMES) + 2 - -# Lock staleness: a lock whose lastHeartbeatAt is older than this is silently -# reclaimable by another session. Overridable via SPECODE_LOCK_STALE_SECONDS. -LOCK_STALE_SECONDS = int(os.environ.get("SPECODE_LOCK_STALE_SECONDS") or 1800) - - -def now() -> str: - return datetime.now(timezone.utc).isoformat() - - -def _parse_ts(value: str | None) -> datetime | None: - if not value: - return None - try: - return datetime.fromisoformat(value) - except ValueError: - return None - - -def normalize_session_id(raw: str | None) -> str: - value = raw or os.environ.get("TERM_SESSION_ID") or os.environ.get("SPEC_SESSION_ID") or "default" - value = SESSION_RE.sub("-", value.strip()).strip("-._") - return value[:80] or "default" - - -def read_json(path: Path, default: dict[str, Any]) -> dict[str, Any]: - if not path.exists(): - return default - return json.loads(path.read_text(encoding="utf-8")) - - -def write_json(path: Path, value: dict[str, Any]) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - temp = path.with_suffix(path.suffix + ".tmp") - temp.write_text(json.dumps(value, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") - temp.replace(path) - - -@contextmanager -def _file_lock(target: Path) -> Iterator[None]: - """Process-level advisory file lock. Cross-platform best-effort. - - Used to guard read-modify-write sequences on .config.json and the active - pointer file against two parallel `spec_session.py` invocations racing. - """ - target.parent.mkdir(parents=True, exist_ok=True) - lock_path = target.with_suffix(target.suffix + ".lock") - handle = open(lock_path, "a+") - locked = False - try: - try: - import fcntl # type: ignore[import-not-found] - fcntl.flock(handle.fileno(), fcntl.LOCK_EX) - locked = True - except (ImportError, OSError): - try: - import msvcrt # type: ignore[import-not-found] - handle.seek(0) - msvcrt.locking(handle.fileno(), msvcrt.LK_LOCK, 1) - locked = True - except (ImportError, OSError): - # Platform without supported locking → proceed unguarded. Atomic - # rename in write_json still prevents torn writes. - pass - yield - finally: - if locked: - try: - try: - import fcntl # type: ignore[import-not-found] - fcntl.flock(handle.fileno(), fcntl.LOCK_UN) - except (ImportError, OSError): - import msvcrt # type: ignore[import-not-found] - handle.seek(0) - msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1) - except Exception: - pass - handle.close() - - -def load_config(spec_dir: Path) -> dict[str, Any]: - config_path = spec_dir / ".config.json" - if not config_path.exists(): - raise SystemExit(f"Missing config: {config_path}") - config = read_json(config_path, {}) - if not config.get("specId"): - raise SystemExit(f"Missing specId in config: {config_path}") - config.setdefault("lock", None) - config.setdefault("evictedSessions", []) - return config - - -def save_config(spec_dir: Path, config: dict[str, Any]) -> None: - """Forced write of .config.json. Any caller that mutated config must persist.""" - write_json(spec_dir / ".config.json", config) - - -def document_root_for(spec_dir: Path, config: dict[str, Any]) -> Path: - root = config.get("documentRoot") - if root: - return Path(root).expanduser().resolve() - return spec_dir.resolve().parent - - -def active_path(document_root: Path) -> Path: - return document_root.resolve() / ACTIVE_FILE - - -def ensure_within_root(spec_dir: Path, document_root: Path) -> None: - spec_resolved = spec_dir.resolve() - root_resolved = document_root.resolve() - try: - spec_resolved.relative_to(root_resolved) - except ValueError as exc: - raise SystemExit(f"Spec dir is outside document root: {spec_resolved} not under {root_resolved}") from exc - - -def _migrate_active_v1_to_v2(data: dict[str, Any], document_root: Path) -> dict[str, Any]: - sessions = data.get("sessions") or {} - migrated: dict[str, Any] = {} - for sid, entry in sessions.items(): - slug = entry.get("slug") or (Path(entry["specDir"]).name if entry.get("specDir") else None) - migrated[sid] = { - "sessionId": sid, - "specSlug": slug, - "specId": entry.get("specId"), - "status": entry.get("status") or "active", - "boundAt": entry.get("startedAt") or entry.get("updatedAt"), - "lastActivityAt": entry.get("updatedAt") or entry.get("lastActivityAt"), - } - data["version"] = ACTIVE_VERSION - data["sessions"] = migrated - data["documentRoot"] = str(document_root.resolve()) - return data - - -def load_active(document_root: Path) -> dict[str, Any]: - path = active_path(document_root) - data = read_json(path, {}) - if not data: - return { - "version": ACTIVE_VERSION, - "documentRoot": str(document_root.resolve()), - "updatedAt": None, - "sessions": {}, - } - if data.get("version", 1) < ACTIVE_VERSION: - data = _migrate_active_v1_to_v2(data, document_root) - data.setdefault("version", ACTIVE_VERSION) - data.setdefault("sessions", {}) - data["documentRoot"] = str(document_root.resolve()) - return data - - -def save_active(document_root: Path, data: dict[str, Any]) -> None: - data["documentRoot"] = str(document_root.resolve()) - data["updatedAt"] = now() - write_json(active_path(document_root), data) - - -def active_sessions(config: dict[str, Any]) -> list[str]: - sessions = config.get("sessions") or {} - return [ - session_id - for session_id, session in sessions.items() - if session.get("status") == "active" - ] - - -# --------------------------------------------------------------------------- -# Lock primitives (acquire / release / verify / force_acquire / heartbeat) -# --------------------------------------------------------------------------- - - -class LockHeld(SystemExit): - """Raised when acquire() finds the spec is held by a different session.""" - - def __init__(self, holder_id: str, last_heartbeat: str | None) -> None: - self.holder_id = holder_id - self.last_heartbeat = last_heartbeat - super().__init__(json.dumps({ - "error": "lock_held", - "holderSessionId": holder_id, - "lastHeartbeatAt": last_heartbeat, - }, ensure_ascii=False)) - - -def _lock_is_stale(lock: dict[str, Any]) -> bool: - ts = _parse_ts(lock.get("lastHeartbeatAt") or lock.get("acquiredAt")) - if ts is None: - return True - elapsed = (datetime.now(timezone.utc) - ts).total_seconds() - return elapsed > LOCK_STALE_SECONDS - - -def _record_eviction(config: dict[str, Any], holder: dict[str, Any], new_session: str, reason: str) -> None: - config.setdefault("evictedSessions", []).append({ - "sessionId": holder.get("sessionId"), - "evictedAt": now(), - "evictedBy": new_session, - "reason": reason, - }) - - -def _acquire(spec_dir: Path, session_id: str, *, force: bool, agent: str | None) -> dict[str, Any]: - config_path = spec_dir / ".config.json" - with _file_lock(config_path): - config = load_config(spec_dir) - lock = config.get("lock") or None - if lock and lock.get("sessionId") == session_id: - lock["lastHeartbeatAt"] = now() - config["lock"] = lock - save_config(spec_dir, config) - return {"action": "renewed", "lock": lock, "config": config} - if lock: - if _lock_is_stale(lock): - _record_eviction(config, lock, session_id, "stale") - elif force: - _record_eviction(config, lock, session_id, "force_acquire") - else: - raise LockHeld( - holder_id=lock.get("sessionId", "unknown"), - last_heartbeat=lock.get("lastHeartbeatAt"), - ) - new_lock = { - "sessionId": session_id, - "acquiredAt": now(), - "lastHeartbeatAt": now(), - "agent": agent or os.environ.get("SPECODE_AGENT") or "unknown", - "pid": os.getpid(), - } - config["lock"] = new_lock - save_config(spec_dir, config) - return {"action": "acquired" if not lock else "evicted", "lock": new_lock, "config": config} - - -def _release(spec_dir: Path, session_id: str) -> dict[str, Any]: - config_path = spec_dir / ".config.json" - with _file_lock(config_path): - config = load_config(spec_dir) - lock = config.get("lock") or None - if lock and lock.get("sessionId") == session_id: - config["lock"] = None - save_config(spec_dir, config) - return {"action": "released", "lock": None} - return {"action": "noop", "lock": lock} - - -def _heartbeat(spec_dir: Path, session_id: str) -> dict[str, Any]: - config_path = spec_dir / ".config.json" - with _file_lock(config_path): - config = load_config(spec_dir) - lock = config.get("lock") or None - if not lock or lock.get("sessionId") != session_id: - holder = lock.get("sessionId") if lock else None - raise SystemExit(json.dumps({ - "error": "lock_lost", - "expectedSessionId": session_id, - "actualHolder": holder, - }, ensure_ascii=False)) - lock["lastHeartbeatAt"] = now() - config["lock"] = lock - save_config(spec_dir, config) - return {"action": "heartbeat", "lock": lock} - - -def _verify(spec_dir: Path, session_id: str) -> dict[str, Any]: - config = load_config(spec_dir) - lock = config.get("lock") or None - evicted = config.get("evictedSessions") or [] - if lock and lock.get("sessionId") == session_id: - return {"status": "ok", "lock": lock} - if any(e.get("sessionId") == session_id for e in evicted): - latest = max( - (e for e in evicted if e.get("sessionId") == session_id), - key=lambda e: e.get("evictedAt", ""), - ) - return {"status": "evicted", "lock": lock, "eviction": latest} - return {"status": "not_held", "lock": lock} - - -def verify_and_heartbeat(spec_dir: Path, session_id: str) -> dict[str, Any]: - """Public wrapper: verify the caller still holds the lock and refresh heartbeat. - - Returns the same dict shape as _verify. When status == "ok" the lock's - heartbeat is bumped as a side-effect. Callers (e.g. task_swarm.writeback) - should branch on the returned `status` field. - """ - verify = _verify(spec_dir, session_id) - if verify.get("status") == "ok": - _heartbeat(spec_dir, session_id) - return verify - - -def command_acquire(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser().resolve() - session_id = normalize_session_id(args.session) - result = _acquire(spec_dir, session_id, force=args.force, agent=args.agent) - print(json.dumps({k: v for k, v in result.items() if k != "config"}, ensure_ascii=False, indent=2)) - return 0 - - -def command_release(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser().resolve() - session_id = normalize_session_id(args.session) - result = _release(spec_dir, session_id) - print(json.dumps(result, ensure_ascii=False, indent=2)) - return 0 +'''scripts/spec_session.py — 薄 launcher,把所有调用转给 spec_session.cli.main()。 +文件名 `spec_session.py` 保留作为外部 API surface:hooks/hooks.json、 +commands/*.md、tests/conftest.py:run_script 都按此路径调用。实现拆到同目录的 +`spec_session/` 包内(_io / _selectors / _reminders / _business / _hooks / +_catalog / cli),launcher 只做三件事: -def command_heartbeat(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser().resolve() - session_id = normalize_session_id(args.session) - result = _heartbeat(spec_dir, session_id) - print(json.dumps(result, ensure_ascii=False, indent=2)) - return 0 + 1. Windows utf-8 stdout/stderr reconfigure(让 emoji / 中文 emit 不再 + UnicodeEncodeError 被 _safe_hook 吞掉) + 2. sys.path 注入 scripts/,让包内 spec_log import 可用 + 3. import spec_session.cli.main 并调用 +Python 的 import system 在同一 path entry 下 package > module,所以 +`scripts/spec_session.py` 与 `scripts/spec_session/` 共存安全:launcher 作为 +脚本被 exec、`import spec_session` 解析为 package。 -def command_verify(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser().resolve() - session_id = normalize_session_id(args.session) - result = _verify(spec_dir, session_id) - print(json.dumps(result, ensure_ascii=False, indent=2)) - return 0 if result["status"] == "ok" else 3 - - -# --------------------------------------------------------------------------- -# Existing session lifecycle (start / continue / status / end / list / load) -# --------------------------------------------------------------------------- - - -def update_config_session( - spec_dir: Path, - config: dict[str, Any], - session_id: str, - status: str, - phase: str, - reason: str | None = None, -) -> dict[str, Any]: - timestamp = now() - prev_phase = config.get("currentPhase") - prev_status = config.get("sessionStatus") - sessions = config.setdefault("sessions", {}) - session = sessions.setdefault(session_id, {"startedAt": timestamp}) - session["status"] = status - session["currentPhase"] = phase - session["lastActivityAt"] = timestamp - if status == "active": - session.setdefault("startedAt", timestamp) - session["endedAt"] = None - session["endedReason"] = None - else: - session["endedAt"] = timestamp - session["endedReason"] = reason or "ended" - - config["currentSessionId"] = session_id - config["sessionStatus"] = status - config["currentPhase"] = phase - config["lastActivityAt"] = timestamp - config["persistentMode"] = bool(active_sessions(config)) - if status != "active" and not config["persistentMode"]: - config["endedAt"] = timestamp - config["endedReason"] = reason or "ended" - else: - config["endedAt"] = None - config["endedReason"] = None - save_config(spec_dir, config) - - slug = config.get("slug") or spec_dir.name - if status == "ended": - spec_telemetry.emit( - "spec.end", - spec_slug=slug, - spec_dir=str(spec_dir), - session_id=session_id, - ended_phase=prev_phase, - reason=reason or "ended", - ) - elif prev_phase != phase: - spec_telemetry.emit( - "spec.phase_transition", - spec_slug=slug, - spec_dir=str(spec_dir), - session_id=session_id, - from_phase=prev_phase, - to_phase=phase, - prev_status=prev_status, - status=status, - ) - return config - - -def entry_for(spec_dir: Path, config: dict[str, Any], session_id: str) -> dict[str, Any]: - return { - "sessionId": session_id, - "specSlug": config.get("slug") or spec_dir.name, - "specId": config["specId"], - "status": "active", - "boundAt": now(), - "lastActivityAt": now(), - } - - -def resolve_active(document_root: Path, session_id: str) -> tuple[Path, dict[str, Any], dict[str, Any]]: - active = load_active(document_root) - entry = active.get("sessions", {}).get(session_id) - if not entry or entry.get("status") != "active": - raise SystemExit(f"No active spec session '{session_id}' under {document_root}") - slug = entry.get("specSlug") or entry.get("slug") - if not slug: - raise SystemExit(f"Active pointer entry for '{session_id}' has no specSlug") - spec_dir = (document_root / slug).resolve() - config = load_config(spec_dir) - ensure_within_root(spec_dir, document_root) - if config.get("specId") != entry.get("specId"): - raise SystemExit( - f"Active pointer specId mismatch for session '{session_id}'. " - f"Refusing to continue to avoid cross-spec contamination." - ) - return spec_dir, config, entry - - -def _bind_session(spec_dir: Path, config: dict[str, Any], session_id: str, phase: str) -> dict[str, Any]: - document_root = document_root_for(spec_dir, config) - ensure_within_root(spec_dir, document_root) - config = update_config_session(spec_dir, config, session_id, "active", phase) - active = load_active(document_root) - active["sessions"][session_id] = entry_for(spec_dir, config, session_id) - save_active(document_root, active) - return active["sessions"][session_id] - - -def command_start(args: argparse.Namespace) -> int: - session_id = normalize_session_id(args.session) - spec_dir = Path(args.spec_dir).expanduser().resolve() - config = load_config(spec_dir) - document_root = document_root_for(spec_dir, config) - ensure_within_root(spec_dir, document_root) - - requested_phase = args.phase - if not requested_phase: - requested_phase = config.get("currentPhase") or "intake" - if requested_phase not in PHASES or requested_phase == "ended": - raise SystemExit(f"Invalid active phase: {requested_phase}") - - if getattr(args, "acquire", True): - _acquire(spec_dir, session_id, force=getattr(args, "force", False), agent=getattr(args, "agent", None)) - config = load_config(spec_dir) - - entry = _bind_session(spec_dir, config, session_id, requested_phase) - print(json.dumps({"active": entry, "activeFile": str(active_path(document_root))}, ensure_ascii=False, indent=2)) - return 0 - - -def command_status(args: argparse.Namespace) -> int: - session_id = normalize_session_id(args.session) - if args.spec_dir: - spec_dir = Path(args.spec_dir).expanduser().resolve() - config = load_config(spec_dir) - document_root = document_root_for(spec_dir, config) - ensure_within_root(spec_dir, document_root) - entry = load_active(document_root).get("sessions", {}).get(session_id) - if entry and entry.get("specId") != config.get("specId"): - raise SystemExit( - f"Active pointer specId mismatch for session '{session_id}'. " - f"Refusing to report a different spec." - ) - else: - if not args.root: - raise SystemExit("status without spec_dir requires --root") - document_root = Path(args.root).expanduser().resolve() - spec_dir, config, entry = resolve_active(document_root, session_id) - - lock = config.get("lock") or None - result = { - "sessionId": session_id, - "specDir": str(spec_dir), - "specId": config.get("specId"), - "requirementName": config.get("requirementName"), - "workflowType": config.get("workflowType"), - "specType": config.get("specType"), - "persistentMode": config.get("persistentMode", False), - "sessionStatus": (config.get("sessions") or {}).get(session_id, {}).get("status", config.get("sessionStatus")), - "currentPhase": (config.get("sessions") or {}).get(session_id, {}).get("currentPhase", config.get("currentPhase")), - "iterationRound": config.get("iterationRound"), - "activeFile": str(active_path(document_root)), - "activePointer": entry, - "lock": lock, - "lockHeldBy": (lock or {}).get("sessionId"), - "lockOwnedByCurrentSession": bool(lock and lock.get("sessionId") == session_id), - } - if args.json: - print(json.dumps(result, ensure_ascii=False, indent=2)) - else: - print(f"Session: {result['sessionId']}") - print(f"Spec: {result['requirementName'] or Path(result['specDir']).name}") - print(f"Path: {result['specDir']}") - print(f"Status: {result['sessionStatus'] or 'unknown'}") - print(f"Phase: {result['currentPhase'] or 'unknown'}") - if result["iterationRound"]: - print(f"Iteration round: {result['iterationRound']}") - print(f"Persistent: {str(result['persistentMode']).lower()}") - if lock: - owned = "本会话" if result["lockOwnedByCurrentSession"] else f"其他: {result['lockHeldBy']}" - print(f"Lock: {owned} (last heartbeat: {lock.get('lastHeartbeatAt')})") - else: - print("Lock: 空闲") - print(f"Active file: {result['activeFile']}") - return 0 - - -def command_end(args: argparse.Namespace) -> int: - session_id = normalize_session_id(args.session) - if args.spec_dir: - spec_dir = Path(args.spec_dir).expanduser().resolve() - config = load_config(spec_dir) - document_root = document_root_for(spec_dir, config) - ensure_within_root(spec_dir, document_root) - else: - if not args.root: - raise SystemExit("end without spec_dir requires --root") - document_root = Path(args.root).expanduser().resolve() - spec_dir, config, _entry = resolve_active(document_root, session_id) - - update_config_session(spec_dir, config, session_id, "ended", "ended", args.reason) - _release(spec_dir, session_id) - active = load_active(document_root) - entry = active.get("sessions", {}).get(session_id) - if entry: - if entry.get("specId") and entry.get("specId") != config.get("specId"): - raise SystemExit( - f"Active pointer specId mismatch for session '{session_id}'. " - f"Refusing to end a different spec." - ) - active["sessions"].pop(session_id, None) - save_active(document_root, active) - - print(json.dumps({"sessionId": session_id, "specDir": str(spec_dir), "status": "ended"}, ensure_ascii=False, indent=2)) - return 0 - - -def command_list(args: argparse.Namespace) -> int: - document_root = Path(args.root).expanduser().resolve() - active = load_active(document_root) - sessions = active.get("sessions", {}) - if args.json: - print(json.dumps({"documentRoot": str(document_root), "sessions": sessions}, ensure_ascii=False, indent=2)) - else: - print(f"Document root: {document_root}") - if not sessions: - print("No active spec sessions.") - return 0 - for session_id, entry in sorted(sessions.items()): - print( - f"- {session_id}: {entry.get('specSlug') or entry.get('slug')} " - f"({entry.get('status')}, lastActivity: {entry.get('lastActivityAt')})" - ) - return 0 - - -def command_list_specs(args: argparse.Namespace) -> int: - document_root = Path(args.root).expanduser().resolve() - if not document_root.exists(): - raise SystemExit(f"Document root does not exist: {document_root}") - - specs: list[dict[str, Any]] = [] - for child in sorted(document_root.iterdir(), key=lambda item: item.name): - if not child.is_dir(): - continue - config_path = child / ".config.json" - if not config_path.exists(): - continue - try: - config = load_config(child) - ensure_within_root(child, document_root) - except SystemExit as exc: - specs.append({ - "slug": child.name, - "specDir": str(child.resolve()), - "valid": False, - "error": str(exc), - }) - continue - lock = config.get("lock") or None - specs.append({ - "slug": config.get("slug") or child.name, - "requirementName": config.get("requirementName"), - "specDir": str(child.resolve()), - "specId": config.get("specId"), - "workflowType": config.get("workflowType"), - "specType": config.get("specType"), - "currentPhase": config.get("currentPhase"), - "sessionStatus": config.get("sessionStatus"), - "iterationRound": config.get("iterationRound"), - "lastActivityAt": config.get("lastActivityAt"), - "lock": lock, - "lockHeldBy": (lock or {}).get("sessionId"), - "lockStale": bool(lock and _lock_is_stale(lock)), - "valid": True, - }) - - if args.json: - print(json.dumps({"documentRoot": str(document_root), "specs": specs}, ensure_ascii=False, indent=2)) - else: - print(f"Document root: {document_root}") - if not specs: - print("No specs found.") - return 0 - for spec in specs: - if not spec.get("valid"): - print(f"- {spec.get('slug')}: invalid ({spec.get('error')})") - continue - lock_state = "空闲" - if spec["lockHeldBy"]: - lock_state = f"锁定于 {spec['lockHeldBy']}" - if spec["lockStale"]: - lock_state += "(已过期)" - iteration = f", iter {spec['iterationRound']}" if spec.get("iterationRound") else "" - print( - f"- {spec.get('slug')}: {spec.get('requirementName') or spec.get('slug')} " - f"({spec.get('currentPhase') or 'unknown'}{iteration}, {lock_state})" - ) - return 0 - - -# --------------------------------------------------------------------------- -# Document loading (used by /continue context restoration and read-only) -# --------------------------------------------------------------------------- - - -def task_section(text: str) -> str: - """Extract the ## 任务 section from tasks.md text, or return whole text.""" - start = text.find("## 任务") - if start == -1: - return text - tail = text[start:] - end_match = re.search(r"\n##\s+", tail[len("## 任务"):]) - if not end_match: - return tail - return tail[: len("## 任务") + end_match.start()] - - -def _file_info(path: Path) -> dict[str, Any]: - if not path.exists(): - return {"exists": False} - mtime = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc).isoformat() - return { - "exists": True, - "modifiedAt": mtime, - "modifiedTs": path.stat().st_mtime, - "text": path.read_text(encoding="utf-8"), - } - - -def command_load(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser().resolve() - config = load_config(spec_dir) - document_root = document_root_for(spec_dir, config) - ensure_within_root(spec_dir, document_root) - - req_info = _file_info(spec_dir / "requirements.md") - bug_info = _file_info(spec_dir / "bugfix.md") - design_info = _file_info(spec_dir / "design.md") - tasks_info = _file_info(spec_dir / "tasks.md") - - req_doc = req_info if req_info["exists"] else bug_info - req_name = "requirements.md" if req_info["exists"] else "bugfix.md" - shall_count = 0 - req_open_questions = False - if req_doc.get("exists"): - shall_count = req_doc["text"].count("SHALL") - req_open_questions = "待确认问题" in req_doc["text"] - - design_open_questions = False - if design_info.get("exists"): - design_open_questions = "待确认问题" in design_info["text"] - - counts: dict[str, int] = {label: 0 for label in TASK_LABELS.values()} - counts["total"] = 0 - in_progress: list[str] = [] - if tasks_info.get("exists"): - section = task_section(tasks_info["text"]) - for match in TASK_RE.finditer(section): - status_label = TASK_LABELS.get(match.group(1), "pending") - counts["total"] += 1 - counts[status_label] += 1 - if status_label == "in_progress": - in_progress.append(match.group(2).strip()) - - lock = config.get("lock") or None - session_id = normalize_session_id(getattr(args, "session", None)) - result: dict[str, Any] = { - "specDir": str(spec_dir), - "slug": config.get("slug") or spec_dir.name, - "specId": config.get("specId"), - "requirementName": config.get("requirementName"), - "currentPhase": config.get("currentPhase"), - "iterationRound": config.get("iterationRound"), - "sessionStatus": config.get("sessionStatus"), - "currentSessionId": config.get("currentSessionId"), - "lastActivityAt": config.get("lastActivityAt"), - "lock": lock, - "lockHeldBy": (lock or {}).get("sessionId"), - "lockOwnedByCurrentSession": bool(lock and lock.get("sessionId") == session_id), - "documents": { - req_name: { - "exists": req_doc.get("exists", False), - "modifiedAt": req_doc.get("modifiedAt"), - "shallCount": shall_count, - "hasOpenQuestions": req_open_questions, - }, - "design.md": { - "exists": design_info.get("exists", False), - "modifiedAt": design_info.get("modifiedAt"), - "hasOpenQuestions": design_open_questions, - }, - "tasks.md": { - "exists": tasks_info.get("exists", False), - "modifiedAt": tasks_info.get("modifiedAt"), - "counts": counts, - "inProgress": in_progress, - }, - }, - } - - if args.json: - print(json.dumps(result, ensure_ascii=False, indent=2)) - return 0 - - w = DOC_COL_WIDTH - slug = result["slug"] - phase = result["currentPhase"] or "unknown" - session_label = result["currentSessionId"] or "unknown" - s_status = result["sessionStatus"] or "unknown" - print(f"已加载 spec: {slug}") - print(f" specId: {result['specId']}") - print(f" phase: {phase}") - if result["iterationRound"]: - print(f" iteration: 第 {result['iterationRound']} 轮") - print(f" session: {session_label} ({s_status})") - if lock: - owner = "本会话持有" if result["lockOwnedByCurrentSession"] else f"⚠ 锁定于 {result['lockHeldBy']}" - print(f" lock: {owner} (last heartbeat: {lock.get('lastHeartbeatAt')})") - else: - print(" lock: 空闲") - print() - req_d = result["documents"][req_name] - if req_d["exists"]: - q = " | 有待确认问题" if req_d["hasOpenQuestions"] else "" - print(f" {req_name:<{w}} ← {req_d['shallCount']} 条验收标准{q} | 修改: {req_d['modifiedAt']}") - else: - print(f" {req_name:<{w}} ← 不存在") - design_d = result["documents"]["design.md"] - if design_d["exists"]: - q = " | 有待确认问题" if design_d["hasOpenQuestions"] else "" - print(f" {'design.md':<{w}} ←{q} | 修改: {design_d['modifiedAt']}") - else: - print(f" {'design.md':<{w}} ← 不存在") - tasks_d = result["documents"]["tasks.md"] - if tasks_d["exists"]: - c = tasks_d["counts"] - prog = f", 进行中: {', '.join(tasks_d['inProgress'])}" if tasks_d["inProgress"] else "" - print(f" {'tasks.md':<{w}} ← {c['completed']}/{c['total']} 已完成, {c['pending']} 待处理{prog} | 修改: {tasks_d['modifiedAt']}") - else: - print(f" {'tasks.md':<{w}} ← 不存在") - return 0 - - -# --------------------------------------------------------------------------- -# Iteration bookkeeping -# --------------------------------------------------------------------------- - - -def command_iterate(args: argparse.Namespace) -> int: - """Advance a spec into a new iteration round. Used at /spec-accept moment.""" - spec_dir = Path(args.spec_dir).expanduser().resolve() - config_path = spec_dir / ".config.json" - with _file_lock(config_path): - config = load_config(spec_dir) - current_round = int(config.get("iterationRound") or 0) - history = config.setdefault("iterationHistory", []) - if current_round > 0: - for entry in reversed(history): - if entry.get("round") == current_round and "completedAt" not in entry: - entry["completedAt"] = now() - entry["newReqCount"] = args.new_req_count or 0 - break - new_round = current_round + 1 - config["iterationRound"] = new_round - history.append({ - "round": new_round, - "startedAt": now(), - "newReqCount": 0, - }) - config["currentPhase"] = "iteration" - save_config(spec_dir, config) - print(json.dumps({"iterationRound": new_round, "specDir": str(spec_dir)}, ensure_ascii=False, indent=2)) - return 0 - - -# --------------------------------------------------------------------------- -# CLI wiring -# --------------------------------------------------------------------------- - - -def main() -> int: - parser = argparse.ArgumentParser(description="Manage persistent specode sessions.") - subparsers = parser.add_subparsers(dest="command", required=True) - session_help = "Window/thread/session id. Defaults to $TERM_SESSION_ID or 'default'." - - start = subparsers.add_parser("start", help="Bind a session to a spec and mark it active.") - start.add_argument("spec_dir") - start.add_argument("--session", help=session_help) - start.add_argument("--phase", choices=sorted(PHASES - {"ended"}), default="intake") - start.add_argument("--no-acquire", dest="acquire", action="store_false") - start.add_argument("--force", action="store_true", help="Force-acquire lock even if held by another session.") - start.add_argument("--agent", help="Agent name recorded into lock metadata.") - start.set_defaults(func=command_start, acquire=True) - - cont = subparsers.add_parser("continue", help="Resume or switch the current session to a spec.") - cont.add_argument("spec_dir") - cont.add_argument("--session", help=session_help) - cont.add_argument("--phase", choices=sorted(PHASES - {"ended"}), default=None, - help="Override phase. Defaults to .config.json.currentPhase.") - cont.add_argument("--no-acquire", dest="acquire", action="store_false") - cont.add_argument("--force", action="store_true", help="Force-acquire lock from another session.") - cont.add_argument("--agent", help="Agent name recorded into lock metadata.") - cont.set_defaults(func=command_start, acquire=True) - - status = subparsers.add_parser("status", help="Show session/spec lifecycle status.") - status.add_argument("spec_dir", nargs="?") - status.add_argument("--root", help="Document root used when spec_dir is omitted.") - status.add_argument("--session", help=session_help) - status.add_argument("--json", action="store_true") - status.set_defaults(func=command_status) - - end = subparsers.add_parser("end", help="End the active session without deleting spec documents.") - end.add_argument("spec_dir", nargs="?") - end.add_argument("--root", help="Document root used when spec_dir is omitted.") - end.add_argument("--session", help=session_help) - end.add_argument("--reason", default="user ended") - end.set_defaults(func=command_end) - - list_cmd = subparsers.add_parser("list", help="List active sessions under a document root.") - list_cmd.add_argument("--root", required=True) - list_cmd.add_argument("--json", action="store_true") - list_cmd.set_defaults(func=command_list) - - list_specs_cmd = subparsers.add_parser("list-specs", help="List spec folders under a configured document root.") - list_specs_cmd.add_argument("--root", required=True) - list_specs_cmd.add_argument("--json", action="store_true") - list_specs_cmd.set_defaults(func=command_list_specs) - - load_cmd = subparsers.add_parser("load", help="Load and summarize spec documents for context restoration.") - load_cmd.add_argument("spec_dir") - load_cmd.add_argument("--session", help=session_help) - load_cmd.add_argument("--json", action="store_true") - load_cmd.set_defaults(func=command_load) - - acquire_cmd = subparsers.add_parser("acquire", help="Acquire the spec lock for this session.") - acquire_cmd.add_argument("spec_dir") - acquire_cmd.add_argument("--session", help=session_help) - acquire_cmd.add_argument("--force", action="store_true", help="Force-acquire even if held by another session.") - acquire_cmd.add_argument("--agent", help="Agent name recorded into lock metadata.") - acquire_cmd.set_defaults(func=command_acquire) - - release_cmd = subparsers.add_parser("release", help="Release the spec lock if held by this session.") - release_cmd.add_argument("spec_dir") - release_cmd.add_argument("--session", help=session_help) - release_cmd.set_defaults(func=command_release) +stdlib-only。 +''' +from __future__ import annotations - hb_cmd = subparsers.add_parser("heartbeat", help="Refresh lock lastHeartbeatAt; fail if lock lost.") - hb_cmd.add_argument("spec_dir") - hb_cmd.add_argument("--session", help=session_help) - hb_cmd.set_defaults(func=command_heartbeat) +import contextlib +import sys +from pathlib import Path - verify_cmd = subparsers.add_parser("verify-lock", help="Check whether this session still holds the spec lock.") - verify_cmd.add_argument("spec_dir") - verify_cmd.add_argument("--session", help=session_help) - verify_cmd.set_defaults(func=command_verify) +# Windows 子进程 pipe stdout 的 encoding 会 fallback 到 locale(中文 Windows 是 +# cp936/gbk),无法编码 emoji 等非 BMP 字符 → emit 时 UnicodeEncodeError 被 +# _safe_hook 吞掉 → 主代理收不到任何 hook 注入。强制 utf-8 + errors=replace +# 兜底。stderr 同步以保证异常 trace 可读。 +with contextlib.suppress(Exception): + sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined] +with contextlib.suppress(Exception): + sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined] - iter_cmd = subparsers.add_parser("iterate", help="Advance the spec into a new iteration round.") - iter_cmd.add_argument("spec_dir") - iter_cmd.add_argument("--new-req-count", type=int, default=0) - iter_cmd.set_defaults(func=command_iterate) +_SCRIPTS_DIR = Path(__file__).resolve().parent +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) - args = parser.parse_args() - try: - return args.func(args) - except LockHeld as exc: - print(str(exc), file=sys.stderr) - return 4 +from spec_session.cli import main # noqa: E402 if __name__ == "__main__": - raise SystemExit(main()) + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/spec_session/__init__.py b/plugins/specode/scripts/spec_session/__init__.py new file mode 100644 index 0000000..1d7d36b --- /dev/null +++ b/plugins/specode/scripts/spec_session/__init__.py @@ -0,0 +1,27 @@ +'''spec_session package public surface. + +外部依赖入口: + - scripts/spec_session.py launcher 调 `spec_session.cli.main()` + - scripts/spec_status.py 用 `from spec_session import read_session, + read_spec_config, _session_short, _is_lock_stale`(0.10.22 拆分前 + 这 4 个符号在 scripts/spec_session.py 文件 module 级;现在它们的 + canonical 位置是 spec_session._io,本文件 re-export 保持外部 import + 路径不变,避免改 spec_status.py)。 + +stdlib-only。 +''' +from __future__ import annotations + +from spec_session._io import ( # noqa: F401 + _is_lock_stale, + _session_short, + read_session, + read_spec_config, +) + +__all__ = [ + "_is_lock_stale", + "_session_short", + "read_session", + "read_spec_config", +] diff --git a/plugins/specode/scripts/spec_session/_business.py b/plugins/specode/scripts/spec_session/_business.py new file mode 100644 index 0000000..5e7e176 --- /dev/null +++ b/plugins/specode/scripts/spec_session/_business.py @@ -0,0 +1,679 @@ +'''spec_session package 内部实现:业务子命令(cmd_*)+ _update_session_for_spec + _auto_pending_selector。 + +这些命令被 hooks.json 引导主代理调用,全部接 --session 参数; +任何写入失败必须回滚已变更的另一份文件 + 返回非零 exit。 + +不要直接运行本文件。stdlib-only。 +''' +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Optional + +from spec_session._io import ( + VALID_PHASES, + _atomic_write_json, + _emit_json, + _ensure_spec_dir, + _is_lock_stale, + _now_iso, + _session_short, + read_session, + read_spec_config, + session_file_path, + write_session_atomic, + write_spec_config_atomic, +) + + +def _update_session_for_spec(session_id: str, spec_dir: Path, cfg: dict, + mode: str = "active", + lock_state: str = "ok", + pending_selector: Optional[str] = ...) -> dict: + """构造 sessions/.json 的常规更新。pending_selector=... 表示沿用 spec config 中的值。""" + existing = read_session(session_id) or {} + if pending_selector is ...: + pending = cfg.get("pending_selector") + else: + pending = pending_selector + payload = { + "session_id": session_id, + "started_at": existing.get("started_at") or _now_iso(), + "last_activity_at": _now_iso(), + "ended_at": None, + "mode": mode, + "active_spec_slug": cfg.get("slug"), + "active_spec_dir": str(spec_dir), + "spec_id": cfg.get("specId"), + "phase": cfg.get("phase"), + "lock_state": lock_state, + "task_swarm_run_id": existing.get("task_swarm_run_id"), + "pending_selector": pending, + } + return payload + + +def cmd_acquire(args: argparse.Namespace) -> int: + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 1 + + now = _now_iso() + lock = cfg.get("lock") or {} + holder = lock.get("holder") + + if holder and holder != args.session and not _is_lock_stale(lock) and not args.force: + _emit_json({ + "ok": False, + "reason": "LockHeld", + "holder": holder, + "last_heartbeat_at": lock.get("last_heartbeat_at"), + }) + return 4 + + # 备份用于回滚 + prior_cfg = json.loads(json.dumps(cfg)) + prior_session_blob: Optional[str] = None + sp = session_file_path(args.session) + if sp.exists(): + try: + prior_session_blob = sp.read_text(encoding="utf-8") + except Exception: + prior_session_blob = None + + cfg["lock"] = { + "holder": args.session, + "acquired_at": now, + "last_heartbeat_at": now, + } + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"写入 spec config 失败:{e}\n") + return 1 + + try: + session_payload = _update_session_for_spec(args.session, spec_dir, cfg, + mode="active", lock_state="ok") + write_session_atomic(args.session, session_payload) + except Exception as e: + # 回滚 spec config + try: + write_spec_config_atomic(spec_dir, prior_cfg) + except Exception: + pass + sys.stderr.write(f"写入 sessions 失败,已回滚 spec config:{e}\n") + return 1 + + _emit_json({"ok": True, "holder": args.session, "acquired_at": now}) + return 0 + + +def cmd_release(args: argparse.Namespace) -> int: + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 0 # release 容忍:spec config 缺失视作已释放 + prior_cfg = json.loads(json.dumps(cfg)) + lock = cfg.get("lock") or {} + if lock.get("holder") == args.session: + cfg["lock"] = None + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"释放锁写入失败:{e}\n") + return 1 + # 更新 sessions + try: + existing = read_session(args.session) or {} + existing["last_activity_at"] = _now_iso() + existing["lock_state"] = "released" + write_session_atomic(args.session, existing) + except Exception as e: + # 回滚 spec config + try: + write_spec_config_atomic(spec_dir, prior_cfg) + except Exception: + pass + sys.stderr.write(f"写入 sessions 失败,已回滚 spec config:{e}\n") + return 1 + _emit_json({"ok": True, "released_at": _now_iso()}) + return 0 + + +def cmd_heartbeat(args: argparse.Namespace) -> int: + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 1 + lock = cfg.get("lock") or {} + if lock.get("holder") != args.session: + _emit_json({"ok": False, "reason": "lock_lost", "holder": lock.get("holder")}) + return 1 + prior_cfg = json.loads(json.dumps(cfg)) + now = _now_iso() + cfg["lock"]["last_heartbeat_at"] = now + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"heartbeat 写入失败:{e}\n") + return 1 + try: + existing = read_session(args.session) or {} + existing["last_activity_at"] = now + existing["lock_state"] = "ok" + write_session_atomic(args.session, existing) + except Exception as e: + try: + write_spec_config_atomic(spec_dir, prior_cfg) + except Exception: + pass + sys.stderr.write(f"heartbeat sessions 写入失败,已回滚:{e}\n") + return 1 + _emit_json({"ok": True, "last_heartbeat_at": now}) + return 0 + + +def cmd_verify_lock(args: argparse.Namespace) -> int: + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 3 + lock = cfg.get("lock") or {} + holder = lock.get("holder") + if not holder: + _emit_json({"ok": False, "reason": "not_held"}) + return 3 + if holder != args.session: + if _is_lock_stale(lock): + _emit_json({"ok": False, "reason": "stale_lock", "holder": holder}) + return 3 + _emit_json({"ok": False, "reason": "evicted", "holder": holder}) + return 3 + _emit_json({"ok": True, "holder": holder, "last_heartbeat_at": lock.get("last_heartbeat_at")}) + return 0 + + +def cmd_phase_transition(args: argparse.Namespace) -> int: + if args.frm not in VALID_PHASES or args.to not in VALID_PHASES: + sys.stderr.write(f"非法 phase:{args.frm} → {args.to}\n") + return 1 + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 1 + lock = cfg.get("lock") or {} + if lock.get("holder") != args.session: + _emit_json({"ok": False, "reason": "lock_lost"}) + return 1 + if cfg.get("phase") != args.frm: + _emit_json({ + "ok": False, + "reason": "phase_mismatch", + "current": cfg.get("phase"), + "expected_from": args.frm, + }) + return 1 + prior_cfg = json.loads(json.dumps(cfg)) + prior_session = read_session(args.session) + cfg["phase"] = args.to + # 自动推断 pending_selector + auto = _auto_pending_selector(args.to, cfg) + cfg["pending_selector"] = auto + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"phase-transition 写 spec config 失败:{e}\n") + return 1 + try: + payload = _update_session_for_spec(args.session, spec_dir, cfg, + mode="active", lock_state="ok", + pending_selector=auto) + write_session_atomic(args.session, payload) + except Exception as e: + try: + write_spec_config_atomic(spec_dir, prior_cfg) + if prior_session is not None: + write_session_atomic(args.session, prior_session) + except Exception: + pass + sys.stderr.write(f"phase-transition 写 sessions 失败,已回滚:{e}\n") + return 1 + _emit_json({"ok": True, "phase": args.to, "pending_selector": auto}) + return 0 + + +def _auto_pending_selector(phase: str, cfg: dict) -> Optional[str]: + """根据 phase 推断默认 pending_selector(命令层可显式覆写)。""" + workflow = cfg.get("workflow") + if phase == "intake": + return "workflow-choice" + if phase == "requirements": + return "doc-confirm-requirements" + if phase == "bugfix": + return "doc-confirm-bugfix" + if phase == "design": + return "doc-confirm-design" + if phase == "tasks": + return "tasks-execution" + if phase == "implementation": + return None + if phase == "acceptance": + return "acceptance-gate" + # phase == "iteration" / "implementation" / 其它:不自动注入 selector。 + # iteration 是已交付常驻态,停在 chat 等用户显式提出下一轮调整意图后, + # 主代理判断到调整范围明确时再主动呈现 iteration-scope。 + return None + + +def cmd_load(args: argparse.Namespace) -> int: + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 1 + _emit_json({ + "ok": True, + "spec_dir": str(spec_dir), + "config": cfg, + }) + return 0 + + +def cmd_continue(args: argparse.Namespace) -> int: + spec_dir = _ensure_spec_dir(args.spec) + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 1 + lock = cfg.get("lock") or {} + holder = lock.get("holder") + mode = "active" + lock_state = "ok" + pending = cfg.get("pending_selector") + + if holder and holder != args.session and not _is_lock_stale(lock) and not args.force: + if args.readonly: + mode = "readonly" + lock_state = "readonly" + else: + # 提示走 takeover selector + cfg["pending_selector"] = "takeover-options" + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"写 spec config 失败:{e}\n") + return 1 + try: + payload = _update_session_for_spec(args.session, spec_dir, cfg, + mode="readonly", lock_state="readonly", + pending_selector="takeover-options") + write_session_atomic(args.session, payload) + except Exception as e: + sys.stderr.write(f"写 sessions 失败:{e}\n") + return 1 + _emit_json({ + "ok": False, + "reason": "LockHeld", + "holder": holder, + "pending_selector": "takeover-options", + "spec_dir": str(spec_dir), + }) + return 4 + else: + # 抢锁(force / stale / 同 session / 无 holder) + prior_cfg = json.loads(json.dumps(cfg)) + now = _now_iso() + cfg["lock"] = { + "holder": args.session, + "acquired_at": now, + "last_heartbeat_at": now, + } + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"写 spec config 失败:{e}\n") + return 1 + try: + payload = _update_session_for_spec(args.session, spec_dir, cfg, + mode=mode, lock_state=lock_state, + pending_selector=pending) + write_session_atomic(args.session, payload) + except Exception as e: + try: + write_spec_config_atomic(spec_dir, prior_cfg) + except Exception: + pass + sys.stderr.write(f"写 sessions 失败,已回滚 spec config:{e}\n") + return 1 + # 更新 active-pointer + try: + root = Path(cfg.get("doc_root") or spec_dir.parent.parent) + active_path = root / ".active-specode.json" + _atomic_write_json(active_path, { + "active_spec_slug": cfg.get("slug"), + "active_spec_dir": str(spec_dir), + "specId": cfg.get("specId"), + "updatedAt": now, + "session_id": args.session, + }) + except Exception: + pass + + _emit_json({ + "ok": True, + "spec_dir": str(spec_dir), + "mode": mode, + "phase": cfg.get("phase"), + "pending_selector": pending, + }) + return 0 + + +def cmd_end(args: argparse.Namespace) -> int: + existing = read_session(args.session) + if existing is None: + # 即使 sessions 文件不存在,也写一份 ended 状态,便于排查 + existing = { + "session_id": args.session, + "started_at": _now_iso(), + } + spec_dir_str = existing.get("active_spec_dir") + prior_cfg: Optional[dict] = None + spec_dir: Optional[Path] = None + if spec_dir_str: + try: + spec_dir = Path(spec_dir_str) + if spec_dir.exists(): + cfg = read_spec_config(spec_dir) + if cfg is not None: + prior_cfg = json.loads(json.dumps(cfg)) + lock = cfg.get("lock") or {} + if lock.get("holder") == args.session: + cfg["lock"] = None + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"释锁写入失败:{e}\n") + return 1 + except Exception as e: + sys.stderr.write(f"end 读取 spec config 出错:{e}\n") + + existing["mode"] = "ended" + existing["ended_at"] = _now_iso() + existing["lock_state"] = "released" + existing["pending_selector"] = None + # 对齐 end.md 文档:清 active_spec_* / task_swarm_run_id + existing["active_spec_slug"] = None + existing["active_spec_dir"] = None + existing["spec_id"] = None + existing["phase"] = None + existing["task_swarm_run_id"] = None + # 标记:下一次 UserPromptSubmit 时由 hook 注入一次性反向提醒, + # 抵消此前 N 个 turn 注入的 STATUS_FOOTER_TEMPLATE / SPEC_MODE_CONTINUE_REMINDER + existing["post_end_reminder_pending"] = True + try: + write_session_atomic(args.session, existing) + except Exception as e: + # 回滚 spec config + if spec_dir is not None and prior_cfg is not None: + try: + write_spec_config_atomic(spec_dir, prior_cfg) + except Exception: + pass + sys.stderr.write(f"sessions 写入失败,已回滚:{e}\n") + return 1 + + _emit_json({"ok": True, "ended_at": existing["ended_at"]}) + return 0 + + +def cmd_set_project_root(args: argparse.Namespace) -> int: + """0.10.15+:写 .config.json.project_root + 推进 pending_selector→workflow-choice。 + + 由 project-root-choice selector 选定后由主代理调用。 + + args: + --spec spec 目录 + --session lock holder 必须是当前 session + --root 绝对路径;不存在则 mkdir -p;存在但非目录则 exit 1 + + 幂等:重复调用以最后一次为准。 + """ + spec_dir = Path(args.spec) + if not spec_dir.exists(): + sys.stderr.write(f"spec 目录不存在:{spec_dir}\n") + return 1 + cfg = read_spec_config(spec_dir) + if cfg is None: + sys.stderr.write(f"无法读取 {spec_dir}/.config.json\n") + return 1 + lock = cfg.get("lock") or {} + if lock.get("holder") != args.session: + sys.stderr.write( + f"lock holder 不是当前 session " + f"(holder={_session_short(lock.get('holder'))} vs current={_session_short(args.session)})\n" + ) + return 1 + + root_path = Path(args.root) + if not root_path.is_absolute(): + sys.stderr.write(f"--root 必须是绝对路径,收到:{args.root!r}\n") + return 1 + if root_path.exists(): + if not root_path.is_dir(): + sys.stderr.write(f"--root 存在但不是目录:{root_path}\n") + return 1 + else: + # 自动创建(覆盖"cwd/slug 新项目子目录"场景;自定义路径也可借此创建) + try: + root_path.mkdir(parents=True, exist_ok=True) + except Exception as e: + sys.stderr.write(f"创建 --root 目录失败:{root_path}:{e}\n") + return 1 + + prior_cfg = json.loads(json.dumps(cfg)) + cfg["project_root"] = str(root_path) + cfg["pending_selector"] = "workflow-choice" + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception as e: + sys.stderr.write(f"写 spec config 失败:{e}\n") + return 1 + + # 同步 session 的 pending_selector + sess = read_session(args.session) + if sess is not None: + sess["pending_selector"] = "workflow-choice" + sess["last_activity_at"] = _now_iso() + try: + write_session_atomic(args.session, sess) + except Exception as e: + # 回滚 spec config + try: + write_spec_config_atomic(spec_dir, prior_cfg) + except Exception: + pass + sys.stderr.write(f"写 sessions 失败,已回滚 spec config:{e}\n") + return 1 + + _emit_json({ + "ok": True, + "project_root": str(root_path), + "pending_selector": "workflow-choice", + "spec_dir": str(spec_dir), + }) + return 0 + + +def cmd_status(args: argparse.Namespace) -> int: + sess = read_session(args.session) + if sess is None: + _emit_json({"ok": False, "reason": "session_not_found", "session_id": args.session}) + return 0 + payload = {"ok": True, "session": sess} + spec_dir_str = sess.get("active_spec_dir") + if spec_dir_str: + try: + cfg = read_spec_config(Path(spec_dir_str)) + if cfg is not None: + payload["spec_config"] = cfg + except Exception: + pass + _emit_json(payload) + return 0 + + +def cmd_read_session(args: argparse.Namespace) -> int: + sess = read_session(args.session) + if sess is None: + _emit_json({"ok": False, "reason": "session_not_found"}) + return 0 + _emit_json(sess) + return 0 + + +def cmd_list_specs(args: argparse.Namespace) -> int: + """列出当前 doc_root 下所有 spec 的状态摘要。 + + 输出 JSON: + {ok, root, source, specs: [...], reason?} + 每个 spec 元素: + {slug, dir, specId, displayName, phase, iterationRound, + lock_state, holder, last_heartbeat_at, pending_selector, + mtimes: {...}} + """ + import datetime as _dt + try: + import spec_vault # type: ignore + except Exception as e: + _emit_json({ + "ok": False, + "reason": f"spec_vault_import_failed: {e}", + "root": None, + "source": "error", + "specs": [], + }) + return 0 + + override = args.root + try: + root, source = spec_vault.resolve_doc_root(override) + except Exception as e: + _emit_json({ + "ok": False, + "reason": f"resolve_doc_root_failed: {e}", + "root": None, + "source": "error", + "specs": [], + }) + return 0 + + if root is None: + _emit_json({ + "ok": False, + "reason": "no_doc_root", + "root": None, + "source": source, + "specs": [], + }) + return 0 + + specs_dir = Path(root) / "specs" + if not specs_dir.exists() or not specs_dir.is_dir(): + _emit_json({ + "ok": True, + "root": str(root), + "source": source, + "specs": [], + }) + return 0 + + spec_doc_names = [ + "requirements.md", + "bugfix.md", + "design.md", + "tasks.md", + "implementation-log.md", + ] + + entries: list[dict] = [] + try: + children = sorted(specs_dir.iterdir(), key=lambda p: p.name) + except Exception: + children = [] + + for child in children: + if not child.is_dir(): + continue + cfg_path = child / ".config.json" + if not cfg_path.exists(): + continue + try: + with cfg_path.open("r", encoding="utf-8") as fh: + cfg = json.load(fh) + if not isinstance(cfg, dict): + continue + except Exception: + continue + + lock = cfg.get("lock") or {} + # 业务侧实际字段名是 holder;兼容历史 session_id / claude_session_id 兜底 + holder_id = ( + lock.get("holder") or lock.get("session_id") or lock.get("claude_session_id") + if isinstance(lock, dict) else None + ) + if holder_id: + if _is_lock_stale(lock): + lock_state = "stale" + else: + lock_state = "held" + else: + lock_state = "free" + holder_short = holder_id[:8] if isinstance(holder_id, str) and holder_id else None + + mtimes: dict[str, str] = {} + for name in spec_doc_names: + doc_path = child / name + try: + if doc_path.exists(): + ts = doc_path.stat().st_mtime + mtimes[name] = ( + _dt.datetime.utcfromtimestamp(ts) + .strftime("%Y-%m-%dT%H:%M:%SZ") + ) + except Exception: + continue + + display_name = cfg.get("displayName") or cfg.get("requirementName") + + entries.append({ + "slug": cfg.get("slug") or child.name, + "dir": str(child), + "specId": cfg.get("specId"), + "displayName": display_name, + "phase": cfg.get("phase"), + "iterationRound": cfg.get("iterationRound", 0), + "lock_state": lock_state, + "holder": holder_short, + "last_heartbeat_at": lock.get("last_heartbeat_at") if isinstance(lock, dict) else None, + "pending_selector": cfg.get("pending_selector"), + "mtimes": mtimes, + }) + + _emit_json({ + "ok": True, + "root": str(root), + "source": source, + "specs": entries, + }) + return 0 diff --git a/plugins/specode/scripts/spec_session/_catalog.py b/plugins/specode/scripts/spec_session/_catalog.py new file mode 100644 index 0000000..0c0db1d --- /dev/null +++ b/plugins/specode/scripts/spec_session/_catalog.py @@ -0,0 +1,163 @@ +'''spec_session package 内部实现:B2 reference catalog hook。 + +按 user prompt 关键词在 active spec 内注入「考虑读 references/.md」提醒。 +description 即触发(superpowers 风格):每个 reference 在文件头 YAML +frontmatter 写 `description: Use when ...`,告诉读者"何时该来这里";本 hook +按预定义关键词表把命中的 reference 列出来,主代理自己决定是否要 Read。 + +激活门:仅当 sessions/.json.mode=active 时触发;mode=readonly / idle / +ended 一律静默,避免在不应活动的状态下打扰。 + +性能预算:UserPromptSubmit budget 80ms。本 hook 全程纯预编译正则匹配 + +按命中 key 才读 frontmatter(最多 8 次 small file read),单次 <10ms。 + +不要直接运行本文件。stdlib-only。 +''' +from __future__ import annotations + +import argparse +import re +from pathlib import Path +from typing import Optional + +from spec_session._hooks import ( + _emit_hook_additional_context, + _read_stdin_payload, + _safe_hook, +) +from spec_session._io import read_session + + +_THIS_DIR = Path(__file__).resolve().parents[1] # = scripts/(本文件在 scripts/spec_session/) +_REFERENCES_DIR = _THIS_DIR.parent / "skills" / "specode" / "references" + + +# key = reference 文件名(无 .md 后缀),value = 触发关键词 regex 列表 +# 命中规则:prompt 内匹配任一 pattern 即命中该 key(一个 key 只列一次) +CATALOG: dict[str, list[str]] = { + "lock-protocol": [ + r"\block\b", r"takeover", r"heartbeat", r"\bstale\b", + r"接管", r"释锁", r"持锁", r"锁主", r"verify-lock", + ], + "obsidian": [ + r"\bvault\b", r"obsidian", r"doc[-_]?root", + r"--set-vault", r"--detect-vault", r"--vault-status", + r"specs?\s*目录", r"文档目录", r"spec\s*根目录", + ], + "iteration": [ + r"\biteration\b", r"迭代", r"acceptance.*?(继续|调整|修改)", + r"验收后", r"再跑一轮", + ], + "selectors": [ + r"AskUserQuestion", r"\bselector\b", r"选择器", r"phase[- ]gate", + r"chip[- ]tab", + ], + "workflow": [ + r"workflow[- ]choice", r"clarification", r"澄清", + r"工作流选择", r"phase\s*转换", r"phase[- ]transition", + ], + "templates": [ + r"\bEARS\b", r"\bSHALL\b", r"traceability", r"_需求:", + r"模板.*文档", r"requirements?\.md", r"design\.md", + ], + "task-swarm": [ + r"task[- ]swarm", r"\breviewer\b", r"\bvalidator\b", + r"v[- ]?fix", r"p0[- ]?fix", + r"@writes", r"@depends[- ]on", r"@reads", + r"writeback", r"deadloop", r"task_swarm", + ], + "task-swarm-example": [ + r"tasks\.md.*?示例", r"task-swarm.*?例子", + r"tasks\.md.*?例", r"task-swarm.*?demo", + ], +} + + +# 预编译 + IGNORECASE,避免 hook 每次都重编译 +_COMPILED: dict[str, list[re.Pattern]] = { + k: [re.compile(p, re.IGNORECASE) for p in patterns] + for k, patterns in CATALOG.items() +} + + +def _read_description(ref_key: str) -> Optional[str]: + """从 references/.md YAML frontmatter 取 description 字段。 + + 无 frontmatter / 无 description / 读失败 → None(catalog 仍触发, + 只是注入文本里改用占位符)。 + """ + p = _REFERENCES_DIR / f"{ref_key}.md" + if not p.exists(): + return None + try: + text = p.read_text(encoding="utf-8") + except Exception: + return None + if not text.startswith("---\n"): + return None + end = text.find("\n---\n", 4) + if end < 0: + return None + fm = text[4:end] + # 简易行解析:找以 'description:' 开头的行 + for line in fm.split("\n"): + if line.startswith("description:"): + return line[len("description:"):].strip() + return None + + +def _match_refs(prompt: str) -> list[str]: + """返回 prompt 命中的 reference key 列表(保序去重)。""" + hits: list[str] = [] + for key, patterns in _COMPILED.items(): + for p in patterns: + if p.search(prompt): + hits.append(key) + break # 同 key 内任一 pattern 命中即可 + return hits + + +def _render_catalog_text(hits: list[str]) -> str: + lines = [ + "## 📚 specode reference 提示", + "", + "你最新一轮输入命中下列关键词,对应 references 可能与本轮相关;", + "如未读过请先 Read(路径相对于 plugin skills/specode/):", + "", + ] + for key in hits: + desc = _read_description(key) or "(该 reference 暂无 description)" + lines.append(f"- `references/{key}.md` — {desc}") + lines.append("") + lines.append( + "提示仅供参考;是否需要 Read 由你结合 SKILL.md 与当前 phase 自行判断。" + ) + return "\n".join(lines) + "\n" + + +@_safe_hook +def hook_on_user_prompt_catalog(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = ( + payload.get("session_id") + or payload.get("sessionId") + or args.session_override + ) + if not session_id: + return + sess = read_session(session_id) + if sess is None: + return + # 激活门:仅 active 模式触发;idle / ended / readonly 静默 + if sess.get("mode") != "active": + return + prompt = payload.get("prompt") or "" + if not prompt: + return + hits = _match_refs(prompt) + if not hits: + return + _emit_hook_additional_context( + _render_catalog_text(hits), + hook_event_name="UserPromptSubmit", + ) diff --git a/plugins/specode/scripts/spec_session/_hooks.py b/plugins/specode/scripts/spec_session/_hooks.py new file mode 100644 index 0000000..4ab4758 --- /dev/null +++ b/plugins/specode/scripts/spec_session/_hooks.py @@ -0,0 +1,793 @@ +'''spec_session package 内部实现:所有 hook 子命令(hook_on_*)+ safe wrapper + task-swarm plan 提醒辅助。 + +hook 子命令仅由 hooks/hooks.json 调用;全部 exit 0、任何异常通过 @_safe_hook +内部 catch(PreToolUse 对 task-swarm 受控路径与 tasks.md 的 exit 2 强阻断除外, +见 hook_on_pre_tool_use)。 + +不要直接运行本文件。stdlib-only。 +''' +from __future__ import annotations + +import argparse +import contextlib +import json +import os +import re +import subprocess +import sys +import traceback +from pathlib import Path +from typing import Optional + +from spec_session._io import ( + _now_iso, + _session_short, + read_session, + read_spec_config, + write_session_atomic, + write_spec_config_atomic, +) +from spec_session._reminders import ( + CODE_DOC_SYNC_STOP, + DOC_PRIORITY_REMINDER_ACTIVE, + DOC_PRIORITY_REMINDER_READONLY, + SPEC_MODE_CONTINUE_REMINDER, + SPEC_MODE_ENDED_REMINDER, + SPEC_MODE_READONLY_REMINDER, + STATUS_FOOTER_TEMPLATE, + _render_help_text, + _wrap_help_fastpath, +) +from spec_session._selectors import _fill_selector + + +_THIS_DIR = Path(__file__).resolve().parents[1] # = scripts/(本文件在 scripts/spec_session/) + +# spec_log 兜底 import(sibling 同目录脚本;scripts/spec_session.py launcher 已注入 sys.path) +try: + from spec_log import write_event as _log_event # type: ignore +except Exception: + def _log_event(event: str, payload: Optional[dict] = None, + session_id: Optional[str] = None) -> None: + return None + + +def _read_stdin_payload() -> dict: + """读 hook stdin payload。**不要 block**:如 stdin 不是管道,立刻返回 {}。""" + data: dict = {} + try: + if sys.stdin is None: + return data + # 判断是否 tty/无管道 + try: + isatty = sys.stdin.isatty() + except Exception: + isatty = True + if isatty: + return data + raw = sys.stdin.read() + if not raw: + return data + try: + obj = json.loads(raw) + if isinstance(obj, dict): + return obj + except Exception: + return data + except Exception: + return data + return data + + +def _emit_hook_additional_context(text: str, hook_event_name: str = "UserPromptSubmit") -> None: + """按宿主 hook 协议 emit additionalContext JSON。""" + payload = { + "hookSpecificOutput": { + "hookEventName": hook_event_name, + "additionalContext": text, + } + } + sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n") + + +def _bypass_active() -> bool: + return os.environ.get("SPECODE_GUARD", "").lower() == "off" + + +def _safe_hook(fn): + """装饰器:hook 子命令的最外层异常吞并,恒 exit 0。""" + def wrapper(args: argparse.Namespace) -> int: + if _bypass_active(): + return 0 + # log hook invocation(0.10.0+;日志失败不阻断 hook) + with contextlib.suppress(Exception): + _log_event("hook_invoked", {"hook": fn.__name__}, session_id=None) + try: + fn(args) + except SystemExit: + raise + except BaseException: + with contextlib.suppress(Exception): + # 写一份本地 trace 便于排查;忽略 IO 错误 + err = traceback.format_exc() + sys.stderr.write(f"specode hook 异常已吞并:\n{err}\n") + _log_event("hook_exception", {"hook": fn.__name__, "trace_head": err[:500]}, session_id=None) + return 0 + return wrapper + + +# ------------------------------------------------------------------------- +# 0.10.0+ 工具调用日志 hook(PreToolUse / PostToolUse 全通配,仅落日志) +# ------------------------------------------------------------------------- + +@_safe_hook +def hook_on_log_pre_tool_use(args: argparse.Namespace) -> None: + """PreToolUse 全通配 hook:抓主代理每个工具调用前的 payload。仅落日志,不注入。""" + payload = _read_stdin_payload() + session_id = payload.get("session_id") or payload.get("sessionId") or args.session_override + _log_event("tool_pre", { + "tool_name": payload.get("tool_name") or payload.get("toolName"), + "tool_input": payload.get("tool_input") or payload.get("toolInput"), + }, session_id=session_id) + + +@_safe_hook +def hook_on_log_post_tool_use(args: argparse.Namespace) -> None: + """PostToolUse 全通配 hook:抓主代理每个工具调用后的 payload。仅落日志,不注入。""" + payload = _read_stdin_payload() + session_id = payload.get("session_id") or payload.get("sessionId") or args.session_override + _log_event("tool_post", { + "tool_name": payload.get("tool_name") or payload.get("toolName"), + "tool_response_head": str(payload.get("tool_response") or payload.get("toolResponse") or "")[:300], + }, session_id=session_id) + + +# ---- on-session-start ---- + +@_safe_hook +def hook_on_session_start(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = payload.get("session_id") or payload.get("sessionId") or args.session_override + if not session_id: + return + existing = read_session(session_id) + if existing is None: + new_payload = { + "session_id": session_id, + "started_at": _now_iso(), + "last_activity_at": _now_iso(), + "ended_at": None, + "mode": "idle", + "active_spec_slug": None, + "active_spec_dir": None, + "spec_id": None, + "phase": None, + "lock_state": "released", + "task_swarm_run_id": None, + "pending_selector": None, + } + try: + write_session_atomic(session_id, new_payload) + except Exception: + pass + existing = new_payload + else: + existing["last_activity_at"] = _now_iso() + # 断线重连:如果原 ended,重新激活为 idle + if existing.get("mode") == "ended": + existing["mode"] = "idle" + existing["ended_at"] = None + try: + write_session_atomic(session_id, existing) + except Exception: + pass + + mode = existing.get("mode") or "idle" + slug = existing.get("active_spec_slug") or "无" + text = ( + "## Specode session 就绪\n\n" + f"当前会话 session_id: {session_id}\n" + f"后续调用 specode CLI 时请始终用 `--session {session_id}` 传入。\n\n" + f"(此 session 当前 mode={mode},spec={slug};\n" + " 如需开始新 spec,使用 `/specode:spec <需求>`;\n" + " 如需恢复,使用 `/specode:continue [slug]`。)\n" + ) + if mode == "active" and existing.get("active_spec_slug"): + text += "\n" + text += SPEC_MODE_CONTINUE_REMINDER.replace("", existing.get("active_spec_slug") or "?").replace("", existing.get("phase") or "?") + + _emit_hook_additional_context(text, hook_event_name="SessionStart") + + +# ---- on-user-prompt ---- + +FAST_PATH_HELP = re.compile(r"^\s*/specode:spec\s+(-h|--help)\s*$", re.IGNORECASE) +FAST_PATH_VAULT = re.compile( + r"^\s*/specode:spec\s+--(vault-status|detect-vault|sync-status)\s*$", + re.IGNORECASE, +) + + +def _run_subcmd(argv: list[str]) -> str: + """运行 spec_vault.py 等子命令,捕获 stdout。失败返回错误描述。""" + try: + proc = subprocess.run( + [sys.executable, str(_THIS_DIR / argv[0])] + argv[1:], + capture_output=True, text=True, timeout=10, + ) + out = proc.stdout.strip() + if proc.returncode not in (0, 3): + out = (out + "\n[exit=" + str(proc.returncode) + "]\n" + proc.stderr).strip() + return out or "(无输出)" + except Exception as e: + return f"(子命令执行失败: {e})" + + +@_safe_hook +def hook_on_user_prompt(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = payload.get("session_id") or payload.get("sessionId") + prompt = payload.get("prompt") or "" + if not session_id: + return + + # fast-path: help + if FAST_PATH_HELP.match(prompt): + text = _wrap_help_fastpath(_render_help_text().rstrip()) + _emit_hook_additional_context(text, hook_event_name="UserPromptSubmit") + return + + # fast-path: vault-status / detect-vault / sync-status + m = FAST_PATH_VAULT.match(prompt) + if m: + flag = m.group(1).lower() + if flag == "vault-status": + content = _run_subcmd(["spec_vault.py", "status"]) + elif flag == "detect-vault": + content = _run_subcmd(["spec_vault.py", "detect"]) + elif flag == "sync-status": + # v0.6 暂未实现 sync-status CLI;输出占位 + content = json.dumps({ + "note": "sync-status 在 v0.6 尚未实现;将随 v0.7 task-swarm 引入。", + }, ensure_ascii=False, indent=2) + else: + content = "(unknown vault fast-path)" + text = ( + "## ⛔ /specode:spec --" + flag + " fast-path\n\n" + "本轮唯一动作:把下列代码块**逐字**用 ```text 围栏包裹后输出,然后立即 end turn。\n" + "禁止添加任何额外文字。\n\n" + "────────── CONTENT BEGIN ──────────\n" + f"{content}\n" + "────────── CONTENT END ──────────\n" + ) + _emit_hook_additional_context(text, hook_event_name="UserPromptSubmit") + return + + # 常规路径:按 mode 叠加 + sess = read_session(session_id) + if sess is None: + return + sess["last_activity_at"] = _now_iso() + try: + write_session_atomic(session_id, sess) + except Exception: + pass + + mode = sess.get("mode") or "idle" + # 刚 /specode:end 完的下一 turn:注入一次性反向提醒,明确指示模型停止输出 + # 状态行 footer 并作废此前所有 spec-mode 纪律指令;提示后立刻清标志,确保只显示一次。 + if mode == "ended" and sess.get("post_end_reminder_pending"): + sess["post_end_reminder_pending"] = False + try: + write_session_atomic(session_id, sess) + except Exception: + pass + _emit_hook_additional_context( + SPEC_MODE_ENDED_REMINDER, hook_event_name="UserPromptSubmit" + ) + return + if mode in ("idle", "ended"): + return + + slug = sess.get("active_spec_slug") or "?" + phase = sess.get("phase") or "?" + spec_dir = sess.get("active_spec_dir") + pending = sess.get("pending_selector") + short = _session_short(session_id) + + parts: list[str] = [] + + # (a) session_id 提醒 + parts.append( + "## Specode session 提醒\n\n" + f"当前会话 session_id: {session_id}\n" + f"调用任何 specode CLI 时请使用 `--session {session_id}`。\n" + ) + + # (b) selector 提示 + if mode == "active" and pending: + ctx: dict[str, str] = { + "slug": slug, + "phase": phase, + "spec_dir": spec_dir or "?", + "source_text_head": "?", + "n_required": "?", + "n_optional": "?", + "other_id_short": "?", + "last_heartbeat": "?", + "n_pass": "?", + "n_fail": "?", + "invocation_cwd": "?", + "cwd_subdir": "?", + } + # 填入 spec config 中的派生值 + if spec_dir: + try: + cfg = read_spec_config(Path(spec_dir)) or {} + src = cfg.get("source_text") or "" + if src: + ctx["source_text_head"] = src[:60].replace("\n", " ") + lock = cfg.get("lock") or {} + other = lock.get("holder") + if other and other != session_id: + ctx["other_id_short"] = _session_short(other) + ctx["last_heartbeat"] = str(lock.get("last_heartbeat_at") or "?") + inv = cfg.get("invocation_cwd") + if inv: + ctx["invocation_cwd"] = str(inv) + # cwd/slug:用 os.path.join 跨平台拼接,但模板里用斜杠展示更直观; + # spec_session 不直接 mkdir,set-project-root CLI 才创建实际目录 + sep = "\\" if "\\" in str(inv) else "/" + ctx["cwd_subdir"] = f"{inv}{sep}{slug}" + except Exception: + pass + sel = _fill_selector(pending, ctx) + if sel: + parts.append(sel) + elif mode == "readonly" and pending: + parts.append( + "## ℹ️ 只读模式:当前 pending_selector=" + f"`{pending}` (仅信息提示,只读不能确认)\n" + ) + + # (c) 文档优先提醒 + if mode == "active": + parts.append( + DOC_PRIORITY_REMINDER_ACTIVE + .replace("", slug) + .replace("", phase) + ) + elif mode == "readonly": + parts.append( + DOC_PRIORITY_REMINDER_READONLY + .replace("", slug) + .replace("", phase) + ) + + # (d) 状态行 footer + if mode in ("active", "readonly"): + footer = ( + STATUS_FOOTER_TEMPLATE + .replace("", slug) + .replace("", short) + .replace("", phase) + .replace("", mode) + ) + parts.append(footer) + + # (e) 模式提醒 + if mode == "active": + parts.append( + SPEC_MODE_CONTINUE_REMINDER + .replace("", slug) + .replace("", phase) + ) + elif mode == "readonly": + parts.append( + SPEC_MODE_READONLY_REMINDER + .replace("", slug) + .replace("", phase) + ) + + if not parts: + return + text = "\n\n".join(p.rstrip() for p in parts) + "\n" + _emit_hook_additional_context(text, hook_event_name="UserPromptSubmit") + + +# ---- on-stop ---- + +@_safe_hook +def hook_on_stop(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = payload.get("session_id") or payload.get("sessionId") + if not session_id: + return + sess = read_session(session_id) + if sess is None: + return + sess["last_activity_at"] = _now_iso() + try: + write_session_atomic(session_id, sess) + except Exception: + pass + mode = sess.get("mode") or "idle" + if mode in ("idle", "ended"): + return + slug = sess.get("active_spec_slug") or "?" + phase = sess.get("phase") or "?" + if mode == "active": + text_parts = [ + CODE_DOC_SYNC_STOP.replace("", slug).replace("", phase), + SPEC_MODE_CONTINUE_REMINDER.replace("", slug).replace("", phase), + ] + else: + text_parts = [ + SPEC_MODE_READONLY_REMINDER.replace("", slug).replace("", phase), + ] + text = "\n\n".join(p.rstrip() for p in text_parts) + "\n" + _emit_hook_additional_context(text, hook_event_name="Stop") + + +# ---- on-session-end ---- + +@_safe_hook +def hook_on_session_end(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = payload.get("session_id") or payload.get("sessionId") + if not session_id: + return + sess = read_session(session_id) + if sess is None: + return + spec_dir_str = sess.get("active_spec_dir") + if spec_dir_str: + try: + spec_dir = Path(spec_dir_str) + if spec_dir.exists(): + cfg = read_spec_config(spec_dir) + if cfg is not None: + lock = cfg.get("lock") or {} + if lock.get("holder") == session_id: + cfg["lock"] = None + with contextlib.suppress(Exception): + write_spec_config_atomic(spec_dir, cfg) + except Exception: + pass + sess["mode"] = "ended" + sess["ended_at"] = _now_iso() + sess["lock_state"] = "released" + sess["pending_selector"] = None + with contextlib.suppress(Exception): + write_session_atomic(session_id, sess) + # 不输出 additionalContext + + +# ---- v0.7 on-task-completed(task-swarm 节点提醒) ---- + +TASK_COMPLETED_TRAILER = "\n\n本提醒仅供参考;fork 谁、是否 fork、何时 writeback 仍由你判断;可忽略。" + + +def _run_task_swarm_plan(run_id: str) -> Optional[dict]: + """调子进程 task_swarm.py plan --run ,解析 stdout JSON 返回 dict。 + + 任何失败(exit != 0、JSON 解析失败、子进程异常)返回 None。 + """ + try: + proc = subprocess.run( + [sys.executable, str(_THIS_DIR / "task_swarm.py"), "plan", "--run", run_id], + capture_output=True, text=True, timeout=10, + ) + except Exception: + return None + if proc.returncode != 0: + return None + out = (proc.stdout or "").strip() + if not out: + return None + try: + obj = json.loads(out) + if isinstance(obj, dict): + return obj + except Exception: + return None + return None + + +def _format_plan_context(plan: dict) -> str: + """按 references/task-swarm.md §6 hook 提醒矩阵把 plan dict 渲染成 additionalContext 文本。""" + phase = str(plan.get("phase") or "?") + action = str(plan.get("action") or "") + group = plan.get("group") + rnd = plan.get("round") + in_flight = plan.get("in_flight") or [] + fork = plan.get("fork") or [] + msg = str(plan.get("message") or "") + n_fork = len(fork) if isinstance(fork, list) else 0 + n_in_flight = len(in_flight) if isinstance(in_flight, list) else 0 + + # 选择具体建议文本(references/task-swarm.md §6 9 种状态) + if action == "deadloop" or phase == "error": + body = ( + f"⚠️ 死循环检测:g{group} 已连续 3 轮同一 fail 签名。\n" + "建议停止本 group,向用户报告 `failed-deadloop`,让用户介入。" + ) + elif action == "all-done" or phase == "done": + body = ( + "全部 group 已完成。请按 SKILL.md 退出 task-swarm 模式," + "回到 spec-mode acceptance phase。" + ) + elif phase == "coding" and action == "coding-waiting": + body = ( + f"coding phase 还在等 {n_in_flight} 个 subagent;" + "无需 fork 新 agent,等齐后再判断。" + ) + elif phase == "coding" and action == "coding-fork": + body = ( + f"本 group 开始 coding。请按下面 {n_fork} 个 coder agent_key fork" + "(同 message 内并发)。" + ) + elif phase == "review" and action == "review-fork": + body = ( + "本 group coder 已全部返回。请 fork **1 个** `task-swarm-reviewer`," + "prompt 已生成。" + ) + elif phase == "p0-fix" and action == "p0-fix-fork": + body = ( + f"reviewer 提了带证据 P0。请按 P0 涉及文件 fork **{n_fork}** 个 " + "`task-swarm-coder`(p0-fix),prompt 已生成。\n" + "提醒:reviewer 修复**只触发一次**,不 re-review。" + ) + elif phase == "p0-fix" and action == "p0-fix-waiting": + body = f"p0-fix 仍有 {n_in_flight} 个 coder 未返回,等齐后再判断。" + elif phase == "validation" and action == "validation-fork": + body = ( + "reviewer 无带证据 P0(或全部降级为 advisory)。" + "请 fork **1 个** `task-swarm-validator`,prompt 已生成。" + ) + elif phase == "validation" and action == "validation-fork-after-p0": + body = ( + "p0-fix coder 已返回。请 fork **1 个** `task-swarm-validator`," + "prompt 已生成。" + ) + elif phase == "validation" and action == "validation-after-vfix": + body = ( + "v-fix coder 已返回。请 fork **1 个** `task-swarm-validator` 验证。" + ) + elif phase == "writeback" and action == "writeback": + body = ( + "validator pass。请调 `task_swarm.py writeback " + f"--run --group {group}` 回写 tasks.md,然后进入下一 group。" + ) + elif phase == "v-fix" and action == "v-fix-fork": + body = ( + f"validator fail。请按 validation.md 的 fix_targets 各文件 " + f"fork **{n_fork}** 个 `task-swarm-coder`(v-fix)。\n" + "注意:validator fail 循环修复直到 pass。" + f"本轮是 g{group}-r{rnd}。" + ) + elif phase == "v-fix" and action == "v-fix-waiting": + body = f"v-fix 仍有 {n_in_flight} 个 coder 未返回,等齐后再判断。" + else: + body = msg or f"phase={phase} action={action}(详见 plan 输出)" + + header = ( + f"## task-swarm 节点提醒(phase={phase}, " + f"group={group if group is not None else '?'}, " + f"round={rnd if rnd is not None else '?'})\n\n" + ) + return header + body + TASK_COMPLETED_TRAILER + + +@_safe_hook +def hook_on_task_completed(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = ( + payload.get("session_id") + or payload.get("sessionId") + or args.session_override + ) + if not session_id: + return + sess = read_session(session_id) + if sess is None: + return + run_id = sess.get("task_swarm_run_id") + if not run_id: + return + + plan = _run_task_swarm_plan(run_id) + if isinstance(plan, dict): + text = _format_plan_context(plan) + else: + # plan 调用失败 → 兜底文本 + text = ( + "## task-swarm 节点提醒\n\n" + f"无法自动获取 task-swarm run `{run_id}` 的下一步建议——" + "请手动调用:\n\n" + "```bash\n" + f"task_swarm.py plan --run {run_id}\n" + "```\n\n" + "拿到输出后再判断 fork 谁 / 是否 writeback。" + + TASK_COMPLETED_TRAILER + ) + _emit_hook_additional_context(text, hook_event_name="PostToolUse") + + +# ---- v0.8 on-heartbeat-quiet(静默续锁) ---- + +@_safe_hook +def hook_on_heartbeat_quiet(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = ( + payload.get("session_id") + or payload.get("sessionId") + or args.session_override + ) + if not session_id: + return + sess = read_session(session_id) + if sess is None: + return + if sess.get("mode") != "active": + return + spec_dir_str = sess.get("active_spec_dir") + if not spec_dir_str: + return + spec_dir = Path(spec_dir_str) + if not spec_dir.exists(): + return + cfg = read_spec_config(spec_dir) + if cfg is None: + return + lock = cfg.get("lock") or {} + if not isinstance(lock, dict): + return + holder = lock.get("holder") or lock.get("session_id") or lock.get("claude_session_id") + if holder != session_id: + return + + now = _now_iso() + lock["last_heartbeat_at"] = now + cfg["lock"] = lock + try: + write_spec_config_atomic(spec_dir, cfg) + except Exception: + return + sess["last_activity_at"] = now + with contextlib.suppress(Exception): + write_session_atomic(session_id, sess) + # 不输出 additionalContext + + +# ---- v0.8 on-pre-tool-use(tasks.md 直写提醒 + task-swarm 受控路径阻断) ---- + +def _task_swarm_protected_reason(spec_dir: Path, edited: Path) -> Optional[str]: + """若 edited 落在 task-swarm 管理路径下(state.json / agent task.md / + agent outbox/*),返回简短的拒绝标签;否则返回 None。 + + 阻断动机:这些文件是 task_swarm.py advance/writeback 的内部状态与产物。 + 主代理直接 Edit 会破坏状态机契约(典型事故:state in_flight 与磁盘 + agent 目录的 round 号对不上时,手工抹平 state.json 反而越改越乱)。 + 所有变更必须通过 task_swarm.py CLI 走。 + """ + try: + ts_root = (spec_dir / ".task-swarm" / "runs").resolve() + except Exception: + return None + try: + rel = edited.relative_to(ts_root) + except ValueError: + return None + parts = rel.parts + if len(parts) < 2: + return None + # parts[0] = + if parts[1] == "state.json": + return "state.json" + if parts[1] == "agents" and len(parts) >= 4: + # parts[2] = + if parts[3] == "task.md": + return "agent task.md" + if parts[3] == "outbox": + return "agent outbox" + return None + + +@_safe_hook +def hook_on_pre_tool_use(args: argparse.Namespace) -> None: + payload = _read_stdin_payload() + session_id = ( + payload.get("session_id") + or payload.get("sessionId") + or args.session_override + ) + if not session_id: + return + sess = read_session(session_id) + if sess is None: + return + if sess.get("mode") != "active": + return + run_id = sess.get("task_swarm_run_id") + if not run_id: + return + spec_dir_str = sess.get("active_spec_dir") + if not spec_dir_str: + return + + tool_input = payload.get("tool_input") or {} + if not isinstance(tool_input, dict): + return + file_path = tool_input.get("file_path") or "" + if not file_path or not isinstance(file_path, str): + return + + try: + edited = Path(file_path).resolve() + except Exception: + return + spec_dir = Path(spec_dir_str) + + # 强阻断:task-swarm 受控路径(state.json / agent task.md / agent outbox/*) + protected = _task_swarm_protected_reason(spec_dir, edited) + if protected: + if protected == "state.json": + target_hint = "task_swarm.py advance --run --phase " + why = ( + "`state.json` 是 task_swarm.py 状态机的唯一事实来源。手工 Edit 会让\n" + "in_flight / done / phase / round 与磁盘 agent 目录脱节(已知事故:\n" + "state 是 r2、磁盘是 r3 时,手工把 r2 改成 r3 抹平差异 → 再 advance\n" + "时状态机走错分支 → validator/coder 名字越漂越远)。" + ) + elif protected == "agent task.md": + target_hint = "task_swarm.py advance(让状态机重新 render prompt)" + why = ( + "`agents//task.md` 是 task_swarm.py 为 subagent 生成的 prompt。\n" + "主代理改它不会让 subagent 重新读——只会让产物与意图脱节。" + ) + else: # agent outbox + target_hint = "重新 fork 对应 subagent 让它输出合规产物" + why = ( + "`agents//outbox/*` 是 subagent 的产物。主代理手工补 STATUS / 改\n" + "result.md 等同于伪造 subagent 工作,advance 解析时看似 ok,但实际\n" + "代码未改。请重新 fork subagent 或汇报 task_swarm.py 解析 bug。" + ) + reason = ( + f"specode 阻断:主代理不得直接 Edit/Write task-swarm 受控路径" + f"({protected})。\n\n" + f"文件: {edited}\n" + f"run_id: {run_id}\n\n" + f"{why}\n\n" + f"正确路径: {target_hint}\n" + ) + sys.stderr.write(reason) + sys.exit(2) + + # 0.10.21+:tasks.md 直写从软提醒升级为强阻断 + # 理由:login-page 现场显示主代理见 writeback 越界报错就手工 Edit tasks.md + # 把 `[ ]` 改成 `[x]`,破坏了 state.json 与 tasks.md 行号一致性,后续 + # writeback 永远过不去。跟 state.json / outbox 同等待遇——只能走 CLI。 + try: + tasks_md = (spec_dir / "tasks.md").resolve() + except Exception: + return + + if edited != tasks_md: + return + + reason = ( + f"specode 阻断:主代理不得直接 Edit/Write `tasks.md`\n\n" + f"文件: {edited}\n" + f"run_id: {run_id}\n\n" + "`tasks.md` 在 task-swarm run 进行中是受控产物——所有 checkbox toggle " + "(`[ ]` → `[x]`)和评审注释块都必须通过 `task_swarm.py writeback` CLI 走,\n" + "走 line-safe diff 算法保证 state.json 行号引用不被破坏。\n\n" + "已知反模式:见 writeback 越界报错就手工改 tasks.md → state.json 行号失效 → 后续\n" + "writeback 永远过不去 → 主代理陷入死循环(参 0.10.13 user-login / 0.10.21 login-page 事故)。\n\n" + "正确路径: task_swarm.py writeback --run --group \n" + "若 writeback 本身报越界,请保留现场报告用户,让 task-swarm 算法层修,**不要**\n" + "手工抹平。\n" + ) + sys.stderr.write(reason) + sys.exit(2) diff --git a/plugins/specode/scripts/spec_session/_io.py b/plugins/specode/scripts/spec_session/_io.py new file mode 100644 index 0000000..bb17f81 --- /dev/null +++ b/plugins/specode/scripts/spec_session/_io.py @@ -0,0 +1,182 @@ +"""spec_session package 内部实现:原子写 / session+spec config 读写 / 锁工具 / 共享常量。 + +不要直接运行本文件。它通过 spec_session.py 导出,spec_status.py 也通过 +spec_session.py 间接消费这里的 read_session / read_spec_config / +_session_short / _is_lock_stale。 + +stdlib-only。 +""" +from __future__ import annotations + +import contextlib +import json +import os +import sys +import tempfile +import time +from pathlib import Path +from typing import Any, Optional + + +# ------------------------------------------------------------------------- +# 共享常量 +# ------------------------------------------------------------------------- + +STALE_LOCK_SECONDS = 30 * 60 # 30 分钟无 heartbeat 视为 stale + +VALID_PHASES = { + "intake", + "requirements", + "bugfix", + "design", + "tasks", + "implementation", + "acceptance", + "iteration", +} + + +# ------------------------------------------------------------------------- +# 时间工具 +# ------------------------------------------------------------------------- + +def _now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def _parse_iso(s: Optional[str]) -> Optional[float]: + if not s: + return None + try: + # 朴素 ISO8601-UTC 解析 + if s.endswith("Z"): + s2 = s[:-1] + "+00:00" + else: + s2 = s + import datetime as _dt + return _dt.datetime.fromisoformat(s2).timestamp() + except Exception: + return None + + +# ------------------------------------------------------------------------- +# 原子写 +# ------------------------------------------------------------------------- + +def _atomic_write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp( + prefix=path.name + ".", + suffix=".tmp", + dir=str(path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + fh.write(content) + fh.flush() + try: + os.fsync(fh.fileno()) + except OSError: + pass + os.replace(tmp, path) + try: + dir_fd = os.open(str(path.parent), os.O_RDONLY) + try: + os.fsync(dir_fd) + except OSError: + pass + finally: + os.close(dir_fd) + except OSError: + pass + except Exception: + with contextlib.suppress(OSError): + os.unlink(tmp) + raise + + +def _atomic_write_json(path: Path, payload: Any) -> None: + _atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=2)) + + +# ------------------------------------------------------------------------- +# 数据层 +# ------------------------------------------------------------------------- + +def _sessions_dir() -> Path: + return Path.home() / ".specode" / "sessions" + + +def session_file_path(session_id: str) -> Path: + return _sessions_dir() / f"{session_id}.json" + + +def read_session(session_id: str) -> Optional[dict]: + p = session_file_path(session_id) + if not p.exists(): + return None + try: + with p.open("r", encoding="utf-8") as fh: + data = json.load(fh) + if isinstance(data, dict): + # 兼容老 sessions/.json:字段名曾叫 claude_session_id,迁移到 session_id + if "session_id" not in data and "claude_session_id" in data: + data["session_id"] = data["claude_session_id"] + return data + except Exception: + return None + return None + + +def write_session_atomic(session_id: str, data: dict) -> None: + _atomic_write_json(session_file_path(session_id), data) + + +def read_spec_config(spec_dir: Path) -> Optional[dict]: + p = spec_dir / ".config.json" + if not p.exists(): + return None + try: + with p.open("r", encoding="utf-8") as fh: + data = json.load(fh) + if isinstance(data, dict): + return data + except Exception: + return None + return None + + +def write_spec_config_atomic(spec_dir: Path, data: dict) -> None: + _atomic_write_json(spec_dir / ".config.json", data) + + +# ------------------------------------------------------------------------- +# 锁工具 +# ------------------------------------------------------------------------- + +def _is_lock_stale(lock: dict) -> bool: + last = _parse_iso(lock.get("last_heartbeat_at") or lock.get("acquired_at")) + if last is None: + return True + return (time.time() - last) > STALE_LOCK_SECONDS + + +def _session_short(sid: Optional[str]) -> str: + if not sid: + return "????????" + return sid[:8] + + +# ------------------------------------------------------------------------- +# CLI 共享辅助 +# ------------------------------------------------------------------------- + +def _ensure_spec_dir(spec_dir_str: str) -> Path: + p = Path(spec_dir_str).expanduser().resolve() + if not p.exists() or not p.is_dir(): + raise FileNotFoundError(f"spec_dir 不存在:{p}") + return p + + +def _emit_json(payload: dict) -> None: + sys.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2) + "\n") diff --git a/plugins/specode/scripts/spec_session/_reminders.py b/plugins/specode/scripts/spec_session/_reminders.py new file mode 100644 index 0000000..36cd7b3 --- /dev/null +++ b/plugins/specode/scripts/spec_session/_reminders.py @@ -0,0 +1,182 @@ +'''spec_session package 内部实现:reminder 模板字符串 + help 文本渲染。 + +hook 注入时拿这里的模板按 / / / +等占位符 .replace() 填值;HELP_OUTPUT_TEMPLATE / HELP_FASTPATH_WRAPPER +配套 _render_help_text / _wrap_help_fastpath 用于 fast-path 帮助渲染。 + +不要直接运行本文件。stdlib-only。 +''' +from __future__ import annotations + +import json +from pathlib import Path +from string import Template + + +_THIS_DIR = Path(__file__).resolve().parents[1] # = scripts/(本文件在 scripts/spec_session/) + + +STATUS_FOOTER_TEMPLATE = """## 🪧 spec-mode 状态行(必须在本响应末尾输出) + +请在本次响应正文之后**额外**输出一行格式如下的状态行,紧贴响应末尾、之前空一行: + +─── spec-mode ─── spec: | session: | phase: | /specode:end 退出 + +如果是只读模式,请使用: + +─── spec-mode ─── spec: | session: | phase: | [只读] | /specode:end 退出 + +具体值: + slug: + session: + phase: + mode: + +状态行的唯一目的是让用户和你自己都看到当前仍在 spec 模式。**不要省略**;如果本轮要调 `AskUserQuestion` 工具呈现选择器,状态行应放在工具调用**之前**的 chat 文本里(与正文空一行隔开),然后再调工具。 +""" + +DOC_PRIORITY_REMINDER_ACTIVE = """## 📝 文档优先提醒(用户输入侧) + +active spec:(phase=) +此 spec 的可写文档: + • requirements.md / bugfix.md + • design.md + • tasks.md(末尾自带 `## 测试要点` 节,按需顺手按 SHALL 补几行作为参考) + • implementation-log.md(如有) + +请评估用户本次输入是否涉及以下变更: + +- 需求 / 验收标准调整 → 先 Edit `requirements.md` 或 `bugfix.md` +- 架构 / 接口 / 数据模型决策 → 先 Edit `design.md` +- 任务范围 / 状态推进 → 先 Edit `tasks.md` +- 实现期间的设计偏离 / 关键决策 → 在 `implementation-log.md` 追加条目 +- 仅闲聊 / 状态查询 / 无关讨论 → 无需文档变更 + +文档变更要**在同一轮 turn 内先于代码改动落盘**;不要把"待会儿写"留作 verbal commitment——chat 内容不会进入 next session。 +""" + +DOC_PRIORITY_REMINDER_READONLY = """## 📝 文档优先提醒(用户输入侧 / 只读模式) + +active spec:(phase=,**只读**) +你当前没有持锁,**不应**对该 spec 的文档发起 Edit/Write。如需修改,请先: + + 1. 使用 `/specode:continue ` 并在 selector 中选"强制接管"获取锁; + 2. 或退出本会话后由锁主推进。 + +只读模式下可以:阅读、回答用户基于已有文档的问题、协助分析;**不要**写 spec 文档或源码以"模拟落地"。 +""" + +CODE_DOC_SYNC_STOP = """## 🔄 代码-文档同步提醒(turn 结束侧) + +active spec:(phase=) + +本 turn 即将结束。如果你在本 turn 内修改了源代码,请自检以下三项: + +1. `tasks.md` 是否更新? —— 推进任务标记(`[ ]` → `[~]` → `[x]` / blocked) +2. `implementation-log.md` 是否记录? —— 实现说明、设计偏离、技术决策 +3. `design.md` 接口契约是否变化? —— 若改了,同步 Edit + +如有遗漏,请在 chat 显式承诺下一轮第一件事就是补齐。 + +(本提醒**不阻断 turn**——是否补齐由你判断。但代价是 next session `/specode:continue` 时,未写入文档的变更**全部丢失**。) +""" + +SPEC_MODE_CONTINUE_REMINDER = """## ⛔ 你仍处于 spec 模式 + +spec=, phase=, mode=active + +下一 turn 必须继续遵守: + - selector / 文档优先 / 状态行 footer 三项纪律 + - 通过 /specode:end 才能正式退出 spec 模式 +""" + +SPEC_MODE_READONLY_REMINDER = """## 🔒 你处于 spec 只读模式 + +spec=, phase=, mode=readonly + +只读模式下: + - 不要 Edit/Write 该 spec 的任何文档或源码 + - 状态行 footer 必须带 [只读] 标记 + - 如需写入,请走 `/specode:continue` 选"强制接管" +""" + +SPEC_MODE_ENDED_REMINDER = """## ✅ spec 模式已退出(一次性反向提醒) + +session 已 ended(`/specode:end` 调用成功)。 + +**从现在起,作废此前所有 spec-mode 指令:** + - **不要**再在响应末尾输出 `─── spec-mode ─── …` 状态行(footer) + - **不再**受 spec-mode 三项纪律约束(selector / 文档优先 / 状态行) + - 此前注入的 `STATUS_FOOTER_TEMPLATE`、`SPEC_MODE_CONTINUE_REMINDER`、`DOC_PRIORITY_REMINDER_*` 等指令**全部失效**,不要再按它们行事 + +如需重新进入 spec 模式:`/specode:spec <需求>` 开新 spec,或 `/specode:continue ` 恢复已有 spec。 + +(本提示只在 end 后的下一 turn 显示一次。) +""" + + +# ------------------------------------------------------------------------- +# 帮助 fast-path 文本(hook emit verbatim) +# ------------------------------------------------------------------------- + +def _get_plugin_version() -> str: + """读 plugin.json 的 version;失败时返回 'unknown'。""" + try: + plugin_json = _THIS_DIR.parent / ".claude-plugin" / "plugin.json" + with plugin_json.open("r", encoding="utf-8") as fh: + data = json.load(fh) + v = data.get("version") + return str(v) if v else "unknown" + except Exception: + return "unknown" + + +HELP_OUTPUT_TEMPLATE = """specode v$version — Specification-driven workflow + +用法: + /specode:spec -n <需求> 推荐:显式指定 spec 目录名(slug 直接用作 specs//) + /specode:spec <需求> 兼容:主代理从 <需求> 推导 slug(结果不可预知) + /specode:continue [slug] 接管已有 spec(无 slug 时列表选) + /specode:end 退出当前 spec 模式 + /specode:status 查看会话与 spec 状态 + +会话与锁: + 每次会话拥有唯一 session_id,hook 会在 additionalContext 中持续注入。 + CLI 调用必须传 --session 。当前 spec 锁记录在 /.config.json。 + 忘记 /specode:end 时 SessionEnd hook 会兜底释锁;30 分钟无 heartbeat 视为 stale。 + +工作流: + intake → workflow 选择 → requirements / bugfix / design → tasks → implementation + → acceptance → iteration(可循环) + +会话日志(v0.10.0+): + 默认开启。所有 hook / CLI 调用写入 ~/.specode/logs/.jsonl + (敏感字段自动脱敏;长字符串截断到 500 字符)。 + 开关优先级:env > config > 默认开启 + - 临时关闭:export SPECODE_LOG=off (Windows: set SPECODE_LOG=off) + - 临时打开:export SPECODE_LOG=on + - 持久关闭:在 ~/.config/specode/config.json 写 {"logging": false} + 查看 / 回放: + python3 /scripts/spec_log.py status + python3 /scripts/spec_log.py replay --session + +更多细节见 plugin 内 skills/specode/SKILL.md 与 references/。 +""" + + +def _render_help_text() -> str: + return Template(HELP_OUTPUT_TEMPLATE).safe_substitute(version=_get_plugin_version()) + +HELP_FASTPATH_WRAPPER = """## ⛔ /specode:spec -h fast-path + +本轮唯一动作:把下列代码块**逐字**用 ```text 围栏包裹后输出,然后立即 end turn。 +禁止添加任何额外文字("以下是帮助" / "希望对你有帮助" 等都不允许)。 + +────────── HELP CONTENT BEGIN ────────── +$content +────────── HELP CONTENT END ────────── +""" + + +def _wrap_help_fastpath(content: str) -> str: + return Template(HELP_FASTPATH_WRAPPER).safe_substitute(content=content) diff --git a/plugins/specode/scripts/spec_session/_selectors.py b/plugins/specode/scripts/spec_session/_selectors.py new file mode 100644 index 0000000..f5da746 --- /dev/null +++ b/plugins/specode/scripts/spec_session/_selectors.py @@ -0,0 +1,461 @@ +'''spec_session package 内部实现:SELECTOR_PROMPTS 字典 + _fill_selector 模板填充。 + +`SELECTOR_PROMPTS` 是 11 个 phase-gate selector 的提示词常量库。 +hook 注入时按 key 取出对应模板字符串、用 _fill_selector 填入 / 等 +上下文占位符后 emit 到 additionalContext,让主代理按格式调 AskUserQuestion。 + +byte-identical 守卫:tests/test_selectors_drift.py 用 regex 解析本文件,把 +`SELECTOR_PROMPTS: dict[str, str] = {...}` 字典字面量与 references/selectors.md +逐字对比。新增 / 改 selector 时务必同步 selectors.md,否则 drift test fail。 + +不要直接运行本文件。stdlib-only。 +''' +from __future__ import annotations + +from typing import Optional + + +SELECTOR_PROMPTS: dict[str, str] = { + "project-root-choice": """## 选择器节点:项目实现目录选择 + +**目的**:spec 刚创建(pending_selector=project-root-choice),在选工作流之前 +**先**确定 task-swarm subagent / 主代理写代码时用哪个目录作为项目根(`project_root`)。 +spec 文档目录(``)只放 `.md` 文档和 `.task-swarm/` 状态,**不是**代码根。 + +**上下文**:active spec=,phase=intake。 +- 用户启动 Claude Code 的 cwd:`` +- cwd/slug 子目录:`` + +**前置动作(chat 简报,≤3 行)**:写一句 +"spec 已创建。代码将写到 project_root,**不是** spec 文档目录。 +请选择项目目录(cwd 在已有项目里迭代 / cwd/slug 新项目子目录 / 自定义)。" + +**调用 `AskUserQuestion` 工具**,**直接传**下列结构(label/description 不要翻译): + +questions: + - question: "代码写到哪个目录?project_root 决定 task-swarm subagent 的 cwd" + header: "项目目录" + multiSelect: false + options: + - label: "cwd(在已有项目里迭代)" + description: "代码写到 。适用:已 cd 到目标 repo 后启动。" + - label: "cwd/slug(新项目子目录)" + description: "代码写到 。适用:cwd 是父目录,要新建项目子目录。" + - label: "自定义路径" + description: "用 Other 输入绝对路径。适用:项目目录跟 cwd 完全无关。" + +**约束**: +- 调用工具后立即 end turn 等用户选择。 +- 不要在 chat 输出 markdown 列表 / 不要让用户回复编号。 + +**用户选定后流程(同一 turn 内继续,不要 end turn 让用户输命令)** + +拿到选项后**本 turn 内**按选项走,调 `spec_session.py set-project-root` CLI 写入: + +- 选 "cwd(在已有项目里迭代)" → 调 + `sh "$PLUGIN_ROOT/scripts/run.sh" "$PLUGIN_ROOT/scripts/spec_session.py" set-project-root --spec --session --root ""` + (`$PLUGIN_ROOT` 即 `${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}`) +- 选 "cwd/slug(新项目子目录)" → 同上但 `--root ""`,CLI 会 mkdir -p 自动创建。 +- 选 "自定义路径"(Other 文本)→ 拿用户输入的绝对路径作 `--root`。**禁止**接受相对路径;若用户给的是相对,请先扩展为绝对。 + +CLI 成功后: +1. `.config.json.project_root` 已写入,`pending_selector` 推进到 `workflow-choice` +2. 立即调 `AskUserQuestion` 呈现 `workflow-choice` selector(不要 end turn 让用户再输命令) +3. 简报一句"已设 project_root=<选定路径>,下一步选工作流" + +CLI exit 1(路径不存在 / 不是目录 / 无权限)→ 报错给用户,重新呈现本 selector。 +""", + "workflow-choice": """## 选择器节点:工作流选择 + +**目的**:用户刚运行 /specode:spec <需求>,已进入 intake 阶段。 +在写 requirements.md / bugfix.md / design.md 之前,先决定走哪条 spec 工作流。 + +**上下文**:active spec=,phase=。 + +**前置动作(chat 简报,≤2 行)**:写一句"接到需求《...》,请选择工作流。" + +**调用 `AskUserQuestion` 工具**,参数完全按下列结构(直接传入,不要翻译/重写选项): + +questions: + - question: "工作流选择 —— 决定走哪条 spec 流程?" + header: "工作流选择" + multiSelect: false + options: + - label: "Requirements first" + description: "行为优先的新特性:先把 SHALL 写清楚,再补技术设计。" + - label: "Technical Design first" + description: "架构约束已知的新特性:先把 design.md 框架定下来,再反推 requirements。" + - label: "Bugfix" + description: "缺陷修复 / 回归测试:用 bugfix.md(Current/Expected/Unchanged)替代 requirements.md。" + +**约束**: +- 调用工具后立即 end turn 等待用户选择。 +- 不要在 chat 输出 markdown 列表 / 不要让用户回复编号。 +- 宿主工具自动提供 "Other" + ESC 取消,**禁止**自己加 "Type something" / "Chat about this" 保留位。 + +**用户选定后流程(同一 turn 内继续,不要 end turn 让用户输命令)** + +拿到 AskUserQuestion 选项后**先做歧义自检**(SKILL.md §「Pre-requirements Clarification(铁律)」),再决定下一步: + +**Step A — 歧义自检(必做)**:通读 `/.config.json.source_text` + 用户最近 turn 的补充,按 scope / behavior / UX / data / validation / acceptance 六维自问"要写 SHALL 或 design 时,是否任一维度需要我编一条规则填空?" + +- **有阻塞性歧义且用户未明确放权** → 立即调 `AskUserQuestion` 呈现 `clarification-wizard`(类型 B,2-4 个子问题),**不**做 phase-transition、**不**写任何文档;用户答完 → 呈现 `clarification-done` 决定再问 / 进入文档生成。 +- **无歧义** 或 **用户已明确放权**(说过"由你决定"/"按业界默认"/"先 MVP" 等)→ 在 chat 显式声明"已自检无阻塞性歧义"或"用户已放权 X 部分",再进 Step B。 + +**Step B — 按工作流选项生成文档**: + +- 选 "Requirements first" → 调 `phase-transition --from intake --to requirements` → 按 SKILL.md §「Spec 文档生成」生成 `requirements.md` → 报路径 + 3-8 条变更要点 → 立即调 `AskUserQuestion` 呈现 `doc-confirm-requirements` selector → end turn 等用户对文档做决策 +- 选 "Technical Design first" → 同上但 `--to design` + 生成 `design.md` + 呈现 `doc-confirm-design` +- 选 "Bugfix" → 同上但 `--to bugfix` + 生成 `bugfix.md` + 呈现 `doc-confirm-bugfix` +- "Other"(用户文字输入)→ 按用户文字调整,必要时重新呈现 selector + +**写文档过程中冒出新歧义** → 立即**停写**,回到 Step A 补一轮 wizard,不要边写边 invent。 + +详细 phase 链见 `references/workflow.md` §2-§5;澄清铁律完整定义见 SKILL.md §「Pre-requirements Clarification(铁律)」。 +""", + "clarification-wizard": """## 选择器节点:需求澄清问答(wizard) + +**目的**:需求有歧义,必须在写 requirements.md / bugfix.md 之前**一次性**收齐 +影响 scope / behavior / UX / data / validation / acceptance 的 2-4 个阻塞性澄清点。 + +**上下文**:active spec=,phase=intake。 +源需求摘要: + +**前置动作(chat 简报,≤3 行)**:写一句"为避免 invent 业务规则,需要先确认 N 个关键点,请逐一回答。" + +**调用 `AskUserQuestion` 工具一次**,`questions` 数组传 **2-4 个 question 对象** +(每个 question 都是独立的 chip-tab,每个 multiSelect=false)。子问题与选项**由你结合源需求摘要 + 用户最近输入 + assets/templates 章节结构自行生成**——不要凭空 invent 业务规则。 + +参数格式示例(替换为你针对当前需求生成的具体子问题): + +questions: + - question: "<具体决策点 1 标题,必须是'是/否/选哪条'问题>" + header: "<≤12 字 chip 标签>" + multiSelect: false + options: + - label: "<选项 A>" + description: "<一句话解释 + trade-off>" + - label: "<选项 B>" + description: "<一句话解释 + trade-off>" + - question: "<具体决策点 2>" + header: "" + multiSelect: false + options: + - label: "<选项 A>" + description: "..." + - label: "<选项 B>" + description: "..." + # 最多 4 个 question + +**约束**: +- 每个子问题必须是"是/否/选哪条"具体问题;禁止开放式叙述("你怎么想")。 +- 子问题之间**无依赖**——若有依赖应拆成两次 wizard。 +- 决策点 ≥ 5 个 → 只保留最阻塞的 4 个,其余记入 requirements.md "待确认问题" 节。 +- inputs 不足以构成阻塞决策点 → **不调本工具**,直接进 `clarification-done`。 +- 工具自动提供 "Other",**不要**手工加 "Type something" / "Chat about this" 保留位。 +- 调用工具后立即 end turn。 + +**用户选定后流程(同一 turn 内继续)** + +收齐子问题答案后**本 turn 内**: + +- 在 chat 简报 "已记录用户 N 个澄清回答" +- 立即调 `AskUserQuestion` 呈现 `clarification-done` selector 判断是否进入 requirements/bugfix 生成 +- end turn 等 `clarification-done` 决策 +""", + "clarification-done": """## 选择器节点:需求澄清是否完成? + +**目的**:上一轮 wizard 用户已回答;判断是否进入 requirements.md / bugfix.md 生成, +还是再发一轮 wizard 继续澄清。 + +**上下文**:active spec=,phase=intake。 + +**前置动作(chat 简报,≤2 行)**:写一句"已记录用户的 N 个澄清回答,请确认下一步。" + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "需求澄清是否完成?" + header: "澄清完成?" + multiSelect: false + options: + - label: "进入下一阶段(推荐)" + description: "用户回答已覆盖所有阻塞项,可开始写 requirements.md / bugfix.md。" + - label: "继续澄清" + description: "还有未解决的歧义,再发一轮 wizard。" + +**约束**: +- 调用工具后立即 end turn。 +- 不要复述选项 / 不要让用户回复编号。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "进入下一阶段" → 按 spec workflow(看 `/.config.json.workflow`)调 `phase-transition --from intake --to ` → 按 SKILL.md §「Spec 文档生成」生成对应文档 → 报路径+摘要 → 立即呈现 `doc-confirm-` selector +- 选 "继续澄清" → 重新调 `AskUserQuestion` 呈现 `clarification-wizard` 收新一轮澄清点 +""", + "doc-confirm-requirements": """## 选择器节点:requirements.md 文档确认 + +**目的**:requirements.md 已生成 / 更新;让用户确认是否进入 design phase, +或者先看全文 / 继续修改。 + +**上下文**:active spec=,phase=。 +刚生成的文档:/requirements.md + +**前置动作(chat 简报,≤8 行)**:列出 3-8 条**关键变更要点**(文件路径 + 章节增量 + 未决问题)。 +绝对不要 reprint 文档全文。 + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "requirements.md 已生成。下一步?" + header: "需求确认" + multiSelect: false + options: + - label: "确认(推荐)" + description: "文档内容符合预期,进入设计(design)环节。" + - label: "查看全文" + description: "在 chat 完整 echo 该文档(不进入设计环节)。" + - label: "继续沟通" + description: "文档需要修改,告诉你具体怎么改。" + +**约束**: +- 调用工具后立即 end turn。 +- 简报必须在工具调用**之前**输出。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "确认" → 调 `phase-transition --from requirements --to design` → 按 SKILL.md §「Spec 文档生成」生成 `design.md` → 报路径+摘要 → 立即呈现 `doc-confirm-design` selector +- 选 "查看全文" → 在 chat 完整 echo `requirements.md`(无任何额外解释)→ 重新调 `AskUserQuestion` 呈现 `doc-confirm-requirements` selector +- 选 "继续沟通" → end turn 等用户文字反馈 → 下一 turn 按反馈 Edit `requirements.md` → 报变更要点 → 重新呈现 `doc-confirm-requirements` +""", + "doc-confirm-bugfix": """## 选择器节点:bugfix.md 文档确认 + +**目的**:bugfix.md 已生成 / 更新;让用户确认是否进入 design phase, +或者先看全文 / 继续修改。 + +**上下文**:active spec=,phase=。 +刚生成的文档:/bugfix.md + +**前置动作(chat 简报,≤8 行)**:列出 3-8 条关键变更要点 +(Current / Expected / Unchanged 段落增量 + 复现步骤 + 影响范围)。 + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "bugfix.md 已生成。下一步?" + header: "缺陷确认" + multiSelect: false + options: + - label: "确认(推荐)" + description: "文档内容符合预期,进入设计(design)环节。" + - label: "查看全文" + description: "在 chat 完整 echo 该文档(不进入设计环节)。" + - label: "继续沟通" + description: "文档需要修改,告诉你具体怎么改。" + +**约束**: +- 调用工具后立即 end turn。 +- 简报必须在工具调用**之前**输出。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "确认" → 调 `phase-transition --from bugfix --to design` → 按 SKILL.md §「Spec 文档生成」生成 `design.md` → 报路径+摘要 → 立即呈现 `doc-confirm-design` selector +- 选 "查看全文" → 在 chat 完整 echo `bugfix.md` → 重新呈现 `doc-confirm-bugfix` +- 选 "继续沟通" → end turn 等用户反馈 → 下一 turn 按反馈 Edit `bugfix.md` → 重新呈现 `doc-confirm-bugfix` +""", + "doc-confirm-design": """## 选择器节点:design.md 文档确认 + +**目的**:design.md 已生成 / 更新;让用户确认是否进入 tasks phase, +或者先看全文 / 继续修改。 + +**上下文**:active spec=,phase=。 +刚生成的文档:/design.md + +**前置动作(chat 简报,≤8 行)**:列出 3-8 条关键变更要点 +(架构图变化 + 接口签名 + 数据模型字段 + 风险 / 偏离)。 + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "design.md 已生成。下一步?" + header: "设计确认" + multiSelect: false + options: + - label: "确认(推荐)" + description: "文档内容符合预期,进入任务拆分(tasks)环节。" + - label: "查看全文" + description: "在 chat 完整 echo 该文档(不进入任务拆分环节)。" + - label: "继续沟通" + description: "文档需要修改,告诉你具体怎么改。" + +**约束**: +- 调用工具后立即 end turn。 +- 简报必须在工具调用**之前**输出。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "确认" → 调 `phase-transition --from design --to tasks` → 按 SKILL.md §「Spec 文档生成」生成 `tasks.md`(task-swarm 兼容格式:`## 阶段 N:` + `- [ ] N.M ... @writes:... _需求:x.y_`)→ 报路径 + 任务计数 + 主要阶段摘要 → 立即呈现 `tasks-execution` selector +- 选 "查看全文" → 在 chat 完整 echo `design.md` → 重新呈现 `doc-confirm-design` +- 选 "继续沟通" → end turn 等用户反馈 → 下一 turn 按反馈 Edit `design.md` → 重新呈现 `doc-confirm-design` +""", + "tasks-execution": """## 选择器节点:任务执行选择(合并 0.9.2 旧 doc-confirm-tasks) + +**目的**:tasks.md 已生成;让用户在一个选择器里同时完成「确认 tasks.md」+「选择执行方式」+「回退(需要调整)」+「暂不 coding」。0.9.3 起废弃单独的 doc-confirm-tasks 选择器,「需要调整 tasks.md」作为本选择器的回退出口。 + +**上下文**:active spec=,phase=tasks。 +required 任务数:,optional 任务数:。 + +**前置动作(chat 简报,≤8 行)**: +- 列出**任务计数**(required N 个,optional M 个) +- 列出**主要阶段**与 traceability(`_需求:x.y_` 标签) +- 标注同文件冲突的 stage(影响 task-swarm group 切分) + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "tasks.md 已生成。怎么执行?" + header: "执行方式" + multiSelect: false + options: + - label: "task-swarm + validator 自动验收(推荐)" + description: "多 coder 并发 + reviewer + validator 自动 fix loop 到 pass。最稳但 validator 一轮耗时长。" + - label: "task-swarm + 人工验收(跳过 validator)" + description: "多 coder 并发 + reviewer + p0-fix,**跳过** validator/v-fix 循环;代码正确性由用户事后人工核验。省 validator 那一轮耗时;有问题再跟模型常规对话沟通。" + - label: "顺序执行(同时处理 optional)" + description: "单 agent 逐个推进 required + optional 任务,[ ] → [~] → [x]。如需只跑 required,可在 Other 输入说明。" + - label: "暂停 / 调整 tasks.md" + description: "tasks 不符合预期需要调整,或暂不开始 coding(Other 输入说明具体哪种)。" + +**约束**: +- 4 个选项已占满工具上限;细化需求(如只跑 required / 跳过某 optional)走 "Other" 输入。 +- 调用工具后立即 end turn。 +- 简报必须在工具调用**之前**输出。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "task-swarm + validator 自动验收" → 走 `/specode:task-swarm` 第二步 `init`(默认 full 模式)+ 第三步 7 步循环(详见 `commands/task-swarm.md` + `references/task-swarm.md`) +- 选 "task-swarm + 人工验收(跳过 validator)" → 同上但 `init` 时加 `--skip-validator` flag;流程走到 p0-fix 完成后直接 writeback,不进 validation/v-fix;writeback 后提醒用户人工 review 代码 + `/specode:end` 退出 spec 模式后跟模型常规对话沟通调整 +- 选 "顺序执行" → 调 `phase-transition --from tasks --to implementation` → 单 agent 按 `tasks.md` checkbox 顺序逐个推进 +- 选 "暂停 / 调整 tasks.md" → end turn 等用户反馈:若是调整 → 下一 turn Edit `tasks.md` → 重新呈现本 selector;若是暂停 → 留在 tasks phase,随时 `/specode:end` 退出或后续 `/specode:continue ` 续接 +""", + "takeover-options": """## 选择器节点:接管选项 + +**目的**:/specode:continue 命中 LockHeld;让用户选择强制接管 / 只读查看 / 取消。 + +**上下文**:active spec=,phase=。 +锁持有者: (前 8 位),最近 heartbeat: 。 + +**前置动作(chat 简报,≤2 行)**:写一句"spec '' 已被 持有,请选择处理方式。" + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "该 spec 已被其他会话窗口持有,怎么处理?" + header: "接管选项" + multiSelect: false + options: + - label: "强制接管" + description: "驱逐对方锁,本会话成为新锁主;对方下一次写操作会被 verify-lock 拒绝。" + - label: "只读查看" + description: "不持锁,加载文档进入只读模式;所有 Edit/Write 在 SKILL.md 层面被劝阻。" + - label: "取消" + description: "不接管,关闭本次 /specode:continue。" + +**约束**: +- **不给"(推荐)"标记**——让用户根据对方是否仍活跃自己判断。 +- 调用工具后立即 end turn。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "强制接管" → 调 `acquire --force --spec --session ` → `load` → `continue` → 报告 "已强制接管:" + 状态行 footer +- 选 "只读查看" → **跳** `acquire`(不持锁)→ `load --spec ` 拿数据 → 写 `sessions/.json.mode=readonly` → 报告 "已只读加载:(持锁者:)" + footer(含 `[只读]` 标记) +- 选 "取消" → end turn,不调任何 CLI +""", + "acceptance-gate": """## 选择器节点:验收门 + +**目的**:acceptance phase;tasks.md 全部 `[x]` 完成后,判断是否通过验收进入 iteration,或者回到 requirements / design / tasks 继续修改。 + +**上下文**:active spec=,phase=acceptance。 +任务完成度:/。 + +**前置动作(chat 简报,≤3 行)**: +- 列出 tasks.md 完成度(done/total)。 +- 调用 `spec_lint.py --spec ` 把 WARNING 列出来(traceability / log / EARS 三类,如有)。 +- 若 tasks.md 末尾 `## 测试要点` 章节存在,简述本次需要测试人员关注的要点;测试要点是参考信息,不参与验收门判定。 + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "验收结论?" + header: "验收门" + multiSelect: false + options: + - label: "验收通过,进入 iteration(推荐)" + description: "所有任务完成;如有后续调整走 iteration 子循环。" + - label: "继续修改" + description: "仍有未完成任务 / lint WARNING 需处理,回到 requirements / design / tasks 调整。" + +**约束**: +- n_done == n_total 时推荐选 1;否则**移除"(推荐)"标记**。 +- 调用工具后立即 end turn。 + +**用户选定后流程(同一 turn 内继续)** + +- 选 "验收通过,进入 iteration" → 调 `phase-transition --from acceptance --to iteration` → 在 chat 用 1-2 行告知"已进入 iteration(已交付常驻态),如需新一轮调整请直接提出(如『加个 X 功能』『改一下 Y 需求』),或 `/specode:end` 退出 spec 模式" → **end turn**。**不要**自动呈现 `iteration-scope`——它只在用户后续显式提出迭代调整时才呈现。 +- 选 "继续修改" → end turn 等用户文字反馈 → 下一 turn 根据反馈判断回到哪个 phase:改需求 → `phase-transition --to requirements`;改设计 → `--to design`;改任务 → `--to tasks`;改实现 → 留 implementation phase Edit 代码 + 更新 `tasks.md` +""", + "iteration-scope": """## 选择器节点:iteration 调整范围(多选) + +**目的**:用户在 iteration 默认停留态**显式**提出了新一轮调整意图(如"加个 X 功能"/"改下 Y 需求"/"重跑下测试");确定本轮 iteration 调整哪些文档/动作。 + +**触发条件(必须满足之一才可呈现)**: +- 用户当前 turn 在 chat 里明确表达了下一轮迭代/调整意图; +- **不**在 `acceptance-gate` 选「验收通过」后自动呈现——验收通过只切 phase 到 iteration,end turn 等用户提; +- **不**在 `/specode:continue` 一个 phase=iteration 的 spec 时自动呈现——恢复后停在 chat 等用户提。 + +**上下文**:active spec=,phase=iteration。 + +**前置动作(chat 简报,≤2 行)**:用 1-2 行复述用户提出的调整意图(如"你提到要加 X 功能 + 调整 Y 验收标准,请勾选本轮调整范围"),让用户确认 selector 选项与意图对得上。 + +**调用 `AskUserQuestion` 工具**,注意 **multiSelect=true**: + +questions: + - question: "本轮 iteration 要调整哪些文档/动作?(可多选)" + header: "迭代范围" + multiSelect: true + options: + - label: "改 requirements" + description: "新增 / 修改 EARS SHALL 条款。" + - label: "改 design" + description: "架构 / 接口 / 数据模型调整。" + - label: "改 tasks" + description: "新增任务或调整已有任务范围。" + - label: "重跑测试" + description: "不改文档,重新验证当前实现。" + +**约束**: +- multiSelect=true(**唯一**使用类型 C 复选框的场景)。 +- 允许用户全不选(视为本轮 iteration 取消);ESC 等价。 +- 调用工具后立即 end turn。 + +**用户选定后流程(同一 turn 内继续)** + +iteration-scope 是多选(multiSelect=true),用户可勾选 1-4 项或全不选。按 phase 序列从前往后依次处理勾选项(同一 turn 内串行 phase-transition + 文档生成 + 对应 `doc-confirm-*` selector): + +- 勾 "改 requirements" → `phase-transition --from iteration --to requirements` → 按 SKILL.md §「Spec 文档生成」修订 `requirements.md` → 呈现 `doc-confirm-requirements`(修订版) +- 勾 "改 design" → `--to design` + 修 `design.md` → 呈现 `doc-confirm-design` +- 勾 "改 tasks" → `--to tasks` + 修 `tasks.md` → 呈现 `tasks-execution` +- 勾 "重跑测试" → 留 iteration phase,执行 `tasks.md` 末尾验证命令 / `## 测试要点` 节中的检查项 + 报告结果 +- 全不选 / ESC → 视为本轮 iteration 取消,留 acceptance phase,告知用户随时 `/specode:end` 或再次进入 acceptance-gate +""", +} + + +def _fill_selector(key: str, ctx: dict[str, str]) -> Optional[str]: + tpl = SELECTOR_PROMPTS.get(key) + if not tpl: + return None + out = tpl + for k, v in ctx.items(): + out = out.replace(f"<{k}>", str(v)) + return out diff --git a/plugins/specode/scripts/spec_session/cli.py b/plugins/specode/scripts/spec_session/cli.py new file mode 100755 index 0000000..186d4fa --- /dev/null +++ b/plugins/specode/scripts/spec_session/cli.py @@ -0,0 +1,210 @@ +'''spec_session.cli — argparse + COMMANDS dispatch + main 入口。 + +由 `scripts/spec_session.py` launcher 调用(launcher 负责 utf-8 reconfigure 与 +sys.path 注入)。完整业务/hook 实现见同包内: + + _io.py 原子写 / session+spec config 读写 / 锁工具 / 共享常量 + _selectors.py SELECTOR_PROMPTS 字典 + _fill_selector + _reminders.py reminder 模板字符串 + help 文本渲染 + _business.py 所有 cmd_* 业务命令 + _hooks.py 所有 hook_on_* + safe wrapper + task-swarm plan 辅助 + _catalog.py on-user-prompt-catalog reference 提示 hook + +业务子命令(被 SKILL.md 引导主会话调用;都接 --session): + acquire / release / heartbeat / verify-lock / phase-transition + load / continue / end / status / read-session / list-specs / set-project-root + +hook 子命令(仅由 hooks/hooks.json 调用;全部 exit 0,仅注入提示;唯一例外: +PreToolUse 对 task-swarm 受控路径与 tasks.md 直写的 exit 2 强阻断): + on-session-start / on-user-prompt / on-user-prompt-catalog / on-stop + on-session-end / on-task-completed / on-heartbeat-quiet / on-pre-tool-use + on-log-pre-tool-use / on-log-post-tool-use + +强制写入语义: + - 任何修改 sessions/.json 或 /.config.json 的命令必须 tempfile + + os.replace + fsync。 + - 写失败 → 整命令视失败、回滚已变更的另一份文件、exit 1。 + +stdlib-only。 +''' +from __future__ import annotations + +import argparse +import contextlib +import sys +from typing import Optional + +# spec_log 兜底:launcher 已把 scripts/ 注入 sys.path,spec_log import 应可用。 +try: + from spec_log import write_event as _log_event # type: ignore +except Exception: + def _log_event(event: str, payload: Optional[dict] = None, + session_id: Optional[str] = None) -> None: + return None + +# 业务命令 +from spec_session._business import ( # noqa: E402 + cmd_acquire, + cmd_continue, + cmd_end, + cmd_heartbeat, + cmd_list_specs, + cmd_load, + cmd_phase_transition, + cmd_read_session, + cmd_release, + cmd_set_project_root, + cmd_status, + cmd_verify_lock, +) + +# Hook handlers +from spec_session._hooks import ( # noqa: E402 + hook_on_heartbeat_quiet, + hook_on_log_post_tool_use, + hook_on_log_pre_tool_use, + hook_on_pre_tool_use, + hook_on_session_end, + hook_on_session_start, + hook_on_stop, + hook_on_task_completed, + hook_on_user_prompt, +) +from spec_session._catalog import hook_on_user_prompt_catalog # noqa: E402 + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="spec_session.py", description="specode session / lock / hook entry") + sub = parser.add_subparsers(dest="cmd", required=True) + + p = sub.add_parser("acquire") + p.add_argument("--spec", required=True) + p.add_argument("--session", required=True) + p.add_argument("--force", action="store_true") + + p = sub.add_parser("release") + p.add_argument("--spec", required=True) + p.add_argument("--session", required=True) + + p = sub.add_parser("heartbeat") + p.add_argument("--spec", required=True) + p.add_argument("--session", required=True) + + p = sub.add_parser("verify-lock") + p.add_argument("--spec", required=True) + p.add_argument("--session", required=True) + + p = sub.add_parser("phase-transition") + p.add_argument("--spec", required=True) + p.add_argument("--session", required=True) + p.add_argument("--from", dest="frm", required=True) + p.add_argument("--to", required=True) + + p = sub.add_parser("load") + p.add_argument("--spec", required=True) + + p = sub.add_parser("continue") + p.add_argument("--spec", required=True) + p.add_argument("--session", required=True) + p.add_argument("--force", action="store_true") + p.add_argument("--readonly", action="store_true") + + p = sub.add_parser("end") + p.add_argument("--session", required=True) + + p = sub.add_parser("set-project-root") + p.add_argument("--spec", required=True, help="spec 目录") + p.add_argument("--session", required=True, help="必须是 lock holder") + p.add_argument("--root", required=True, help="项目实现根目录(绝对路径,不存在则 mkdir)") + + p = sub.add_parser("status") + p.add_argument("--session", required=True) + + p = sub.add_parser("read-session") + p.add_argument("--session", required=True) + + p = sub.add_parser("list-specs") + p.add_argument("--root", default=None, + help="doc root override;缺省按三层 resolve_doc_root") + + # hook 子命令(无必需参数;从 stdin 拿 session_id) + for name in ( + "on-session-start", + "on-user-prompt", + "on-user-prompt-catalog", + "on-stop", + "on-session-end", + "on-task-completed", + "on-heartbeat-quiet", + "on-pre-tool-use", + "on-log-pre-tool-use", + "on-log-post-tool-use", + ): + ph = sub.add_parser(name) + ph.add_argument("--session-override", default=None, + help="测试用:覆盖 stdin payload 中的 session_id") + if name == "on-heartbeat-quiet": + ph.add_argument("--quiet", action="store_true") + + return parser + + +COMMANDS = { + "acquire": cmd_acquire, + "release": cmd_release, + "heartbeat": cmd_heartbeat, + "verify-lock": cmd_verify_lock, + "phase-transition": cmd_phase_transition, + "load": cmd_load, + "continue": cmd_continue, + "end": cmd_end, + "set-project-root": cmd_set_project_root, + "status": cmd_status, + "read-session": cmd_read_session, + "list-specs": cmd_list_specs, + "on-session-start": hook_on_session_start, + "on-user-prompt": hook_on_user_prompt, + "on-user-prompt-catalog": hook_on_user_prompt_catalog, + "on-stop": hook_on_stop, + "on-session-end": hook_on_session_end, + "on-task-completed": hook_on_task_completed, + "on-heartbeat-quiet": hook_on_heartbeat_quiet, + "on-pre-tool-use": hook_on_pre_tool_use, + "on-log-pre-tool-use": hook_on_log_pre_tool_use, + "on-log-post-tool-use": hook_on_log_post_tool_use, +} + + +def main(argv: Optional[list[str]] = None) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + fn = COMMANDS.get(args.cmd) + if fn is None: + parser.print_help() + return 1 + # log cli 调用(0.10.0+;只记业务命令,hook 调用由 _safe_hook 已记) + if not args.cmd.startswith("on-"): + with contextlib.suppress(Exception): + session_id = getattr(args, "session", None) or getattr(args, "session_override", None) + _log_event("cli_call", { + "script": "spec_session.py", + "cmd": args.cmd, + "spec": getattr(args, "spec", None), + "phase_from": getattr(args, "frm", None), + "phase_to": getattr(args, "to", None), + "force": getattr(args, "force", False), + "readonly": getattr(args, "readonly", False), + }, session_id=session_id) + rc = fn(args) or 0 + if not args.cmd.startswith("on-"): + with contextlib.suppress(Exception): + session_id = getattr(args, "session", None) or getattr(args, "session_override", None) + _log_event("cli_exit", {"script": "spec_session.py", "cmd": args.cmd, "exit_code": rc}, session_id=session_id) + return rc + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/spec_state.py b/plugins/specode/scripts/spec_state.py deleted file mode 100644 index 5299aa4..0000000 --- a/plugins/specode/scripts/spec_state.py +++ /dev/null @@ -1,533 +0,0 @@ -# State probe for the specode plugin's hooks. -# -# Read-only against existing specode artifacts (.active-specode.json, -# /.config.json) — state authoring stays with spec_session.py / -# spec_init.py, driven by the slash commands. This module only: -# - discovers the configured document_root -# - detects whether any spec is currently active (and where) -# - maintains ~/.specode/.any-active sentinel for hooks.json shell short-circuit -# - records/clears per-Claude-session metadata under ~/.specode/sessions/ -# -# Phase 2 scope. Phase 4 will refine session-id binding (TERM_SESSION_ID -> spec -# session linkage), turn ledger, etc. - -from __future__ import annotations - -import argparse -import json -import os -import sys -import time -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Optional - -sys.path.insert(0, str(Path(__file__).resolve().parent)) -import spec_telemetry # noqa: E402 - - -USER_CONFIG = Path.home() / ".config/specode/config.json" -SPECODE_DIR = Path.home() / ".specode" -SESSIONS_DIR = SPECODE_DIR / "sessions" -ANY_ACTIVE_SENTINEL = SPECODE_DIR / ".any-active" -AUDIT_DIR = Path( - os.environ.get("SPECODE_AUDIT_DIR") - or SPECODE_DIR / "audit" -) - - -def now_iso() -> str: - return datetime.now(timezone.utc).isoformat() - - -def _read_json(path: Path) -> Optional[dict]: - if not path.exists(): - return None - try: - return json.loads(path.read_text(encoding="utf-8")) - except Exception: - return None - - -def get_document_root() -> Optional[Path]: - cfg = _read_json(USER_CONFIG) - if not cfg: - return None - raw = ( - cfg.get("obsidianRoot") - or cfg.get("specRoot") - or cfg.get("documentRoot") - ) - if not raw: - vault = cfg.get("vaultPath") - if vault: - raw = str(Path(vault) / "spec-in") - return Path(raw).expanduser() if raw else None - - -def find_active_spec(prefer_session_id: Optional[str] = None) -> Optional[dict]: - """Return info about an active spec on this machine, or None. - - Selection order: - 1. Session whose id == prefer_session_id (typically $TERM_SESSION_ID). - 2. Most recently active session by lastActivityAt. - """ - root = get_document_root() - if not root or not root.exists(): - return None - active = _read_json(root / ".active-specode.json") - if not active: - return None - sessions = active.get("sessions") or {} - - candidates = [] - for sid, entry in sessions.items(): - if entry.get("status") != "active": - continue - slug = entry.get("specSlug") - if not slug: - continue - candidates.append((sid, entry, root / slug)) - - if not candidates: - return None - - # Mirror spec_session.normalize_session_id's fallback: when no env-derived - # id is provided, the writer used "default" — so look that up first before - # falling back to lastActivityAt ordering. - lookup_sids: list[str] = [] - if prefer_session_id: - lookup_sids.append(prefer_session_id) - else: - lookup_sids.append("default") - for target in lookup_sids: - for sid, entry, spec_dir in candidates: - if sid == target: - return _build_spec_info(sid, entry, spec_dir) - - candidates.sort(key=lambda c: c[1].get("lastActivityAt") or "", reverse=True) - sid, entry, spec_dir = candidates[0] - return _build_spec_info(sid, entry, spec_dir) - - -def _build_spec_info(sid: str, entry: dict, spec_dir: Path) -> dict: - current_phase = "unknown" - spec_config = _read_json(spec_dir / ".config.json") - if spec_config: - current_phase = spec_config.get("currentPhase") or current_phase - return { - "spec_slug": entry.get("specSlug"), - "spec_dir": str(spec_dir), - "current_phase": current_phase, - "session_id": sid, - "spec_id": entry.get("specId"), - "last_activity_at": entry.get("lastActivityAt"), - } - - -def render_status_block(info: dict) -> str: - return ( - "=== specode active ===\n" - f"spec: {info.get('spec_slug')}\n" - f"phase: {info.get('current_phase')}\n" - f"spec_dir: {info.get('spec_dir')}\n" - f"session_id: {info.get('session_id')}\n" - f"last activity: {info.get('last_activity_at')}\n" - "=========================" - ) - - -def sync_any_active_sentinel() -> bool: - is_active = find_active_spec() is not None - SPECODE_DIR.mkdir(parents=True, exist_ok=True) - if is_active: - ANY_ACTIVE_SENTINEL.touch(exist_ok=True) - else: - try: - ANY_ACTIVE_SENTINEL.unlink() - except FileNotFoundError: - pass - return is_active - - -def write_claude_session(claude_session_id: str, payload: dict) -> None: - if not claude_session_id: - return - SESSIONS_DIR.mkdir(parents=True, exist_ok=True) - target = SESSIONS_DIR / f"{claude_session_id}.json" - record = { - "claude_session_id": claude_session_id, - "term_session_id": os.environ.get("TERM_SESSION_ID"), - "started_at": now_iso(), - "cwd": payload.get("cwd"), - "transcript_path": payload.get("transcript_path"), - } - target.write_text( - json.dumps(record, ensure_ascii=False, indent=2) + "\n", - encoding="utf-8", - ) - - -def clear_claude_session(claude_session_id: str) -> None: - if not claude_session_id: - return - target = SESSIONS_DIR / f"{claude_session_id}.json" - try: - target.unlink() - except FileNotFoundError: - pass - - -# --- audit log readers ------------------------------------------------------ - -def _audit_log_for(date: Optional[str]) -> Path: - d = date or datetime.now(timezone.utc).strftime("%Y-%m-%d") - return AUDIT_DIR / f"{d}.log" - - -def _fmt_audit_line(rec: dict) -> str: - ts = rec.get("ts") or "" - event = rec.get("event") or "" - decision = rec.get("decision") or "" - tool = rec.get("tool") or "-" - msg = rec.get("msg") or "" - return f"{ts} {event:<18} {decision:<24} {tool:<10} {msg}" - - -# --- CLI ------------------------------------------------------------------- - -def _cmd_status(_args: argparse.Namespace) -> int: - info = find_active_spec(prefer_session_id=os.environ.get("TERM_SESSION_ID")) - print(json.dumps(info, ensure_ascii=False, indent=2)) - return 0 - - -def _cmd_sync_sentinel(_args: argparse.Namespace) -> int: - active = sync_any_active_sentinel() - print(f"any-active: {active}") - print(f"sentinel: {'exists' if ANY_ACTIVE_SENTINEL.exists() else 'missing'}") - return 0 - - -def _cmd_demo_activate(args: argparse.Namespace) -> int: - """Fabricate an active spec entry under document_root for Phase 2 testing. - - Creates //.config.json minimal contents + appends to - .active-specode.json. Idempotent: re-running updates lastActivityAt. - """ - root = Path(args.root).expanduser() if args.root else get_document_root() - if not root: - print("ERR: no document_root configured (~/.config/specode/config.json missing)", file=sys.stderr) - return 2 - root.mkdir(parents=True, exist_ok=True) - - slug = args.slug - spec_dir = root / slug - spec_dir.mkdir(parents=True, exist_ok=True) - - spec_config_path = spec_dir / ".config.json" - spec_config = _read_json(spec_config_path) or {} - spec_config.setdefault("specId", f"demo-{slug}-{int(datetime.now().timestamp())}") - spec_config["slug"] = slug - spec_config["currentPhase"] = args.phase - spec_config_path.write_text( - json.dumps(spec_config, ensure_ascii=False, indent=2) + "\n", - encoding="utf-8", - ) - - active_path = root / ".active-specode.json" - active = _read_json(active_path) or { - "version": 2, - "documentRoot": str(root.resolve()), - "updatedAt": None, - "sessions": {}, - } - sid = args.session or os.environ.get("TERM_SESSION_ID") or "demo-phase-2" - active["sessions"][sid] = { - "sessionId": sid, - "specSlug": slug, - "specId": spec_config["specId"], - "status": "active", - "boundAt": now_iso(), - "lastActivityAt": now_iso(), - } - active["updatedAt"] = now_iso() - active_path.write_text( - json.dumps(active, ensure_ascii=False, indent=2) + "\n", - encoding="utf-8", - ) - - sync_any_active_sentinel() - print(f"✓ demo spec '{slug}' activated under {root}") - print(f" spec_dir: {spec_dir}") - print(f" session: {sid}") - print(f" phase: {args.phase}") - print(f" sentinel: {'exists' if ANY_ACTIVE_SENTINEL.exists() else 'missing'}") - return 0 - - -def _cmd_demo_deactivate(args: argparse.Namespace) -> int: - root = Path(args.root).expanduser() if args.root else get_document_root() - if not root: - print("ERR: no document_root configured", file=sys.stderr) - return 2 - active_path = root / ".active-specode.json" - active = _read_json(active_path) - if not active: - print("(no .active-specode.json to clear)") - sync_any_active_sentinel() - return 0 - sid = args.session or os.environ.get("TERM_SESSION_ID") or "demo-phase-2" - sessions = active.get("sessions") or {} - if sid in sessions: - sessions[sid]["status"] = "ended" - sessions[sid]["lastActivityAt"] = now_iso() - active["updatedAt"] = now_iso() - active_path.write_text( - json.dumps(active, ensure_ascii=False, indent=2) + "\n", - encoding="utf-8", - ) - print(f"✓ session '{sid}' marked ended") - else: - print(f"(session '{sid}' not found in active map)") - sync_any_active_sentinel() - print(f" sentinel: {'exists' if ANY_ACTIVE_SENTINEL.exists() else 'missing'}") - return 0 - - -def _cmd_audit_tail(args: argparse.Namespace) -> int: - path = _audit_log_for(args.date) - if not path.exists(): - print(f"(no audit log at {path})", file=sys.stderr) - return 0 - raw = "json" if args.json else "text" - with path.open("r", encoding="utf-8") as f: - lines = f.readlines() - for line in lines[-args.n:]: - line = line.rstrip("\n") - if not line: - continue - if raw == "json": - print(line) - continue - try: - print(_fmt_audit_line(json.loads(line))) - except json.JSONDecodeError: - print(line) - if not args.follow: - return 0 - with path.open("r", encoding="utf-8") as f: - f.seek(0, 2) - try: - while True: - line = f.readline() - if not line: - time.sleep(0.5) - continue - line = line.rstrip("\n") - if raw == "json": - print(line, flush=True) - else: - try: - print(_fmt_audit_line(json.loads(line)), flush=True) - except json.JSONDecodeError: - print(line, flush=True) - except KeyboardInterrupt: - return 0 - - -def _cmd_audit_summary(args: argparse.Namespace) -> int: - if not AUDIT_DIR.exists(): - print(f"(no audit dir at {AUDIT_DIR})", file=sys.stderr) - return 0 - files = sorted(AUDIT_DIR.glob("*.log")) - if args.days and args.days > 0: - files = files[-args.days:] - by_event: dict[str, int] = {} - by_decision: dict[str, int] = {} - deny_lines: list[str] = [] - total = 0 - for fp in files: - try: - content = fp.read_text(encoding="utf-8") - except OSError: - continue - for line in content.splitlines(): - if not line.strip(): - continue - try: - rec = json.loads(line) - except json.JSONDecodeError: - continue - total += 1 - ev = rec.get("event") or "?" - dec = rec.get("decision") or "?" - by_event[ev] = by_event.get(ev, 0) + 1 - by_decision[dec] = by_decision.get(dec, 0) + 1 - if dec.startswith("deny") and len(deny_lines) < args.show_deny: - deny_lines.append(_fmt_audit_line(rec)) - print(f"audit summary: {total} records across {len(files)} log file(s)") - if files: - print(f" range: {files[0].stem} → {files[-1].stem}") - print("\nby event:") - for k, v in sorted(by_event.items(), key=lambda x: (-x[1], x[0])): - print(f" {v:7d} {k}") - print("\nby decision:") - for k, v in sorted(by_decision.items(), key=lambda x: (-x[1], x[0])): - print(f" {v:7d} {k}") - if deny_lines: - print(f"\nrecent denies (up to {args.show_deny}):") - for line in deny_lines[-args.show_deny:]: - print(f" {line}") - return 0 - - -def _cmd_telemetry_summary(args: argparse.Namespace) -> int: - if not spec_telemetry.is_enabled() and not args.force: - print( - f"(telemetry is disabled — set {spec_telemetry._ENV_FLAG}=on to record events,\n" - f" or pass --force to read the existing file anyway)", - file=sys.stderr, - ) - path = spec_telemetry._env_path() - rotated = spec_telemetry._rotated_for(path) - if not path.exists() and not rotated.exists(): - print(f"(no telemetry file at {path})", file=sys.stderr) - return 0 - - cutoff: Optional[datetime] = None - if args.days and args.days > 0: - cutoff = datetime.now(timezone.utc) - timedelta(days=args.days) - - by_event: dict[str, int] = {} - by_inv: dict[str, int] = {} - spec_phase_transitions: dict[str, int] = {} - spec_inv_counts: dict[str, int] = {} - swarm_runs: dict[str, dict] = {} - total = 0 - - for rec in spec_telemetry.iter_records(path): - if cutoff is not None: - try: - ts = datetime.fromisoformat(rec.get("ts", "")) - except ValueError: - ts = None - if ts is None or ts < cutoff: - continue - total += 1 - ev = rec.get("event", "?") - by_event[ev] = by_event.get(ev, 0) + 1 - - if ev == "spec.phase_transition": - slug = rec.get("spec_slug") or "?" - spec_phase_transitions[slug] = spec_phase_transitions.get(slug, 0) + 1 - elif ev == "inv.violation": - inv = rec.get("inv") or "?" - by_inv[inv] = by_inv.get(inv, 0) + 1 - slug = rec.get("spec_slug") or "?" - spec_inv_counts[slug] = spec_inv_counts.get(slug, 0) + 1 - elif ev == "swarm.run_start": - rid = rec.get("run_id") or "?" - swarm_runs.setdefault(rid, {})["start"] = rec - elif ev == "swarm.stage_done": - rid = rec.get("run_id") or "?" - run = swarm_runs.setdefault(rid, {}) - run.setdefault("stages_done", []).append(rec) - elif ev == "swarm.run_end": - rid = rec.get("run_id") or "?" - swarm_runs.setdefault(rid, {})["end"] = rec - - if args.json: - rounds_per_stage: list[int] = [] - for run in swarm_runs.values(): - for s in run.get("stages_done") or []: - rounds = s.get("rounds") or {} - total_rounds = sum(v for v in rounds.values() if isinstance(v, int)) - if total_rounds: - rounds_per_stage.append(total_rounds) - avg = (sum(rounds_per_stage) / len(rounds_per_stage)) if rounds_per_stage else 0 - print(json.dumps({ - "total_records": total, - "by_event": by_event, - "by_inv": by_inv, - "phase_transitions_by_spec": spec_phase_transitions, - "inv_violations_by_spec": spec_inv_counts, - "swarm_runs": len(swarm_runs), - "swarm_avg_total_rounds_per_stage": round(avg, 2), - }, ensure_ascii=False, indent=2)) - return 0 - - print(f"telemetry summary: {total} record(s) from {path}") - if cutoff is not None: - print(f" window: last {args.days} day(s)") - print("\nby event:") - for k, v in sorted(by_event.items(), key=lambda x: (-x[1], x[0])): - print(f" {v:6d} {k}") - if by_inv: - print("\ninvariant violations (top):") - for k, v in sorted(by_inv.items(), key=lambda x: (-x[1], x[0])): - print(f" {v:6d} {k}") - if spec_phase_transitions: - print("\nphase transitions per spec (top 10):") - ranked = sorted(spec_phase_transitions.items(), key=lambda x: (-x[1], x[0]))[:10] - for slug, count in ranked: - print(f" {count:6d} {slug}") - if spec_inv_counts: - print("\ninv violations per spec (top 10):") - ranked = sorted(spec_inv_counts.items(), key=lambda x: (-x[1], x[0]))[:10] - for slug, count in ranked: - print(f" {count:6d} {slug}") - if swarm_runs: - rounds_per_stage: list[int] = [] - for run in swarm_runs.values(): - for s in run.get("stages_done") or []: - rounds = s.get("rounds") or {} - total_rounds = sum(v for v in rounds.values() if isinstance(v, int)) - if total_rounds: - rounds_per_stage.append(total_rounds) - avg = (sum(rounds_per_stage) / len(rounds_per_stage)) if rounds_per_stage else 0 - print(f"\ntask-swarm: {len(swarm_runs)} run(s)") - print(f" avg total rounds per converged/failed stage: {avg:.2f}") - print(f" stages with recorded rounds: {len(rounds_per_stage)}") - return 0 - - -def main(argv) -> int: - p = argparse.ArgumentParser(prog="spec_state.py") - sub = p.add_subparsers(dest="cmd", required=True) - sub.add_parser("status", help="Print active-spec info (JSON) or null") - sub.add_parser("sync-sentinel", help="Re-sync ~/.specode/.any-active to current state") - sp = sub.add_parser("demo-activate", help="(testing) Mark a fake spec as active") - sp.add_argument("--slug", default="demo-phase-2") - sp.add_argument("--phase", default="implementation") - sp.add_argument("--root") - sp.add_argument("--session") - sd = sub.add_parser("demo-deactivate", help="(testing) End the fake spec session") - sd.add_argument("--root") - sd.add_argument("--session") - at = sub.add_parser("audit-tail", help="Pretty-print the last N lines of an audit log") - at.add_argument("-n", type=int, default=50, help="lines to show (default 50)") - at.add_argument("--date", help="YYYY-MM-DD UTC; default today") - at.add_argument("--follow", action="store_true", help="keep streaming new entries") - at.add_argument("--json", action="store_true", help="output raw JSON lines") - asum = sub.add_parser("audit-summary", help="Aggregate event/decision counts") - asum.add_argument("--days", type=int, default=7, help="how many most-recent daily logs to scan (default 7; 0=all)") - asum.add_argument("--show-deny", type=int, default=10, help="how many recent deny entries to include (default 10)") - tsum = sub.add_parser("telemetry-summary", help="Aggregate ~/.specode/telemetry.jsonl (opt-in)") - tsum.add_argument("--days", type=int, default=0, help="restrict to last N days (default 0=all)") - tsum.add_argument("--json", action="store_true", help="emit raw aggregates as JSON") - tsum.add_argument("--force", action="store_true", help="read the file even if telemetry is disabled") - args = p.parse_args(argv) - return { - "status": _cmd_status, - "sync-sentinel": _cmd_sync_sentinel, - "demo-activate": _cmd_demo_activate, - "demo-deactivate": _cmd_demo_deactivate, - "audit-tail": _cmd_audit_tail, - "audit-summary": _cmd_audit_summary, - "telemetry-summary": _cmd_telemetry_summary, - }[args.cmd](args) - - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/plugins/specode/scripts/spec_status.py b/plugins/specode/scripts/spec_status.py index 5aaefbf..4eb12b7 100755 --- a/plugins/specode/scripts/spec_status.py +++ b/plugins/specode/scripts/spec_status.py @@ -1,128 +1,164 @@ #!/usr/bin/env python3 -"""Spec-mode status: thin wrapper around `spec_session.py load --json`. +"""spec_status.py — `/specode:status` 命令入口。 -Historically this script duplicated TASK_RE / LABELS / task_section. Per the -P2 refactor it now delegates to `spec_session.py load --json`, parses the -JSON output, and renders the task-progress view. All shared regex/label -definitions live in `spec_session`. +读 ~/.specode/sessions/.json + active spec 的 .config.json, +输出可读摘要(人类友好 + JSON 数据块)。 + +用法: + spec_status.py --session + +stdlib-only。 """ from __future__ import annotations import argparse import json -import subprocess +import re import sys from pathlib import Path +from typing import Optional + +THIS_DIR = Path(__file__).resolve().parent +if str(THIS_DIR) not in sys.path: + sys.path.insert(0, str(THIS_DIR)) + +from spec_session import read_session, read_spec_config, _session_short, _is_lock_stale # type: ignore # noqa: E402 + +# 0.10.0+ 日志(defensive import) +try: + from spec_log import write_event as _log_event # type: ignore +except Exception: + def _log_event(event: str, payload: Optional[dict] = None, + session_id: Optional[str] = None) -> None: + return None + + +CHECKBOX_RE = re.compile(r"^\s*[-*]\s*\[(.)\]\s+", re.MULTILINE) + + +def _count_tasks(tasks_md: Optional[str]) -> dict: + if not tasks_md: + return {"total": 0, "done": 0, "in_progress": 0, "pending": 0} + total = done = in_prog = pending = 0 + for m in CHECKBOX_RE.finditer(tasks_md): + ch = m.group(1).strip().lower() + total += 1 + if ch == "x": + done += 1 + elif ch == "~": + in_prog += 1 + else: + pending += 1 + return {"total": total, "done": done, "in_progress": in_prog, "pending": pending} + + +def _read_text(p: Path) -> Optional[str]: + try: + if p.exists() and p.is_file(): + return p.read_text(encoding="utf-8", errors="replace") + except Exception: + return None + return None + + +def main(argv: Optional[list[str]] = None) -> int: + parser = argparse.ArgumentParser(prog="spec_status.py", description="show specode session/spec status") + parser.add_argument("--session", required=True, help="会话 id(宿主注入的 session_id)") + parser.add_argument("--json", action="store_true", help="仅输出 JSON") + args = parser.parse_args(argv) + + sess = read_session(args.session) + if sess is None: + msg = { + "ok": False, + "reason": "session_not_found", + "session_id": args.session, + } + sys.stdout.write(json.dumps(msg, ensure_ascii=False, indent=2) + "\n") + return 0 -import spec_session -from spec_session import TASK_RE, TASK_LABELS, task_section - - -SCRIPT_DIR = Path(__file__).resolve().parent - - -def _run_load(spec_dir: Path, session_id: str) -> dict: - cmd = [ - sys.executable, - str(SCRIPT_DIR / "spec_session.py"), - "load", - str(spec_dir), - "--session", - session_id, - "--json", - ] - proc = subprocess.run(cmd, capture_output=True, text=True) - if proc.returncode != 0: - sys.stderr.write(proc.stderr) - raise SystemExit(proc.returncode) - return json.loads(proc.stdout) - - -def _collect_tasks(spec_dir: Path) -> tuple[dict[str, int], list[dict[str, str]]]: - """Read tasks.md once locally for the task-list view. spec_session.load - only returns counts; we need title-level data here for the table output.""" - counts = {label: 0 for label in TASK_LABELS.values()} - tasks: list[dict[str, str]] = [] - tasks_path = spec_dir / "tasks.md" - if not tasks_path.exists(): - return counts, tasks - text = task_section(tasks_path.read_text(encoding="utf-8")) - for match in TASK_RE.finditer(text): - marker = match.group(1) - title = match.group(2).strip() - status = TASK_LABELS[marker] - counts[status] += 1 - tasks.append({"status": status, "title": title}) - return counts, tasks - - -def main() -> int: - parser = argparse.ArgumentParser(description="Summarize specode status.") - parser.add_argument("spec_dir", type=Path, nargs="?") - parser.add_argument("--root", help="Document root. Required when spec_dir is omitted.") - parser.add_argument("--session", help="Window/thread/session id. Defaults to $TERM_SESSION_ID or 'default'.") - parser.add_argument("--json", action="store_true") - args = parser.parse_args() - - session_id = spec_session.normalize_session_id(args.session) - if args.spec_dir: - spec_dir = args.spec_dir.expanduser().resolve() - else: - if not args.root: - raise SystemExit("status without spec_dir requires --root") - spec_dir, _config, _entry = spec_session.resolve_active( - Path(args.root).expanduser().resolve(), - session_id, - ) - - load_data = _run_load(spec_dir, session_id) - counts, tasks = _collect_tasks(spec_dir) - - lock = load_data.get("lock") - result = { - "specDir": load_data["specDir"], - "requirementName": load_data.get("requirementName"), - "specId": load_data.get("specId"), - "sessionId": session_id, - "currentPhase": load_data.get("currentPhase"), - "iterationRound": load_data.get("iterationRound"), - "sessionStatus": load_data.get("sessionStatus"), - "lock": lock, - "lockHeldBy": (lock or {}).get("sessionId"), - "lockOwnedByCurrentSession": load_data.get("lockOwnedByCurrentSession", False), - "checklistStale": load_data.get("checklistStale", False), - "counts": counts, - "tasks": tasks, + spec_dir_str = sess.get("active_spec_dir") + cfg = None + task_counts = None + lock_state_detail = None + if spec_dir_str: + try: + cfg = read_spec_config(Path(spec_dir_str)) + except Exception: + cfg = None + tasks_md = _read_text(Path(spec_dir_str) / "tasks.md") if spec_dir_str else None + task_counts = _count_tasks(tasks_md) + if cfg: + lock = cfg.get("lock") or {} + holder = lock.get("holder") + if not holder: + lock_state_detail = "released" + elif holder == args.session: + lock_state_detail = "ok (held by current session)" + elif _is_lock_stale(lock): + lock_state_detail = f"stale (holder={_session_short(holder)})" + else: + lock_state_detail = f"held by other (holder={_session_short(holder)})" + + payload = { + "ok": True, + "session": sess, + "spec_config": cfg, + "task_counts": task_counts, + "lock_state_detail": lock_state_detail, } - if args.json: - print(json.dumps(result, ensure_ascii=False, indent=2)) + sys.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2) + "\n") return 0 - print(f"Spec: {result['requirementName'] or Path(spec_dir).name}") - print(f"Path: {spec_dir}") - print(f"Session: {session_id}") - print(f"Phase: {result['currentPhase'] or 'unknown'}") - if result["iterationRound"]: - print(f"Iteration round: {result['iterationRound']}") - if lock: - owner = "本会话" if result["lockOwnedByCurrentSession"] else f"其他: {result['lockHeldBy']}" - print(f"Lock: {owner}") - else: - print("Lock: 空闲") - c = result["counts"] - print( - "Tasks: " - f"{c['completed']} completed, {c['in_progress']} in progress, " - f"{c['pending']} pending, {c['optional']} optional, {c['skipped']} skipped" - ) - if tasks: - print() - for task in tasks: - marker = {"pending": "[ ]", "completed": "[x]", "in_progress": "[~]", "optional": "[*]", "skipped": "[-]"}[task["status"]] - print(f" {marker} {task['title']}") + # 可读摘要 + lines: list[str] = [] + lines.append("=== specode status ===") + sid_for_show = sess.get("session_id") or sess.get("claude_session_id") + lines.append(f"session_id : {sid_for_show}") + lines.append(f"session(short) : {_session_short(sid_for_show)}") + lines.append(f"mode : {sess.get('mode')}") + lines.append(f"started_at : {sess.get('started_at')}") + lines.append(f"last_activity : {sess.get('last_activity_at')}") + if sess.get("ended_at"): + lines.append(f"ended_at : {sess.get('ended_at')}") + lines.append(f"active spec : {sess.get('active_spec_slug') or '(none)'}") + lines.append(f"spec_dir : {spec_dir_str or '(none)'}") + lines.append(f"phase : {sess.get('phase') or '(none)'}") + lines.append(f"pending_select : {sess.get('pending_selector') or '(none)'}") + lines.append(f"lock_state : {sess.get('lock_state')}") + if lock_state_detail: + lines.append(f"lock_detail : {lock_state_detail}") + if task_counts: + lines.append( + f"tasks : total={task_counts['total']} done={task_counts['done']} " + f"in_progress={task_counts['in_progress']} pending={task_counts['pending']}" + ) + if sess.get("task_swarm_run_id"): + lines.append(f"task_swarm_run : {sess.get('task_swarm_run_id')}") + + sys.stdout.write("\n".join(lines) + "\n") return 0 +def _log_wrap_main(argv: Optional[list[str]] = None) -> int: + import contextlib as _cl + argv_list = list(sys.argv[1:]) if argv is None else list(argv) + sid = None + for i, a in enumerate(argv_list): + if a == "--session" and i + 1 < len(argv_list): + sid = argv_list[i + 1] + break + with _cl.suppress(Exception): + _log_event("cli_call", {"script": "spec_status.py", "argv_len": len(argv_list)}, session_id=sid) + rc = main(argv) + with _cl.suppress(Exception): + _log_event("cli_exit", {"script": "spec_status.py", "exit_code": rc}, session_id=sid) + return rc + + if __name__ == "__main__": - raise SystemExit(main()) + try: + sys.exit(_log_wrap_main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/spec_sync.py b/plugins/specode/scripts/spec_sync.py deleted file mode 100644 index 2ef2455..0000000 --- a/plugins/specode/scripts/spec_sync.py +++ /dev/null @@ -1,624 +0,0 @@ -# Code-Doc Sync Guard (CDSG). -# -# Implements INV-1 (PreToolUse legality), INV-2 (Stop turn conservation), and -# INV-4 (Stop requirements↔tasks follow-mode) by maintaining a per-spec ledger -# at /.sync-ledger.json that tracks code/doc changes within each turn. -# -# Concepts: -# - turn: one user → assistant → stop cycle (refreshed at UserPromptSubmit) -# - D (docs): files under /, *.md (requirements/design/tasks/ -# bugfix/implementation-log) -# - C (code): files under project_root/ but NOT under / -# - tasks_files: file paths explicitly listed in tasks.md or design.md -# "Affected Files" section, plus glob expansions -# -# Sync rule (INV-1): editing f ∈ C is legal iff -# (a) f matches tasks_files, OR -# (b) some d ∈ D was modified earlier in the same turn, OR -# (c) freeform mode is on. -# -# Turn conservation (INV-2): a turn that touched any f ∈ C must also touch at -# least one d ∈ D before Stop. Freeform mode does NOT exempt INV-2 (per design -# decision 1A). implementation-log.md counts as a doc change (decision 2A). -# -# Tasks follow-mode (INV-4): a turn that touched requirements.md or bugfix.md -# must also touch tasks.md before Stop — the `## 测试要点` section of tasks.md -# is the tester-facing derivation of the SHALL statements and must stay in -# lockstep with requirements/bugfix changes. - -from __future__ import annotations - -import argparse -import json -import os -import re -import sys -import uuid -from datetime import datetime, timezone -from pathlib import Path -from typing import Iterable, Optional - -# --- constants ------------------------------------------------------------- - -SPEC_DOC_FILENAMES = { - "requirements.md", - "bugfix.md", - "design.md", - "tasks.md", - "implementation-log.md", -} - -LEDGER_FILENAME = ".sync-ledger.json" -LEDGER_VERSION = 1 - -# Tasks-file extraction patterns. -TASKS_FILE_LINE_RE = re.compile( - r"^\s*-\s*\[[ x~*\-]\]\s+(?:FILE|file|文件)[::]\s*(?P\S.*?)\s*$", - re.MULTILINE, -) -# Inline `- [ ] ... (src/foo.py)` is *not* matched — too noisy. Use FILE: prefix. - -AFFECTED_FILES_SECTION_RE = re.compile( - r"^#{2,3}\s+(?:Affected Files|影响文件)\s*$", - re.MULTILINE, -) -BULLET_PATH_RE = re.compile(r"^\s*[-*]\s+`?(?P[^`\s]+)`?\s*$") - - -# --- time ------------------------------------------------------------------ - -def _now() -> str: - return datetime.now(timezone.utc).isoformat() - - -# --- json io --------------------------------------------------------------- - -def _read_json(path: Path) -> Optional[dict]: - if not path.exists(): - return None - try: - return json.loads(path.read_text(encoding="utf-8")) - except Exception: - return None - - -def _write_json(path: Path, data: dict) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - tmp = path.with_suffix(path.suffix + ".tmp") - tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") - tmp.replace(path) - - -# --- tasks_files extraction ------------------------------------------------- - -def extract_tasks_files(spec_dir: Path) -> list[str]: - """Pull every path mentioned in tasks.md / design.md. - - Sources: - 1. tasks.md lines like `- [ ] FILE: src/foo.py` (also matches 文件/file) - 2. tasks.md / design.md `## Affected Files` (or 影响文件) section bullets - - Glob patterns (`*`, `**`) are preserved as-is; callers expand against the - project root. - """ - paths: list[str] = [] - for fname in ("tasks.md", "design.md"): - f = spec_dir / fname - if not f.exists(): - continue - text = f.read_text(encoding="utf-8", errors="replace") - for m in TASKS_FILE_LINE_RE.finditer(text): - paths.append(m.group("path")) - paths.extend(_extract_affected_files_section(text)) - # Dedup while preserving order. - seen: set[str] = set() - out = [] - for p in paths: - p = p.strip().strip("`").strip() - if not p or p in seen: - continue - seen.add(p) - out.append(p) - return out - - -def _extract_affected_files_section(text: str) -> list[str]: - lines = text.splitlines() - out: list[str] = [] - in_section = False - for line in lines: - if AFFECTED_FILES_SECTION_RE.match(line): - in_section = True - continue - if in_section: - if line.startswith("#"): - # Reached the next heading. - in_section = False - continue - m = BULLET_PATH_RE.match(line) - if m: - out.append(m.group("path")) - return out - - -def _glob_to_regex(pattern: str) -> re.Pattern: - """Convert a path glob (with `**` recursive support) into a regex. - - Rules: - - `**/` → zero or more path components (incl. none) - - `**` → any sequence (incl. `/`) - - `*` → any sequence not containing `/` - - `?` → any single non-`/` character - - `[...]` → character class (passed through) - - other → literal - """ - out: list[str] = [] - i = 0 - n = len(pattern) - while i < n: - c = pattern[i] - if c == "*": - if i + 1 < n and pattern[i + 1] == "*": - if i + 2 < n and pattern[i + 2] == "/": - out.append("(?:.*/)?") - i += 3 - else: - out.append(".*") - i += 2 - else: - out.append("[^/]*") - i += 1 - elif c == "?": - out.append("[^/]") - i += 1 - elif c == "[": - j = pattern.find("]", i) - if j == -1: - out.append(re.escape(c)) - i += 1 - else: - out.append(pattern[i : j + 1]) - i = j + 1 - else: - out.append(re.escape(c)) - i += 1 - return re.compile("^" + "".join(out) + "$") - - -def matches_tasks_files(target: Path, tasks_files: Iterable[str], project_root: Path) -> bool: - """Check if `target` matches any entry in tasks_files (literal or glob).""" - try: - rel = target.resolve().relative_to(project_root.resolve()) - except ValueError: - return False - rel_str = str(rel) - for entry in tasks_files: - if not entry: - continue - if "*" in entry or "?" in entry or "[" in entry: - if _glob_to_regex(entry).match(rel_str): - return True - else: - if rel_str == entry or rel_str == entry.lstrip("./"): - return True - return False - - -# --- path classification ---------------------------------------------------- - -def is_under(target: Path, root: Path) -> bool: - try: - target.resolve().relative_to(root.resolve()) - return True - except ValueError: - return False - - -def classify_path(target: Path, spec_dir: Path, project_root: Optional[Path]) -> str: - """Return one of: 'spec-doc', 'project-code', 'outside'.""" - if is_under(target, spec_dir): - return "spec-doc" - if project_root and is_under(target, project_root): - return "project-code" - return "outside" - - -# --- ledger ---------------------------------------------------------------- - -def ledger_path(spec_dir: Path) -> Path: - return spec_dir / LEDGER_FILENAME - - -def read_ledger(spec_dir: Path) -> dict: - data = _read_json(ledger_path(spec_dir)) - if not data: - return _new_ledger(spec_dir) - data.setdefault("version", LEDGER_VERSION) - data.setdefault("turn_code_changes", []) - data.setdefault("turn_doc_changes", []) - return data - - -def write_ledger(spec_dir: Path, data: dict) -> None: - _write_json(ledger_path(spec_dir), data) - - -def _new_ledger(spec_dir: Path) -> dict: - return { - "version": LEDGER_VERSION, - "spec_slug": spec_dir.name, - "project_root": None, - "freeform_mode": False, - "tasks_files": [], - "turn_id": None, - "turn_started_at": None, - "turn_code_changes": [], - "turn_doc_changes": [], - "last_violation": None, - "pending_advisories": [], - "updated_at": None, - } - - -def start_new_turn(ledger: dict, project_root: Optional[Path], tasks_files: list[str]) -> None: - ledger["turn_id"] = f"turn-{uuid.uuid4().hex[:8]}" - ledger["turn_started_at"] = _now() - ledger["turn_code_changes"] = [] - ledger["turn_doc_changes"] = [] - if project_root: - ledger["project_root"] = str(project_root) - ledger["tasks_files"] = tasks_files - ledger["updated_at"] = _now() - - -def append_change(ledger: dict, kind: str, file_path: str, tool: str) -> None: - bucket = "turn_code_changes" if kind == "code" else "turn_doc_changes" - ledger[bucket].append({ - "file": file_path, - "tool": tool, - "at": _now(), - }) - ledger["updated_at"] = _now() - - -def reset_turn(ledger: dict) -> None: - ledger["turn_code_changes"] = [] - ledger["turn_doc_changes"] = [] - ledger["updated_at"] = _now() - - -def has_doc_change_this_turn(ledger: dict) -> bool: - return bool(ledger.get("turn_doc_changes")) - - -def has_code_change_this_turn(ledger: dict) -> bool: - return bool(ledger.get("turn_code_changes")) - - -def doc_changes_files(ledger: dict) -> set[str]: - return {Path(c["file"]).name for c in ledger.get("turn_doc_changes", [])} - - -# --- decisions -------------------------------------------------------------- - -INV1_MESSAGE_TMPL = ( - "代码-文档同步守卫 (INV-1): 文件 {target} 不在 tasks.md 列表内, " - "且本回合未先修改 design/tasks/bugfix 文档。\n" - "两条合法路径任选其一:\n" - " (A) 先在 design.md 或 tasks.md 中加上该文件的修改说明, 再写代码;\n" - " (B) 如确需自由实现, 运行 /spec --freeform 切换至自由阶段。" -) - -INV3_EVICTED_MSG = ( - "代码-文档同步守卫 (INV-3): 当前 session 已被另一个窗口抢占 (evicted)。\n" - "本会话对 spec '{slug}' 的写权限已被回收。请运行 /continue {slug} 重新取回 lock, " - "或停止编辑以避免覆盖另一会话的工作。" -) - -INV6_MESSAGE_TMPL = ( - "代码-文档同步守卫 (INV-6): 当前阶段 [{phase}] 禁止修改源码。\n" - "必须先完成 requirements/design/tasks 阶段确认 (推进到 implementation 后才能写代码)。\n" - "phase gate 是绝对规则, freeform 模式也不豁免。" -) - -# Phases that forbid source-code edits. Spec-doc edits within these phases -# are still allowed (and in fact expected). -PHASES_FORBID_CODE = {"intake", "requirements", "bugfix", "design", "tasks"} - -INV2_MESSAGE_TMPL = ( - "代码-文档同步守卫 (INV-2): 本回合修改了 {n} 个源码文件但未同步任何 spec 文档。\n" - "必须在本回合内完成下列之一:\n" - " - 在 design.md 中描述代码变更的设计意图;\n" - " - 在 tasks.md 中把对应 task 标记为完成 (或补充新 task);\n" - " - 在 implementation-log.md 中追加本次变更的纪要。" -) - -INV4_MESSAGE_TMPL = ( - "代码-文档同步守卫 (INV-4): 本回合修改了 {req} 但未同步 tasks.md 的" - " 测试要点。\n需求/bug 行为变化时, 测试人员需要的验证场景也必须同 turn 跟进 ——" - " 请在 tasks.md `## 测试要点` 节增删对应行。" -) - -def check_pre_edit( - target: Path, - spec_dir: Path, - project_root: Optional[Path], - ledger: dict, -) -> tuple[str, str]: - """Return ('ok' | 'deny', message). Spec-doc edits bypass INV-1 (handled elsewhere).""" - cls = classify_path(target, spec_dir, project_root) - if cls != "project-code": - return "ok", "" - # project-code branch - tasks_files = ledger.get("tasks_files") or [] - if project_root and matches_tasks_files(target, tasks_files, project_root): - return "ok", "" - if has_doc_change_this_turn(ledger): - return "ok", "" - if ledger.get("freeform_mode"): - return "ok", "" - return "deny", INV1_MESSAGE_TMPL.format(target=str(target)) - - -def check_phase_gate(current_phase: str) -> tuple[str, str]: - """INV-6: forbid source-code edits in pre-implementation phases. - - freeform mode does NOT exempt INV-6 (phase gate is absolute). - """ - if current_phase in PHASES_FORBID_CODE: - return "deny", INV6_MESSAGE_TMPL.format(phase=current_phase) - return "ok", "" - - -def check_verify_lock(spec_dir: Path, session_id: str, slug: str) -> tuple[str, str]: - """INV-3: lock-ownership check before spec-doc write. - - Imports spec_session lazily (heavy module). Returns deny only on 'evicted'; - other non-ok statuses (not_held / stale_lock) are allowed with a soft signal - so existing pre-Phase-4 specs without a lock keep working. - """ - try: - sys.path.insert(0, str(Path(__file__).resolve().parent)) - import spec_session # type: ignore - except Exception as e: - return "ok", f"verify-lock-import-failed: {e}" - - try: - result = spec_session._verify(spec_dir, session_id) - except SystemExit as e: - # spec_session raises SystemExit for missing config; treat as "no lock model" - return "ok", f"verify-lock-skipped: {e}" - except Exception as e: - return "ok", f"verify-lock-error: {e}" - - status = result.get("status") - if status == "evicted": - return "deny", INV3_EVICTED_MSG.format(slug=slug) - # status in {"ok", "not_held", "stale_lock"} -> allow. - return "ok", status or "" - - -def check_stop(ledger: dict) -> list[dict]: - """Return a list of violation dicts. Empty list = pass.""" - violations: list[dict] = [] - if has_code_change_this_turn(ledger) and not has_doc_change_this_turn(ledger): - violations.append({ - "id": "INV-2", - "msg": INV2_MESSAGE_TMPL.format(n=len(ledger.get("turn_code_changes", []))), - }) - doc_files = doc_changes_files(ledger) - req_touched = {"requirements.md", "bugfix.md"} & doc_files - if req_touched and "tasks.md" not in doc_files: - violations.append({ - "id": "INV-4", - "msg": INV4_MESSAGE_TMPL.format(req=" + ".join(sorted(req_touched))), - }) - return violations - - -# --- Advisory infrastructure (INV-1 / INV-2 / INV-4 / INV-6) -------------- -# -# These INVs are "process discipline" — violating them does NOT corrupt data -# or break the task-swarm scheduler. As of 0.4.0 they are recorded as sticky -# advisories on the ledger instead of denying the action. -# -# INV-3 / INV-7 / INV-8 / INV-9 remain hard deny because each one protects -# data integrity (lock ownership, subagent_type contract, write boundary, -# tasks.md writeback safety) — advisory-only would silently corrupt state. - -ADVISORY_INVS = {"INV-1", "INV-2", "INV-4", "INV-6"} -# Doc edit auto-dismisses these. Touching any spec doc indicates the user is -# already engaging with documentation; the stale sync warning becomes noise. -DOC_CHANGE_DISMISSES = {"INV-1", "INV-2", "INV-4"} - - -def record_advisory(ledger: dict, inv_id: str, msg: str, file: str | None = None) -> None: - """Append an advisory to the sticky ledger queue (idempotent per turn+inv+file).""" - entry = { - "id": inv_id, - "msg": msg, - "file": file, - "turn_id": ledger.get("turn_id"), - "at": _now(), - } - pending = ledger.setdefault("pending_advisories", []) - # Dedupe: same inv + same file + same turn → don't pile up - key = (entry["id"], entry["file"], entry["turn_id"]) - for existing in pending: - if (existing.get("id"), existing.get("file"), existing.get("turn_id")) == key: - return - pending.append(entry) - ledger["updated_at"] = _now() - - -def auto_dismiss_on_doc_change(ledger: dict) -> int: - """Called after a spec-doc edit lands. Drop INV-1/2/4 advisories (drift fixed).""" - pending = ledger.get("pending_advisories") or [] - before = len(pending) - ledger["pending_advisories"] = [a for a in pending if a.get("id") not in DOC_CHANGE_DISMISSES] - after = len(ledger["pending_advisories"]) - if before != after: - ledger["updated_at"] = _now() - return before - after - - -def dismiss_advisories(ledger: dict, inv_ids: list[str] | None = None) -> int: - """Manual dismiss — drop all advisories or only those matching inv_ids.""" - pending = ledger.get("pending_advisories") or [] - before = len(pending) - if inv_ids is None: - ledger["pending_advisories"] = [] - else: - targets = set(inv_ids) - ledger["pending_advisories"] = [a for a in pending if a.get("id") not in targets] - after = len(ledger["pending_advisories"]) - if before != after: - ledger["updated_at"] = _now() - return before - after - - -def format_advisories_block(ledger: dict) -> str: - """Render sticky advisories for injection into UserPromptSubmit status block. - - Empty string when there are none. - """ - pending = ledger.get("pending_advisories") or [] - if not pending: - return "" - lines = ["⚠ pending advisories (sticky — 本轮处理或运行 /spec --dismiss-advisories 清除):"] - # Group by inv id for readability - by_inv: dict[str, list[dict]] = {} - for a in pending: - by_inv.setdefault(a.get("id", "?"), []).append(a) - for inv_id in sorted(by_inv): - entries = by_inv[inv_id] - head = f" {inv_id} × {len(entries)}" - files = [e.get("file") for e in entries if e.get("file")] - if files: - sample = files[0] if len(files) == 1 else f"{files[0]} (+{len(files) - 1} more)" - head += f": {sample}" - lines.append(head) - # Show only the first entry's message (others are likely similar) - first_msg = entries[0].get("msg", "").split("\n")[0] - if first_msg: - lines.append(f" {first_msg}") - return "\n".join(lines) - - -# --- CLI ------------------------------------------------------------------- - -def _resolve_active_spec_dir() -> Optional[Path]: - sys.path.insert(0, str(Path(__file__).resolve().parent)) - import spec_state # local import to avoid hard dep at import time - info = spec_state.find_active_spec(prefer_session_id=os.environ.get("TERM_SESSION_ID")) - if not info: - return None - return Path(info["spec_dir"]) - - -def _spec_config_path(spec_dir: Path) -> Path: - return spec_dir / ".config.json" - - -def _read_spec_config(spec_dir: Path) -> dict: - return _read_json(_spec_config_path(spec_dir)) or {} - - -def _write_spec_config(spec_dir: Path, data: dict) -> None: - _write_json(_spec_config_path(spec_dir), data) - - -def _cmd_status(args: argparse.Namespace) -> int: - if getattr(args, "spec_dir", None): - spec_dir = Path(args.spec_dir).expanduser().resolve() - if not spec_dir.exists(): - print(f"ERR: spec_dir does not exist: {spec_dir}", file=sys.stderr) - return 2 - else: - spec_dir = _resolve_active_spec_dir() - if not spec_dir: - print("(no active spec)") - return 0 - ledger = read_ledger(spec_dir) - config = _read_spec_config(spec_dir) - print(f"spec_dir: {spec_dir}") - print(f"slug: {ledger.get('spec_slug')}") - print(f"project_root: {ledger.get('project_root')}") - print(f"freeform: {bool(config.get('freeformMode'))}") - print(f"tasks_files: {len(ledger.get('tasks_files') or [])} entries") - print(f"turn_id: {ledger.get('turn_id')}") - print(f"turn started: {ledger.get('turn_started_at')}") - print(f"code changes: {len(ledger.get('turn_code_changes') or [])}") - print(f"doc changes: {len(ledger.get('turn_doc_changes') or [])}") - print(f"last violation: {ledger.get('last_violation')}") - pending = ledger.get("pending_advisories") or [] - if pending: - print(f"advisories: {len(pending)} pending") - for a in pending: - print(f" - [{a.get('id')}] {a.get('file') or '(no file)'} @ turn {a.get('turn_id')}") - return 0 - - -def _cmd_dismiss_advisories(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser() if args.spec_dir else _resolve_active_spec_dir() - if not spec_dir: - print("ERR: no active spec (and no --spec-dir given)", file=sys.stderr) - return 2 - ledger = read_ledger(spec_dir) - inv_ids = args.inv.split(",") if args.inv else None - dropped = dismiss_advisories(ledger, inv_ids) - write_ledger(spec_dir, ledger) - scope = f"matching {args.inv}" if inv_ids else "all" - print(f"✓ dismissed {dropped} advisory entries ({scope}) for spec '{spec_dir.name}'") - return 0 - - -def _cmd_freeform(args: argparse.Namespace) -> int: - spec_dir = _resolve_active_spec_dir() - if not spec_dir: - print("ERR: no active spec", file=sys.stderr) - return 2 - config = _read_spec_config(spec_dir) - desired = (args.state == "on") - config["freeformMode"] = desired - _write_spec_config(spec_dir, config) - ledger = read_ledger(spec_dir) - ledger["freeform_mode"] = desired - write_ledger(spec_dir, ledger) - print(f"✓ freeform_mode = {desired} for spec '{spec_dir.name}'") - return 0 - - -def _cmd_extract(args: argparse.Namespace) -> int: - spec_dir = Path(args.spec_dir).expanduser() if args.spec_dir else _resolve_active_spec_dir() - if not spec_dir: - print("ERR: no spec_dir", file=sys.stderr) - return 2 - files = extract_tasks_files(spec_dir) - print(json.dumps(files, ensure_ascii=False, indent=2)) - return 0 - - -def main(argv) -> int: - p = argparse.ArgumentParser(prog="spec_sync.py") - sub = p.add_subparsers(dest="cmd", required=True) - sp_status = sub.add_parser("status", help="Print ledger summary for active spec") - sp_status.add_argument("--spec-dir", help="Explicit spec dir; bypass active-pointer lookup") - sp = sub.add_parser("freeform", help="Toggle freeform mode") - sp.add_argument("state", choices=["on", "off"]) - sx = sub.add_parser("extract", help="Print tasks_files for a spec") - sx.add_argument("--spec-dir") - sd = sub.add_parser("dismiss-advisories", help="Clear sticky advisories (all or selected INV ids)") - sd.add_argument("--spec-dir", help="Explicit spec dir; bypass active-pointer lookup") - sd.add_argument("--inv", help="Comma-separated INV ids to clear (e.g. 'INV-1,INV-2'); default: clear all") - args = p.parse_args(argv) - return { - "status": _cmd_status, - "freeform": _cmd_freeform, - "extract": _cmd_extract, - "dismiss-advisories": _cmd_dismiss_advisories, - }[args.cmd](args) - - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/plugins/specode/scripts/spec_telemetry.py b/plugins/specode/scripts/spec_telemetry.py deleted file mode 100644 index 7f8bbdf..0000000 --- a/plugins/specode/scripts/spec_telemetry.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Local-only telemetry for specode flow events. - -Opt-in via env: SPECODE_TELEMETRY ∈ {"on","1","true","yes"} (case-insensitive). -Disabled by default. Events go to ~/.specode/telemetry.jsonl (single -append-only file so `grep` / `jq` stay trivial). Absolutely no remote upload. - -Distinct from ~/.specode/audit/ (always-on hook-decision audit). Telemetry -records higher-level workflow events: - spec.init / spec.phase_transition / spec.end - inv.violation - swarm.run_start / swarm.stage_round / swarm.stage_done / swarm.writeback - -When the file passes SPECODE_TELEMETRY_MAX_BYTES (default 50 MB), the current -file is renamed to telemetry.jsonl.0 (overwriting any prior .0) and a fresh -file begins. Older .0 contents are discarded — this is best-effort local -analytics, not durable storage. - -All write/IO errors are swallowed: telemetry must never break a hook. -""" -from __future__ import annotations - -import json -import os -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - - -SPECODE_DIR = Path.home() / ".specode" -TELEMETRY_FILE = SPECODE_DIR / "telemetry.jsonl" -ROTATED_FILE = SPECODE_DIR / "telemetry.jsonl.0" - -DEFAULT_MAX_BYTES = 50 * 1024 * 1024 -_ENV_FLAG = "SPECODE_TELEMETRY" -_ENV_MAX = "SPECODE_TELEMETRY_MAX_BYTES" -_ENV_PATH = "SPECODE_TELEMETRY_FILE" - -_TRUTHY = {"on", "1", "true", "yes", "y"} - - -def _env_path() -> Path: - raw = os.environ.get(_ENV_PATH) - return Path(raw).expanduser() if raw else TELEMETRY_FILE - - -def _rotated_for(path: Path) -> Path: - return path.with_suffix(path.suffix + ".0") - - -def is_enabled() -> bool: - return (os.environ.get(_ENV_FLAG) or "").strip().lower() in _TRUTHY - - -def _max_bytes() -> int: - raw = os.environ.get(_ENV_MAX) - if not raw: - return DEFAULT_MAX_BYTES - try: - v = int(raw) - return v if v > 0 else DEFAULT_MAX_BYTES - except ValueError: - return DEFAULT_MAX_BYTES - - -def _maybe_rotate(path: Path, max_bytes: int) -> None: - try: - size = path.stat().st_size - except OSError: - return - if size <= max_bytes: - return - try: - rotated = _rotated_for(path) - try: - rotated.unlink() - except FileNotFoundError: - pass - os.replace(path, rotated) - except OSError: - pass - - -def emit(event: str, **fields: Any) -> None: - """Record one telemetry event. No-op when SPECODE_TELEMETRY is off. - - `event` is a dotted namespace ("spec.init", "swarm.stage_done"). - `fields` is the event payload — keep it small, JSON-serializable. - Common identity fields like spec_slug / project_root / run_id should be - passed by the caller so users can grep / aggregate by them. - """ - if not is_enabled(): - return - path = _env_path() - try: - path.parent.mkdir(parents=True, exist_ok=True) - except OSError: - return - _maybe_rotate(path, _max_bytes()) - record = {"ts": datetime.now(timezone.utc).isoformat(), "event": event} - record.update(fields) - try: - with path.open("a", encoding="utf-8") as f: - f.write(json.dumps(record, ensure_ascii=False, default=str) + "\n") - except OSError: - pass - - -def iter_records(path: Path | None = None, include_rotated: bool = True): - """Yield decoded telemetry records, oldest first. - - When include_rotated, telemetry.jsonl.0 is read before telemetry.jsonl - so the chronological order is preserved across one rotation boundary. - """ - target = path or _env_path() - files: list[Path] = [] - if include_rotated: - rotated = _rotated_for(target) - if rotated.exists(): - files.append(rotated) - if target.exists(): - files.append(target) - for fp in files: - try: - with fp.open("r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - yield json.loads(line) - except json.JSONDecodeError: - continue - except OSError: - continue diff --git a/plugins/specode/scripts/spec_vault.py b/plugins/specode/scripts/spec_vault.py old mode 100644 new mode 100755 index fbbf22d..de6cbb7 --- a/plugins/specode/scripts/spec_vault.py +++ b/plugins/specode/scripts/spec_vault.py @@ -1,4 +1,16 @@ #!/usr/bin/env python3 +"""spec_vault.py — Obsidian vault 检测与 specode 根目录配置(详见 references/obsidian.md)。 + +子命令: + detect 扫描三平台 obsidian.json,输出已知 vault 列表 (JSON) + status 输出当前 doc_root 与来源 (env / config / auto / none) + set --vault

写 ~/.config/specode/config.json.obsidianRoot + set --root

同字段(不强调 vault 概念) + +退出码:0 ok / 3 用户引导(含 hard-stop 提示)。 + +stdlib-only。 +""" from __future__ import annotations import argparse @@ -6,290 +18,316 @@ import json import os import platform -import re import sys +import tempfile +import time from pathlib import Path +from typing import Optional, Tuple -CONFIG_FILE = Path.home() / ".config" / "specode" / "config.json" +def _device_segment() -> str: + """返回 `-` 设备分段,例如 `windows-qiang` / `macos-alice`。 -def _safe_username() -> str: - """Return a filesystem-safe username, stripping domain prefix on Windows.""" - try: - username = getpass.getuser() - except Exception: - username = os.environ.get("USERNAME") or os.environ.get("USER") or "user" - # Strip DOMAIN\user or domain/user prefix (Windows domain accounts) - username = re.sub(r"^[^/\\]+[/\\]", "", username) - # Replace characters that are invalid or awkward in directory names - username = re.sub(r"[^\w.-]", "-", username).strip("-") - return username or "user" - + 用途:让同一 Obsidian vault 在多设备 / 多用户共享时,每个设备的 spec 文档独立 + 存放在 `/spec-in//specs/`,避免锁串扰与文件冲突。 + 详见 references/obsidian.md §0 与 §1。 + """ + sys_map = {"Darwin": "macos", "Windows": "windows", "Linux": "linux"} + os_name = sys_map.get(platform.system(), platform.system().lower()) + return f"{os_name}-{getpass.getuser()}" -def device_segment() -> Path: - """Return the vault-relative Path for this machine: spec-in/-/specs.""" - os_map = {"Darwin": "macos", "Windows": "windows"} - os_name = os_map.get(platform.system(), platform.system().lower()) - return Path("spec-in") / f"{os_name}-{_safe_username()}" / "specs" +# ------------------------------------------------------------------------- +# 平台相关:obsidian.json 路径 +# ------------------------------------------------------------------------- -def obsidian_config_path() -> Path | None: +def _obsidian_config_paths() -> list[Path]: + """返回当前平台下可能的 Obsidian obsidian.json 路径列表(按优先级)。""" + home = Path.home() system = platform.system() + paths: list[Path] = [] if system == "Darwin": - return Path.home() / "Library" / "Application Support" / "obsidian" / "obsidian.json" - if system == "Windows": + paths.append(home / "Library" / "Application Support" / "obsidian" / "obsidian.json") + elif system == "Windows": appdata = os.environ.get("APPDATA") - return Path(appdata) / "obsidian" / "obsidian.json" if appdata else None + if appdata: + paths.append(Path(appdata) / "obsidian" / "obsidian.json") + paths.append(home / "AppData" / "Roaming" / "obsidian" / "obsidian.json") + else: + # Linux / others + xdg = os.environ.get("XDG_CONFIG_HOME") + if xdg: + paths.append(Path(xdg) / "obsidian" / "obsidian.json") + paths.append(home / ".config" / "obsidian" / "obsidian.json") + # Flatpak + paths.append(home / ".var" / "app" / "md.obsidian.Obsidian" / "config" / "obsidian" / "obsidian.json") + return paths + + +def _load_obsidian_vaults() -> list[dict]: + """读所有 obsidian.json,返回 vault 列表(含 path、open、mtime)。""" + results: list[dict] = [] + seen: set[str] = set() + for cfg in _obsidian_config_paths(): + try: + if not cfg.exists(): + continue + with cfg.open("r", encoding="utf-8") as fh: + data = json.load(fh) + except Exception: + continue + vaults = data.get("vaults", {}) + if not isinstance(vaults, dict): + continue + for vid, info in vaults.items(): + if not isinstance(info, dict): + continue + path = info.get("path") + if not path or not isinstance(path, str): + continue + if path in seen: + continue + seen.add(path) + try: + mtime = float(info.get("ts", 0)) / 1000.0 + except Exception: + mtime = 0.0 + exists = False + try: + exists = Path(path).exists() + except Exception: + exists = False + results.append({ + "id": vid, + "path": path, + "open": bool(info.get("open", False)), + "mtime": mtime, + "exists": exists, + "source_config": str(cfg), + }) + # 按 (open desc, mtime desc) 排序 + results.sort(key=lambda v: (0 if v.get("open") else 1, -float(v.get("mtime") or 0))) + return results + + +# ------------------------------------------------------------------------- +# specode 配置文件 (~/.config/specode/config.json) +# ------------------------------------------------------------------------- + +def _specode_config_path() -> Path: xdg = os.environ.get("XDG_CONFIG_HOME") - base = Path(xdg) if xdg else Path.home() / ".config" - return base / "obsidian" / "obsidian.json" + base = Path(xdg) if xdg else (Path.home() / ".config") + return base / "specode" / "config.json" -def read_vaults() -> list[dict]: - config_path = obsidian_config_path() - if not config_path or not config_path.exists(): - return [] +def _load_specode_config() -> dict: + p = _specode_config_path() + if not p.exists(): + return {} try: - data = json.loads(config_path.read_text(encoding="utf-8")) - result = [] - for _vid, v in (data.get("vaults") or {}).items(): - path_str = v.get("path") - if path_str and Path(path_str).exists(): - result.append({ - "path": path_str, - "ts": v.get("ts", 0), - "open": v.get("open", False), - }) - return result + with p.open("r", encoding="utf-8") as fh: + data = json.load(fh) + if isinstance(data, dict): + return data except Exception: - return [] - - -def pick_best_vault(vaults: list[dict]) -> dict | None: - if not vaults: - return None - open_vaults = sorted([v for v in vaults if v.get("open")], key=lambda v: v["ts"], reverse=True) - if open_vaults: - return open_vaults[0] - return sorted(vaults, key=lambda v: v["ts"], reverse=True)[0] - - -def read_config() -> dict: - if CONFIG_FILE.exists(): - try: - return json.loads(CONFIG_FILE.read_text(encoding="utf-8")) - except Exception: - return {} + pass return {} -def write_config(cfg: dict) -> None: - CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True) - tmp = CONFIG_FILE.with_suffix(".json.tmp") - tmp.write_text(json.dumps(cfg, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") - tmp.replace(CONFIG_FILE) - - -def resolve_spec_root() -> tuple[Path | None, str]: - """Return (resolved_path, source_tag) or (None, 'not_found'). +def _atomic_write_json(path: Path, payload: dict) -> None: + """tempfile -> os.replace -> fsync。失败抛异常。""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp( + prefix=path.name + ".", + suffix=".tmp", + dir=str(path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + json.dump(payload, fh, ensure_ascii=False, indent=2) + fh.flush() + try: + os.fsync(fh.fileno()) + except OSError: + pass + os.replace(tmp, path) + # fsync parent dir 提高跨进程一致性(Windows 上无效,忽略) + try: + dir_fd = os.open(str(path.parent), os.O_RDONLY) + try: + os.fsync(dir_fd) + except OSError: + pass + finally: + os.close(dir_fd) + except OSError: + pass + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise - On first successful Obsidian detection, auto-saves the resolved path to - config.json so subsequent calls are stable even if Obsidian is not running. - """ - env_root = os.environ.get("SPECODE_ROOT") - if env_root: - return Path(env_root).expanduser().resolve(), "env" - cfg = read_config() - if cfg.get("obsidianRoot"): - return Path(cfg["obsidianRoot"]).expanduser().resolve(), "config" +def _save_specode_config(cfg: dict) -> None: + _atomic_write_json(_specode_config_path(), cfg) - vaults = read_vaults() - best = pick_best_vault(vaults) - if best: - root = Path(best["path"]) / device_segment() - cfg["vaultPath"] = best["path"] - cfg["obsidianRoot"] = str(root) - write_config(cfg) - return root, "obsidian" - return None, "not_found" +# ------------------------------------------------------------------------- +# 三层 resolve_doc_root +# ------------------------------------------------------------------------- +def resolve_doc_root(override: Optional[str] = None) -> Tuple[Optional[Path], str]: + """三层根目录解析。返回 (Path|None, source)。 -def configured_spec_root() -> tuple[Path | None, str]: - """Return the spec root explicitly recorded by specode config.json.""" - cfg = read_config() - if cfg.get("obsidianRoot"): - return Path(cfg["obsidianRoot"]).expanduser().resolve(), "config" - return None, "not_found" + source ∈ {'override', 'env', 'config', 'auto', 'none'}。 + 路径段 `spec-in/-` 由 `_device_segment()` 在以下场景自动追加 + (让多设备 / 多用户共享同一 vault 时各 device 的 spec 互不串扰): -def list_other_root_specs(current_root: Path) -> list[dict[str, object]]: - """Look for spec folders living under known historical fallback locations. + | source | 来源 | 追加 device 段? | + |----------|-------------------------------------|------------------| + | override | --root 参数 | 否(用户给什么用什么) | + | env | SPECODE_ROOT 环境变量 | 否 | + | config | config.json.rootOverride | 否 | + | config | config.json.obsidianRoot / docRoot | 是 | + | auto | Obsidian auto-detect | 是 | + | none | 三层全 miss | — | - Used to warn users after `--set-root` / `--set-vault` so they know specs - created under the old root are not auto-migrated. + 详见 references/obsidian.md §1。 """ - candidates: list[Path] = [] - cwd = Path.cwd().resolve() - candidates.append(cwd / "specs") - candidates.append(Path.home() / "new project" / "specs") - seen: list[dict[str, object]] = [] - for candidate in candidates: - if not candidate.exists() or candidate.resolve() == current_root.resolve(): - continue - for child in sorted(candidate.iterdir()): - if child.is_dir() and (child / ".config.json").exists(): - seen.append({"slug": child.name, "path": str(child)}) - return seen - - -def command_detect(args: argparse.Namespace) -> int: - config_path = obsidian_config_path() - vaults = read_vaults() - best = pick_best_vault(vaults) - cfg = read_config() - - if args.json: - print(json.dumps({ - "platform": platform.system(), - "obsidianConfigPath": str(config_path) if config_path else None, - "obsidianConfigExists": bool(config_path and config_path.exists()), - "vaults": vaults, - "bestVault": best, - "specModeConfig": str(CONFIG_FILE), - "specModeConfigExists": CONFIG_FILE.exists(), - "currentConfig": cfg, - }, ensure_ascii=False, indent=2)) - return 0 - - if not vaults: - print("未检测到 Obsidian 安装,或没有已注册的 vault。") - print(f" Obsidian 配置路径: {config_path}") - print() - print("请选择以下方式之一:") - print(" 1. 安装 Obsidian 后重试(推荐)") - print(" 2. /spec --set-vault ") - print(" 3. /spec --set-root <自定义目录>") - else: - print(f"检测到 {len(vaults)} 个 vault:") - for v in vaults: - marker = "► " if v == best else " " - status = "open" if v.get("open") else "closed" - print(f"{marker}{v['path']} [{status}]") - if best: - print(f"\n将使用: {Path(best['path']) / device_segment()}") - return 0 - + # 1. override 优先:参数 > 环境变量;用户给什么用什么,不追加 device 段 + if override: + return (Path(override).expanduser(), "override") -def command_set(args: argparse.Namespace) -> int: - cfg = read_config() - changed = False + env_root = os.environ.get("SPECODE_ROOT") + if env_root: + return (Path(env_root).expanduser(), "env") + + # 2. config.json — rootOverride 优先于 obsidianRoot(显式 set --root 不追加) + cfg = _load_specode_config() + override_root = cfg.get("rootOverride") + if override_root and isinstance(override_root, str): + return (Path(override_root).expanduser(), "config") + obs_root = cfg.get("obsidianRoot") or cfg.get("docRoot") + if obs_root and isinstance(obs_root, str): + return (Path(obs_root).expanduser() / "spec-in" / _device_segment(), "config") + + # 3. auto-detect → vault 根 + 追加 device 段 + vaults = _load_obsidian_vaults() + for v in vaults: + if v.get("exists"): + return (Path(v["path"]) / "spec-in" / _device_segment(), "auto") + + return (None, "none") + + +# ------------------------------------------------------------------------- +# 子命令 +# ------------------------------------------------------------------------- + +def cmd_detect(args: argparse.Namespace) -> int: + vaults = _load_obsidian_vaults() + payload = { + "platform": platform.system(), + "configs_checked": [str(p) for p in _obsidian_config_paths()], + "vaults": vaults, + "count": len(vaults), + } + sys.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2) + "\n") + return 0 - if args.vault: - vault = Path(args.vault).expanduser().resolve() - segment = device_segment() - cfg["vaultPath"] = str(vault) - cfg["obsidianRoot"] = str(vault / segment) - changed = True - print(f"vault: {vault}") - print(f"spec root: {vault / segment}") - - if args.root: - cfg["obsidianRoot"] = str(Path(args.root).expanduser().resolve()) - changed = True - print(f"spec root: {cfg['obsidianRoot']}") - - if not changed: - print("未指定任何参数。可用选项:", file=sys.stderr) - print(" --vault 设置 Obsidian vault,spec 存入 vault/spec-in/-/specs", file=sys.stderr) - print(" --root <目录> 直接指定 spec 文档根目录(完全自定义路径)", file=sys.stderr) - return 1 - write_config(cfg) - print(f"\n配置已保存至: {CONFIG_FILE}") - print(" (此后每次 /spec 自动使用此路径;任何时候可再次运行 set 修改)") - - new_root = Path(cfg["obsidianRoot"]).expanduser().resolve() - others = list_other_root_specs(new_root) - if others: - print() - print(f"⚠ 检测到旧位置仍有 {len(others)} 个 spec(不会自动迁移):") - for entry in others[:10]: - print(f" - {entry['slug']} {entry['path']}") - if len(others) > 10: - print(f" ... 还有 {len(others) - 10} 个") - print(" 如需迁移,请手动 mv 并更新各 spec 的 .config.json.documentRoot 字段。") +def cmd_status(args: argparse.Namespace) -> int: + root, source = resolve_doc_root() + payload: dict = { + "doc_root": str(root) if root else None, + "source": source, + "exists": bool(root and root.exists()), + "config_path": str(_specode_config_path()), + "env_SPECODE_ROOT": os.environ.get("SPECODE_ROOT"), + } + if source == "none": + payload["hint"] = ( + "未检测到 specode 根目录。可任选其一:\n" + " 1) 运行 `spec_vault.py set --vault ` 写入持久配置;\n" + " 2) 在环境变量中 export SPECODE_ROOT=;\n" + " 3) 在 Obsidian 中打开任意 vault 后再次运行 detect。" + ) + sys.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2) + "\n") + return 3 + sys.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2) + "\n") return 0 -def command_get(args: argparse.Namespace) -> int: - if args.configured_only: - root, source = configured_spec_root() - else: - root, source = resolve_spec_root() - cfg = read_config() - - if args.json: - print(json.dumps({ - "specRoot": str(root) if root else None, - "source": source, - "config": cfg, - "configFile": str(CONFIG_FILE), - }, ensure_ascii=False, indent=2)) - return 0 - - source_labels = { - "env": "SPECODE_ROOT 环境变量", - "config": "specode 配置文件", - "obsidian": "Obsidian 自动检测", - "not_found": "未配置", - } - if root: - print(f"spec 文档根目录: {root}") - print(f"来源: {source_labels.get(source, source)}") - others = list_other_root_specs(root) - if others: - print() - print(f"⚠ 旧位置仍有 {len(others)} 个 spec(不会自动迁移):") - for entry in others[:10]: - print(f" - {entry['slug']} {entry['path']}") - if len(others) > 10: - print(f" ... 还有 {len(others) - 10} 个") +def cmd_set(args: argparse.Namespace) -> int: + target = args.vault or args.root + if not target: + sys.stderr.write("用法:spec_vault.py set --vault 或 set --root \n") + return 3 + p = Path(target).expanduser().resolve() + if not p.exists(): + sys.stderr.write(f"路径不存在:{p}\n请确认目录已创建后再次执行。\n") + return 3 + if not p.is_dir(): + sys.stderr.write(f"路径不是目录:{p}\n") + return 3 + cfg = _load_specode_config() + if args.vault: + # --vault:写 obsidianRoot;resolve_doc_root 会追加 spec-in/ + cfg["obsidianRoot"] = str(p) + cfg.pop("rootOverride", None) else: - print("未配置 spec 文档根目录。") - print() - print("请选择以下方式之一:") - print(" 1. 安装 Obsidian 后重试(推荐)") - print(" 2. /spec --set-vault ") - print(" 3. /spec --set-root <自定义目录>") - print(f"配置文件: {CONFIG_FILE} ({'存在' if CONFIG_FILE.exists() else '不存在'})") + # --root:写 rootOverride;resolve_doc_root 不追加(用户给什么用什么) + cfg["rootOverride"] = str(p) + cfg.pop("obsidianRoot", None) + cfg.pop("docRoot", None) # legacy 字段统一清理 + cfg["updatedAt"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + _save_specode_config(cfg) + except Exception as e: + sys.stderr.write(f"写入 {_specode_config_path()} 失败:{e}\n") + return 1 + # doc_root 输出用 resolve_doc_root 重算(反映 device 段追加) + resolved, _ = resolve_doc_root() + sys.stdout.write(json.dumps({ + "ok": True, + "doc_root": str(resolved) if resolved else str(p), + "config_path": str(_specode_config_path()), + }, ensure_ascii=False, indent=2) + "\n") return 0 -def main() -> int: - parser = argparse.ArgumentParser( - description="Obsidian vault detection and specode root configuration.", - ) - sub = parser.add_subparsers(dest="command", required=True) +# ------------------------------------------------------------------------- +# entry +# ------------------------------------------------------------------------- - detect_p = sub.add_parser("detect", help="检测已安装的 Obsidian vault。") - detect_p.add_argument("--json", action="store_true") - detect_p.set_defaults(func=command_detect) +def main(argv: Optional[list[str]] = None) -> int: + parser = argparse.ArgumentParser(prog="spec_vault.py", description="specode vault detection & root configuration") + sub = parser.add_subparsers(dest="cmd", required=True) - set_p = sub.add_parser("set", help="设置 spec 文档根目录或 vault 路径。") - set_p.add_argument("--vault", help="Obsidian vault 路径。spec root = vault/spec-in/-/specs。") - set_p.add_argument("--root", help="直接指定 spec 文档根目录(完全自定义路径)。") - set_p.set_defaults(func=command_set) + sub.add_parser("detect", help="探测平台 obsidian.json,列出已知 vault") + sub.add_parser("status", help="输出当前 doc_root 与来源") - get_p = sub.add_parser("get", help="显示当前解析到的 spec 文档根目录。") - get_p.add_argument("--json", action="store_true") - get_p.add_argument("--configured-only", action="store_true", help="只读取 specode config.json 中记录的根目录,不自动检测或回退。") - get_p.set_defaults(func=command_get) + p_set = sub.add_parser("set", help="写入 ~/.config/specode/config.json") + g = p_set.add_mutually_exclusive_group(required=True) + g.add_argument("--vault", help="设置 vault 根目录") + g.add_argument("--root", help="设置 doc 根目录(不强调 vault 概念)") - args = parser.parse_args() - return args.func(args) + args = parser.parse_args(argv) + if args.cmd == "detect": + return cmd_detect(args) + if args.cmd == "status": + return cmd_status(args) + if args.cmd == "set": + return cmd_set(args) + parser.print_help() + return 3 if __name__ == "__main__": - raise SystemExit(main()) + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/task_swarm.py b/plugins/specode/scripts/task_swarm.py old mode 100644 new mode 100755 index 7bf8b55..f21bf95 --- a/plugins/specode/scripts/task_swarm.py +++ b/plugins/specode/scripts/task_swarm.py @@ -1,639 +1,33 @@ -"""task-swarm orchestrator CLI. +#!/usr/bin/env python3 +'''scripts/task_swarm.py — 薄 launcher,把所有调用转给 task_swarm.cli.main()。 -Subcommands: - init — parse tasks.md, build run workspace + state.json - next — return JSON instruction for the next step (fork|writeback|wait|done) - parse — read a subagent's outbox, return structured verdict - advance — record verdict into state.json - writeback — safely flip tasks.md checkboxes for a converged/failed stage - status — print human-readable run status - resolve — resolve a run (latest run for spec or by id) +文件名 `task_swarm.py` 保留作为外部 API surface:commands/task-swarm.md + +spec_session/_hooks.py:_run_task_swarm_plan 都按此路径调用。实现拆到同目录的 +`task_swarm/` 包内(_state / _parse_md / _outbox / _prompt / _writeback / cli), +launcher 只做两件事: -The orchestrator (main Claude session) only calls these subcommands and -acts on their JSON output — it doesn't recreate state-machine logic or -parse tasks.md itself. -""" + 1. sys.path 注入 scripts/,让包内 spec_log import 可用 + 2. import task_swarm.cli.main 并调用 + +同名文件 + 同名目录共存安全:Python FileFinder 在同一 path entry 下 +package > module,launcher 自己被 exec、不走 import 系统。 + +stdlib-only。 +''' from __future__ import annotations -import argparse -import json -import os import sys -from datetime import datetime, timezone from pathlib import Path -from typing import Optional - -SCRIPTS_DIR = Path(__file__).resolve().parent -sys.path.insert(0, str(SCRIPTS_DIR)) - -import spec_telemetry # noqa: E402 -import task_swarm_parse_md as plan_mod # noqa: E402 -import task_swarm_state as state_mod # noqa: E402 -import task_swarm_outbox as outbox_mod # noqa: E402 -import task_swarm_prompt as prompt_mod # noqa: E402 -import task_swarm_writeback as wb_mod # noqa: E402 - - -RUNS_DIRNAME = ".task-swarm" - -# Emitted commands consumed by the orchestrating model (run via Bash). Use the -# python launcher so Windows hosts that only ship `python` / `py` still work. -SELF_CMD = ( - "sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh " - "${CLAUDE_PLUGIN_ROOT}/scripts/" + Path(__file__).name -) - - -# ---------- run discovery ---------- - -def runs_root(project_root: Path) -> Path: - return project_root / RUNS_DIRNAME / "runs" - - -def resolve_run_dir(project_root: Path, run_id: Optional[str]) -> Path: - root = runs_root(project_root) - if run_id: - return root / run_id - if not root.exists(): - raise FileNotFoundError("尚无任何 task-swarm 运行 (.task-swarm/runs/ 不存在)") - candidates = sorted(p for p in root.iterdir() if p.is_dir()) - if not candidates: - raise FileNotFoundError("尚无任何 task-swarm 运行") - # latest by name (timestamp prefix sorts correctly) - return candidates[-1] - - -def _print_json(data) -> None: - print(json.dumps(data, ensure_ascii=False, indent=2)) - - -# ---------- init ---------- - -def cmd_init(args: argparse.Namespace) -> int: - tasks_path = Path(args.tasks).expanduser().resolve() - if not tasks_path.exists(): - _print_json({"error": f"tasks.md 不存在: {tasks_path}"}) - return 2 - spec_dir = tasks_path.parent - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - session_id = args.session or os.environ.get("TERM_SESSION_ID") or "" - - text = tasks_path.read_text(encoding="utf-8") - plan = plan_mod.parse_tasks_md(text).to_dict() - - run_id = state_mod.new_run_id() - run_dir = runs_root(project_root) / run_id - (run_dir / "agents").mkdir(parents=True, exist_ok=True) - - state = state_mod.build_initial_state( - run_id=run_id, - tasks_path=tasks_path, - spec_dir=spec_dir, - project_root=project_root, - plan=plan, - parallel=int(args.parallel), - max_rounds=int(args.max_rounds), - reviewer_max_rounds=int(args.reviewer_rounds) if args.reviewer_rounds is not None else None, - validator_max_rounds=int(args.validator_rounds) if args.validator_rounds is not None else None, - session_id=session_id, - ) - state_mod.save_state(run_dir, state) - - # Touch active-run pointer for UserPromptSubmit hook discovery. - pointer_dir = project_root / RUNS_DIRNAME - pointer_dir.mkdir(parents=True, exist_ok=True) - (pointer_dir / "active-run").write_text(run_id, encoding="utf-8") - - spec_telemetry.emit( - "swarm.run_start", - run_id=run_id, - spec_dir=str(spec_dir), - spec_slug=spec_dir.name, - project_root=str(project_root), - stage_count=len(state["stages"]), - max_rounds=int(args.max_rounds), - parallel=int(args.parallel), - ) - - _print_json({ - "run_id": run_id, - "run_dir": str(run_dir), - "tasks_path": str(tasks_path), - "spec_dir": str(spec_dir), - "project_root": str(project_root), - "stages": [ - {"num": s["num"], "title": s["title"], "kind": s["kind"], "phase": s["phase"]} - for s in state["stages"] - ], - "warnings": state.get("warnings") or [], - "next": f"{SELF_CMD} next --run {run_id}", - }) - return 0 - -# ---------- next ---------- +_SCRIPTS_DIR = Path(__file__).resolve().parent +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) -def cmd_next(args: argparse.Namespace) -> int: - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - run_dir = resolve_run_dir(project_root, args.run) - state = state_mod.load_state(run_dir) - action = state_mod.next_action(state) - payload = action.to_dict() - - if action.kind == "fork": - stage_num = payload["stage"] - role = payload["role"] - round_no = payload["round"] - stage = state_mod.get_stage(state, stage_num) - ws = prompt_mod.prepare_workspace(run_dir, stage_num, role, round_no) - - # Relay upstream artifacts into inbox per role. - sources: list[tuple[int, str, int, str]] = [] - if role == "reviewer": - # Advisory reviewer: just needs the latest coder output. - sources.append((stage_num, "coder", round_no, "result.md")) - elif role == "validator": - # Checkpoint: pull latest coder output of the validated stage. - if stage["kind"] == "checkpoint" and stage.get("checkpoint_for"): - src_stage = stage["checkpoint_for"] - sources.append((src_stage, "coder", _max_round_for(state, src_stage, "coder"), "upstream-result.md")) - else: - # Normal-stage validator path (kept for forward-compat; not - # currently scheduled by next_action). - sources.append((stage_num, "coder", round_no, "result.md")) - if round_no > 1: - # Re-run after a coder fix — give validator the previous fail report - sources.append((stage_num, "validator", round_no - 1, "prev-validation.md")) - elif role == "coder" and round_no > 1: - # fix round — only validator-fail-fix exists post-R3 - sources.append((stage_num, "coder", round_no - 1, "prev-result.md")) - if payload.get("scope") == "validator-fail-fix": - sources.append((stage_num, "validator", round_no - 1, "validation.md")) - - prompt_mod.relay_inbox(run_dir, ws, sources) - - ctx = prompt_mod.StageContext( - stage_num=stage_num, - stage_title=stage["title"], - stage_kind=stage["kind"], - leaves=stage["leaves"], - spec_dir=Path(state["spec_dir"]), - project_root=Path(state["project_root"]), - workspace=ws, - round_no=round_no, - scope=payload.get("scope") or "", - ) - task_md = prompt_mod.write_task_file(ctx, role) - - # Mark in-flight - state_mod.mark_in_flight(state, stage_num, role, round_no) - state_mod.save_state(run_dir, state) - - payload.update({ - "subagent_type": f"specode:task-swarm-{role}", - "description": _fork_description(stage_num, role, round_no, payload.get("scope"), stage["title"]), - "workspace": str(ws), - "prompt_file": str(task_md), - "after_fork": ( - f"{SELF_CMD} parse " - f"--run {state['run_id']} --stage {stage_num} --role {role} --round {round_no}" - ), - }) - - elif action.kind == "writeback": - payload["cmd"] = ( - f"{SELF_CMD} writeback " - f"--run {state['run_id']} --stage {payload['stage']}" - ) - - _print_json(payload) - return 0 - - -def _fork_description(stage_num: int, role: str, round_no: int, scope: Optional[str], stage_title: str) -> str: - """Build the Task description shown in the UI. - - Always include scope when present, so a r2+ coder reads as e.g. - "阶段 5 coder-r2 [validator-fail-fix]: 检查点 —— Mascot 独立可控" - instead of letting the orchestrator improvise a description that - misleadingly mentions "P0" (reviewer terminology) for what is really - a validator-fail-fix round. - """ - rstr = f"-r{round_no}" if round_no > 1 else "" - scope_str = f" [{scope}]" if scope else "" - return f"阶段 {stage_num} {role}{rstr}{scope_str}: {stage_title}" - - -def _max_round_for(state: dict, stage_num: int, role: str) -> int: - stage = state_mod.get_stage(state, stage_num) - rounds = [h["round"] for h in stage.get("history", []) if h["role"] == role] - return max(rounds) if rounds else 1 - - -# ---------- parse ---------- - -def cmd_parse(args: argparse.Namespace) -> int: - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - run_dir = resolve_run_dir(project_root, args.run) - ws = prompt_mod.agent_workspace(run_dir, int(args.stage), args.role, int(args.round)) - outbox = ws / "outbox" - result = outbox_mod.parse_outbox(args.role, outbox) - result["workspace"] = str(ws) - - if result.get("judgment") == "schema-error": - # R5: on schema-error, snapshot the malformed outbox + clear it so the - # next fork starts from a clean slate. Do NOT include advance_cmd — - # subagent must be re-forked at the same stage/role/round. - snapshot: dict[str, str] = {} - if outbox.exists(): - for f in sorted(outbox.iterdir()): - if not f.is_file(): - continue - try: - snapshot[f.name] = f.read_text(encoding="utf-8", errors="replace")[:2000] - except OSError: - continue - try: - f.unlink() - except OSError: - pass - # Clear in_flight so the next `next` call can re-dispatch this fork. - state = state_mod.load_state(run_dir) - stage = state_mod.get_stage(state, int(args.stage)) - if stage.get("in_flight"): - stage["in_flight"] = None - state_mod.save_state(run_dir, state) - result["retry"] = True - result["outbox_snapshot"] = snapshot - result["next"] = ( - f"重新派发 subagent: stage={args.stage} role={args.role} round={args.round}. " - f"outbox 已清空、in_flight 已重置。下一次 `next` 会再次给出同一 fork 指令;" - f"把 outbox_snapshot 中的内容粘到 subagent prompt 提示它上次为何被拒。" - ) - else: - result["advance_cmd"] = ( - f"{SELF_CMD} advance " - f"--run {state_mod.load_state(run_dir)['run_id']} " - f"--stage {args.stage} --role {args.role} --round {args.round} " - f"--judgment {result['judgment']}" - ) - - _print_json(result) - return 0 - - -# ---------- advance ---------- - -def cmd_advance(args: argparse.Namespace) -> int: - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - run_dir = resolve_run_dir(project_root, args.run) - state = state_mod.load_state(run_dir) - - extra: dict = {} - if args.note: - extra["note"] = args.note - if args.reason: - extra["reason"] = args.reason - - # Re-parse outbox so the history record captures role-specific detail - # (subtasks for coder, p0_items / advisory_p0 for reviewer, fix_files for - # validator). writeback uses these to render annotations. - try: - ws = prompt_mod.agent_workspace(run_dir, int(args.stage), args.role, int(args.round)) - parsed = outbox_mod.parse_outbox(args.role, ws / "outbox") - for key in ( - "subtasks", "fix_files", "p0_items", "p0_count", - "advisory_p0_items", "advisory_p0_count", "conclusion", - "fix_guidance", "loop_warning", - ): - if key in parsed and parsed[key] not in (None, "", [], 0): - extra[key] = parsed[key] - except Exception: - # If outbox is missing or unreadable, fall through with bare verdict. - pass - - prev_phase = state_mod.get_stage(state, int(args.stage)).get("phase") - try: - stage = state_mod.advance( - state, int(args.stage), args.role, int(args.round), args.judgment, extra - ) - except ValueError as e: - _print_json({"error": str(e)}) - return 2 - - state_mod.save_state(run_dir, state) - - spec_slug = Path(state["spec_dir"]).name - spec_telemetry.emit( - "swarm.stage_round", - run_id=state["run_id"], - spec_slug=spec_slug, - stage=stage["num"], - role=args.role, - round=int(args.round), - judgment=args.judgment, - phase=stage["phase"], - p0_count=extra.get("p0_count"), - ) - if stage["phase"] in {"converged", "failed"} and prev_phase != stage["phase"]: - spec_telemetry.emit( - "swarm.stage_done", - run_id=state["run_id"], - spec_slug=spec_slug, - stage=stage["num"], - phase=stage["phase"], - rounds=stage["rounds"], - fail_reason=stage.get("fail_reason"), - ) - - _print_json({ - "stage": stage["num"], - "phase": stage["phase"], - "rounds": stage["rounds"], - "last": stage["last"], - "fail_reason": stage.get("fail_reason"), - "next": f"{SELF_CMD} next --run {state['run_id']}", - }) - return 0 - - -# ---------- writeback ---------- - -def cmd_writeback(args: argparse.Namespace) -> int: - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - run_dir = resolve_run_dir(project_root, args.run) - state = state_mod.load_state(run_dir) - stage = state_mod.get_stage(state, int(args.stage)) - tasks_path = Path(state["tasks_path"]) - if not tasks_path.exists(): - _print_json({"error": f"tasks.md 不存在: {tasks_path}"}) - return 2 - - if stage["phase"] not in {"converged", "failed"}: - _print_json({"error": f"stage {stage['num']} 尚未收敛 (phase={stage['phase']})", "skip": True}) - return 2 - - # Verify-lock + heartbeat before write. - warnings: list[str] = [] - try: - import spec_session - verify = spec_session.verify_and_heartbeat( - Path(state["spec_dir"]), state.get("session_id") or "" - ) - if verify.get("status") == "evicted": - _print_json({"error": "lock evicted", "verify": verify}) - return 3 - except SystemExit as e: - # spec_session.load_config raises SystemExit when .config.json absent — - # treat as "no lock model" and proceed. - warnings.append(f"verify-lock skipped (no spec config): {e}") - except Exception as e: - # non-fatal — lock model may not be in effect for this spec - warnings.append(f"verify-lock skipped: {e}") - - # Build writeback plan from history. - leaves_status: dict[str, str] = {} - reviewer_summary: dict | None = None - for record in stage.get("history", []): - if record["role"] == "coder" and record.get("subtasks"): - for st in record["subtasks"]: - leaves_status[st["num"]] = st["status"] - elif record["role"] == "coder" and stage["phase"] == "converged": - # No subtask data — assume all done. - for leaf in stage["leaves"]: - if leaf.get("policy") != "skip": - leaves_status.setdefault(leaf["num"], "done") - # R3: capture the latest reviewer verdict (advisory) for annotation. - if record["role"] == "reviewer": - reviewer_summary = { - "judgment": record.get("judgment"), - "p0_count": record.get("p0_count", 0), - "p0_items": record.get("p0_items", []), - "advisory_p0_count": record.get("advisory_p0_count", 0), - "advisory_p0_items": record.get("advisory_p0_items", []), - "conclusion": record.get("conclusion", ""), - } - - plan = wb_mod.WritebackPlan( - stage_num=stage["num"], - stage_phase=stage["phase"], - rounds=stage["rounds"], - leaves_status=leaves_status, - fail_reason=stage.get("fail_reason") or "", - reviewer_summary=reviewer_summary, - ) - - text = tasks_path.read_text(encoding="utf-8") - new_text = wb_mod.apply_writeback(text, plan) - - safe, reason = wb_mod.diff_safe_line_by_line(text, new_text) - if not safe: - _print_json({"error": f"writeback diff 不安全: {reason}"}) - return 2 - - tmp = tasks_path.with_suffix(".md.swarm.tmp") - tmp.write_text(new_text, encoding="utf-8") - tmp.replace(tasks_path) - - state_mod.mark_written_back(state, stage["num"]) - state_mod.save_state(run_dir, state) - - spec_telemetry.emit( - "swarm.writeback", - run_id=state["run_id"], - spec_slug=Path(state["spec_dir"]).name, - stage=stage["num"], - phase=stage["phase"], - rounds=stage["rounds"], - ) - - # If every stage is in a terminal (written or skipped) state, mark run end. - if all(s.get("written_back") or s.get("phase") == "skipped" for s in state["stages"]): - counts = {"converged": 0, "failed": 0, "skipped": 0} - for s in state["stages"]: - counts[s.get("phase", "?")] = counts.get(s.get("phase", "?"), 0) + 1 - spec_telemetry.emit( - "swarm.run_end", - run_id=state["run_id"], - spec_slug=Path(state["spec_dir"]).name, - stage_count=len(state["stages"]), - converged=counts.get("converged", 0), - failed=counts.get("failed", 0), - skipped=counts.get("skipped", 0), - ) - - payload = { - "stage": stage["num"], - "phase": stage["phase"], - "written": True, - "next": f"{SELF_CMD} next --run {state['run_id']}", - } - if warnings: - payload["warnings"] = warnings - _print_json(payload) - return 0 - - -# ---------- status ---------- - -def cmd_status(args: argparse.Namespace) -> int: - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - try: - run_dir = resolve_run_dir(project_root, args.run) - except FileNotFoundError as e: - _print_json({"error": str(e)}) - return 2 - state = state_mod.load_state(run_dir) - if args.json: - _print_json(state_mod.summarize(state)) - return 0 - print(f"task-swarm run: {state['run_id']}") - print(f"tasks_path: {state['tasks_path']}") - print(f"spec_dir: {state['spec_dir']}") - print(f"max_rounds: {state['config']['max_rounds']} parallel: {state['config']['parallel']}") - print() - for s in state["stages"]: - marker = { - "pending": "○", - "running": "▶", - "converged": "✔", - "failed": "✗", - "skipped": "—", - }.get(s["phase"], "?") - rounds = s["rounds"] - rstr = f"r:{rounds.get('reviewer',0)} v:{rounds.get('validator',0)}" - print(f" {marker} stage {s['num']:>2} [{s['kind']:<10}] {s['phase']:<10} {rstr} — {s['title']}") - if s.get("fail_reason"): - print(f" fail: {s['fail_reason']}") - return 0 - - -# ---------- reset-in-flight ---------- - -def cmd_reset_in_flight(args: argparse.Namespace) -> int: - """Clear in_flight on one or all stages — recovery after a subagent vanished. - - Without --stage: clears every stage's in_flight marker. - """ - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - try: - run_dir = resolve_run_dir(project_root, args.run) - except FileNotFoundError as e: - _print_json({"error": str(e)}) - return 2 - state = state_mod.load_state(run_dir) - - cleared: list[dict] = [] - if args.stage is None: - for s in state["stages"]: - if s.get("in_flight"): - cleared.append({"stage": s["num"], "prev": s["in_flight"]}) - s["in_flight"] = None - else: - stage = state_mod.get_stage(state, int(args.stage)) - if stage.get("in_flight"): - cleared.append({"stage": stage["num"], "prev": stage["in_flight"]}) - stage["in_flight"] = None - state_mod.save_state(run_dir, state) - _print_json({"cleared": cleared, "count": len(cleared)}) - return 0 - - -# ---------- resolve ---------- - -def cmd_resolve(args: argparse.Namespace) -> int: - project_root = Path(args.project_root or os.getcwd()).expanduser().resolve() - try: - run_dir = resolve_run_dir(project_root, args.run) - except FileNotFoundError as e: - _print_json({"error": str(e)}) - return 2 - _print_json({ - "run_id": run_dir.name, - "run_dir": str(run_dir), - "exists": run_dir.exists(), - }) - return 0 - - -# ---------- main ---------- - -def main(argv: list[str]) -> int: - parser = argparse.ArgumentParser(prog="task_swarm.py") - sub = parser.add_subparsers(dest="cmd", required=True) - - p = sub.add_parser("init", help="解析 tasks.md,初始化 run + state.json") - p.add_argument("--tasks", required=True, help="tasks.md 绝对路径") - p.add_argument("--project-root", default=None) - p.add_argument("--parallel", default=3, type=int) - p.add_argument( - "--max-rounds", default=3, type=int, - help="所有循环角色(目前仅 validator)的 fallback 上限", - ) - p.add_argument( - "--reviewer-rounds", default=None, type=int, - help="(已弃用)reviewer 现在是 advisory,不再参与修复循环。保留参数仅为兼容旧脚本", - ) - p.add_argument( - "--validator-rounds", default=3, type=int, - help="validator fail 修复循环上限(默认 3 — 测试驱动的修复有客观信号)", - ) - p.add_argument("--session", default=None, help="spec session id(缺省取 TERM_SESSION_ID)") - p.set_defaults(func=cmd_init) - - p = sub.add_parser("next", help="返回下一步指令 JSON") - p.add_argument("--run", default=None, help="run id(缺省取最新)") - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_next) - - p = sub.add_parser("parse", help="解析 subagent outbox,返回结构化判定") - p.add_argument("--run", default=None) - p.add_argument("--stage", required=True) - p.add_argument("--role", required=True, choices=["coder", "reviewer", "validator"]) - p.add_argument("--round", required=True) - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_parse) - - p = sub.add_parser("advance", help="记录 verdict 到 state.json") - p.add_argument("--run", default=None) - p.add_argument("--stage", required=True) - p.add_argument("--role", required=True, choices=["coder", "reviewer", "validator"]) - p.add_argument("--round", required=True) - p.add_argument("--judgment", required=True) - p.add_argument("--note", default="") - p.add_argument("--reason", default="") - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_advance) - - p = sub.add_parser("writeback", help="安全回写 tasks.md") - p.add_argument("--run", default=None) - p.add_argument("--stage", required=True) - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_writeback) - - p = sub.add_parser("status", help="打印 run 状态") - p.add_argument("--run", default=None) - p.add_argument("--json", action="store_true") - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_status) - - p = sub.add_parser("resolve", help="解析 run dir 路径") - p.add_argument("--run", default=None) - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_resolve) - - p = sub.add_parser( - "reset-in-flight", - help="清理 in_flight 标记(subagent 崩溃/超时后的恢复手段)", - ) - p.add_argument("--run", default=None) - p.add_argument("--stage", default=None, help="留空清理所有 stage") - p.add_argument("--project-root", default=None) - p.set_defaults(func=cmd_reset_in_flight) - - args = parser.parse_args(argv) - return args.func(args) +from task_swarm.cli import main # noqa: E402 if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/task_swarm/__init__.py b/plugins/specode/scripts/task_swarm/__init__.py new file mode 100644 index 0000000..65b7db1 --- /dev/null +++ b/plugins/specode/scripts/task_swarm/__init__.py @@ -0,0 +1,12 @@ +'''task_swarm package public surface. + +外部消费者:scripts/task_swarm.py launcher 调 `task_swarm.cli.main()`。 +测试侧(test_task_swarm_state / outbox / writeback / parse_md)按 +`from task_swarm._state import ...` 等子模块路径直接 import。 + +本 __init__.py 故意保持空白——子模块按需被 cli 或测试加载,无 package-level +公共 API 需 re-export(与 spec_session/__init__.py 不同,spec_session 那边是 +为兼容 spec_status.py:25 的旧 `from spec_session import …` 而 re-export)。 + +stdlib-only。 +''' diff --git a/plugins/specode/scripts/task_swarm/_outbox.py b/plugins/specode/scripts/task_swarm/_outbox.py new file mode 100644 index 0000000..eac8dc7 --- /dev/null +++ b/plugins/specode/scripts/task_swarm/_outbox.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python3 +"""task_swarm_outbox.py — 解析 3 类子代理产物:result.md / review.md / validation.md。 + +按 references/task-swarm.md §4 schema 严格校验;schema 错误返回 ParseError with 详细 reason。 + +stdlib-only。 +""" +from __future__ import annotations + +import hashlib +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +# ------------------------------------------------------------------------- +# 异常 +# ------------------------------------------------------------------------- + +class ParseError(Exception): + """schema 校验错误。message 必须含具体 reason 供主代理决策重派。""" + + +# ------------------------------------------------------------------------- +# 数据结构 — coder +# ------------------------------------------------------------------------- + +@dataclass +class CoderSubtaskResult: + number: str # "1.1" + title: str + status: str # done / failed / skipped + note: str = "" # 备注 / 文件路径 + + +@dataclass +class CoderResult: + path: Path + status: str # ok / failed / blocked + status_reason: str = "" # failed/blocked 时的原因 + subtasks: list[CoderSubtaskResult] = field(default_factory=list) + key_changes: list[str] = field(default_factory=list) + hints: list[str] = field(default_factory=list) + raw: str = "" + + +# ------------------------------------------------------------------------- +# 数据结构 — reviewer +# ------------------------------------------------------------------------- + +@dataclass +class ReviewerFinding: + """reviewer 一条 finding。""" + severity: str # p0 / p1 / p2 / advisory + text: str # 完整一条原文 + evidence_tags: list[str] = field(default_factory=list) # 形如 ["req:1.2", "security"] + file_hint: Optional[str] = None # 提取的文件路径(best effort,仅展示用) + + +@dataclass +class ReviewerReview: + path: Path + verdict: str # needs-changes / approved-with-comments / approved + p0_items: list[ReviewerFinding] = field(default_factory=list) # 仅"带证据"的 P0 + advisory_items: list[ReviewerFinding] = field(default_factory=list) # 原 P0 但无证据,降级 + p1_items: list[ReviewerFinding] = field(default_factory=list) + p2_items: list[ReviewerFinding] = field(default_factory=list) + summary: str = "" + status: str = "ok" # 末行 STATUS + raw: str = "" + + +# ------------------------------------------------------------------------- +# 数据结构 — validator +# ------------------------------------------------------------------------- + +@dataclass +class ValidatorSubtaskResult: + number: str + title: str + status: str # pass / fail + note: str = "" + + +@dataclass +class ValidatorFixTarget: + """validator fail 时按文件分组的修复指引。""" + file_path: str + title: str + location: str = "" + problem: str = "" + suggestion: str = "" + requirements: list[str] = field(default_factory=list) + + +@dataclass +class ValidatorValidation: + path: Path + verdict: str # pass / fail + reproduce_cmd: str = "" + subtask_results: list[ValidatorSubtaskResult] = field(default_factory=list) + failure_excerpt: str = "" + fix_targets: list[ValidatorFixTarget] = field(default_factory=list) + status: str = "ok" + raw: str = "" + + def fail_signature(self) -> str: + """fail 签名:从 failure_excerpt 提取测试名 + assertion 文本,做哈希。""" + if self.verdict != "fail": + return "" + text = self.failure_excerpt or "" + # 试图抓 "FAILED " 行 + 第一条 AssertionError/Error 行 + m = re.search(r"FAILED\s+([^\s]+)", text) + test_name = m.group(1) if m else "" + m2 = re.search(r"(AssertionError|Error)[^\n]*", text) + assertion = m2.group(0) if m2 else text.strip()[:200] + sig_src = f"{test_name}|{assertion}".strip() + return hashlib.sha256(sig_src.encode("utf-8")).hexdigest()[:16] + + +# ------------------------------------------------------------------------- +# 工具函数 +# ------------------------------------------------------------------------- + +_STATUS_RE = re.compile(r"^\s*STATUS\s*:\s*([A-Za-z]+)(?:\s*:\s*(.*))?$", re.IGNORECASE) + + +def _read_text(path: Path) -> str: + if not path.exists(): + raise ParseError(f"产物文件不存在:{path}") + try: + return path.read_text(encoding="utf-8") + except Exception as e: + raise ParseError(f"读取产物文件失败:{path}:{e}") + + +def _split_sections(text: str) -> dict[str, str]: + """按 `## ` 切段,返回 {section_name_lower: body}。第一个 `## ` 之前的部分以空 key 存。""" + sections: dict[str, str] = {"": ""} + cur_name = "" + cur_buf: list[str] = [] + for line in text.splitlines(): + m = re.match(r"^##\s+(.+?)\s*$", line) + if m: + sections[cur_name] = "\n".join(cur_buf).rstrip() + cur_name = m.group(1).strip().lower() + cur_buf = [] + else: + cur_buf.append(line) + sections[cur_name] = "\n".join(cur_buf).rstrip() + return sections + + +def _extract_status(text: str) -> tuple[str, str]: + """提取末尾 STATUS 行。返回 (status, reason)。未找到 → ("", "")。""" + for line in reversed(text.splitlines()): + s = line.strip() + if not s: + continue + m = _STATUS_RE.match(s) + if m: + status = m.group(1).lower() + # 兼容形如 "STATUS: failed: ImportError" → status=failed, reason=ImportError + reason = (m.group(2) or "").strip() + # 若 status 末尾带冒号(罕见),剥掉 + status = status.rstrip(":") + return status, reason + # 任何非空非 STATUS 行都跳出(STATUS 必须是末行非空) + break + return "", "" + + +# ------------------------------------------------------------------------- +# coder result.md +# ------------------------------------------------------------------------- + +_CODER_SUBTASK_RE = re.compile( + r"^\s*-\s+(?P\d+(?:\.\d+)+)\s+(?P.+?)\s*[::]\s*(?P<status>done|failed|skipped)(?:\s*[—\-]\s*(?P<note>.*))?$", + re.IGNORECASE, +) + + +def parse_coder_result(path: Path) -> CoderResult: + raw = _read_text(path) + sections = _split_sections(raw) + status, reason = _extract_status(raw) + if status not in ("ok", "failed", "blocked"): + raise ParseError( + f"coder result 缺末行 STATUS 或 status 非法(应为 ok/failed/blocked):{path}" + ) + + res = CoderResult(path=path, status=status, status_reason=reason, raw=raw) + + body = sections.get("子任务状态", "") + for line in body.splitlines(): + m = _CODER_SUBTASK_RE.match(line) + if m: + res.subtasks.append(CoderSubtaskResult( + number=m.group("num"), + title=m.group("title").strip(), + status=m.group("status").lower(), + note=(m.group("note") or "").strip(), + )) + + kc = sections.get("关键变更", "") + for line in kc.splitlines(): + s = line.strip() + if s.startswith("-"): + res.key_changes.append(s.lstrip("-").strip()) + + hints = sections.get("给下游 reviewer 的提示", "") or sections.get("给下游 reviewer 的提示(可选)", "") + for line in hints.splitlines(): + s = line.strip() + if s.startswith("-"): + res.hints.append(s.lstrip("-").strip()) + + return res + + +# ------------------------------------------------------------------------- +# reviewer review.md +# ------------------------------------------------------------------------- + +_EVIDENCE_RE = re.compile(r"\[(req:[^\]]+|security|contract)\]") +_FILE_HINT_RE = re.compile(r"([A-Za-z0-9_./\-]+\.[A-Za-z0-9]+)(?::\d+)?") +_VERDICT_VALUES = {"needs-changes", "approved-with-comments", "approved"} + + +def _parse_findings(body: str, default_severity: str) -> list[ReviewerFinding]: + out: list[ReviewerFinding] = [] + for line in body.splitlines(): + s = line.strip() + if not s or not s.startswith("-"): + continue + s2 = s.lstrip("-").strip() + if s2.lower() in ("(none)", "none", "无"): + continue + tags = [m.group(1) for m in _EVIDENCE_RE.finditer(s2)] + file_hint_m = _FILE_HINT_RE.search(s2) + file_hint = file_hint_m.group(1) if file_hint_m else None + out.append(ReviewerFinding( + severity=default_severity, + text=s2, + evidence_tags=tags, + file_hint=file_hint, + )) + return out + + +def parse_reviewer_review(path: Path) -> ReviewerReview: + raw = _read_text(path) + sections = _split_sections(raw) + verdict_body = sections.get("结论", "").strip().lower() + verdict_token = "" + for tok in verdict_body.split(): + if tok in _VERDICT_VALUES: + verdict_token = tok + break + if verdict_token not in _VERDICT_VALUES: + raise ParseError( + f"reviewer review 缺合法 '## 结论' 章节(应为 " + f"needs-changes/approved-with-comments/approved):{path}" + ) + + # 抓 P0 / P1 / P2 三段(P0 章节标题可能含证据要求附注) + p0_body = "" + p1_body = "" + p2_body = "" + for k, v in sections.items(): + if k.startswith("p0"): + p0_body = v + elif k.startswith("p1"): + p1_body = v + elif k.startswith("p2"): + p2_body = v + p0_raw = _parse_findings(p0_body, "p0") + # 按证据标签拆分 + p0_items: list[ReviewerFinding] = [] + advisory: list[ReviewerFinding] = [] + for f in p0_raw: + if f.evidence_tags: + p0_items.append(f) + else: + f.severity = "advisory" + advisory.append(f) + p1_items = _parse_findings(p1_body, "p1") + p2_items = _parse_findings(p2_body, "p2") + summary = "" + for k in ("给使用者的提示", "给使用者的提示(可选)"): + if k in sections and sections[k]: + summary = sections[k].strip() + break + + status, _ = _extract_status(raw) + if status != "ok": + raise ParseError( + f"reviewer review 末行 STATUS 必须是 ok(reviewer 是 advisory):{path}" + ) + + return ReviewerReview( + path=path, + verdict=verdict_token, + p0_items=p0_items, + advisory_items=advisory, + p1_items=p1_items, + p2_items=p2_items, + summary=summary, + status=status, + raw=raw, + ) + + +# ------------------------------------------------------------------------- +# validator validation.md +# ------------------------------------------------------------------------- + +_VALIDATOR_SUBTASK_RE = re.compile( + r"^\s*-\s+\[(?P<box>[ xX])\]\s+(?P<num>\d+(?:\.\d+)+)\s+(?P<title>.+?)\s*[::]\s*(?P<status>pass|fail)(?:\s*[—\-]\s*(?P<note>.*))?$", + re.IGNORECASE, +) +_VALIDATOR_VERDICT_VALUES = {"pass", "fail"} + + +def _parse_fix_targets(body: str) -> list[ValidatorFixTarget]: + targets: list[ValidatorFixTarget] = [] + cur: Optional[ValidatorFixTarget] = None + for line in body.splitlines(): + ms = re.match(r"^###\s+(?:修复\s*\d+\s*[—\-]\s*)?(.+?)\s*$", line) + if ms: + if cur is not None: + targets.append(cur) + cur = ValidatorFixTarget(file_path="", title=ms.group(1).strip()) + continue + if cur is None: + continue + s = line.strip() + if s.startswith("- 文件:") or s.startswith("- 文件:"): + cur.file_path = s.split(":", 1)[1].strip() if ":" in s else s.split(":", 1)[1].strip() + elif s.startswith("- 位置:") or s.startswith("- 位置:"): + cur.location = s.split(":", 1)[1].strip() if ":" in s else s.split(":", 1)[1].strip() + elif s.startswith("- 问题:") or s.startswith("- 问题:"): + cur.problem = s.split(":", 1)[1].strip() if ":" in s else s.split(":", 1)[1].strip() + elif s.startswith("- 建议:") or s.startswith("- 建议:"): + cur.suggestion = s.split(":", 1)[1].strip() if ":" in s else s.split(":", 1)[1].strip() + elif "需求" in s and "_" in s: + reqs = re.findall(r"_需求[::]\s*([^_]+)_", s) + for r in reqs: + cur.requirements.extend([x.strip() for x in re.split(r"[,,]", r) if x.strip()]) + if cur is not None: + targets.append(cur) + # 校验:fix_target 必须有 file_path + return [t for t in targets if t.file_path] + + +def parse_validator_validation(path: Path) -> ValidatorValidation: + raw = _read_text(path) + sections = _split_sections(raw) + verdict_body = sections.get("判定", "").strip().lower() + verdict_token = "" + for tok in verdict_body.split(): + if tok in _VALIDATOR_VERDICT_VALUES: + verdict_token = tok + break + if verdict_token not in _VALIDATOR_VERDICT_VALUES: + raise ParseError( + f"validator validation 缺合法 '## 判定' 章节(应为 pass / fail):{path}" + ) + + val = ValidatorValidation(path=path, verdict=verdict_token, raw=raw) + cmd_body = sections.get("复现命令", "") + # 抓 fenced block 内容 + m = re.search(r"```(?:bash)?\s*(.*?)```", cmd_body, re.DOTALL) + if m: + val.reproduce_cmd = m.group(1).strip() + else: + val.reproduce_cmd = cmd_body.strip() + + st_body = sections.get("按子任务的验证结果", "") + for line in st_body.splitlines(): + m = _VALIDATOR_SUBTASK_RE.match(line) + if m: + val.subtask_results.append(ValidatorSubtaskResult( + number=m.group("num"), + title=m.group("title").strip(), + status=m.group("status").lower(), + note=(m.group("note") or "").strip(), + )) + + fail_body = sections.get("失败现场(fail 时必填)", "") or sections.get("失败现场", "") + m = re.search(r"```\s*(.*?)```", fail_body, re.DOTALL) + if m: + val.failure_excerpt = m.group(1).strip() + else: + val.failure_excerpt = fail_body.strip() + + fix_body = "" + for key in sections: + if key.startswith("给 coder 的修复指引") or key.startswith("给 coder 的修复指引(fail 时必填"): + fix_body = sections[key] + break + val.fix_targets = _parse_fix_targets(fix_body) + + # schema 强校验:fail 必须有 failure_excerpt + 至少 1 个 fix_target + if val.verdict == "fail": + if not val.failure_excerpt: + raise ParseError(f"validator fail 但缺 '失败现场':{path}") + if not val.fix_targets: + raise ParseError(f"validator fail 但缺 '给 coder 的修复指引'(按文件分组):{path}") + + status, _ = _extract_status(raw) + if status != "ok": + raise ParseError( + f"validator validation 末行 STATUS 必须是 ok(pass/fail 是 verdict,不是 status):{path}" + ) + val.status = status + return val + + +# ------------------------------------------------------------------------- +# 模块自测 +# ------------------------------------------------------------------------- + +if __name__ == "__main__": # pragma: no cover + import sys + if len(sys.argv) < 3: + print("usage: task_swarm_outbox.py <coder|reviewer|validator> <file.md>") + raise SystemExit(2) + kind, fp = sys.argv[1], Path(sys.argv[2]) + if kind == "coder": + r = parse_coder_result(fp) + elif kind == "reviewer": + r = parse_reviewer_review(fp) + elif kind == "validator": + r = parse_validator_validation(fp) + else: + raise SystemExit(2) + print(r) diff --git a/plugins/specode/scripts/task_swarm/_parse_md.py b/plugins/specode/scripts/task_swarm/_parse_md.py new file mode 100644 index 0000000..3cda916 --- /dev/null +++ b/plugins/specode/scripts/task_swarm/_parse_md.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +"""task_swarm_parse_md.py — 解析 tasks.md 为 stage 列表,并按文件冲突切 group(references/task-swarm.md §2)。 + +输入:tasks.md 路径 +输出: + parse_tasks_md(path) -> list[Stage] + group_by_file_conflict(stages, max_parallel=N) -> list[list[Stage]] + +stdlib-only。 +""" +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +# ------------------------------------------------------------------------- +# 数据结构 +# ------------------------------------------------------------------------- + +@dataclass +class StageItem: + """tasks.md 中一个 - [ ] N.M ... 行(叶子子任务)。""" + number: str # 例如 "1.1" / "2.3" + title: str # 不含 @writes/@reads/_需求_ 等标签的纯标题 + writes: list[str] = field(default_factory=list) + reads: list[str] = field(default_factory=list) + requirements: list[str] = field(default_factory=list) # 形如 "1.1" / "1.2" + depends_on: list[str] = field(default_factory=list) # 形如 "2.1" + raw_line: str = "" # 原始行,writeback 时用作精确定位 + checkbox: str = " " # 当前 checkbox 字符(空格 / x) + line_no: int = 0 # 在 tasks.md 中的 1-based 行号 + + +@dataclass +class Stage: + """tasks.md 中一个 ## 阶段 N: ... 段,包含若干 StageItem。""" + number: int # 1, 2, 3, ... + title: str # 标题(不含 "阶段 N: " 前缀) + items: list[StageItem] = field(default_factory=list) + header_line_no: int = 0 # 标题所在行号 + end_line_no: int = 0 # 段最后一行(下个阶段标题前 / 文末) + + @property + def writes(self) -> list[str]: + """聚合该 stage 全部 item 的 @writes 集合。""" + out: list[str] = [] + for it in self.items: + for w in it.writes: + if w not in out: + out.append(w) + return out + + @property + def reads(self) -> list[str]: + """聚合该 stage 全部 item 的 @reads 集合。""" + out: list[str] = [] + for it in self.items: + for r in it.reads: + if r not in out: + out.append(r) + return out + + @property + def depends_on(self) -> list[int]: + """聚合该 stage 全部 item 的 @depends-on(取数字段,转为 int stage 号)。""" + out: list[int] = [] + for it in self.items: + for d in it.depends_on: + # 接受 "2" / "2.1" 两种格式;统一取主 stage 号 + head = d.split(".", 1)[0].strip() + try: + n = int(head) + except ValueError: + continue + if n != self.number and n not in out: + out.append(n) + return out + + +# ------------------------------------------------------------------------- +# 解析 +# ------------------------------------------------------------------------- + +_STAGE_HEADER_RE = re.compile(r"^\s*##\s+阶段\s+(\d+)\s*[::]\s*(.+?)\s*$") +_ITEM_RE = re.compile(r"^\s*-\s+\[(?P<box>[ xX])\]\s+(?P<num>\d+(?:\.\d+)+)\s+(?P<rest>.*)$") +_WRITES_RE = re.compile(r"@writes\s*[::]\s*([^\s@_]+)") +_READS_RE = re.compile(r"@reads\s*[::]\s*([^\s@_]+)") +_DEPENDS_RE = re.compile(r"@depends-on\s*[::]\s*([^\s@_]+)") +_REQ_RE = re.compile(r"_需求[::]\s*([^_]+?)_") + + +def _split_csv(s: str) -> list[str]: + return [p.strip() for p in re.split(r"[,,]", s) if p.strip()] + + +def parse_tasks_md(path: Path) -> list[Stage]: + """解析 tasks.md,返回 Stage 列表。 + + 解析规则: + - 阶段标题:`## 阶段 N: <标题>` 或 `## 阶段 N:<标题>` + - 子任务:`- [ ] N.M <标题> @writes:a.py,b.py @reads:c.py @depends-on:2.1 _需求:1.1,1.2_` + - tag 之间分隔符可空格;中文/英文冒号都识别 + - 非阶段块行(介绍、备注)忽略 + """ + if not path.exists(): + return [] + text = path.read_text(encoding="utf-8") + lines = text.splitlines() + + stages: list[Stage] = [] + current: Optional[Stage] = None + + for idx, line in enumerate(lines, start=1): + m = _STAGE_HEADER_RE.match(line) + if m: + if current is not None: + current.end_line_no = idx - 1 + stages.append(current) + current = Stage( + number=int(m.group(1)), + title=m.group(2).strip(), + header_line_no=idx, + ) + continue + if current is None: + continue + im = _ITEM_RE.match(line) + if not im: + continue + num = im.group("num") + rest = im.group("rest") + # 提取标签 + writes = _split_csv(_WRITES_RE.search(rest).group(1)) if _WRITES_RE.search(rest) else [] + reads = _split_csv(_READS_RE.search(rest).group(1)) if _READS_RE.search(rest) else [] + depends_on = _split_csv(_DEPENDS_RE.search(rest).group(1)) if _DEPENDS_RE.search(rest) else [] + reqs = _split_csv(_REQ_RE.search(rest).group(1)) if _REQ_RE.search(rest) else [] + # 标题:去掉所有标签后剩余 + title = rest + for r in (_WRITES_RE, _READS_RE, _DEPENDS_RE, _REQ_RE): + title = r.sub("", title) + title = re.sub(r"\s+", " ", title).strip() + current.items.append(StageItem( + number=num, + title=title, + writes=writes, + reads=reads, + requirements=reqs, + depends_on=depends_on, + raw_line=line, + checkbox=im.group("box"), + line_no=idx, + )) + + if current is not None: + current.end_line_no = len(lines) + stages.append(current) + + return stages + + +# ------------------------------------------------------------------------- +# group 切分(references/task-swarm.md §2) +# ------------------------------------------------------------------------- + +def group_by_file_conflict( + stages: list[Stage], + max_parallel: int = 4, +) -> list[list[Stage]]: + """按 references/task-swarm.md §2 把 stage 切成 group: + + - 同 group 内任意两 stage 的 @writes 集合不相交且无 @depends-on 关系 + - 跨 group 串行:上一 group 全部 pass 后才能开 next group + - 每 group 上限 = max_parallel + - stage 顺序保留(按 stage.number 排序) + + 依赖关系:若 stage X depends_on Y,X 所在 group 的 index 必须严格大于 Y 所在 group 的 index。 + """ + if not stages: + return [] + if max_parallel < 1: + max_parallel = 1 + + sorted_stages = sorted(stages, key=lambda s: s.number) + stage_group: dict[int, int] = {} # stage.number -> group index + groups: list[list[Stage]] = [] + + for st in sorted_stages: + placed = False + # 计算依赖最低可放 group:所有依赖所在 group 的最大 index + 1 + min_idx = 0 + for dep in st.depends_on: + if dep in stage_group: + min_idx = max(min_idx, stage_group[dep] + 1) + # 尝试放入已有 group(>=min_idx) + for gi in range(min_idx, len(groups)): + g = groups[gi] + if len(g) >= max_parallel: + continue + # 检查与 group 内每个 stage 的冲突 + ok = True + for other in g: + # 文件冲突 + if set(st.writes) & set(other.writes): + ok = False + break + # 直接依赖(双向) + if other.number in st.depends_on or st.number in other.depends_on: + ok = False + break + if ok: + g.append(st) + stage_group[st.number] = gi + placed = True + break + if not placed: + groups.append([st]) + stage_group[st.number] = len(groups) - 1 + return groups + + +# ------------------------------------------------------------------------- +# 模块自测 +# ------------------------------------------------------------------------- + +if __name__ == "__main__": # pragma: no cover + import sys + if len(sys.argv) < 2: + print("usage: task_swarm_parse_md.py <tasks.md>") + raise SystemExit(2) + stages = parse_tasks_md(Path(sys.argv[1])) + for s in stages: + print(f"## 阶段 {s.number}: {s.title} (lines {s.header_line_no}-{s.end_line_no})") + for it in s.items: + print(f" - [{it.checkbox}] {it.number} {it.title} " + f"writes={it.writes} reads={it.reads} deps={it.depends_on} req={it.requirements}") + groups = group_by_file_conflict(stages) + print("\nGroups:") + for gi, g in enumerate(groups): + print(f" group {gi}: {[s.number for s in g]}") diff --git a/plugins/specode/scripts/task_swarm/_prompt.py b/plugins/specode/scripts/task_swarm/_prompt.py new file mode 100644 index 0000000..0cd865f --- /dev/null +++ b/plugins/specode/scripts/task_swarm/_prompt.py @@ -0,0 +1,400 @@ +#!/usr/bin/env python3 +"""task_swarm_prompt.py — 预渲染 coder / reviewer / validator subagent 的 task.md。 + +按 references/task-swarm.md §4 规范输出,每个 prompt 必须含: + - specId / spec_dir 上下文 + - @writes / @reads 边界 + - inbox 文件清单(指向 .task-swarm/runs/<id>/agents/<key>/inbox/) + - outbox 路径 + - STATUS 输出协议 + +每个 prompt 渲染后写到: + .task-swarm/runs/<run_id>/agents/<agent-key>/task.md + +stdlib-only。 +""" +from __future__ import annotations + +import contextlib +import os +import tempfile +from pathlib import Path +from typing import Any, Optional + + +# ------------------------------------------------------------------------- +# 原子写 +# ------------------------------------------------------------------------- + +def _atomic_write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=path.name + ".", suffix=".tmp", dir=str(path.parent)) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + fh.write(content) + fh.flush() + with contextlib.suppress(OSError): + os.fsync(fh.fileno()) + os.replace(tmp, path) + except Exception: + with contextlib.suppress(OSError): + os.unlink(tmp) + raise + + +# ------------------------------------------------------------------------- +# 通用上下文段 +# ------------------------------------------------------------------------- + +def _context_block(spec_id: str, spec_dir: str, run_id: str, group: int, round_: int, + project_root: Optional[str] = None) -> str: + lines = [ + "## 上下文", + f"- specId: {spec_id}", + f"- spec_dir: {spec_dir}", + ] + # 0.10.15+:project_root 是代码实际写入的根目录,跟 spec_dir 区分 + if project_root: + lines.append(f"- project_root: {project_root} ← 代码必须写到这里,不要写到 spec_dir") + else: + lines.append("- project_root: (未设置;fallback 用 spec_dir,但应由主代理在 init 后通过 set-project-root CLI 指定)") + lines.extend([ + f"- run_id: {run_id}", + f"- group: {group}", + f"- round: {round_}", + ]) + return "\n".join(lines) + "\n" + + +def _agent_root(run_dir: Path, agent_key: str) -> Path: + return run_dir / "agents" / agent_key + + +def _ensure_agent_dirs(run_dir: Path, agent_key: str) -> tuple[Path, Path, Path]: + """创建 agent_root / inbox / outbox 三个目录,返回它们。""" + root = _agent_root(run_dir, agent_key) + inbox = root / "inbox" + outbox = root / "outbox" + inbox.mkdir(parents=True, exist_ok=True) + outbox.mkdir(parents=True, exist_ok=True) + return root, inbox, outbox + + +def _stage_writes(stage: Any) -> list[str]: + """获取 stage 的 writes 列表(兼容 property / 字段两种形式)。""" + w = getattr(stage, "writes", None) + if w is None: + return [] + if callable(w): + try: + w = w() + except TypeError: + return [] + return list(w) + + +def _stage_reads(stage: Any) -> list[str]: + r = getattr(stage, "reads", None) + if r is None: + return [] + if callable(r): + try: + r = r() + except TypeError: + return [] + return list(r) + + +# ------------------------------------------------------------------------- +# coder prompt +# ------------------------------------------------------------------------- + +def render_coder_prompt( + stage: Any, # StageEntry-like:number/title/items/writes/reads/requirements + run_dir: Path, + run_id: str, + spec_id: str, + spec_dir: str, + group: int, + round_: int = 1, + mode: str = "initial", # initial / p0-fix / v-fix + fix_targets: Optional[list[dict]] = None, + file_idx: Optional[int] = None, + project_root: Optional[str] = None, +) -> str: + """渲染 coder 的 task.md。返回 prompt 文本。同步写到 agent 目录的 task.md。""" + if mode == "initial": + agent_key = f"coder-g{group}-s{stage.number}-r{round_}" + elif mode == "p0-fix": + agent_key = f"coder-p0fix-g{group}-r{round_}-f{file_idx or 0}" + elif mode == "v-fix": + agent_key = f"coder-vfix-g{group}-r{round_}-f{file_idx or 0}" + else: + raise ValueError(f"未知 mode: {mode}") + + root, inbox, outbox = _ensure_agent_dirs(run_dir, agent_key) + + lines: list[str] = [] + title = stage.title if hasattr(stage, "title") else "修复任务" + if mode == "initial": + lines.append(f"# {agent_key}:阶段 {stage.number} {title}") + else: + lines.append(f"# {agent_key}:{mode} 修复任务") + lines.append("") + lines.append(_context_block(spec_id, spec_dir, run_id, group, round_, + project_root=project_root)) + lines.append("") + + # 0.10.15+:项目根目录与路径规约(避免 subagent 把代码写到 spec_dir) + lines.append("## 项目根目录与路径规约") + if project_root: + lines.append(f"- 代码根目录(`project_root`):`{project_root}`") + lines.append(f"- spec 文档目录(`spec_dir`):`{spec_dir}`") + lines.append("- 下面 `@writes` / `@reads` / " + "「修复指引文件」中的**相对路径**,全部相对于 " + "`project_root` 解析(如 `src/services/foo.ts` " + "→ 实际写到 `<project_root>/src/services/foo.ts`)。") + lines.append("- **严禁**把代码 / 数据库 / node_modules 等写到 `spec_dir/` 下;" + "`spec_dir/` 只放 `*.md` 文档和 `.task-swarm/` 状态。") + lines.append("- 跑 Bash 命令时请先 `cd \"" + project_root + "\"` 再执行 " + "`npm install` / `pytest` / `cargo` 等。") + else: + lines.append(f"- ⚠ project_root 未设置;fallback 用 spec_dir=`{spec_dir}`") + lines.append("- 这是兼容老 spec 的退化路径。新 spec 应在 init 后通过 " + "project-root-choice selector + set-project-root CLI 显式指定。") + lines.append("") + + if mode == "initial": + lines.append("## 任务清单(按顺序逐条完成)") + items = getattr(stage, "items", []) or [] + for it in items: + tags = [] + it_writes = it.get("writes") if isinstance(it, dict) else getattr(it, "writes", []) + it_reads = it.get("reads") if isinstance(it, dict) else getattr(it, "reads", []) + it_reqs = it.get("requirements") if isinstance(it, dict) else getattr(it, "requirements", []) + num = it.get("number") if isinstance(it, dict) else getattr(it, "number", "") + it_title = it.get("title") if isinstance(it, dict) else getattr(it, "title", "") + if it_writes: + tags.append(f"@writes:{','.join(it_writes)}") + if it_reads: + tags.append(f"@reads:{','.join(it_reads)}") + if it_reqs: + tags.append("_需求:" + ",".join(it_reqs) + "_") + suffix = (" " + " ".join(tags)) if tags else "" + lines.append(f"- {num} {it_title}{suffix}") + lines.append("") + st_writes = _stage_writes(stage) + st_reads = _stage_reads(stage) + lines.append("## 文件边界(严格遵守)") + lines.append(f"- @writes(仅允许写入):{', '.join(st_writes) if st_writes else '(无声明)'}") + lines.append(f"- @reads(允许读取):{', '.join(st_reads) if st_reads else '(无声明)'}") + elif mode == "p0-fix": + lines.append("## P0 修复任务") + lines.append("reviewer 提出的 P0(带证据标签)必须修复。详情见 inbox/p0-items.md。") + lines.append("修复后请在 outbox/result.md 的「子任务状态」节按 done/failed 标记。") + if fix_targets: + lines.append("") + lines.append("## 涉及文件") + for ft in fix_targets: + lines.append(f"- {ft.get('file_hint') or ft.get('file_path') or 'unknown'}") + elif mode == "v-fix": + lines.append("## v-fix 修复任务(按 validator 修复指引)") + lines.append("仅修复 validator 在 validation.md 「给 coder 的修复指引」中列出的项。") + lines.append("不要扩大范围;不要重写非失败相关的代码。") + if fix_targets: + lines.append("") + lines.append("## 涉及文件 / 修复指引") + for ft in fix_targets: + lines.append(f"- 文件:{ft.get('file_path', '?')}") + if ft.get("location"): + lines.append(f" 位置:{ft['location']}") + if ft.get("problem"): + lines.append(f" 问题:{ft['problem']}") + if ft.get("suggestion"): + lines.append(f" 建议:{ft['suggestion']}") + if ft.get("requirements"): + lines.append(f" _需求:{','.join(ft['requirements'])}_") + + lines.append("") + lines.append("## inbox(上游产物,**只读**)") + lines.append(f"- 路径:`{inbox}`") + lines.append("- 内容(由主编排器在 fork 前放入):上一轮 result.md / review.md / validation.md(按需)") + lines.append("") + lines.append("## outbox(你的产物**必须**写到这里)") + lines.append(f"- result.md:`{outbox / 'result.md'}`") + lines.append("") + lines.append("## 输出协议(必读)") + lines.append("1. result.md 必须含三节:`## 上下文` / `## 子任务状态` / `## 关键变更`") + lines.append("2. 子任务状态行格式:`- <编号> <标题>: <done|failed|skipped> — <备注/文件>`") + lines.append("3. 末行必须是:`STATUS: ok` 或 `STATUS: failed: <原因>` 或 `STATUS: blocked: <原因>`") + lines.append("4. 严禁评价自己产物(不写 LGTM / 看起来不错);reviewer 自会评审") + lines.append("5. 严禁修改 @writes 之外的文件") + + text = "\n".join(lines) + "\n" + _atomic_write_text(root / "task.md", text) + return text + + +# ------------------------------------------------------------------------- +# reviewer prompt +# ------------------------------------------------------------------------- + +def render_reviewer_prompt( + group_stages: list[Any], + coder_outboxes: list[Path], + run_dir: Path, + run_id: str, + spec_id: str, + spec_dir: str, + group: int, + round_: int = 1, + project_root: Optional[str] = None, +) -> str: + agent_key = f"reviewer-g{group}-r{round_}" + root, inbox, outbox = _ensure_agent_dirs(run_dir, agent_key) + lines: list[str] = [] + lines.append(f"# {agent_key}:本 group {len(group_stages)} 个 stage 联合评审") + lines.append("") + lines.append(_context_block(spec_id, spec_dir, run_id, group, round_, + project_root=project_root)) + lines.append("") + lines.append("## 评审范围") + for s in group_stages: + st_writes = _stage_writes(s) + lines.append(f"- 阶段 {s.number}: {s.title}(@writes: {', '.join(st_writes)})") + lines.append("") + lines.append("## inbox(上游 coder 全部产物)") + for p in coder_outboxes: + lines.append(f"- `{p}`") + lines.append("") + lines.append("## outbox") + lines.append(f"- review.md:`{outbox / 'review.md'}`") + lines.append("") + lines.append("## review.md schema(必须严格遵守)") + lines.append("````markdown") + lines.append("# " + agent_key) + lines.append("") + lines.append("## 结论") + lines.append("needs-changes | approved-with-comments | approved") + lines.append("") + lines.append("## P0(每条必须带证据标签:[req:x.y] / [security] / [contract])") + lines.append("- <file:line> [req:x.y] — <一句话说明>") + lines.append("(如无 P0:本节写 `(none)`)") + lines.append("") + lines.append("## P1") + lines.append("- <file:line> — <说明>") + lines.append("") + lines.append("## P2") + lines.append("- <一句话风格类建议>") + lines.append("") + lines.append("## 给使用者的提示") + lines.append("- 一句话总结") + lines.append("") + lines.append("STATUS: ok") + lines.append("````") + lines.append("") + lines.append("## 关键约束") + lines.append("- P0 必须带证据标签;否则自动降级为 advisory(仅写入 tasks.md 注释,不进 fix loop)") + lines.append("- reviewer 不参与修复循环;本轮 advisory 提完即结束(v0.7 reviewer round 恒为 1)") + lines.append("- 末行恒为 `STATUS: ok`(advisory 模式;pass/fail 在 validator)") + + text = "\n".join(lines) + "\n" + _atomic_write_text(root / "task.md", text) + return text + + +# ------------------------------------------------------------------------- +# validator prompt +# ------------------------------------------------------------------------- + +def render_validator_prompt( + group_stages: list[Any], + run_dir: Path, + run_id: str, + spec_id: str, + spec_dir: str, + group: int, + round_: int = 1, + prev_validation: Optional[Path] = None, + project_root: Optional[str] = None, +) -> str: + agent_key = f"validator-g{group}-r{round_}" + root, inbox, outbox = _ensure_agent_dirs(run_dir, agent_key) + lines: list[str] = [] + lines.append(f"# {agent_key}:本 group {len(group_stages)} 个 stage 联合验证") + lines.append("") + lines.append(_context_block(spec_id, spec_dir, run_id, group, round_, + project_root=project_root)) + lines.append("") + if project_root: + lines.append(f"## 跑验证命令时请先 `cd \"{project_root}\"`") + lines.append("(所有 `@writes` 路径相对 `project_root`,不是 `spec_dir`)") + lines.append("") + lines.append("## 验证范围") + for s in group_stages: + lines.append(f"- 阶段 {s.number}: {s.title}") + items = getattr(s, "items", []) or [] + for it in items: + num = it.get("number") if isinstance(it, dict) else getattr(it, "number", "") + it_title = it.get("title") if isinstance(it, dict) else getattr(it, "title", "") + it_reqs = it.get("requirements") if isinstance(it, dict) else getattr(it, "requirements", []) + req = ("(_需求:" + ",".join(it_reqs) + "_)") if it_reqs else "" + lines.append(f" - {num} {it_title}{req}") + lines.append("") + if prev_validation is not None: + lines.append("## 上一轮 validation(本轮在其上验证 v-fix 是否成功)") + lines.append(f"- `{prev_validation}`") + lines.append("") + lines.append("## outbox") + lines.append(f"- validation.md:`{outbox / 'validation.md'}`") + lines.append("") + lines.append("## validation.md schema(必须严格遵守)") + lines.append("````markdown") + lines.append("# " + agent_key) + lines.append("") + lines.append("## 判定") + lines.append("pass | fail") + lines.append("") + lines.append("## 复现命令") + lines.append("```bash") + lines.append("cd <project root>") + lines.append("pytest tests/... -v") + lines.append("```") + lines.append("") + lines.append("## 按子任务的验证结果") + lines.append("- [x] 1.1 <标题>: pass") + lines.append("- [ ] 1.3 <标题>: fail — <一句话现场>") + lines.append("") + lines.append("## 失败现场(fail 时必填)") + lines.append("```") + lines.append("FAILED tests/... ::test_xxx") + lines.append("AssertionError: ...") + lines.append("```") + lines.append("") + lines.append("## 给 coder 的修复指引(fail 时必填,不带 P0/P1 标签)") + lines.append("### 修复 1 — <短标题>") + lines.append("- 文件: <abs/rel>") + lines.append("- 位置: <函数名 / 行号>") + lines.append("- 问题: <说明>") + lines.append("- 建议: <修复方向>") + lines.append("- _需求:x.y_") + lines.append("") + lines.append("STATUS: ok") + lines.append("````") + lines.append("") + lines.append("## 关键约束") + lines.append("- pass / fail 是客观信号;fail 必须给出可复现命令 + 失败现场 + 按文件分组的修复指引") + lines.append("- 不要评论代码风格(那是 reviewer 的活);只关心可验证项") + lines.append("- 末行恒为 `STATUS: ok`") + + text = "\n".join(lines) + "\n" + _atomic_write_text(root / "task.md", text) + return text + + +# ------------------------------------------------------------------------- +# 模块自测 +# ------------------------------------------------------------------------- + +if __name__ == "__main__": # pragma: no cover + print("import this module; see render_coder_prompt / render_reviewer_prompt / render_validator_prompt") diff --git a/plugins/specode/scripts/task_swarm/_state.py b/plugins/specode/scripts/task_swarm/_state.py new file mode 100644 index 0000000..e6cd15b --- /dev/null +++ b/plugins/specode/scripts/task_swarm/_state.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 +"""task_swarm_state.py — task-swarm 状态机(state.json 单一事实源;详见 references/task-swarm.md §7)。 + +负责: + - state.json 的 load/save(atomic write + fsync) + - phase 状态机推进(references/task-swarm.md §3) + - 死循环检测(连续 3 轮同 fail 签名 → group failed-deadloop) + +state.json schema 见 references/task-swarm.md §7 关键不变量。本模块只管"事实源", +派发 / 解析在 task_swarm.py 主 CLI 里。 + +stdlib-only。 +""" +from __future__ import annotations + +import contextlib +import json +import os +import tempfile +import time +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any, Optional + + +# ------------------------------------------------------------------------- +# 时间 / 原子写 +# ------------------------------------------------------------------------- + +def _now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def _atomic_write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=path.name + ".", suffix=".tmp", dir=str(path.parent)) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + fh.write(content) + fh.flush() + with contextlib.suppress(OSError): + os.fsync(fh.fileno()) + os.replace(tmp, path) + with contextlib.suppress(OSError): + dir_fd = os.open(str(path.parent), os.O_RDONLY) + try: + os.fsync(dir_fd) + finally: + os.close(dir_fd) + except Exception: + with contextlib.suppress(OSError): + os.unlink(tmp) + raise + + +def _atomic_write_json(path: Path, payload: Any) -> None: + _atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=2)) + + +# ------------------------------------------------------------------------- +# 数据模型 +# ------------------------------------------------------------------------- + +@dataclass +class StageEntry: + """state.json 里一条 stage 记录。""" + number: int + title: str + writes: list[str] = field(default_factory=list) + reads: list[str] = field(default_factory=list) + depends_on: list[int] = field(default_factory=list) + requirements: list[str] = field(default_factory=list) + items: list[dict] = field(default_factory=list) + header_line_no: int = 0 + end_line_no: int = 0 + + +# Phase 枚举(同 references/task-swarm.md §3) +PHASES = { + "init", "coding", "review", "p0-fix", + "validation", "v-fix", "writeback", "done", "error", +} + +GROUP_STATUS_VALUES = { + "pending", "coding", "review", "p0-fix", "validation", + "v-fix", "writeback", "done", "failed", "failed-deadloop", +} + +# 连续相同 fail 签名达到此值 → 整个 group 标 failed-deadloop +DEADLOOP_THRESHOLD = 3 + + +# ------------------------------------------------------------------------- +# StateMachine +# ------------------------------------------------------------------------- + +@dataclass +class StateMachine: + run_id: str + tasks_md: str + run_dir: str + max_parallel: int = 4 + max_rounds: int = 6 + session_id: Optional[str] = None + spec_dir: Optional[str] = None + spec_id: Optional[str] = None + + # 0.10.20+:人工验收模式。True 时 review/p0-fix 完成后跳过 validation/v-fix, + # 直接 begin_writeback;tasks.md 注释块写"⏭️ validator 已跳过(人工验收模式)"。 + # 由 cmd_init 的 --skip-validator flag 设置。 + skip_validator: bool = False + + # group 数据 + groups: list[list[StageEntry]] = field(default_factory=list) + current_group_index: int = 0 + group_status: list[str] = field(default_factory=list) # 与 groups 平行 + + # 当前 phase 信息 + phase: str = "init" + round: int = 0 # 当前 phase 已完成的轮号(v-fix 用得最多) + + # 在飞 / 已返回 subagent + coder_in_flight: list[str] = field(default_factory=list) + coder_done: list[str] = field(default_factory=list) + reviewer_done: bool = False + p0_in_flight: list[str] = field(default_factory=list) + p0_done: list[str] = field(default_factory=list) + validator_in_flight: bool = False + vfix_in_flight: list[str] = field(default_factory=list) + vfix_done: list[str] = field(default_factory=list) + + # findings & validator 历史(per group) + findings: list[dict] = field(default_factory=list) # reviewer 输出(含降级) + p0_pending: list[dict] = field(default_factory=list) # 带证据 P0 项 + fix_targets: list[dict] = field(default_factory=list) # validator fail 时的修复目标 + validator_history: list[dict] = field(default_factory=list) # 历轮 verdict + signature + fail_signature: str = "" # 上一轮 fail 签名 + + # 状态 + started_at: str = "" + last_activity_at: str = "" + completed_at: Optional[str] = None + failed_status: Optional[str] = None # failed-deadloop / failed / done + events: list[dict] = field(default_factory=list) + + # ----------------------------------------------------------------- + # 文件 IO + # ----------------------------------------------------------------- + + @staticmethod + def state_path(run_dir: Path) -> Path: + return run_dir / "state.json" + + @classmethod + def load(cls, run_dir: Path) -> "StateMachine": + sp = cls.state_path(run_dir) + if not sp.exists(): + raise FileNotFoundError(f"state.json 不存在:{sp}") + with sp.open("r", encoding="utf-8") as fh: + data = json.load(fh) + # 反序列化 groups + groups: list[list[StageEntry]] = [] + for g in data.get("groups", []): + gg: list[StageEntry] = [] + for s in g: + gg.append(StageEntry(**s)) + groups.append(gg) + sm = cls( + run_id=data["run_id"], + tasks_md=data["tasks_md"], + run_dir=str(run_dir), + max_parallel=data.get("max_parallel", 4), + max_rounds=data.get("max_rounds", 6), + session_id=data.get("session_id") or data.get("claude_session_id"), + spec_dir=data.get("spec_dir"), + spec_id=data.get("spec_id"), + groups=groups, + current_group_index=data.get("current_group_index", 0), + group_status=data.get("group_status", ["pending"] * len(groups)), + phase=data.get("phase", "init"), + round=data.get("round", 0), + coder_in_flight=data.get("coder_in_flight", []), + coder_done=data.get("coder_done", []), + reviewer_done=data.get("reviewer_done", False), + p0_in_flight=data.get("p0_in_flight", []), + p0_done=data.get("p0_done", []), + validator_in_flight=data.get("validator_in_flight", False), + vfix_in_flight=data.get("vfix_in_flight", []), + vfix_done=data.get("vfix_done", []), + findings=data.get("findings", []), + p0_pending=data.get("p0_pending", []), + fix_targets=data.get("fix_targets", []), + validator_history=data.get("validator_history", []), + fail_signature=data.get("fail_signature", ""), + started_at=data.get("started_at", ""), + last_activity_at=data.get("last_activity_at", ""), + completed_at=data.get("completed_at"), + failed_status=data.get("failed_status"), + events=data.get("events", []), + skip_validator=data.get("skip_validator", False), + ) + return sm + + def to_dict(self) -> dict: + return { + "run_id": self.run_id, + "tasks_md": self.tasks_md, + "run_dir": self.run_dir, + "max_parallel": self.max_parallel, + "max_rounds": self.max_rounds, + "session_id": self.session_id, + "spec_dir": self.spec_dir, + "spec_id": self.spec_id, + "groups": [[asdict(s) for s in g] for g in self.groups], + "current_group_index": self.current_group_index, + "group_status": list(self.group_status), + "phase": self.phase, + "round": self.round, + "coder_in_flight": list(self.coder_in_flight), + "coder_done": list(self.coder_done), + "reviewer_done": self.reviewer_done, + "p0_in_flight": list(self.p0_in_flight), + "p0_done": list(self.p0_done), + "validator_in_flight": self.validator_in_flight, + "vfix_in_flight": list(self.vfix_in_flight), + "vfix_done": list(self.vfix_done), + "findings": list(self.findings), + "p0_pending": list(self.p0_pending), + "fix_targets": list(self.fix_targets), + "validator_history": list(self.validator_history), + "fail_signature": self.fail_signature, + "started_at": self.started_at, + "last_activity_at": self.last_activity_at, + "completed_at": self.completed_at, + "failed_status": self.failed_status, + "events": list(self.events), + "skip_validator": self.skip_validator, + } + + def save(self) -> None: + self.last_activity_at = _now_iso() + _atomic_write_json(self.state_path(Path(self.run_dir)), self.to_dict()) + + # ----------------------------------------------------------------- + # 事件 + # ----------------------------------------------------------------- + + def events_append(self, event: dict) -> None: + e = dict(event) + e.setdefault("at", _now_iso()) + self.events.append(e) + + # ----------------------------------------------------------------- + # 当前 group 视图 + # ----------------------------------------------------------------- + + def current_group(self) -> list[StageEntry]: + if self.current_group_index >= len(self.groups): + return [] + return self.groups[self.current_group_index] + + def is_group_complete(self) -> bool: + return self.current_group_index >= len(self.groups) + + def current_group_done(self) -> bool: + if self.current_group_index >= len(self.group_status): + return False + return self.group_status[self.current_group_index] in ("done", "failed", "failed-deadloop") + + # ----------------------------------------------------------------- + # phase 推进 + # ----------------------------------------------------------------- + + def begin_coding(self) -> None: + """进入新 group 的 coding phase。""" + if self.current_group_index >= len(self.groups): + self.phase = "done" + self.completed_at = _now_iso() + return + self.phase = "coding" + self.round = 1 + # 设置 in_flight keys(命名规则见 references/task-swarm.md §4) + gi = self.current_group_index + self.coder_in_flight = [ + f"coder-g{gi + 1}-s{s.number}-r1" for s in self.current_group() + ] + self.coder_done = [] + self.reviewer_done = False + self.p0_in_flight = [] + self.p0_done = [] + self.validator_in_flight = False + self.vfix_in_flight = [] + self.vfix_done = [] + self.findings = [] + self.p0_pending = [] + self.fix_targets = [] + self.validator_history = [] + self.fail_signature = "" + self.group_status[gi] = "coding" + self.events_append({"type": "phase", "phase": "coding", "group": gi + 1}) + + def mark_coder_done(self, agent_key: str) -> None: + if agent_key in self.coder_in_flight: + self.coder_in_flight.remove(agent_key) + if agent_key not in self.coder_done: + self.coder_done.append(agent_key) + + def all_coders_returned(self) -> bool: + return not self.coder_in_flight + + def begin_review(self) -> None: + self.phase = "review" + self.reviewer_done = False + gi = self.current_group_index + self.group_status[gi] = "review" + self.events_append({"type": "phase", "phase": "review", "group": gi + 1}) + + def mark_reviewer_done(self) -> None: + self.reviewer_done = True + + def begin_p0_fix(self, p0_pending: list[dict]) -> None: + self.phase = "p0-fix" + self.round = 1 + gi = self.current_group_index + self.p0_pending = list(p0_pending) + # 按文件分组:每个不同 file → 一个 fix agent + files: list[str] = [] + for p in p0_pending: + f = (p.get("file_hint") or "unknown").strip() + if f not in files: + files.append(f) + self.p0_in_flight = [f"coder-p0fix-g{gi + 1}-r1-f{i}" for i in range(len(files))] + self.p0_done = [] + self.group_status[gi] = "p0-fix" + self.events_append({"type": "phase", "phase": "p0-fix", "group": gi + 1, + "files": files}) + + def mark_p0_done(self, agent_key: str) -> None: + if agent_key in self.p0_in_flight: + self.p0_in_flight.remove(agent_key) + if agent_key not in self.p0_done: + self.p0_done.append(agent_key) + + def all_p0_returned(self) -> bool: + return not self.p0_in_flight + + def begin_validation(self) -> None: + self.phase = "validation" + self.validator_in_flight = True + gi = self.current_group_index + self.group_status[gi] = "validation" + self.events_append({"type": "phase", "phase": "validation", + "group": gi + 1, "round": self.round}) + + def mark_validator_done(self) -> None: + self.validator_in_flight = False + + def record_round_signature(self, fail_sig: str) -> None: + """记录本轮 fail 签名到 history。""" + gi = self.current_group_index + self.validator_history.append({ + "group": gi + 1, + "round": self.round, + "verdict": "fail" if fail_sig else "pass", + "signature": fail_sig, + "at": _now_iso(), + }) + self.fail_signature = fail_sig + + def detect_deadloop(self) -> bool: + """连续 3 轮同 fail 签名 → 死循环。""" + gi = self.current_group_index + sigs = [h["signature"] for h in self.validator_history + if h.get("group") == gi + 1 and h.get("verdict") == "fail" and h.get("signature")] + if len(sigs) < DEADLOOP_THRESHOLD: + return False + return all(s == sigs[-1] for s in sigs[-DEADLOOP_THRESHOLD:]) + + def begin_v_fix(self, fix_targets: list[dict]) -> None: + self.phase = "v-fix" + self.round += 1 + gi = self.current_group_index + # 按文件分组 + files: list[str] = [] + for t in fix_targets: + f = (t.get("file_path") or "unknown").strip() + if f and f not in files: + files.append(f) + if not files: + files = ["unknown"] + self.fix_targets = list(fix_targets) + self.vfix_in_flight = [ + f"coder-vfix-g{gi + 1}-r{self.round}-f{i}" for i in range(len(files)) + ] + self.vfix_done = [] + self.group_status[gi] = "v-fix" + self.events_append({"type": "phase", "phase": "v-fix", + "group": gi + 1, "round": self.round, "files": files}) + + def mark_vfix_done(self, agent_key: str) -> None: + if agent_key in self.vfix_in_flight: + self.vfix_in_flight.remove(agent_key) + if agent_key not in self.vfix_done: + self.vfix_done.append(agent_key) + + def all_vfix_returned(self) -> bool: + return not self.vfix_in_flight + + def begin_writeback(self) -> None: + self.phase = "writeback" + gi = self.current_group_index + self.group_status[gi] = "writeback" + + def finalize_group(self, status: str = "done") -> None: + gi = self.current_group_index + if gi < len(self.group_status): + self.group_status[gi] = status + self.events_append({"type": "group-done", "group": gi + 1, "status": status}) + # 进入下一 group + self.current_group_index += 1 + if self.current_group_index >= len(self.groups): + self.phase = "done" + self.completed_at = _now_iso() + self.failed_status = self.failed_status or "done" + self.events_append({"type": "run-done", "status": self.failed_status}) + else: + # 不自动 begin_coding;由 advance 调用 + self.phase = "init" + + def fail_group_deadloop(self) -> None: + gi = self.current_group_index + if gi < len(self.group_status): + self.group_status[gi] = "failed-deadloop" + self.failed_status = "failed-deadloop" + self.phase = "error" + self.events_append({"type": "group-failed", "group": gi + 1, "reason": "deadloop"}) + + +# ------------------------------------------------------------------------- +# 模块自测 +# ------------------------------------------------------------------------- + +if __name__ == "__main__": # pragma: no cover + import sys + if len(sys.argv) < 2: + print("usage: task_swarm_state.py <run_dir>") + raise SystemExit(2) + sm = StateMachine.load(Path(sys.argv[1])) + print(json.dumps(sm.to_dict(), ensure_ascii=False, indent=2)) diff --git a/plugins/specode/scripts/task_swarm/_writeback.py b/plugins/specode/scripts/task_swarm/_writeback.py new file mode 100644 index 0000000..a821455 --- /dev/null +++ b/plugins/specode/scripts/task_swarm/_writeback.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +"""task_swarm_writeback.py — tasks.md line-safe diff writeback(详见 references/task-swarm.md §5)。 + +只允许: + - 在指定 stage 范围内:`- [ ] N.M ...` → `- [x] N.M ...`(仅替换 checkbox 字符) + - 在该 stage 段末追加 `> ` 注释块 + +越界 / 修改已有非 checkbox 字符 → WriteBackError → exit 1。 + +stdlib-only。 +""" +from __future__ import annotations + +import contextlib +import os +import re +import tempfile +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +# ------------------------------------------------------------------------- +# 错误 +# ------------------------------------------------------------------------- + +class WriteBackError(Exception): + """越界 / 不安全 diff。""" + + +# ------------------------------------------------------------------------- +# 数据结构 +# ------------------------------------------------------------------------- + +@dataclass +class StageFinding: + """一条 finding(reviewer / advisory),含 fix 状态。""" + severity: str # p0 / p1 / p2 / advisory + text: str # 原文(不含 leading '- ') + fix_status: str # 已修复 / 未修复 + + +@dataclass +class GroupFindings: + """writeback 时传入的本 group 全部 finding 与 validator 历史。""" + group_index: int # 0-based + stages: list[int] # group 内 stage 编号 + findings: list[StageFinding] = field(default_factory=list) + validator_history: list[dict] = field(default_factory=list) + final_verdict: str = "pass" # pass / fail / failed-deadloop / manual-review + reproduce_cmd: str = "" + # 0.10.20+:True 表示这次 run 是 init --skip-validator 启动的,writeback + # 注释块写"⏭️ validator 已跳过(人工验收模式)"而非 "✅ validator ... pass" + skip_validator: bool = False + + +@dataclass +class WriteBackResult: + tasks_md_path: Path + stages_checked: list[int] + findings_count: int + new_text: str + + +# ------------------------------------------------------------------------- +# 原子写 +# ------------------------------------------------------------------------- + +def _atomic_write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=path.name + ".", suffix=".tmp", dir=str(path.parent)) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + fh.write(content) + fh.flush() + with contextlib.suppress(OSError): + os.fsync(fh.fileno()) + os.replace(tmp, path) + except Exception: + with contextlib.suppress(OSError): + os.unlink(tmp) + raise + + +# ------------------------------------------------------------------------- +# writeback 核心 +# ------------------------------------------------------------------------- + +_STAGE_HEADER_RE = re.compile(r"^\s*##\s+阶段\s+(\d+)\s*[::]") +_ITEM_RE = re.compile(r"^(\s*-\s+\[)([ xX])(\]\s+\d+(?:\.\d+)+\s+.*)$") + + +def _find_stage_ranges(lines: list[str], target_stages: list[int]) -> dict[int, tuple[int, int]]: + """返回 {stage_number: (start_line_no, end_line_no)},行号 1-based。""" + ranges: dict[int, tuple[int, int]] = {} + current_num: Optional[int] = None + current_start = 0 + for idx, line in enumerate(lines, start=1): + m = _STAGE_HEADER_RE.match(line) + if m: + n = int(m.group(1)) + if current_num is not None and current_num in target_stages: + ranges[current_num] = (current_start, idx - 1) + current_num = n + current_start = idx + if current_num is not None and current_num in target_stages: + ranges[current_num] = (current_start, len(lines)) + return ranges + + +def _format_findings_block(gf: GroupFindings) -> list[str]: + """生成单 stage 末尾追加的 `> ` 注释块(list of lines, no trailing newline)。 + + 格式见 references/task-swarm.md §5 示例。 + """ + out: list[str] = [] + out.append("") # 空行 + # 0.10.20+:skip_validator 模式优先于其他状态——本 run 根本没跑 validator + if gf.skip_validator: + out.append("> ⏭️ validator 已跳过(人工验收模式)—— 代码正确性由用户人工核验") + out.append(">") + else: + # 顶部 validator 最终结论 + last_pass = None + for h in gf.validator_history: + if h.get("verdict") == "pass": + last_pass = h + break + if gf.final_verdict == "pass": + round_text = "" + if last_pass is not None: + round_text = f" g{last_pass.get('group')}-r{last_pass.get('round')}" + cmd = gf.reproduce_cmd or "" + # 0.10.21+:reproduce_cmd 可能含多行(cd + 多个 node/pytest 命令)。 + # 直接 inline 进单行 `> ✅ validator pass: \`<cmd>\`` 会让多行字符串 + # 写入 tasks.md 后被 splitlines 拆成多行,其中非首行不以 `>` 开头 → + # _verify_line_safe 报"writeback 越界"。 + # 修法:单行 cmd 仍 inline;多行 cmd 单独占 `> ```` ... ` ` ``` ``` 块。 + if "\n" in cmd: + out.append(f"> ✅ validator{round_text} pass,复现命令:") + out.append("> ```") + for cmd_line in cmd.splitlines(): + out.append(f"> {cmd_line}" if cmd_line else ">") + out.append("> ```") + else: + cmd_text = f": `{cmd}`" if cmd else "" + out.append(f"> ✅ validator{round_text} pass{cmd_text}") + elif gf.final_verdict == "failed-deadloop": + out.append("> ⚠️ validator failed-deadloop(连续 3 轮同一 fail 签名);本 group 标 failed") + else: + out.append(f"> ❌ validator 最终结论:{gf.final_verdict}") + out.append(">") + if gf.findings: + out.append("> 评审建议(task-swarm reviewer):") + for f in gf.findings: + if f.severity == "advisory": + tag = f"[adv {f.fix_status}]" + else: + tag = f"[{f.severity.upper()} {f.fix_status}]" + out.append(f"> - {tag} {f.text}") + out.append(">") + if gf.validator_history: + out.append("> validator 历轮:") + for h in gf.validator_history: + verdict = h.get("verdict", "?") + sig = h.get("signature", "") + tail = "" + if verdict == "fail" and sig: + tail = f" — fail signature {sig}" + out.append(f"> - g{h.get('group')}-r{h.get('round')}: {verdict}{tail}") + return out + + +def writeback_tasks_md( + tasks_md_path: Path, + group_findings: GroupFindings, +) -> WriteBackResult: + """对 tasks_md_path 做 line-safe diff: + + 1. 仅在 group_findings.stages 列出的 stage 范围内做替换 + 2. `- [ ] ...` → `- [x] ...`(仅 checkbox 字符) + 3. 在每个 stage 段末追加注释块(findings 内容相同——挂在 group 最后一个 stage 末尾) + 4. 任何其他越界 diff → WriteBackError + """ + if not tasks_md_path.exists(): + raise WriteBackError(f"tasks.md 不存在:{tasks_md_path}") + original = tasks_md_path.read_text(encoding="utf-8") + # 保留末尾换行符 + trailing_newline = original.endswith("\n") + lines = original.splitlines() + + target_stages = sorted(group_findings.stages) + ranges = _find_stage_ranges(lines, target_stages) + missing = [n for n in target_stages if n not in ranges] + if missing: + raise WriteBackError( + f"tasks.md 中找不到目标 stage:{missing}(请确认 tasks.md 未被外部破坏)" + ) + + new_lines = list(lines) + # 替换 checkbox + for n in target_stages: + start, end = ranges[n] + for i in range(start - 1, end): + line = new_lines[i] + m = _ITEM_RE.match(line) + if m: + # 只允许 ' ' → 'x';其它情况保持不变(已 x 不动) + if m.group(2) == " ": + new_lines[i] = f"{m.group(1)}x{m.group(3)}" + + # 在 group 最后一个 stage 段末追加注释块 + last_stage = target_stages[-1] + _, last_end = ranges[last_stage] + block_lines = _format_findings_block(group_findings) + # 插入位置:last_end 之后(即下个 stage header / 文件末尾前) + insert_at = last_end + new_lines = new_lines[:insert_at] + block_lines + new_lines[insert_at:] + + new_text = "\n".join(new_lines) + ("\n" if trailing_newline else "") + # 安全校验:除允许的 diff 外,其余每行必须与原文逐字相等 + _verify_line_safe(original, new_text, group_findings) + _atomic_write_text(tasks_md_path, new_text) + return WriteBackResult( + tasks_md_path=tasks_md_path, + stages_checked=target_stages, + findings_count=len(group_findings.findings), + new_text=new_text, + ) + + +def _verify_line_safe(original: str, modified: str, gf: GroupFindings) -> None: + """二次校验:把 modified 与 original 行级对齐,确认越界没发生。 + + 允许的 diff: + A. 同号行:原 `- [ ] N.M ...`,新 `- [x] N.M ...`,其余字符完全相同 + B. modified 比 original 多若干行,且新增行全部以 `>`、空字符串或前缀属于 `> ` 注释块格式 + """ + orig_lines = original.splitlines() + new_lines = modified.splitlines() + + oi = 0 + ni = 0 + o_len = len(orig_lines) + n_len = len(new_lines) + while oi < o_len and ni < n_len: + o = orig_lines[oi] + n = new_lines[ni] + if o == n: + oi += 1 + ni += 1 + continue + # 允许 A:checkbox toggle + mo = _ITEM_RE.match(o) + mn = _ITEM_RE.match(n) + if mo and mn and mo.group(1) == mn.group(1) and mo.group(3) == mn.group(3): + if mo.group(2) == " " and mn.group(2).lower() == "x": + oi += 1 + ni += 1 + continue + raise WriteBackError( + f"writeback 越界:line {oi + 1} checkbox 替换非法 '{mo.group(2)}'→'{mn.group(2)}'" + ) + # 允许 B:modified 多出 `> ` 注释块或空行 + if (n == "" or n.startswith(">")): + ni += 1 + continue + raise WriteBackError( + f"writeback 越界:line {oi + 1}\n 原: {o!r}\n 新: {n!r}" + ) + # modified 末尾多出的行必须都是 `> ` / 空行 + while ni < n_len: + n = new_lines[ni] + if not (n == "" or n.startswith(">")): + raise WriteBackError( + f"writeback 越界:末尾出现非注释行 line {ni + 1}: {n!r}" + ) + ni += 1 + # original 不能多出来(不允许 writeback 删行) + while oi < o_len: + o = orig_lines[oi] + raise WriteBackError( + f"writeback 越界:删除了原行 line {oi + 1}: {o!r}" + ) + + +# ------------------------------------------------------------------------- +# 模块自测 +# ------------------------------------------------------------------------- + +if __name__ == "__main__": # pragma: no cover + import sys + print("usage: import this module; see writeback_tasks_md", file=sys.stderr) diff --git a/plugins/specode/scripts/task_swarm/cli.py b/plugins/specode/scripts/task_swarm/cli.py new file mode 100644 index 0000000..d6a6ec6 --- /dev/null +++ b/plugins/specode/scripts/task_swarm/cli.py @@ -0,0 +1,1424 @@ +"""task_swarm.cli — task-swarm 编排主 CLI(详见 references/task-swarm.md)。 + +由 `scripts/task_swarm.py` launcher 调用(launcher 负责 sys.path 注入)。 +实现拆到同包内: + + _state.py phase 状态机 + state.json 单一事实源 + 死循环检测 + _parse_md.py tasks.md 解析 + 按文件冲突切 group + _outbox.py coder/reviewer/validator 三类产物 schema 校验 + _prompt.py 各 subagent 角色的 prompt 渲染 + _writeback.py tasks.md line-safe diff 写回 + +子命令: + init --tasks <abs> [--max-parallel N] [--max-rounds N] [--session <id>] [--spec <dir>] + status --run <run_id> + plan --run <run_id> + advance --run <run_id> --phase <coding|review|p0-fix|validation|v-fix> --round <n> + writeback --run <run_id> --group <N> + heartbeat --run <run_id> + resolve --run <run_id> [--abort] + +主代理通过 plan→fork→advance 循环驱动;本脚本只负责"确定性查询 / 状态推进 / +outbox 解析 / tasks.md line-safe diff 写回"。 + +stdlib-only。 +""" +from __future__ import annotations + +import argparse +import contextlib +import json +import os +import random +import string +import sys +import time +from pathlib import Path +from typing import Any, Optional + +# 0.10.0+ 日志(defensive import;launcher 已注入 scripts/ 到 sys.path) +try: + from spec_log import write_event as _log_event # type: ignore +except Exception: + def _log_event(event: str, payload: Optional[dict] = None, + session_id: Optional[str] = None) -> None: + return None + +from task_swarm._parse_md import parse_tasks_md, group_by_file_conflict # noqa: E402 +from task_swarm._state import StateMachine, StageEntry, _atomic_write_json # noqa: E402 +from task_swarm._outbox import ( # noqa: E402 + ParseError, parse_coder_result, parse_reviewer_review, parse_validator_validation, +) +from task_swarm._prompt import ( # noqa: E402 + render_coder_prompt, render_reviewer_prompt, render_validator_prompt, +) +from task_swarm._writeback import ( # noqa: E402 + GroupFindings, StageFinding, WriteBackError, writeback_tasks_md, +) + + +# ------------------------------------------------------------------------- +# 工具 +# ------------------------------------------------------------------------- + +def _now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def _gen_run_id() -> str: + """YYYYMMDD-HHMMSS-<6 位随机>,与 spec-mode 0.3.0 一致。""" + ts = time.strftime("%Y%m%d-%H%M%S", time.gmtime()) + rand = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + return f"{ts}-{rand}" + + +def _emit(payload: Any) -> None: + sys.stdout.write(json.dumps(payload, ensure_ascii=False, indent=2) + "\n") + + +def _sessions_dir() -> Path: + return Path.home() / ".specode" / "sessions" + + +def _session_path(session_id: str) -> Path: + return _sessions_dir() / f"{session_id}.json" + + +def _read_session(session_id: str) -> Optional[dict]: + p = _session_path(session_id) + if not p.exists(): + return None + try: + with p.open("r", encoding="utf-8") as fh: + data = json.load(fh) + if isinstance(data, dict): + return data + except Exception: + return None + return None + + +def _write_session(session_id: str, data: dict) -> None: + _atomic_write_json(_session_path(session_id), data) + + +def _runs_root_for(tasks_md: Path, spec_dir: Optional[Path]) -> Path: + """决定 .task-swarm/runs/ 根目录。 + + 优先级: + 1. spec_dir/.task-swarm/runs/ + 2. tasks_md.parent/.task-swarm/runs/ + """ + if spec_dir is not None: + return spec_dir / ".task-swarm" / "runs" + return tasks_md.parent / ".task-swarm" / "runs" + + +def _resolve_run_dir(run_id: str, hint_dirs: list[Path]) -> Path: + """根据 run_id 查找 run_dir。 + + hint_dirs:可能的 .task-swarm/runs/ 父目录候选。 + 若 run_id 是绝对路径直接返回。 + """ + p = Path(run_id) + if p.is_absolute() and p.exists(): + return p + # 在 hint_dirs 下查找 + for hd in hint_dirs: + candidate = hd / run_id + if candidate.exists(): + return candidate + raise FileNotFoundError(f"找不到 run_dir for run_id={run_id}(hints={hint_dirs})") + + +def _collect_run_dirs() -> list[Path]: + """扫描当前目录 / 当前目录上层若干层下的 .task-swarm/runs/*。""" + candidates: list[Path] = [] + cwd = Path.cwd() + for base in (cwd, cwd.parent, cwd.parent.parent): + runs = base / ".task-swarm" / "runs" + if runs.exists(): + candidates.append(runs) + return candidates + + +def _find_run_dir(run_id: str) -> Path: + # 第一步:如果 run_id 是绝对路径直接用 + p = Path(run_id) + if p.is_absolute() and p.exists(): + return p + # 第二步:扫描 sessions 找到 spec_dir + sessions_dir = _sessions_dir() + spec_dirs: list[Path] = [] + if sessions_dir.exists(): + for sf in sessions_dir.glob("*.json"): + try: + with sf.open("r", encoding="utf-8") as fh: + sess = json.load(fh) + sd = sess.get("active_spec_dir") + if sd: + spec_dirs.append(Path(sd)) + except Exception: + continue + hint_dirs: list[Path] = [] + for sd in spec_dirs: + hint_dirs.append(sd / ".task-swarm" / "runs") + hint_dirs.extend(_collect_run_dirs()) + return _resolve_run_dir(run_id, hint_dirs) + + +# ------------------------------------------------------------------------- +# init +# ------------------------------------------------------------------------- + +def cmd_init(args: argparse.Namespace) -> int: + tasks_md = Path(args.tasks).resolve() + if not tasks_md.exists(): + sys.stderr.write(f"tasks.md 不存在:{tasks_md}\n") + return 1 + + spec_dir: Optional[Path] = None + spec_id: Optional[str] = None + if args.spec: + spec_dir = Path(args.spec).resolve() + else: + # 推断:tasks.md 所在目录就是 spec_dir + if (tasks_md.parent / ".config.json").exists(): + spec_dir = tasks_md.parent + if spec_dir is not None: + cfg_path = spec_dir / ".config.json" + if cfg_path.exists(): + try: + with cfg_path.open("r", encoding="utf-8") as fh: + cfg = json.load(fh) + spec_id = cfg.get("specId") + except Exception: + pass + + stages = parse_tasks_md(tasks_md) + if not stages: + sys.stderr.write("tasks.md 中未解析出任何 `## 阶段 N:` 段;请确认格式\n") + return 1 + groups_raw = group_by_file_conflict(stages, max_parallel=args.max_parallel) + if not groups_raw: + sys.stderr.write("group 切分结果为空\n") + return 1 + + run_id = _gen_run_id() + runs_root = _runs_root_for(tasks_md, spec_dir) + run_dir = runs_root / run_id + run_dir.mkdir(parents=True, exist_ok=True) + (run_dir / "agents").mkdir(parents=True, exist_ok=True) + + # 转换为 StageEntry + groups: list[list[StageEntry]] = [] + for g in groups_raw: + gg: list[StageEntry] = [] + for s in g: + items_dict = [ + { + "number": it.number, + "title": it.title, + "writes": list(it.writes), + "reads": list(it.reads), + "depends_on": list(it.depends_on), + "requirements": list(it.requirements), + "raw_line": it.raw_line, + "checkbox": it.checkbox, + "line_no": it.line_no, + } + for it in s.items + ] + gg.append(StageEntry( + number=s.number, + title=s.title, + writes=list(s.writes), + reads=list(s.reads), + depends_on=list(s.depends_on), + requirements=[r for it in s.items for r in it.requirements], + items=items_dict, + header_line_no=s.header_line_no, + end_line_no=s.end_line_no, + )) + groups.append(gg) + + sm = StateMachine( + run_id=run_id, + tasks_md=str(tasks_md), + run_dir=str(run_dir), + max_parallel=args.max_parallel, + max_rounds=args.max_rounds, + session_id=args.session, + spec_dir=str(spec_dir) if spec_dir else None, + spec_id=spec_id, + groups=groups, + current_group_index=0, + group_status=["pending"] * len(groups), + phase="init", + round=0, + started_at=_now_iso(), + last_activity_at=_now_iso(), + skip_validator=bool(getattr(args, "skip_validator", False)), + ) + sm.events_append({ + "type": "init", + "tasks_md": str(tasks_md), + "groups": len(groups), + "skip_validator": sm.skip_validator, + }) + sm.save() + + # 同步 sessions/<id>.json.task_swarm_run_id + if args.session: + sess = _read_session(args.session) or {} + sess["task_swarm_run_id"] = run_id + sess["last_activity_at"] = _now_iso() + with contextlib.suppress(Exception): + _write_session(args.session, sess) + + out = { + "run_id": run_id, + "run_dir": str(run_dir), + "tasks_md": str(tasks_md), + "spec_dir": str(spec_dir) if spec_dir else None, + "spec_id": spec_id, + "groups": [ + [{"stage": s.number, "title": s.title, "writes": s.writes, + "depends_on": s.depends_on} for s in g] + for g in groups + ], + } + _emit(out) + return 0 + + +# ------------------------------------------------------------------------- +# status +# ------------------------------------------------------------------------- + +def cmd_status(args: argparse.Namespace) -> int: + try: + run_dir = _find_run_dir(args.run) + except FileNotFoundError as e: + sys.stderr.write(f"{e}\n") + return 1 + sm = StateMachine.load(run_dir) + gi = sm.current_group_index + pending = [] + if gi < len(sm.groups): + for s in sm.groups[gi]: + pending.append({"stage": s.number, "title": s.title, "writes": s.writes}) + payload = { + "run_id": sm.run_id, + "tasks_md": sm.tasks_md, + "phase": sm.phase, + "group": gi + 1 if gi < len(sm.groups) else None, + "round": sm.round, + "total_groups": len(sm.groups), + "group_status": sm.group_status, + "coder_in_flight": sm.coder_in_flight, + "reviewer_done": sm.reviewer_done, + "p0_in_flight": sm.p0_in_flight, + "validator_in_flight": sm.validator_in_flight, + "vfix_in_flight": sm.vfix_in_flight, + "pending_stages": pending, + "failed_status": sm.failed_status, + "completed_at": sm.completed_at, + } + _emit(payload) + return 0 + + +# ------------------------------------------------------------------------- +# plan +# ------------------------------------------------------------------------- + +PLAN_TEMPLATES = { + "coding-waiting": "coding phase 仍有 {n} 个 coder 未返回,等齐后再判断。", + "coding-fork": "本 group 开始 coding。请按下面 {n} 个 coder agent_key fork(同 message 内并发)。", + "review-fork": "本 group coder 已全部返回。请 fork **1 个** `task-swarm-reviewer`。", + "p0-fix-fork": "reviewer 提了 {n} 个带证据 P0。请按 P0 涉及文件 fork **{n}** 个 `task-swarm-coder`(p0-fix)。注意:reviewer 修复**只触发一次**,不 re-review。", + "validation-fork": "请 fork **1 个** `task-swarm-validator`。", + "validation-fork-after-p0": "p0-fix coder 已返回。请 fork **1 个** `task-swarm-validator`。", + "writeback": "validator pass。请调 `task_swarm.py writeback --run {run} --group {g}` 回写 tasks.md,然后进入下一 group。", + "v-fix-fork": "validator fail。请按 validation.md 的 fix_targets 各文件 fork **{n}** 个 `task-swarm-coder`(v-fix)。注意:validator fail 循环修复直到 pass。本轮是 g{g}-r{r}。", + "validation-after-vfix": "v-fix coder 已返回。请 fork **1 个** `task-swarm-validator` 验证。", + "deadloop": "⚠️ 死循环检测:g{g} 已连续 3 轮同一 fail 签名。建议停止本 group,向用户报告 `failed-deadloop`,让用户介入。", + "all-done": "全部 group 已完成。请按 SKILL.md 退出 task-swarm 模式,回到 spec-mode acceptance phase。", +} + + +def _plan_for(sm: StateMachine) -> dict: + """根据 state 推导下一步建议(确定性查询,不改 state)。""" + if sm.failed_status == "failed-deadloop": + gi = sm.current_group_index + return { + "phase": sm.phase, + "action": "deadloop", + "message": PLAN_TEMPLATES["deadloop"].format(g=gi + 1), + "fork": [], + } + if sm.is_group_complete() or sm.phase == "done": + return { + "phase": "done", + "action": "all-done", + "message": PLAN_TEMPLATES["all-done"], + "fork": [], + } + + gi = sm.current_group_index + g = sm.current_group() + + # 1. phase=init:尚未开始本 group coding,建议 fork coder + if sm.phase == "init": + forks = [] + run_dir = Path(sm.run_dir) + for s in g: + key = f"coder-g{gi + 1}-s{s.number}-r1" + task_md = run_dir / "agents" / key / "task.md" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(task_md), + "stage": s.number, + "writes": s.writes, + }) + return { + "phase": "coding", + "action": "coding-fork", + "message": PLAN_TEMPLATES["coding-fork"].format(n=len(forks)), + "fork": forks, + "group": gi + 1, + } + + # 2. coding 进行中 + if sm.phase == "coding": + if sm.coder_in_flight and not sm.coder_done: + forks = [] + run_dir = Path(sm.run_dir) + for s in g: + key = f"coder-g{gi + 1}-s{s.number}-r1" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(run_dir / "agents" / key / "task.md"), + "stage": s.number, + "writes": s.writes, + }) + return { + "phase": "coding", + "action": "coding-fork", + "message": PLAN_TEMPLATES["coding-fork"].format(n=len(forks)), + "fork": forks, + "in_flight": list(sm.coder_in_flight), + "group": gi + 1, + } + if sm.coder_in_flight: + return { + "phase": "coding", + "action": "coding-waiting", + "message": PLAN_TEMPLATES["coding-waiting"].format(n=len(sm.coder_in_flight)), + "fork": [], + "in_flight": list(sm.coder_in_flight), + "group": gi + 1, + } + # 全部返回 → 建议 fork reviewer + key = f"reviewer-g{gi + 1}-r1" + return { + "phase": "review", + "action": "review-fork", + "message": PLAN_TEMPLATES["review-fork"], + "fork": [{ + "agent": "task-swarm-reviewer", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + }], + "group": gi + 1, + } + + # 3. review 完成 → 看是否有 P0 + if sm.phase == "review": + if not sm.reviewer_done: + # 仍未 fork → 给 fork 建议 + key = f"reviewer-g{gi + 1}-r1" + return { + "phase": "review", + "action": "review-fork", + "message": PLAN_TEMPLATES["review-fork"], + "fork": [{ + "agent": "task-swarm-reviewer", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + }], + "group": gi + 1, + } + if sm.p0_pending: + # 按文件分组 + files: list[str] = [] + for p in sm.p0_pending: + f = (p.get("file_hint") or "unknown").strip() + if f not in files: + files.append(f) + forks = [] + for i, f in enumerate(files): + key = f"coder-p0fix-g{gi + 1}-r1-f{i}" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + "file": f, + }) + return { + "phase": "p0-fix", + "action": "p0-fix-fork", + "message": PLAN_TEMPLATES["p0-fix-fork"].format(n=len(forks)), + "fork": forks, + "group": gi + 1, + } + # 无 P0 → 直接 validator + key = f"validator-g{gi + 1}-r1" + return { + "phase": "validation", + "action": "validation-fork", + "message": PLAN_TEMPLATES["validation-fork"], + "fork": [{ + "agent": "task-swarm-validator", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + }], + "group": gi + 1, + } + + # 4. p0-fix 阶段 + if sm.phase == "p0-fix": + if sm.p0_in_flight and not sm.p0_done: + files: list[str] = [] + for p in sm.p0_pending: + f = (p.get("file_hint") or "unknown").strip() + if f not in files: + files.append(f) + forks = [] + for i, f in enumerate(files): + key = f"coder-p0fix-g{gi + 1}-r1-f{i}" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + "file": f, + }) + return { + "phase": "p0-fix", + "action": "p0-fix-fork", + "message": PLAN_TEMPLATES["p0-fix-fork"].format(n=len(forks)), + "fork": forks, + "group": gi + 1, + } + if sm.p0_in_flight: + return { + "phase": "p0-fix", + "action": "p0-fix-waiting", + "message": f"p0-fix 仍有 {len(sm.p0_in_flight)} 个 coder 未返回。", + "fork": [], + "group": gi + 1, + } + key = f"validator-g{gi + 1}-r1" + return { + "phase": "validation", + "action": "validation-fork-after-p0", + "message": PLAN_TEMPLATES["validation-fork-after-p0"], + "fork": [{ + "agent": "task-swarm-validator", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + }], + "group": gi + 1, + } + + # 5. validation 阶段 + if sm.phase == "validation": + if sm.validator_in_flight: + # 计算目标 round:第一次进 validation→1;v-fix 后再 validation→sm.round + target_round = sm.round if sm.round > 0 else 1 + key = f"validator-g{gi + 1}-r{target_round}" + if sm.validator_history: + msg = PLAN_TEMPLATES["validation-after-vfix"] + action = "validation-after-vfix" + elif sm.p0_done: + msg = PLAN_TEMPLATES["validation-fork-after-p0"] + action = "validation-fork-after-p0" + else: + msg = PLAN_TEMPLATES["validation-fork"] + action = "validation-fork" + return { + "phase": "validation", + "action": action, + "message": msg, + "fork": [{ + "agent": "task-swarm-validator", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + }], + "group": gi + 1, + "round": target_round, + } + # 看 fix_targets 决定 pass / fail + if sm.fix_targets: + # 死循环检测 + if sm.detect_deadloop(): + return { + "phase": sm.phase, + "action": "deadloop", + "message": PLAN_TEMPLATES["deadloop"].format(g=gi + 1), + "fork": [], + "group": gi + 1, + } + files: list[str] = [] + for t in sm.fix_targets: + f = (t.get("file_path") or "").strip() + if f and f not in files: + files.append(f) + forks = [] + for i, f in enumerate(files): + key = f"coder-vfix-g{gi + 1}-r{sm.round + 1}-f{i}" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + "file": f, + }) + return { + "phase": "v-fix", + "action": "v-fix-fork", + "message": PLAN_TEMPLATES["v-fix-fork"].format( + n=len(forks), g=gi + 1, r=sm.round + 1, + ), + "fork": forks, + "group": gi + 1, + } + # pass + return { + "phase": "writeback", + "action": "writeback", + "message": PLAN_TEMPLATES["writeback"].format(run=sm.run_id, g=gi + 1), + "fork": [], + "group": gi + 1, + } + + # 6. v-fix 阶段 + if sm.phase == "v-fix": + if sm.vfix_in_flight and not sm.vfix_done: + files: list[str] = [] + for t in sm.fix_targets: + f = (t.get("file_path") or "").strip() + if f and f not in files: + files.append(f) + forks = [] + for i, f in enumerate(files): + key = f"coder-vfix-g{gi + 1}-r{sm.round}-f{i}" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + "file": f, + }) + return { + "phase": "v-fix", + "action": "v-fix-fork", + "message": PLAN_TEMPLATES["v-fix-fork"].format( + n=len(forks), g=gi + 1, r=sm.round, + ), + "fork": forks, + "group": gi + 1, + } + if sm.vfix_in_flight: + return { + "phase": "v-fix", + "action": "v-fix-waiting", + "message": f"v-fix 仍有 {len(sm.vfix_in_flight)} 个 coder 未返回。", + "fork": [], + "group": gi + 1, + } + key = f"validator-g{gi + 1}-r{sm.round + 1}" + return { + "phase": "validation", + "action": "validation-after-vfix", + "message": PLAN_TEMPLATES["validation-after-vfix"], + "fork": [{ + "agent": "task-swarm-validator", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + }], + "group": gi + 1, + } + + if sm.phase == "writeback": + return { + "phase": "writeback", + "action": "writeback", + "message": PLAN_TEMPLATES["writeback"].format(run=sm.run_id, g=gi + 1), + "fork": [], + "group": gi + 1, + } + + if sm.phase == "error": + return { + "phase": "error", + "action": "deadloop", + "message": PLAN_TEMPLATES["deadloop"].format(g=gi + 1), + "fork": [], + "group": gi + 1, + } + + return { + "phase": sm.phase, + "action": "unknown", + "message": f"未知 phase={sm.phase},请检查 state.json", + "fork": [], + } + + +def cmd_plan(args: argparse.Namespace) -> int: + try: + run_dir = _find_run_dir(args.run) + except FileNotFoundError as e: + sys.stderr.write(f"{e}\n") + return 1 + sm = StateMachine.load(run_dir) + # 若 phase=init,主动渲染 prompt 文件并把 phase 推进到 coding; + # 渲染好的 prompts 让主代理可以直接 fork。 + if sm.phase == "init": + _materialize_prompts_for_coding(sm) + sm.begin_coding() + sm.save() + # 第一次 plan:把刚切到 coding 的 in-flight 列表当作"待 fork"列表返回 + gi = sm.current_group_index + forks = [] + for s in sm.current_group(): + key = f"coder-g{gi + 1}-s{s.number}-r1" + forks.append({ + "agent": "task-swarm-coder", + "agent_key": key, + "task_md": str(Path(sm.run_dir) / "agents" / key / "task.md"), + "stage": s.number, + "writes": s.writes, + }) + plan = { + "phase": "coding", + "action": "coding-fork", + "message": PLAN_TEMPLATES["coding-fork"].format(n=len(forks)), + "fork": forks, + "group": gi + 1, + } + _emit(plan) + return 0 + if sm.phase == "coding" and not sm.coder_in_flight and sm.coder_done: + # 全部返回 → 渲染 reviewer prompt + _materialize_prompt_reviewer(sm) + elif sm.phase == "review" and sm.reviewer_done and sm.p0_pending: + _materialize_prompts_p0_fix(sm) + elif sm.phase == "review" and sm.reviewer_done and not sm.p0_pending: + _materialize_prompt_validator(sm) + elif sm.phase == "p0-fix" and not sm.p0_in_flight and sm.p0_done: + _materialize_prompt_validator(sm) + elif sm.phase == "validation" and not sm.validator_in_flight and sm.fix_targets: + if not sm.detect_deadloop(): + _materialize_prompts_v_fix(sm) + elif sm.phase == "v-fix" and not sm.vfix_in_flight and sm.vfix_done: + _materialize_prompt_validator(sm) + plan = _plan_for(sm) + _emit(plan) + return 0 + + +def _resolve_project_root(sm: StateMachine) -> Optional[str]: + """从 spec_dir/.config.json 读 project_root;未配置 / 读失败时返回 None。 + + 返回 None 时,render_*_prompt 会输出 fallback 文本提示"未设置 project_root, + 暂用 spec_dir"——主要是兼容老 spec(pre 0.10.15 创建的没有此字段)。 + """ + spec_dir = sm.spec_dir + if not spec_dir: + return None + try: + cfg_path = Path(spec_dir) / ".config.json" + if not cfg_path.exists(): + return None + with cfg_path.open("r", encoding="utf-8") as fh: + cfg = json.load(fh) + pr = cfg.get("project_root") + return str(pr) if pr else None + except Exception: + return None + + +def _materialize_prompts_for_coding(sm: StateMachine) -> None: + gi = sm.current_group_index + project_root = _resolve_project_root(sm) + for s in sm.current_group(): + render_coder_prompt( + stage=s, + run_dir=Path(sm.run_dir), + run_id=sm.run_id, + spec_id=sm.spec_id or "", + spec_dir=sm.spec_dir or "", + group=gi + 1, + round_=1, + mode="initial", + project_root=project_root, + ) + + +def _materialize_prompt_reviewer(sm: StateMachine) -> None: + gi = sm.current_group_index + coder_outboxes: list[Path] = [] + run_dir = Path(sm.run_dir) + for s in sm.current_group(): + outbox = run_dir / "agents" / f"coder-g{gi + 1}-s{s.number}-r1" / "outbox" / "result.md" + coder_outboxes.append(outbox) + render_reviewer_prompt( + group_stages=sm.current_group(), + coder_outboxes=coder_outboxes, + run_dir=run_dir, + run_id=sm.run_id, + spec_id=sm.spec_id or "", + spec_dir=sm.spec_dir or "", + group=gi + 1, + round_=1, + project_root=_resolve_project_root(sm), + ) + + +def _materialize_prompts_p0_fix(sm: StateMachine) -> None: + gi = sm.current_group_index + project_root = _resolve_project_root(sm) + files: list[str] = [] + for p in sm.p0_pending: + f = (p.get("file_hint") or "unknown").strip() + if f not in files: + files.append(f) + for i, f in enumerate(files): + # 找到对应 stage(best effort:按文件路径匹配 stage.writes) + match_stage = None + for s in sm.current_group(): + if f in s.writes: + match_stage = s + break + if match_stage is None and sm.current_group(): + match_stage = sm.current_group()[0] + if match_stage is None: + continue + render_coder_prompt( + stage=match_stage, + run_dir=Path(sm.run_dir), + run_id=sm.run_id, + spec_id=sm.spec_id or "", + spec_dir=sm.spec_dir or "", + group=gi + 1, + round_=1, + mode="p0-fix", + fix_targets=[p for p in sm.p0_pending + if (p.get("file_hint") or "").strip() == f], + file_idx=i, + project_root=project_root, + ) + + +def _materialize_prompts_v_fix(sm: StateMachine) -> None: + gi = sm.current_group_index + project_root = _resolve_project_root(sm) + files: list[str] = [] + for t in sm.fix_targets: + f = (t.get("file_path") or "").strip() + if f and f not in files: + files.append(f) + if not files: + files = ["unknown"] + for i, f in enumerate(files): + match_stage = None + for s in sm.current_group(): + if f in s.writes: + match_stage = s + break + if match_stage is None and sm.current_group(): + match_stage = sm.current_group()[0] + if match_stage is None: + continue + ftargets = [t for t in sm.fix_targets + if (t.get("file_path") or "").strip() == f] + # round_ 必须用 sm.round(不能 +1):advance 里 begin_v_fix 已经 + # 把 sm.round 自增过了,且 vfix_in_flight 用的就是当前 sm.round 命名 + # (task_swarm_state.py:374,385)。多 +1 会导致 task.md 写到 + # agents/coder-vfix-g{N}-r{round+1}-f{i}/task.md,但 in_flight 是 + # r{round}-f{i},advance 后续找不到产物 → 永远报"产物文件不存在"。 + render_coder_prompt( + stage=match_stage, + run_dir=Path(sm.run_dir), + run_id=sm.run_id, + spec_id=sm.spec_id or "", + spec_dir=sm.spec_dir or "", + group=gi + 1, + round_=sm.round, + mode="v-fix", + fix_targets=ftargets, + file_idx=i, + project_root=project_root, + ) + + +def _materialize_prompt_validator(sm: StateMachine) -> None: + gi = sm.current_group_index + if sm.phase == "v-fix": + next_round = sm.round + 1 + elif sm.phase in ("review", "p0-fix"): + next_round = 1 + else: + next_round = sm.round if sm.round > 0 else 1 + prev_validation: Optional[Path] = None + if sm.validator_history: + last = sm.validator_history[-1] + prev_validation = (Path(sm.run_dir) / "agents" + / f"validator-g{gi + 1}-r{last.get('round')}" + / "outbox" / "validation.md") + if not prev_validation.exists(): + prev_validation = None + render_validator_prompt( + group_stages=sm.current_group(), + run_dir=Path(sm.run_dir), + run_id=sm.run_id, + spec_id=sm.spec_id or "", + spec_dir=sm.spec_dir or "", + group=gi + 1, + round_=next_round, + prev_validation=prev_validation, + project_root=_resolve_project_root(sm), + ) + + +# ------------------------------------------------------------------------- +# advance +# ------------------------------------------------------------------------- + +def _coder_outbox_paths(sm: StateMachine, keys: list[str]) -> list[Path]: + out: list[Path] = [] + for k in keys: + out.append(Path(sm.run_dir) / "agents" / k / "outbox" / "result.md") + return out + + +def cmd_advance(args: argparse.Namespace) -> int: + try: + run_dir = _find_run_dir(args.run) + except FileNotFoundError as e: + sys.stderr.write(f"{e}\n") + return 1 + sm = StateMachine.load(run_dir) + phase = args.phase + errors: list[str] = [] + next_msg = "" + + if phase == "coding": + # 解析 coder_in_flight + coder_done 全部 outbox + all_keys = list(sm.coder_in_flight) + list(sm.coder_done) + if not all_keys: + all_keys = [f"coder-g{sm.current_group_index + 1}-s{s.number}-r1" + for s in sm.current_group()] + any_failed = False + for k in all_keys: + p = Path(sm.run_dir) / "agents" / k / "outbox" / "result.md" + try: + res = parse_coder_result(p) + sm.mark_coder_done(k) + if res.status != "ok": + any_failed = True + errors.append(f"{k}: STATUS={res.status} {res.status_reason}") + except ParseError as e: + errors.append(f"{k}: parse error: {e}") + any_failed = True + sm.events_append({"type": "advance", "phase": "coding", "errors": errors}) + if any_failed: + sm.failed_status = sm.failed_status or "failed" + sm.group_status[sm.current_group_index] = "failed" + sm.save() + _emit({ + "ok": False, + "phase": sm.phase, + "errors": errors, + "next": "report-failed-group", + }) + return 0 + sm.begin_review() + # 渲染 reviewer prompt + _materialize_prompt_reviewer(sm) + next_msg = "下一步:fork reviewer(agent_key=reviewer-g{}-r1)".format( + sm.current_group_index + 1) + + elif phase == "review": + gi = sm.current_group_index + path = Path(sm.run_dir) / "agents" / f"reviewer-g{gi + 1}-r1" / "outbox" / "review.md" + try: + rev = parse_reviewer_review(path) + sm.mark_reviewer_done() + # 落 findings + sm.findings = [] + for f in rev.p0_items: + sm.findings.append({ + "severity": "p0", + "text": f.text, + "evidence_tags": f.evidence_tags, + "file_hint": f.file_hint, + "fix_status": "未修复", + }) + for f in rev.advisory_items: + sm.findings.append({ + "severity": "advisory", + "text": f.text, + "evidence_tags": [], + "file_hint": f.file_hint, + "fix_status": "未修复", + }) + for f in rev.p1_items: + sm.findings.append({ + "severity": "p1", + "text": f.text, + "evidence_tags": [], + "file_hint": f.file_hint, + "fix_status": "未修复", + }) + for f in rev.p2_items: + sm.findings.append({ + "severity": "p2", + "text": f.text, + "evidence_tags": [], + "file_hint": f.file_hint, + "fix_status": "未修复", + }) + sm.p0_pending = [ + {"text": f.text, "evidence_tags": f.evidence_tags, + "file_hint": f.file_hint} + for f in rev.p0_items + ] + sm.events_append({"type": "advance", "phase": "review", + "verdict": rev.verdict, "p0": len(rev.p0_items), + "advisory": len(rev.advisory_items), + "p1": len(rev.p1_items), "p2": len(rev.p2_items)}) + if sm.p0_pending: + sm.begin_p0_fix(sm.p0_pending) + _materialize_prompts_p0_fix(sm) + next_msg = "下一步:fork p0-fix coder(按文件分组)" + elif sm.skip_validator: + # 0.10.20+:人工验收模式,无 P0 → 跳过 validation 直接 writeback + sm.begin_writeback() + next_msg = (f"无 P0 + skip_validator=true(人工验收模式);" + f"请调 `task_swarm.py writeback --run {sm.run_id} " + f"--group {gi + 1}` 回写 tasks.md,然后人工验收代码。") + else: + sm.begin_validation() + _materialize_prompt_validator(sm) + next_msg = "下一步:fork validator" + except ParseError as e: + errors.append(str(e)) + + elif phase == "p0-fix": + gi = sm.current_group_index + all_keys = list(sm.p0_in_flight) + list(sm.p0_done) + if not all_keys: + # 推断:根据 p0_pending file 数 + files: list[str] = [] + for p in sm.p0_pending: + f = (p.get("file_hint") or "unknown").strip() + if f not in files: + files.append(f) + all_keys = [f"coder-p0fix-g{gi + 1}-r1-f{i}" for i in range(len(files))] + any_failed = False + for k in all_keys: + p = Path(sm.run_dir) / "agents" / k / "outbox" / "result.md" + try: + res = parse_coder_result(p) + sm.mark_p0_done(k) + if res.status != "ok": + any_failed = True + errors.append(f"{k}: STATUS={res.status} {res.status_reason}") + except ParseError as e: + errors.append(f"{k}: parse error: {e}") + any_failed = True + # 标记 p0 finding 的 fix_status + for finding in sm.findings: + if finding["severity"] == "p0": + finding["fix_status"] = "未修复" if any_failed else "已修复" + sm.events_append({"type": "advance", "phase": "p0-fix", + "any_failed": any_failed, "errors": errors}) + # 0.10.20+:skip_validator 人工验收模式 → 跳过 validation/v-fix + if sm.skip_validator: + gi = sm.current_group_index + sm.begin_writeback() + if any_failed: + next_msg = (f"p0-fix 部分失败 + skip_validator=true(人工验收模式);" + f"未修部分将以 [P0 未修复] 写入 tasks.md。请调 " + f"`task_swarm.py writeback --run {sm.run_id} " + f"--group {gi + 1}` 然后人工验收。") + else: + next_msg = (f"p0-fix 全部 ok + skip_validator=true(人工验收模式);" + f"请调 `task_swarm.py writeback --run {sm.run_id} " + f"--group {gi + 1}` 回写 tasks.md,然后人工验收代码。") + else: + # 不阻断:进入 validation(full 模式) + sm.begin_validation() + _materialize_prompt_validator(sm) + if any_failed: + next_msg = "p0-fix 部分失败;继续进入 validation。失败的 P0 将标 '[P0 未修复]'。" + else: + next_msg = "p0-fix 全部 ok;进入 validation。" + + elif phase == "validation": + gi = sm.current_group_index + # 决定 round:本次 advance 对应的是 sm.round(v-fix 后 sm.round 已经在 begin_v_fix 时 +1, + # validator 跑完的 round = sm.round) + round_used = sm.round if sm.round > 0 else 1 + path = (Path(sm.run_dir) / "agents" + / f"validator-g{gi + 1}-r{round_used}" / "outbox" / "validation.md") + try: + val = parse_validator_validation(path) + sm.mark_validator_done() + sm.round = round_used + sig = val.fail_signature() + sm.record_round_signature(sig) + sm.events_append({"type": "advance", "phase": "validation", + "verdict": val.verdict, "round": round_used, + "signature": sig}) + if val.verdict == "pass": + sm.fix_targets = [] + sm.begin_writeback() + next_msg = (f"validator pass。请调 `task_swarm.py writeback " + f"--run {sm.run_id} --group {gi + 1}` 回写 tasks.md。") + else: + # fail + sm.fix_targets = [ + { + "file_path": t.file_path, + "title": t.title, + "location": t.location, + "problem": t.problem, + "suggestion": t.suggestion, + "requirements": list(t.requirements), + } + for t in val.fix_targets + ] + # 检测死循环 + if sm.detect_deadloop(): + sm.fail_group_deadloop() + sm.save() + _emit({ + "ok": False, + "phase": sm.phase, + "deadloop": True, + "next": "report-deadloop", + "round": sm.round, + }) + return 0 + # 进入 v-fix + sm.begin_v_fix(sm.fix_targets) + _materialize_prompts_v_fix(sm) + next_msg = (f"validator fail。请按 fix_targets 各文件 fork v-fix coder。" + f"本轮是 g{gi + 1}-r{sm.round}。") + except ParseError as e: + errors.append(str(e)) + + elif phase == "v-fix": + gi = sm.current_group_index + all_keys = list(sm.vfix_in_flight) + list(sm.vfix_done) + if not all_keys: + files: list[str] = [] + for t in sm.fix_targets: + f = (t.get("file_path") or "").strip() + if f and f not in files: + files.append(f) + if not files: + files = ["unknown"] + all_keys = [f"coder-vfix-g{gi + 1}-r{sm.round}-f{i}" + for i in range(len(files))] + any_failed = False + for k in all_keys: + p = Path(sm.run_dir) / "agents" / k / "outbox" / "result.md" + try: + res = parse_coder_result(p) + sm.mark_vfix_done(k) + if res.status != "ok": + any_failed = True + errors.append(f"{k}: STATUS={res.status} {res.status_reason}") + except ParseError as e: + errors.append(f"{k}: parse error: {e}") + any_failed = True + sm.events_append({"type": "advance", "phase": "v-fix", + "round": sm.round, "any_failed": any_failed, + "errors": errors}) + if any_failed: + sm.failed_status = sm.failed_status or "failed" + sm.group_status[sm.current_group_index] = "failed" + sm.save() + _emit({ + "ok": False, + "phase": sm.phase, + "errors": errors, + "next": "report-failed-group", + }) + return 0 + # 进入 validation 下一轮 + sm.begin_validation() + _materialize_prompt_validator(sm) + next_msg = (f"v-fix 全部 ok。请 fork validator-g{gi + 1}-r{sm.round + 1}。") + + else: + sys.stderr.write(f"未知 phase: {phase}\n") + return 1 + + sm.save() + plan = _plan_for(sm) + _emit({ + "ok": not errors, + "phase": sm.phase, + "group": sm.current_group_index + 1 if sm.current_group_index < len(sm.groups) else None, + "round": sm.round, + "errors": errors, + "next": next_msg, + "plan": plan, + }) + return 0 + + +# ------------------------------------------------------------------------- +# writeback +# ------------------------------------------------------------------------- + +def cmd_writeback(args: argparse.Namespace) -> int: + try: + run_dir = _find_run_dir(args.run) + except FileNotFoundError as e: + sys.stderr.write(f"{e}\n") + return 1 + sm = StateMachine.load(run_dir) + gi = args.group - 1 + if gi < 0 or gi >= len(sm.groups): + sys.stderr.write(f"--group {args.group} 越界(共 {len(sm.groups)} 个 group)\n") + return 1 + # 仅允许当前 group 或先前已 done 的 group 重写 + if gi != sm.current_group_index and sm.group_status[gi] not in ("done",): + sys.stderr.write(f"--group {args.group} 不是当前 group(current={sm.current_group_index + 1})\n") + return 1 + # 组装 findings + stages = sm.groups[gi] + stage_numbers = [s.number for s in stages] + findings: list[StageFinding] = [] + for f in sm.findings: + sev = f["severity"] + fix_status = f.get("fix_status", "未修复") + findings.append(StageFinding(severity=sev, text=f["text"], fix_status=fix_status)) + # validator history 与 reproduce_cmd(取最后 pass 的) + reproduce_cmd = "" + final_verdict = "pass" + if sm.group_status[gi] == "failed-deadloop": + final_verdict = "failed-deadloop" + else: + last_pass = None + for h in sm.validator_history: + if h.get("group") == gi + 1 and h.get("verdict") == "pass": + last_pass = h + break + if last_pass is None: + final_verdict = "pass" + # reproduce_cmd 从 last pass 对应 validator outbox 取(best effort) + if last_pass is not None: + vpath = (Path(sm.run_dir) / "agents" + / f"validator-g{gi + 1}-r{last_pass.get('round')}" + / "outbox" / "validation.md") + if vpath.exists(): + try: + val = parse_validator_validation(vpath) + reproduce_cmd = val.reproduce_cmd + except ParseError: + pass + gf = GroupFindings( + group_index=gi, + stages=stage_numbers, + findings=findings, + validator_history=[h for h in sm.validator_history if h.get("group") == gi + 1], + final_verdict=final_verdict, + reproduce_cmd=reproduce_cmd, + skip_validator=sm.skip_validator, + ) + try: + result = writeback_tasks_md(Path(sm.tasks_md), gf) + except WriteBackError as e: + sys.stderr.write(f"writeback 越界:{e}\n") + return 1 + except FileNotFoundError as e: + sys.stderr.write(f"writeback 失败:{e}\n") + return 1 + sm.events_append({"type": "writeback", "group": gi + 1, + "stages": stage_numbers, "findings": len(findings)}) + if gi == sm.current_group_index: + sm.finalize_group("done" if final_verdict == "pass" else "failed") + sm.save() + _emit({ + "ok": True, + "tasks_md": str(result.tasks_md_path), + "stages_checked": result.stages_checked, + "findings_count": result.findings_count, + "next_group": sm.current_group_index + 1 if sm.current_group_index < len(sm.groups) else None, + "phase": sm.phase, + }) + return 0 + + +# ------------------------------------------------------------------------- +# heartbeat +# ------------------------------------------------------------------------- + +def cmd_heartbeat(args: argparse.Namespace) -> int: + """透传给 spec_session.py heartbeat 保活 spec 锁。 + + 本命令本身仅刷新 state.json.last_activity_at;spec 锁刷新由调用方主代理 + 单独再调 spec_session.py heartbeat 完成(保持 task_swarm/spec_session 互不 import)。 + """ + try: + run_dir = _find_run_dir(args.run) + except FileNotFoundError as e: + sys.stderr.write(f"{e}\n") + return 1 + sm = StateMachine.load(run_dir) + sm.events_append({"type": "heartbeat"}) + sm.save() + _emit({ + "ok": True, + "run_id": sm.run_id, + "spec_dir": sm.spec_dir, + "session_id": sm.session_id, + "hint": ("如需保活 spec 锁,请额外调用 spec_session.py heartbeat " + "--spec <dir> --session <id>"), + }) + return 0 + + +# ------------------------------------------------------------------------- +# resolve +# ------------------------------------------------------------------------- + +def cmd_resolve(args: argparse.Namespace) -> int: + try: + run_dir = _find_run_dir(args.run) + except FileNotFoundError as e: + sys.stderr.write(f"{e}\n") + return 1 + sm = StateMachine.load(run_dir) + if args.abort: + sm.failed_status = "aborted" + sm.phase = "done" + sm.completed_at = _now_iso() + sm.events_append({"type": "resolve", "status": "aborted"}) + else: + sm.completed_at = sm.completed_at or _now_iso() + sm.failed_status = sm.failed_status or "done" + sm.events_append({"type": "resolve", "status": sm.failed_status}) + sm.save() + # 清理 sessions.task_swarm_run_id + if sm.session_id: + sess = _read_session(sm.session_id) + if sess is not None and sess.get("task_swarm_run_id") == sm.run_id: + sess["task_swarm_run_id"] = None + sess["last_activity_at"] = _now_iso() + with contextlib.suppress(Exception): + _write_session(sm.session_id, sess) + _emit({ + "ok": True, + "run_id": sm.run_id, + "status": sm.failed_status, + "completed_at": sm.completed_at, + }) + return 0 + + +# ------------------------------------------------------------------------- +# argparse +# ------------------------------------------------------------------------- + +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser(prog="task_swarm.py", + description="task-swarm 编排主 CLI") + sub = p.add_subparsers(dest="cmd", required=True) + + pi = sub.add_parser("init") + pi.add_argument("--tasks", required=True) + pi.add_argument("--max-parallel", type=int, default=4) + pi.add_argument("--max-rounds", type=int, default=6) + pi.add_argument("--session", default=None) + pi.add_argument("--spec", default=None) + pi.add_argument("--skip-validator", action="store_true", + help="人工验收模式:review/p0-fix 完成后直接 writeback,跳过 validation/v-fix") + + ps = sub.add_parser("status") + ps.add_argument("--run", required=True) + + pp = sub.add_parser("plan") + pp.add_argument("--run", required=True) + + pa = sub.add_parser("advance") + pa.add_argument("--run", required=True) + pa.add_argument("--phase", required=True, + choices=["coding", "review", "p0-fix", "validation", "v-fix"]) + pa.add_argument("--round", type=int, default=1) + + pw = sub.add_parser("writeback") + pw.add_argument("--run", required=True) + pw.add_argument("--group", type=int, required=True) + + ph = sub.add_parser("heartbeat") + ph.add_argument("--run", required=True) + + pr = sub.add_parser("resolve") + pr.add_argument("--run", required=True) + pr.add_argument("--abort", action="store_true") + + return p + + +COMMANDS = { + "init": cmd_init, + "status": cmd_status, + "plan": cmd_plan, + "advance": cmd_advance, + "writeback": cmd_writeback, + "heartbeat": cmd_heartbeat, + "resolve": cmd_resolve, +} + + +def main(argv: Optional[list[str]] = None) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + fn = COMMANDS.get(args.cmd) + if fn is None: + parser.print_help() + return 1 + return fn(args) or 0 + + +def _log_wrap_main(argv: Optional[list[str]] = None) -> int: + argv_list = list(sys.argv[1:]) if argv is None else list(argv) + sid = None + for i, a in enumerate(argv_list): + if a == "--session" and i + 1 < len(argv_list): + sid = argv_list[i + 1] + break + sub_cmd = argv_list[0] if argv_list else "?" + with contextlib.suppress(Exception): + _log_event("cli_call", {"script": "task_swarm.py", "cmd": sub_cmd, "argv_len": len(argv_list)}, session_id=sid) + rc = main(argv) + with contextlib.suppress(Exception): + _log_event("cli_exit", {"script": "task_swarm.py", "cmd": sub_cmd, "exit_code": rc}, session_id=sid) + return rc + + +if __name__ == "__main__": + try: + sys.exit(_log_wrap_main()) + except KeyboardInterrupt: + sys.exit(130) diff --git a/plugins/specode/scripts/task_swarm_guard.py b/plugins/specode/scripts/task_swarm_guard.py deleted file mode 100644 index 2f1f387..0000000 --- a/plugins/specode/scripts/task_swarm_guard.py +++ /dev/null @@ -1,230 +0,0 @@ -"""task-swarm hook-side detectors. - -Provides the INV-7/8/9/10 invariants exercised by spec_guard.py hooks: - - INV-7 Task tool: subagent_type must carry `specode:task-swarm-*` prefix - when task-swarm is active. - INV-8 Edit/Write inside a subagent: target must live in the subagent's - @writes declaration (parsed from its task.md), and never inside - the spec_dir. - INV-9 Edit on tasks.md during task-swarm: diff must only change checkbox - markers or insert `> ` annotation lines. Anything else (traceability, - metadata, headings, indent) is rejected. - INV-10 subagent Stop: outbox file(s) must satisfy the schema parsers in - task_swarm_outbox; schema-error → deny with explanation. - -Each check returns (decision: "ok"|"deny", message). The host hook chooses -how to enforce (PreToolUse deny → return 2; Stop deny → return 2 with msg). -""" -from __future__ import annotations - -import re -from pathlib import Path -from typing import Optional - -SCRIPTS_DIR = Path(__file__).resolve().parent -import sys - -sys.path.insert(0, str(SCRIPTS_DIR)) - -import task_swarm_outbox as outbox_mod # noqa: E402 -import task_swarm_writeback as wb_mod # noqa: E402 - - -RUNS_DIRNAME = ".task-swarm" -ACTIVE_RUN_FILE = "active-run" -SUBAGENT_PREFIX = "specode:task-swarm-" -VALID_SUBAGENT_TYPES = { - f"{SUBAGENT_PREFIX}coder", - f"{SUBAGENT_PREFIX}reviewer", - f"{SUBAGENT_PREFIX}validator", - f"{SUBAGENT_PREFIX}planner", -} - - -# ---------- run discovery ---------- - -def find_active_run(project_root: Path) -> Optional[Path]: - pointer = project_root / RUNS_DIRNAME / ACTIVE_RUN_FILE - if not pointer.exists(): - return None - run_id = pointer.read_text(encoding="utf-8").strip() - if not run_id: - return None - run_dir = project_root / RUNS_DIRNAME / "runs" / run_id - return run_dir if run_dir.exists() else None - - -def is_task_swarm_active(project_root: Path) -> bool: - return find_active_run(project_root) is not None - - -# ---------- INV-7 ---------- - -INV7_MSG = ( - "task-swarm 守卫 (INV-7): subagent_type 必须使用 `{prefix}*` 前缀。\n" - "当前 subagent_type=`{got}` 在 task-swarm 运行期不被接受。\n" - "请改为下列之一: {valid}.\n" - "原因: 没有 plugin 前缀的 agent (如 general-purpose) 会让角色隔离失效。" -) - - -def check_inv7_subagent_type(subagent_type: str) -> tuple[str, str]: - if not subagent_type: - return "deny", INV7_MSG.format( - prefix=SUBAGENT_PREFIX, got="(空)", valid=", ".join(sorted(VALID_SUBAGENT_TYPES)) - ) - if subagent_type in VALID_SUBAGENT_TYPES: - return "ok", "" - return "deny", INV7_MSG.format( - prefix=SUBAGENT_PREFIX, - got=subagent_type, - valid=", ".join(sorted(VALID_SUBAGENT_TYPES)), - ) - - -# ---------- INV-8 ---------- - -WRITES_LINE_RE = re.compile(r"^- @writes(你只能修改这些路径):\s*(.+)$", re.MULTILINE) - - -def _parse_writes_from_task_md(task_md: Path) -> Optional[list[str]]: - if not task_md.exists(): - return None - text = task_md.read_text(encoding="utf-8", errors="replace") - m = WRITES_LINE_RE.search(text) - if not m: - return None - raw = m.group(1).strip() - if raw.startswith("(") or "无 @writes" in raw: - return [] - out = [] - for piece in re.split(r"[,,]", raw): - piece = piece.strip().strip("`").strip() - if piece: - out.append(piece) - return out - - -INV8_MSG = ( - "task-swarm 守卫 (INV-8): subagent 越界写入。\n" - "目标文件 `{target}` 不在本 subagent 声明的 @writes 范围内: {writes}.\n" - "原因: 物理隔离要求 subagent 只能改自己负责的文件; 越界会污染其他角色的工作区或 spec。" -) - -INV8_SPEC_MSG = ( - "task-swarm 守卫 (INV-8): subagent 禁止修改 spec 文档。\n" - "目标 `{target}` 在 spec 目录 `{spec_dir}` 内。\n" - "原因: spec 文档由主编排器持锁; subagent 只动业务代码。" -) - - -def check_inv8_writes_boundary( - target: Path, - workspace: Path, - project_root: Path, - spec_dir: Path, -) -> tuple[str, str]: - """target should be either inside workspace/outbox (free) or inside - project_root and in the workspace's @writes list. Spec dir is always - forbidden. - """ - try: - target_resolved = target.resolve() - except OSError: - target_resolved = target - - # workspace outbox is always writable for the subagent - try: - target_resolved.relative_to((workspace / "outbox").resolve()) - return "ok", "" - except (ValueError, OSError): - pass - - # spec dir is forbidden - try: - target_resolved.relative_to(spec_dir.resolve()) - return "deny", INV8_SPEC_MSG.format(target=target, spec_dir=spec_dir) - except (ValueError, OSError): - pass - - # outside project_root: ignore (not our jurisdiction) - try: - rel = target_resolved.relative_to(project_root.resolve()) - except (ValueError, OSError): - return "ok", "" - - writes = _parse_writes_from_task_md(workspace / "task.md") - if writes is None: - # No task.md or no writes clause — defensive ok (caller may further check) - return "ok", "" - rel_str = str(rel) - for entry in writes: - if _writes_entry_matches(rel_str, entry): - return "ok", "" - return "deny", INV8_MSG.format(target=rel_str, writes=", ".join(writes) or "(无)") - - -def _writes_entry_matches(rel: str, entry: str) -> bool: - """Match a target path against an @writes entry. - - Supported forms: - - exact file: `src/api/login.py` - - directory (slash): `src/api/` → any path under src/api/ - - directory (glob): `src/api/**` → any path under src/api/ - - Comparison is purely string-based on the POSIX-style relative path; leading - `./` and trailing whitespace are normalized away. - """ - e = entry.strip().lstrip("./").strip() - if not e: - return False - if e == rel: - return True - if e.endswith("/**"): - prefix = e[:-3] - if not prefix.endswith("/"): - prefix += "/" - return rel.startswith(prefix) - if e.endswith("/"): - return rel.startswith(e) - return False - - -# ---------- INV-9 ---------- - -INV9_MSG = ( - "task-swarm 守卫 (INV-9): 禁止直接编辑 tasks.md。\n" - "当前差异不安全: {reason}\n" - "在 task-swarm 运行期回写 tasks.md 必须走 `task_swarm.py writeback`,\n" - "脚本内部只放行 checkbox 切换 + `> ` 注释行追加, 并自动 verify-lock + heartbeat。" -) - - -def check_inv9_tasks_md_diff(old_text: str, new_text: str) -> tuple[str, str]: - safe, reason = wb_mod.diff_safe_line_by_line(old_text, new_text) - if safe: - return "ok", "" - return "deny", INV9_MSG.format(reason=reason) - - -# ---------- INV-10 (deprecated alias) ---------- -# -# Canonical implementation now lives in task_swarm_outbox.validate_outbox_schema. -# INV-10 is enforced by the CLI `parse` subcommand (not a Stop hook). These -# names are preserved to avoid breaking external callers / pre-existing tests. - -INV10_MSG = outbox_mod.SCHEMA_ERROR_MSG - - -def check_inv10_outbox_schema(role: str, outbox_dir: Path) -> tuple[str, str]: - return outbox_mod.validate_outbox_schema(role, outbox_dir) - - -# ---------- helpers used by spec_guard handlers ---------- - -def is_tasks_md(target: Path, spec_dir: Path) -> bool: - try: - return target.resolve() == (spec_dir / "tasks.md").resolve() - except OSError: - return False diff --git a/plugins/specode/scripts/task_swarm_outbox.py b/plugins/specode/scripts/task_swarm_outbox.py deleted file mode 100644 index 8f2550a..0000000 --- a/plugins/specode/scripts/task_swarm_outbox.py +++ /dev/null @@ -1,378 +0,0 @@ -"""Outbox schema parsers for task-swarm. - -Parses the three subagent outputs into structured verdicts: - - result.md (coder) → judgment ∈ {ok, failed, blocked} - - review.md (reviewer) → judgment ∈ {approved, p0, loop} - - validation.md (validator)→ judgment ∈ {pass, fail, loop} - -When a required section is missing the judgment becomes "schema-error". -The orchestrator should re-fork the subagent with a clarifying note rather -than try to interpret malformed output. - -This is the moat between subagent-side hallucination and orchestrator-side -state machine: any output that doesn't fit the schema is rejected upstream. -""" -from __future__ import annotations - -import re -from dataclasses import dataclass, field -from pathlib import Path - - -# ---------- shared ---------- - -STATUS_TAIL_RE = re.compile(r"^STATUS:\s*(ok|failed|blocked)(?::\s*(.+))?\s*$", re.IGNORECASE) -LOOP_WARNING_RE = re.compile(r"^## 进入死循环风险\s*$", re.MULTILINE) - - -def _last_status_line(text: str) -> tuple[str | None, str]: - """Return (status, reason) parsed from the *strict last non-empty line*. - - STATUS must appear on the last meaningful line. If the last non-empty line - doesn't match, return (None, ""). Even if STATUS appears earlier in the - body, we don't accept it — avoids "STATUS in the middle" leakage. - """ - lines = [ln for ln in text.splitlines() if ln.strip()] - if not lines: - return None, "" - m = STATUS_TAIL_RE.match(lines[-1].strip()) - if not m: - return None, "" - return m.group(1).lower(), (m.group(2) or "").strip() - - -def _section(text: str, heading: str) -> str | None: - """Return body text of `## heading` section, or None if absent. - - Body ends at the next `## ` heading or end of file. - """ - pat = re.compile(rf"^## {re.escape(heading)}\s*$", re.MULTILINE) - m = pat.search(text) - if not m: - return None - start = m.end() - nxt = re.search(r"^## ", text[start:], re.MULTILINE) - end = start + nxt.start() if nxt else len(text) - return text[start:end].strip() - - -# ---------- result.md (coder) ---------- - -SUBTASK_LINE_RE = re.compile( - r"^[-*]\s*(?P<num>\d+\.\d+)\s+(?P<title>.+?):\s*(?P<status>done|failed|skipped)(?:\s*[—-]\s*(?P<note>.+))?\s*$" -) - - -@dataclass -class ResultVerdict: - judgment: str # ok | failed | blocked | schema-error - status_reason: str = "" - subtasks: list[dict] = field(default_factory=list) - fix_files: list[str] = field(default_factory=list) # parsed from "P0 修复清单" or note paths - raw_errors: list[str] = field(default_factory=list) - - -def parse_result(text: str) -> ResultVerdict: - errs: list[str] = [] - status, reason = _last_status_line(text) - if status is None: - errs.append("缺少末行 STATUS: ok|failed|blocked") - - subtasks_section = _section(text, "子任务状态") - if subtasks_section is None: - errs.append("缺少 `## 子任务状态` 节") - subtasks = [] - else: - subtasks = [] - for line in subtasks_section.splitlines(): - m = SUBTASK_LINE_RE.match(line.strip()) - if m: - subtasks.append({ - "num": m.group("num"), - "title": m.group("title").strip(), - "status": m.group("status").lower(), - "note": (m.group("note") or "").strip(), - }) - if not subtasks: - errs.append("`## 子任务状态` 节为空或格式不符 `- N.M 标题: done|failed|skipped`") - - # Fix files from "P0 修复清单" (修复轮 result.md) — best effort. - fix_files: list[str] = [] - p0_section = _section(text, "P0 修复清单") - if p0_section: - for line in p0_section.splitlines(): - m = re.search(r"`?([\w./\\_-]+\.[a-zA-Z0-9]+)(?::\d+)?`?", line) - if m: - fix_files.append(m.group(1)) - - if errs: - return ResultVerdict(judgment="schema-error", raw_errors=errs, subtasks=subtasks, fix_files=fix_files) - - return ResultVerdict( - judgment=status or "schema-error", - status_reason=reason, - subtasks=subtasks, - fix_files=fix_files, - ) - - -# ---------- review.md (reviewer) ---------- - -@dataclass -class ReviewVerdict: - judgment: str # approved | p0 | loop | schema-error - p0_count: int = 0 - p0_items: list[str] = field(default_factory=list) - advisory_p0_count: int = 0 - advisory_p0_items: list[str] = field(default_factory=list) - loop_warning: bool = False - conclusion: str = "" - raw_errors: list[str] = field(default_factory=list) - - -# Evidence tag regex for blocking P0 items. Only P0 lines carrying one of -# these tags trigger the coder fix loop; everything else is recorded as an -# advisory (audit-only) so reviewer's subjective opinions can't kick off an -# adversarial bounce-back loop. -P0_EVIDENCE_TAG_RE = re.compile( - r"\[(req:[\w.\-]+|security|contract)\]", - re.IGNORECASE, -) - - -def parse_review(text: str) -> ReviewVerdict: - errs: list[str] = [] - loop = bool(LOOP_WARNING_RE.search(text)) - - status, _ = _last_status_line(text) - if status != "ok": - errs.append("缺少末行 STATUS: ok") - - conclusion_section = _section(text, "结论") - if conclusion_section is None: - errs.append("缺少 `## 结论` 节") - conclusion = "" - else: - conclusion = conclusion_section.splitlines()[0].strip() if conclusion_section else "" - - p0_section = _section(text, "P0 — 阻塞,coder 必须修复(修完才能进 validator)") - if p0_section is None: - # Try short heading variants subagent might use. - for alt in ("P0 — 阻塞", "P0"): - p0_section = _section(text, alt) - if p0_section is not None: - break - if p0_section is None: - errs.append("缺少 `## P0` 节(即便无 P0 也需写 (none))") - all_p0_items: list[str] = [] - else: - all_p0_items = [] - for line in p0_section.splitlines(): - s = line.strip() - if not s or s.startswith("("): - # `(none)` and blank lines skip - continue - if s.startswith("- "): - all_p0_items.append(s[2:].strip()) - - # Split P0 items into blocking (with evidence tag) vs advisory (no tag). - # Only blocking P0 triggers the coder fix loop — advisory P0 is recorded - # for audit but doesn't bounce code back to coder. - blocking_p0: list[str] = [] - advisory_p0: list[str] = [] - for item in all_p0_items: - if P0_EVIDENCE_TAG_RE.search(item): - blocking_p0.append(item) - else: - advisory_p0.append(item) - - if errs: - return ReviewVerdict( - judgment="schema-error", - loop_warning=loop, - conclusion=conclusion, - p0_count=len(blocking_p0), - p0_items=blocking_p0, - advisory_p0_count=len(advisory_p0), - advisory_p0_items=advisory_p0, - raw_errors=errs, - ) - - if loop: - return ReviewVerdict( - judgment="loop", - loop_warning=True, - conclusion=conclusion, - p0_count=len(blocking_p0), - p0_items=blocking_p0, - advisory_p0_count=len(advisory_p0), - advisory_p0_items=advisory_p0, - ) - - return ReviewVerdict( - judgment="p0" if blocking_p0 else "approved", - loop_warning=False, - conclusion=conclusion, - p0_count=len(blocking_p0), - p0_items=blocking_p0, - advisory_p0_count=len(advisory_p0), - advisory_p0_items=advisory_p0, - ) - - -# ---------- validation.md (validator) ---------- - -JUDGMENT_LINE_RE = re.compile(r"^(pass|fail)\b", re.IGNORECASE) - - -@dataclass -class ValidationVerdict: - judgment: str # pass | fail | loop | schema-error - loop_warning: bool = False - fix_files: list[str] = field(default_factory=list) - fix_guidance: str = "" - raw_errors: list[str] = field(default_factory=list) - - -def parse_validation(text: str) -> ValidationVerdict: - errs: list[str] = [] - loop = bool(LOOP_WARNING_RE.search(text)) - - status, _ = _last_status_line(text) - if status != "ok": - errs.append("缺少末行 STATUS: ok") - - judg_section = _section(text, "判定") - if judg_section is None: - errs.append("缺少 `## 判定` 节") - verdict = None - else: - first = judg_section.splitlines()[0].strip().lower() if judg_section else "" - m = JUDGMENT_LINE_RE.match(first) - if not m: - errs.append("`## 判定` 首行必须是 `pass` 或 `fail`") - verdict = None - else: - verdict = m.group(1).lower() - - repro = _section(text, "复现命令") - if repro is None: - errs.append("缺少 `## 复现命令` 节") - - fix_files: list[str] = [] - fix_guidance = "" - if verdict == "fail": - guidance = _section(text, "给 coder 的修复指引(必填)") or _section(text, "给 coder 的修复指引") - if not guidance: - errs.append("validator fail 时必须有 `## 给 coder 的修复指引` 节") - else: - fix_guidance = guidance - for line in guidance.splitlines(): - m = re.match(r"^[-*]\s*(?:文件|file)[::]\s*(.+)$", line.strip(), re.IGNORECASE) - if m: - fix_files.append(m.group(1).strip().strip("`")) - - if errs: - return ValidationVerdict( - judgment="schema-error", - loop_warning=loop, - fix_files=fix_files, - fix_guidance=fix_guidance, - raw_errors=errs, - ) - if loop: - return ValidationVerdict( - judgment="loop", - loop_warning=True, - fix_files=fix_files, - fix_guidance=fix_guidance, - ) - return ValidationVerdict( - judgment=verdict or "schema-error", - loop_warning=False, - fix_files=fix_files, - fix_guidance=fix_guidance, - ) - - -# ---------- dispatch ---------- - -SCHEMA_ERROR_MSG = ( - "task-swarm 守卫 (INV-10): subagent outbox 不符合 schema。\n" - "{role}/{fname}:\n {errors}\n" - "请重新生成 outbox/{fname} —— 主编排器靠固定 schema 解析判定, 偏离会导致状态机误判。" -) - - -def validate_outbox_schema(role: str, outbox_dir: Path) -> tuple[str, str]: - """Schema gate used by the CLI parse subcommand (R1: CLI-side enforcement - of INV-10, not a hook). - - Returns ("ok", "") when the outbox parses cleanly, ("deny", message) when - schema-error is raised. Keep the function signature compatible with the - deprecated task_swarm_guard alias for backward compatibility. - """ - fname_map = {"coder": "result.md", "reviewer": "review.md", "validator": "validation.md"} - fname = fname_map.get(role) - if not fname: - return "ok", "" - result = parse_outbox(role, outbox_dir) - if result.get("judgment") != "schema-error": - return "ok", "" - errors = result.get("errors") or ["未知错误"] - return "deny", SCHEMA_ERROR_MSG.format(role=role, fname=fname, errors="\n ".join(errors)) - - -def parse_outbox(role: str, outbox_dir: Path) -> dict: - """Parse the appropriate file in outbox_dir for the given role. - - Returns a JSON-safe dict with `judgment`, `errors`, and role-specific fields. - Missing file → schema-error with explanatory message. - """ - filename_map = { - "coder": "result.md", - "reviewer": "review.md", - "validator": "validation.md", - } - fname = filename_map.get(role) - if not fname: - return {"judgment": "schema-error", "errors": [f"未知角色: {role}"]} - fpath = outbox_dir / fname - if not fpath.exists(): - return {"judgment": "schema-error", "errors": [f"未找到 outbox/{fname}"]} - text = fpath.read_text(encoding="utf-8", errors="replace") - - if role == "coder": - v = parse_result(text) - return { - "role": "coder", - "judgment": v.judgment, - "errors": v.raw_errors, - "status_reason": v.status_reason, - "subtasks": v.subtasks, - "fix_files": v.fix_files, - } - if role == "reviewer": - v = parse_review(text) - return { - "role": "reviewer", - "judgment": v.judgment, - "errors": v.raw_errors, - "p0_count": v.p0_count, - "p0_items": v.p0_items, - "advisory_p0_count": v.advisory_p0_count, - "advisory_p0_items": v.advisory_p0_items, - "loop_warning": v.loop_warning, - "conclusion": v.conclusion, - } - if role == "validator": - v = parse_validation(text) - return { - "role": "validator", - "judgment": v.judgment, - "errors": v.raw_errors, - "fix_files": v.fix_files, - "fix_guidance": v.fix_guidance, - "loop_warning": v.loop_warning, - } - return {"judgment": "schema-error", "errors": [f"未知角色: {role}"]} diff --git a/plugins/specode/scripts/task_swarm_parse_md.py b/plugins/specode/scripts/task_swarm_parse_md.py deleted file mode 100644 index 9077cff..0000000 --- a/plugins/specode/scripts/task_swarm_parse_md.py +++ /dev/null @@ -1,313 +0,0 @@ -"""tasks.md parser for task-swarm. - -Extracts the dispatch plan from a specode-style tasks.md: - - top-level stages (`- [ ] N. 标题`) - - leaf tasks (` - [ ] N.M 标题`) - - checkpoint stages (top-level + title contains "检查点") - - leaf metadata: `文件:` / `验证:` / `_需求:x.y_` - - `@swarm:full | coder-only | skip` tags + heuristic defaults - -Outputs a structured plan: stage list, deps (sequential by default), parallel -groups (stages with disjoint file sets), warnings. - -This module is pure-function: input is tasks.md text, output is dict (JSON-safe). -""" -from __future__ import annotations - -import re -from dataclasses import dataclass, field, asdict -from typing import Iterable - - -# ---------- regex ---------- - -# Top-level stage: `- [ ] N. 标题` (allow [x] / [~] / [*] markers). -STAGE_RE = re.compile(r"^- \[([ x~*\-])\] (\d+)\. (.+?)\s*$") - -# Leaf task: 2-space indent + `- [ ] N.M 标题`. -LEAF_RE = re.compile(r"^ - \[([ x~*\-])\] (\d+\.\d+) (.+?)\s*$") - -# Metadata lines under a leaf (4-space indent). -FILE_RE = re.compile(r"^ - (?:文件|files?)[::]\s*(.+?)\s*$", re.IGNORECASE) -VERIFY_RE = re.compile(r"^ - (?:验证|verify)[::]\s*(.+?)\s*$", re.IGNORECASE) -REQ_RE = re.compile(r"^ - _(?:需求|requirements?)[::]\s*(.+?)_\s*$", re.IGNORECASE) - -# `@swarm:xxx` tags anywhere in a leaf title or metadata. -TAG_RE = re.compile(r"@swarm:(\w[\w-]*)") - -VALID_TAGS = {"full", "coder-only", "skip"} - -CHECKPOINT_KEYWORDS = ("检查点", "checkpoint") - - -# ---------- data classes ---------- - -@dataclass -class Leaf: - num: str # "1.1" - title: str - files: list[str] = field(default_factory=list) - verify: str = "" - requirement: str = "" - tags_raw: list[str] = field(default_factory=list) # ["full", "coder-only", ...] - policy: str = "default" # final decision: full | coder-only | skip | default - optional: bool = False # came from `[*]` marker - line: int = 0 # original line number (1-based) - - -@dataclass -class Stage: - num: int - title: str - kind: str # "stage" | "checkpoint" - leaves: list[Leaf] = field(default_factory=list) - deps: list[int] = field(default_factory=list) # stage numbers - files_union: list[str] = field(default_factory=list) - optional: bool = False - checkpoint_for: int | None = None - line: int = 0 - - -@dataclass -class Plan: - stages: list[Stage] = field(default_factory=list) - warnings: list[str] = field(default_factory=list) - - def to_dict(self) -> dict: - return { - "stages": [asdict(s) for s in self.stages], - "warnings": list(self.warnings), - } - - -# ---------- parsing ---------- - -def parse_tasks_md(text: str) -> Plan: - """Parse tasks.md text → Plan. - - Lenient — unknown lines are ignored. Errors that affect dispatch (missing - files for full-mode leaves, malformed numbers) are surfaced as warnings. - """ - plan = Plan() - current_stage: Stage | None = None - current_leaf: Leaf | None = None - - lines = text.splitlines() - for idx, raw in enumerate(lines, start=1): - # Stage match must come first since leaves are also dashes. - m_stage = STAGE_RE.match(raw) - if m_stage: - current_leaf = None - marker, num_s, title = m_stage.group(1), m_stage.group(2), m_stage.group(3) - num = int(num_s) - kind = "checkpoint" if any(kw in title for kw in CHECKPOINT_KEYWORDS) else "stage" - stage = Stage( - num=num, - title=title.strip(), - kind=kind, - optional=(marker == "*"), - line=idx, - ) - plan.stages.append(stage) - current_stage = stage - continue - - m_leaf = LEAF_RE.match(raw) - if m_leaf and current_stage is not None: - marker, leaf_num, title = m_leaf.group(1), m_leaf.group(2), m_leaf.group(3) - tags = TAG_RE.findall(title) - # Strip tag suffixes from the title for cleanliness. - clean_title = TAG_RE.sub("", title).rstrip() - leaf = Leaf( - num=leaf_num, - title=clean_title, - tags_raw=list(tags), - optional=(marker == "*"), - line=idx, - ) - current_stage.leaves.append(leaf) - current_leaf = leaf - continue - - if current_leaf is None: - continue - - m_file = FILE_RE.match(raw) - if m_file: - for fp in _split_files(m_file.group(1)): - current_leaf.files.append(fp) - # Tags can also live on the file line. - current_leaf.tags_raw.extend(TAG_RE.findall(raw)) - continue - m_verify = VERIFY_RE.match(raw) - if m_verify: - current_leaf.verify = m_verify.group(1).strip() - current_leaf.tags_raw.extend(TAG_RE.findall(raw)) - continue - m_req = REQ_RE.match(raw) - if m_req: - current_leaf.requirement = m_req.group(1).strip() - current_leaf.tags_raw.extend(TAG_RE.findall(raw)) - continue - - # Plain tag line under a leaf (e.g., " - @swarm:full"). - tags_on_line = TAG_RE.findall(raw) - if tags_on_line: - current_leaf.tags_raw.extend(tags_on_line) - - # Post-process: tag arbitration, deps, file unions. - _arbitrate_tags(plan) - _link_checkpoints(plan) - _compute_file_unions(plan) - _compute_deps(plan) - return plan - - -def _split_files(raw: str) -> list[str]: - out: list[str] = [] - for piece in re.split(r"[,,]", raw): - piece = piece.strip().strip("`").strip() - if piece: - out.append(piece) - return out - - -# ---------- tag arbitration ---------- - -def _arbitrate_tags(plan: Plan) -> None: - """Resolve `@swarm:*` tags + heuristic defaults per leaf. - - Priority (high → low): - 1. @swarm:skip wins unconditionally. - 2. Explicit @swarm:full > @swarm:coder-only. - 3. Explicit any-tag overrides heuristic. - 4. Heuristic: optional ([*]) OR no `_需求:_` → coder-only. - 5. Otherwise: default (stage-aggregated). - - Unknown tags → warning + ignored. - """ - for stage in plan.stages: - for leaf in stage.leaves: - explicit: set[str] = set() - for t in leaf.tags_raw: - if t in VALID_TAGS: - explicit.add(t) - else: - plan.warnings.append( - f"[WARN] T{leaf.num} 无效 @swarm: 标签 \"{t}\",已忽略" - ) - # Dedup tags_raw to valid only (preserves what user wrote, normalized). - leaf.tags_raw = sorted(explicit) - - if "skip" in explicit: - if explicit - {"skip"}: - plan.warnings.append( - f"[INFO] T{leaf.num} 标签冲突 {sorted(explicit)} → 采用 skip" - ) - leaf.policy = "skip" - continue - if "full" in explicit and "coder-only" in explicit: - plan.warnings.append( - f"[INFO] T{leaf.num} 标签冲突 @swarm:full + @swarm:coder-only → 采用 full" - ) - leaf.policy = "full" - continue - if "full" in explicit: - leaf.policy = "full" - continue - if "coder-only" in explicit: - leaf.policy = "coder-only" - continue - # heuristic - if leaf.optional or not leaf.requirement: - leaf.policy = "coder-only" - else: - leaf.policy = "default" - - -# ---------- deps + parallelism ---------- - -def _link_checkpoints(plan: Plan) -> None: - """Link each checkpoint stage to the previous non-checkpoint stage.""" - last_stage_num: int | None = None - for stage in plan.stages: - if stage.kind == "stage": - last_stage_num = stage.num - else: - stage.checkpoint_for = last_stage_num - - -def _compute_file_unions(plan: Plan) -> None: - for stage in plan.stages: - seen: set[str] = set() - union: list[str] = [] - for leaf in stage.leaves: - if leaf.policy == "skip": - continue - for f in leaf.files: - if f not in seen: - seen.add(f) - union.append(f) - stage.files_union = union - - -def _compute_deps(plan: Plan) -> None: - """Compute stage-level deps. - - Default rule: - - A checkpoint stage depends on the stage it follows (checkpoint_for). - - Otherwise, deps stay empty (potential parallelism is determined by - file-union disjointness at dispatch time, not encoded here). - - Future versions may parse explicit `@depends-on:N` tags from stage titles. - """ - for stage in plan.stages: - if stage.kind == "checkpoint" and stage.checkpoint_for is not None: - stage.deps = [stage.checkpoint_for] - - -# ---------- helpers used by orchestrator ---------- - -def parallelizable(a: Stage, b: Stage) -> bool: - """Two stages may run in parallel if their file unions are disjoint and - neither depends on the other. - """ - if a.num in b.deps or b.num in a.deps: - return False - fa = set(a.files_union) - fb = set(b.files_union) - return not (fa & fb) - - -def stages_with_role(plan: Plan, role: str) -> Iterable[Stage]: - """Yield stages that need the given role to be dispatched. - - - role='coder' or 'reviewer': stages with at least one non-skip leaf and - at least one leaf with policy in {full, default, coder-only}. - (coder-only leaves get coder but skip reviewer; the reviewer call still - happens for the stage if ANY default/full leaf is present.) - - role='validator': only stages of kind 'checkpoint'. - """ - if role == "validator": - for s in plan.stages: - if s.kind == "checkpoint": - yield s - return - for s in plan.stages: - if s.kind != "stage": - continue - non_skip = [l for l in s.leaves if l.policy != "skip"] - if not non_skip: - continue - if role == "coder": - yield s - continue - if role == "reviewer": - if any(l.policy in {"full", "default"} for l in s.leaves): - yield s - - -def parse_file(path) -> Plan: - from pathlib import Path - text = Path(path).read_text(encoding="utf-8") - return parse_tasks_md(text) diff --git a/plugins/specode/scripts/task_swarm_prompt.py b/plugins/specode/scripts/task_swarm_prompt.py deleted file mode 100644 index b4e8b3e..0000000 --- a/plugins/specode/scripts/task_swarm_prompt.py +++ /dev/null @@ -1,340 +0,0 @@ -"""Subagent prompt pre-renderer. - -The orchestrator should NOT compose subagent prompts by hand — it only reads -the rendered `task.md` produced by this module and passes it verbatim to the -Task tool. This keeps four invariants: - - - @writes boundary is always declared, identically across rounds - - inbox relay paths use a stable convention (workspace/inbox/...) - - fix-round prompts always include the "only fix P0/fail items, no scope creep" - guardrail - - checkpoint validator prompts always include the upstream coder+reviewer - outboxes by reference - -The rendered file is also useful for debugging: every subagent's exact input is -on disk under `.task-swarm/runs/<RUN>/agents/stage-N-<role>[-rR]/task.md`. -""" -from __future__ import annotations - -import shutil -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable - - -# ---------- workspace ---------- - -def workspace_name(stage: int, role: str, round_no: int) -> str: - """Naming convention: stage-N-<role> for round 1, -rR suffix from round 2+.""" - base = f"stage-{stage}-{role}" - return base if round_no <= 1 else f"{base}-r{round_no}" - - -def agent_workspace(run_dir: Path, stage: int, role: str, round_no: int) -> Path: - return run_dir / "agents" / workspace_name(stage, role, round_no) - - -def prepare_workspace(run_dir: Path, stage: int, role: str, round_no: int) -> Path: - """Create inbox + outbox directories and return workspace path.""" - ws = agent_workspace(run_dir, stage, role, round_no) - (ws / "inbox").mkdir(parents=True, exist_ok=True) - (ws / "outbox").mkdir(parents=True, exist_ok=True) - return ws - - -def relay_inbox(run_dir: Path, ws: Path, sources: Iterable[tuple[int, str, int, str]]) -> list[str]: - """Copy files from upstream outboxes into ws/inbox/. - - sources: iterable of (stage, role, round_no, label) tuples. - label is the destination filename (e.g., "prev-result.md", "review.md"). - - Returns list of relative inbox paths actually copied. - """ - inbox = ws / "inbox" - inbox.mkdir(parents=True, exist_ok=True) - copied: list[str] = [] - for stage, role, round_no, label in sources: - src_ws = agent_workspace(run_dir, stage, role, round_no) - src_dir = src_ws / "outbox" - if not src_dir.exists(): - continue - # if the label maps to a specific filename, copy that single file; - # otherwise copy whole outbox tree. - primary_map = { - "coder": "result.md", - "reviewer": "review.md", - "validator": "validation.md", - } - primary = primary_map.get(role) - if primary and (src_dir / primary).exists(): - dest = inbox / label - shutil.copyfile(src_dir / primary, dest) - copied.append(dest.name) - else: - for p in src_dir.iterdir(): - if p.is_file(): - dest = inbox / f"{label}__{p.name}" - shutil.copyfile(p, dest) - copied.append(dest.name) - return copied - - -# ---------- prompt rendering ---------- - -@dataclass -class StageContext: - stage_num: int - stage_title: str - stage_kind: str # "stage" | "checkpoint" - leaves: list[dict] # subset of parser Leaf as dict - spec_dir: Path - project_root: Path - workspace: Path - round_no: int = 1 - scope: str = "" # "" | "p0-fix" | "validator-fail-fix" | "post-fix" - - -def _writes_clause(leaves: list[dict]) -> str: - files: list[str] = [] - for l in leaves: - if l.get("policy") == "skip": - continue - for f in l.get("files") or []: - if f not in files: - files.append(f) - return ", ".join(files) if files else "(本阶段无 @writes 声明文件; 仅限 inbox/outbox + 已存在文件的最小改动)" - - -def _leaves_block(leaves: list[dict]) -> str: - lines: list[str] = [] - for l in leaves: - if l.get("policy") == "skip": - continue - lines.append(f"- {l.get('num')} {l.get('title')}") - if l.get("files"): - lines.append(f" - 文件: {', '.join(l['files'])}") - if l.get("requirement"): - lines.append(f" - 需求: {l['requirement']}") - if l.get("verify"): - lines.append(f" - 验证: {l['verify']}") - return "\n".join(lines) if lines else "(无叶子任务)" - - -def _inbox_listing(workspace: Path) -> str: - inbox = workspace / "inbox" - if not inbox.exists(): - return "(空)" - names = sorted(p.name for p in inbox.iterdir() if p.is_file()) - return "\n".join(f"- {n}" for n in names) if names else "(空)" - - -def render_coder_prompt(ctx: StageContext) -> str: - writes = _writes_clause(ctx.leaves) - is_fix = ctx.round_no > 1 and ctx.scope == "validator-fail-fix" - header = ( - f"你正在 task-swarm 流程中作为 CODER 子 agent 执行阶段 {ctx.stage_num}" - + (f"(修复轮 r{ctx.round_no},scope={ctx.scope})" if is_fix else "(初轮)") - + "。" - ) - - body = [ - header, - "", - f"# 阶段 {ctx.stage_num}: {ctx.stage_title}", - "", - "## 本阶段子任务清单", - _leaves_block(ctx.leaves), - "", - "## 边界", - f"- 项目根: {ctx.project_root}", - f"- @writes(你只能修改这些路径): {writes}", - f"- 工作区: {ctx.workspace}", - f"- inbox(只读): {ctx.workspace / 'inbox'}", - f"- outbox(你的产出): {ctx.workspace / 'outbox'}", - f"- spec 文档(绝对只读,禁止修改): {ctx.spec_dir}", - "", - "## inbox 内容", - _inbox_listing(ctx.workspace), - "", - ] - - if is_fix: - body += [ - "## 修复轮硬规则(validator-fail-fix)", - "1. 只动 inbox/validation.md 修复指引中提到的文件/位置", - "2. 不要顺手优化、不要扩大范围", - "3. 修完每条 fail 在 outbox/result.md 用 `- [x] <fail 摘要> — 已修复: ...` 标记", - "4. 不要重写整个阶段,是定向补丁", - "", - ] - - body += [ - "## 输出协议(严格)", - "在 outbox/result.md 中至少包含:", - "", - "```markdown", - f"# 阶段 {ctx.stage_num}: {ctx.stage_title} 执行结果", - "", - "## 子任务状态", - "- 1.1 写 ...: done — <文件>", - "- 1.2 写 ...: failed — <原因>", - "", - "## 关键变更", - "- ...", - "", - "## 给下游(reviewer / validator)的提示", - "- ...", - "```", - "", - "**末行**必须是 `STATUS: ok` / `STATUS: failed: <原因>` / `STATUS: blocked: <原因>` 之一。", - "", - "禁止:", - "- 给自己的产物打分(LGTM / 看起来对 / approved)", - "- 评审任何代码(包括自己刚写的)", - "- 判 pass/fail(那是 validator 的事)", - "- 修改 @writes 之外的任何文件", - "- 修改 spec 文档", - ] - return "\n".join(body) - - -def render_reviewer_prompt(ctx: StageContext) -> str: - header = ( - f"你正在 task-swarm 流程中作为 REVIEWER 子 agent 评审阶段 {ctx.stage_num}(advisory 模式)。" - ) - - body = [ - header, - "", - f"# 阶段 {ctx.stage_num}: {ctx.stage_title}", - "", - "## 你的角色(**advisory,不阻塞推进**)", - "- reviewer 是**建议提供者**,**不参与修复循环**。", - "- 你的产出会作为 `> ⚠️ 评审建议` 注释**写进 tasks.md**,供使用者决定是否人工跟进。", - "- coder 不会因为你的 P0 而被重新派发;validator 才是阻塞门。", - "- 你存在的意义:把 LLM 读代码的发现做成结构化报告,供下游审阅。", - "", - "## 评审范围", - _leaves_block(ctx.leaves), - "", - "## 边界", - "- 你**没有** Edit/Write 工具——这是物理隔离。", - "- 仅用 Bash 写 outbox/review.md。", - f"- inbox: {ctx.workspace / 'inbox'}", - f"- outbox: {ctx.workspace / 'outbox'}", - f"- 评审 spec 文档(只读): {ctx.spec_dir}", - "", - "## inbox 内容", - _inbox_listing(ctx.workspace), - "", - "## 输出协议(严格,主编排器要解析)", - "在 outbox/review.md 写入:", - "", - "```markdown", - "## 结论", - "needs-changes | approved-with-comments | approved", - "", - "## P0 — 严重建议(带证据标签的强提醒,写入 tasks.md 注释)", - "- <文件:行> [req:x.y] — <问题> — <建议>", - "- <文件:行> [security] — <安全/数据完整性问题> — <建议>", - "- <文件:行> [contract] — <接口契约不一致> — <建议>", - "(如无 P0 写 `(none)`,不要省略本节)", - "", - "## P1 — 建议", - "- ...", - "", - "## P2 — 可选改进", - "- ...", - "", - "## 给使用者的提示", - "- 关键担忧汇总(1-3 行)", - "```", - "", - "末行:`STATUS: ok`(无论 approved 或 needs-changes 都 ok)。", - "", - "## P0 证据标签规则(**重要**)", - "P0 行**必须**带下列证据标签之一,否则会被自动归入 advisory(仅入档):", - "", - "- `[req:x.y]` — 直接违反某条 `_需求:x.y_` 的 SHALL 语句", - "- `[security]` — 安全 / 数据完整性问题(注入、越权、token 泄漏、并发不安全等)", - "- `[contract]` — 接口契约不一致(A 子任务说返回 token,B 子任务期望 session_id)", - "", - "**没有证据标签 = 仅 advisory**——但所有 P0 / advisory 都会写入 tasks.md 注释供人审阅。", - "证据标签让使用者一眼区分'有客观依据的问题'与'风格/印象'。", - "", - "**零 P0 是允许的**——但必须扫完每个文件、每个子任务才能下结论。", - ] - return "\n".join(body) - - -def render_validator_prompt(ctx: StageContext) -> str: - is_checkpoint = ctx.stage_kind == "checkpoint" - header = ( - f"你正在 task-swarm 流程中作为 VALIDATOR 子 agent 执行阶段 {ctx.stage_num}" - + ("(specode 检查点)" if is_checkpoint else "") - + (f"(重验第 r{ctx.round_no} 轮)" if ctx.round_no > 1 else "") - + "。" - ) - - body = [ - header, - "", - f"# 阶段 {ctx.stage_num}: {ctx.stage_title}", - "", - "## 边界", - "- 你**没有** Edit/Write 工具——只能 Bash 跑命令 + Read 看文件。", - "- 验收报告用 Bash 写到 outbox/validation.md。", - "- 不许因为 reviewer approved 就 pass —— 必须**独立**用真实命令证明。", - f"- inbox: {ctx.workspace / 'inbox'}", - f"- outbox: {ctx.workspace / 'outbox'}", - "", - "## inbox 内容", - _inbox_listing(ctx.workspace), - "", - "## 输出协议(严格,主编排器要解析)", - "", - "```markdown", - "## 判定", - "pass | fail", - "", - "## 复现命令", - "```bash", - "<任何人执行都能得到一样结果的命令序列>", - "```", - "", - "## 按子任务的验证结果", - "- [x] 1.1 ...: pass (pytest ...)", - "- [ ] 1.3 ...: fail — 未达 _需求:1.3_", - "", - "## 给 coder 的修复指引(必填 if fail)", - "- 文件: <path>", - "- 位置: <function/line>", - "- 问题: <具体>", - "- 建议: <具体做法>", - "- 涉及需求: _需求:x.y_", - "```", - "", - "末行:`STATUS: ok`(无论 pass 还是 fail,写完报告就 ok)。", - "", - "**死循环识别**:若本轮失败原因与 inbox 中 prev-validation.md 完全相同,", - "在文件**顶部**加 `## 进入死循环风险` 节,主编排器会立刻终止本阶段。", - ] - return "\n".join(body) - - -def render_prompt(role: str, ctx: StageContext) -> str: - if role == "coder": - return render_coder_prompt(ctx) - if role == "reviewer": - return render_reviewer_prompt(ctx) - if role == "validator": - return render_validator_prompt(ctx) - raise ValueError(f"unknown role: {role}") - - -def write_task_file(ctx: StageContext, role: str) -> Path: - """Render and write task.md into the workspace, return path.""" - text = render_prompt(role, ctx) - path = ctx.workspace / "task.md" - path.write_text(text, encoding="utf-8") - return path diff --git a/plugins/specode/scripts/task_swarm_state.py b/plugins/specode/scripts/task_swarm_state.py deleted file mode 100644 index 59fc0c5..0000000 --- a/plugins/specode/scripts/task_swarm_state.py +++ /dev/null @@ -1,515 +0,0 @@ -"""task-swarm state machine. - -Holds the run-level state.json and provides pure functions to compute the -next dispatch action. The orchestrator (task_swarm.py) calls `next_action()` -and `advance()` — never tries to "remember" round counters or convergence -status itself. - -state.json shape: -{ - "run_id": "20260517-153012-ab12cd", - "tasks_path": "/abs/path/tasks.md", - "spec_dir": "/abs/path/spec-dir", - "project_root": "/abs/path", - "session_id": "...", - "config": {"parallel": 3, "max_rounds": 3}, - "stages": [ - { - "num": 1, "title": "...", "kind": "stage|checkpoint", - "deps": [..], "files_union": [..], "optional": bool, - "checkpoint_for": int|null, - "leaves": [ {"num":"1.1", "policy":"full|default|coder-only|skip", ...}, ... ], - "phase": "pending|running|converged|failed|skipped", - "rounds": {"reviewer": 0, "validator": 0}, - "last": {"role": "coder|reviewer|validator", "round": N, "judgment": "ok|approved|p0|pass|fail|loop|schema-error"}, - "history": [ {...advance records...} ], - "in_flight": null | {"role":..., "round":..., "started_at":...} - }, ... - ], - "started_at": "...", - "updated_at": "..." -} - -Phase transitions: - pending → running → converged | failed - -Action types returned by next_action(): - {"action": "fork", "stage": N, "role": R, "round": K, ...} - {"action": "writeback", "stage": N, "status": "converged|failed"} - {"action": "wait"} — there's work in-flight, model should not fork more - {"action": "done", "summary": {...}} -""" -from __future__ import annotations - -import json -import uuid -from dataclasses import dataclass -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - - -# ---------- io ---------- - -STATE_FILENAME = "state.json" -CURRENT_STATE_VERSION = 1 - - -def _now() -> str: - return datetime.now(timezone.utc).isoformat() - - -def state_path(run_dir: Path) -> Path: - return run_dir / STATE_FILENAME - - -# Registry of one-step migrations: MIGRATIONS[N] takes a v=N state and -# returns a v=N+1 state. Empty until a schema change ships. -MIGRATIONS: dict[int, "callable"] = {} - - -def migrate_state(state: dict) -> dict: - """Run all registered migrations up to CURRENT_STATE_VERSION. - - Future versions (newer than runtime) pass through with a recorded warning; - missing migration entries between known versions raise ValueError so we - don't silently run on half-migrated state. - """ - v = int(state.get("version", 1)) - while v < CURRENT_STATE_VERSION: - fn = MIGRATIONS.get(v) - if fn is None: - raise ValueError( - f"task_swarm_state: missing migration from version {v} → {v + 1}" - ) - state = fn(state) - v = int(state.get("version", v + 1)) - if v > CURRENT_STATE_VERSION: - state.setdefault("warnings", []).append( - f"[WARN] state version {v} is newer than runtime ({CURRENT_STATE_VERSION}); " - f"proceeding with best-effort compatibility" - ) - return state - - -def load_state(run_dir: Path) -> dict: - p = state_path(run_dir) - if not p.exists(): - raise FileNotFoundError(f"state.json not found at {p}") - state = json.loads(p.read_text(encoding="utf-8")) - return migrate_state(state) - - -def save_state(run_dir: Path, state: dict) -> None: - state["updated_at"] = _now() - p = state_path(run_dir) - p.parent.mkdir(parents=True, exist_ok=True) - tmp = p.with_suffix(".json.tmp") - tmp.write_text(json.dumps(state, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") - tmp.replace(p) - - -# ---------- construction ---------- - -def new_run_id() -> str: - """Deterministic-ish run id: YYYYMMDD-HHMMSS-<6hex>.""" - ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") - return f"{ts}-{uuid.uuid4().hex[:6]}" - - -def build_initial_state( - run_id: str, - tasks_path: Path, - spec_dir: Path, - project_root: Path, - plan: dict, - parallel: int = 3, - max_rounds: int = 3, - reviewer_max_rounds: int | None = None, - validator_max_rounds: int | None = None, - session_id: str = "", -) -> dict: - """Build the initial state.json structure. - - Rounds policy: reviewer and validator loops are counted independently. - By default reviewer loops are tight (1 round) since reviewer P0 is a - subjective signal — repeated 'I think this could be better' bounces - waste budget. Validator fails are objective (test ran, test failed) - so they get the full 3 rounds. - - If `reviewer_max_rounds` / `validator_max_rounds` are None, both fall - back to `max_rounds` for backward compatibility. - """ - rev_max = reviewer_max_rounds if reviewer_max_rounds is not None else max_rounds - val_max = validator_max_rounds if validator_max_rounds is not None else max_rounds - stages = [] - for s in plan["stages"]: - stages.append({ - "num": s["num"], - "title": s["title"], - "kind": s["kind"], - "deps": list(s.get("deps") or []), - "files_union": list(s.get("files_union") or []), - "optional": bool(s.get("optional")), - "checkpoint_for": s.get("checkpoint_for"), - "leaves": [dict(l) for l in s.get("leaves") or []], - "phase": "pending", - "rounds": {"reviewer": 0, "validator": 0}, - "last": None, - "history": [], - "in_flight": None, - }) - - # Pre-skip stages whose every leaf is skip, or stage marked optional with - # only coder-only leaves and no requirement (still kept as `pending` if - # has coder leaves — we only auto-skip when ALL leaves are skip). - for st in stages: - if st["kind"] == "stage": - non_skip = [l for l in st["leaves"] if l.get("policy") != "skip"] - if not non_skip: - st["phase"] = "skipped" - - return { - "version": 1, - "run_id": run_id, - "tasks_path": str(tasks_path), - "spec_dir": str(spec_dir), - "project_root": str(project_root), - "session_id": session_id, - "config": { - "parallel": int(parallel), - "max_rounds": int(max_rounds), - "reviewer_max_rounds": int(rev_max), - "validator_max_rounds": int(val_max), - }, - "stages": stages, - "warnings": list(plan.get("warnings") or []), - "started_at": _now(), - "updated_at": _now(), - } - - -def _role_max_rounds(state: dict, role: str) -> int: - """Return the cap for the given role, with legacy fallback to max_rounds.""" - cfg = state["config"] - if role == "reviewer": - return int(cfg.get("reviewer_max_rounds") or cfg.get("max_rounds", 3)) - if role == "validator": - return int(cfg.get("validator_max_rounds") or cfg.get("max_rounds", 3)) - # Coder rounds are bounded by whichever upstream loop triggered them; - # use the larger of the two role caps so coder isn't the bottleneck. - return max( - int(cfg.get("reviewer_max_rounds") or cfg.get("max_rounds", 3)), - int(cfg.get("validator_max_rounds") or cfg.get("max_rounds", 3)), - ) - - -# ---------- state queries ---------- - -def get_stage(state: dict, num: int) -> dict: - for s in state["stages"]: - if s["num"] == num: - return s - raise KeyError(f"stage {num} not found") - - -def stage_completed(stage: dict) -> bool: - return stage["phase"] in {"converged", "failed", "skipped"} - - -def deps_satisfied(state: dict, stage: dict) -> bool: - for dep_num in stage["deps"]: - try: - dep = get_stage(state, dep_num) - except KeyError: - continue - if dep["phase"] != "converged": - return False - return True - - -def has_files_conflict(a: dict, b: dict) -> bool: - fa = set(a.get("files_union") or []) - fb = set(b.get("files_union") or []) - return bool(fa & fb) - - -def in_flight_count(state: dict) -> int: - return sum(1 for s in state["stages"] if s.get("in_flight")) - - -# ---------- next_action ---------- - -@dataclass -class Action: - kind: str # fork | writeback | wait | done - payload: dict - - def to_dict(self) -> dict: - return {"action": self.kind, **self.payload} - - -def next_action(state: dict) -> Action: - """Return the single highest-priority next thing the orchestrator should do. - - Priority order: - 1. Any stage whose loop converged but hasn't been written back → writeback - 2. Any stage in-flight → wait - 3. The first stage that's ready to dispatch its next role → fork - 4. All stages done → done - """ - # 1. writebacks pending - for s in state["stages"]: - if s["phase"] in {"converged", "failed"} and not s.get("written_back"): - return Action("writeback", { - "stage": s["num"], - "status": s["phase"], - "rounds": dict(s["rounds"]), - "title": s["title"], - }) - - # 2. in-flight blocks new forks beyond parallel limit - parallel_cap = state["config"]["parallel"] - in_flight = in_flight_count(state) - - # 3. find next fork candidate - candidates = [] - for s in state["stages"]: - if stage_completed(s): - continue - if s.get("in_flight"): - continue - if not deps_satisfied(state, s): - continue - action = _next_role_for_stage(state, s) - if action is None: - continue - candidates.append((s, action)) - - # honor parallel cap + file conflict - chosen = None - already_running = [s for s in state["stages"] if s.get("in_flight")] - for s, action in candidates: - if in_flight >= parallel_cap and action["round"] == 1 and action["role"] == "coder": - # don't kick off a brand-new stage while at parallel cap - continue - # file conflict with anything in-flight blocks dispatch - conflict = any(has_files_conflict(s, r) for r in already_running) - if conflict: - continue - chosen = (s, action) - break - - if chosen is not None: - s, action = chosen - return Action("fork", { - "stage": s["num"], - "title": s["title"], - "stage_kind": s["kind"], - **action, - }) - - if in_flight > 0: - return Action("wait", {"in_flight": in_flight}) - - if any(not s.get("written_back") and s["phase"] in {"converged", "failed"} for s in state["stages"]): - # shouldn't reach here because we handle writebacks first; defensive - return Action("wait", {"in_flight": 0}) - - return Action("done", {"summary": summarize(state)}) - - -def _next_role_for_stage(state: dict, stage: dict) -> dict | None: - """Decide the next dispatch step for a single stage. - - Pipeline (post-R3 redesign): - - - Normal stage (kind=stage): - coder ok → reviewer (advisory; never triggers fix loops) → converged - coder-only / no reviewable leaves → converged directly after coder ok - - Checkpoint stage (kind=checkpoint): - validator pass → converged - validator fail (within budget) → coder fix → validator re-run - validator fail (at cap) → failed - (reviewer NEVER runs on a checkpoint — the validator IS the gate) - - reviewer no longer participates in fix loops. Its verdict (approved / - p0 / advisory_p0) is captured in history so writeback can surface the - findings as `> ⚠️` annotation on tasks.md for the user to act on. - - Returns None when nothing more to fork (caller will writeback or done). - """ - val_max = _role_max_rounds(state, "validator") - last = stage.get("last") - kind = stage["kind"] - - # Brand new stage - if stage["phase"] == "pending": - if kind == "checkpoint": - return {"role": "validator", "round": 1} - if not any(l.get("policy") != "skip" for l in stage["leaves"]): - return None - return {"role": "coder", "round": 1} - - if stage["phase"] != "running": - return None - - if last is None: - # phase running but no last record — shouldn't normally happen - return {"role": "coder", "round": 1} - - role = last["role"] - judgment = last["judgment"] - round_no = last["round"] - - # --- coder finished --- - if role == "coder": - if judgment in {"failed", "blocked"}: - return None # caller flipped phase to failed; defensive - if kind == "checkpoint": - # checkpoint coder fix → re-validate (validator counts on its own round axis) - val_rounds = [h["round"] for h in stage.get("history", []) if h["role"] == "validator"] - next_round = (max(val_rounds) if val_rounds else 0) + 1 - return {"role": "validator", "round": next_round, "scope": "re-run"} - # Normal stage: dispatch reviewer (advisory) if any reviewable leaf exists - if any(l.get("policy") in {"full", "default"} for l in stage["leaves"]): - return {"role": "reviewer", "round": round_no, "scope": "advisory"} - # All coder-only → stage converges directly (no reviewer) - return None - - # --- reviewer finished (advisory; never schedules another fork) --- - if role == "reviewer": - return None - - # --- validator finished --- - if role == "validator": - if judgment == "pass": - return None # caller marks converged - if judgment == "fail": - if round_no >= val_max: - return None # caller marks failed - # Coder fix → validator re-run (no reviewer post-fix on checkpoints) - return {"role": "coder", "round": round_no + 1, "scope": "validator-fail-fix"} - return None - - return None - - -# ---------- advance ---------- - -VALID_JUDGMENTS = { - "coder": {"ok", "failed", "blocked"}, - "reviewer": {"approved", "p0", "loop", "schema-error"}, - "validator": {"pass", "fail", "loop", "schema-error"}, -} - - -def advance(state: dict, stage_num: int, role: str, round_no: int, judgment: str, extra: dict | None = None) -> dict: - """Record a subagent's verdict; flip phase if it terminates the stage. - - Returns the updated stage dict. Caller persists via save_state(). - """ - if role not in VALID_JUDGMENTS: - raise ValueError(f"unknown role: {role}") - if judgment not in VALID_JUDGMENTS[role]: - raise ValueError(f"invalid judgment '{judgment}' for role '{role}'") - - stage = get_stage(state, stage_num) - if stage["phase"] in {"converged", "failed", "skipped"}: - raise ValueError(f"stage {stage_num} already terminal: {stage['phase']}") - - # Promote to running on first advance - if stage["phase"] == "pending": - stage["phase"] = "running" - - # Update round counter (we count the *largest* round seen for that role) - if role in {"reviewer", "validator"}: - prev = stage["rounds"].get(role, 0) - if round_no > prev: - stage["rounds"][role] = round_no - - record = { - "role": role, - "round": round_no, - "judgment": judgment, - "at": _now(), - **(extra or {}), - } - stage["history"].append(record) - stage["last"] = {"role": role, "round": round_no, "judgment": judgment} - stage["in_flight"] = None - - # Terminal-state inference - val_max = _role_max_rounds(state, "validator") - kind = stage["kind"] - - if role == "coder" and judgment in {"failed", "blocked"}: - stage["phase"] = "failed" - stage["fail_reason"] = (extra or {}).get("reason") or f"coder {judgment}" - return stage - - # coder-only stage: ok on coder is terminal convergence (no reviewer/validator) - if ( - role == "coder" - and judgment == "ok" - and kind == "stage" - and not any(l.get("policy") in {"full", "default"} for l in stage["leaves"]) - ): - stage["phase"] = "converged" - return stage - - if role == "reviewer": - # R3: reviewer is now purely advisory. It never causes a stage to fail - # and never triggers a coder fix loop. The verdict (approved / p0 with - # evidence / advisory_p0) lives in `history` so writeback can surface - # the findings as `> ⚠️` annotations in tasks.md. - # schema-error / loop don't reach advance: cmd_parse retries the subagent. - # If somehow they do, we still converge — reviewer is non-blocking. - if kind == "stage": - stage["phase"] = "converged" - return stage - - if role == "validator": - if judgment in {"loop", "schema-error"}: - stage["phase"] = "failed" - stage["fail_reason"] = f"validator {judgment}" - return stage - if judgment == "pass": - stage["phase"] = "converged" - return stage - if judgment == "fail" and round_no >= val_max: - stage["phase"] = "failed" - stage["fail_reason"] = f"validator FAIL after {round_no} rounds (cap={val_max})" - return stage - - # coder ok / validator fail within budget — stay running - return stage - - -def mark_in_flight(state: dict, stage_num: int, role: str, round_no: int) -> None: - stage = get_stage(state, stage_num) - stage["in_flight"] = {"role": role, "round": round_no, "started_at": _now()} - - -def mark_written_back(state: dict, stage_num: int) -> None: - stage = get_stage(state, stage_num) - stage["written_back"] = True - - -# ---------- summary ---------- - -def summarize(state: dict) -> dict: - return { - "run_id": state["run_id"], - "stages": [ - { - "num": s["num"], - "title": s["title"], - "kind": s["kind"], - "phase": s["phase"], - "rounds": dict(s["rounds"]), - "fail_reason": s.get("fail_reason"), - } - for s in state["stages"] - ], - } diff --git a/plugins/specode/scripts/task_swarm_writeback.py b/plugins/specode/scripts/task_swarm_writeback.py deleted file mode 100644 index a335e06..0000000 --- a/plugins/specode/scripts/task_swarm_writeback.py +++ /dev/null @@ -1,267 +0,0 @@ -"""Safe tasks.md writeback for task-swarm. - -The orchestrator must NOT directly Edit tasks.md during a swarm run. -This module performs the only sanctioned mutation: - - - flip checkbox state on a known stage / leaf line - - append `> ` comment lines (audit trail) - -Anything else (metadata lines, traceability, headings, indentation) is -preserved bit-for-bit. The matching INV-9 hook acts as a backstop in case -the model bypasses this module. - -Convergence semantics: - - stage.phase == "converged": - → checkbox for every leaf with subtask_status=done becomes [x] - → stage's top-level checkbox becomes [x] (if every non-skip leaf done) - → append `> ✔ 第 R 轮收敛` annotation - - stage.phase == "failed": - → leaf checkboxes stay [ ] (or [~] if partially done) - → top-level stage gets [~] - → append `> ✗ 已达 N 轮上限仍未收敛` annotation -""" -from __future__ import annotations - -import re -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable - - -STAGE_LINE_RE = re.compile(r"^(- \[)([ x~*\-])(\] )(\d+)(\. .+)$") -LEAF_LINE_RE = re.compile(r"^( - \[)([ x~*\-])(\] )(\d+\.\d+)( .+)$") -ANNOTATION_LINE_RE = re.compile(r"^\s*> ") - - -def _new_marker(stage_phase: str, leaf_status: str | None, optional: bool) -> str: - """Return the character that should be inside `[ ]` after writeback.""" - if optional: - return "*" - if stage_phase == "skipped": - return "*" - if leaf_status == "done": - return "x" - if leaf_status == "skipped": - return "*" - if leaf_status == "failed": - return " " - if stage_phase == "converged": - return "x" - if stage_phase == "failed": - return "~" - return " " - - -@dataclass -class WritebackPlan: - stage_num: int - stage_phase: str - rounds: dict - leaves_status: dict[str, str] # "1.1" → "done" | "failed" | "skipped" - fail_reason: str = "" - annotation: str = "" - # R3: surface reviewer advisory verdict as a `> ⚠️` annotation block. - # Optional — populated by task_swarm.cmd_writeback when reviewer ran. - reviewer_summary: dict | None = None - - -def _annotate(plan: WritebackPlan) -> str: - if plan.annotation: - return plan.annotation - r_val = plan.rounds.get("validator", 0) - if plan.stage_phase == "converged": - rounds_str = "" - if r_val > 1: - rounds_str = f"(validator {r_val} 轮)" - return f"> ✔ task-swarm 收敛{rounds_str}" - if plan.stage_phase == "failed": - reason = f": {plan.fail_reason}" if plan.fail_reason else "" - return f"> ✗ task-swarm 未收敛{reason}" - return "" - - -def _reviewer_annotations(summary: dict | None) -> list[str]: - """Build `> ⚠️ ...` annotation lines from a reviewer verdict summary. - - Expected shape (from task_swarm_outbox.parse_review): - { - "judgment": "approved" | "p0" | ..., - "p0_count": int, - "p0_items": [str, ...], - "advisory_p0_count": int, - "advisory_p0_items": [str, ...], - "conclusion": str, - } - Returns one or more annotation lines (each prefixed with `> `), or empty - list when nothing worth surfacing. - """ - if not summary: - return [] - p0 = summary.get("p0_items") or [] - adv = summary.get("advisory_p0_items") or [] - if not p0 and not adv: - return [] - lines: list[str] = [] - header = "> ⚠️ 评审建议(advisory):" - counts = [] - if p0: - counts.append(f"{len(p0)} 条带证据 P0") - if adv: - counts.append(f"{len(adv)} 条 advisory") - lines.append(header + "、".join(counts)) - for item in p0[:5]: - lines.append(f"> • {item}") - for item in adv[:3]: - lines.append(f"> • (adv) {item}") - if len(p0) > 5 or len(adv) > 3: - lines.append("> …(更多见 review.md 完整内容)") - return lines - - -def apply_writeback(text: str, plan: WritebackPlan) -> str: - """Apply plan to tasks.md text. Returns new text. - - Only mutates checkbox characters and appends annotation lines. Never - touches metadata, headings, traceability, or indentation. - """ - lines = text.splitlines(keepends=True) - out: list[str] = [] - - in_stage = False - stage_block_end_idx: int | None = None - annotation = _annotate(plan) - annotation_appended = False - - # First pass: find indices for the target stage block. - stage_start: int | None = None - block_end: int | None = None - for idx, raw in enumerate(lines): - m = STAGE_LINE_RE.match(raw.rstrip("\n")) - if m and int(m.group(4)) == plan.stage_num: - stage_start = idx - continue - if stage_start is not None: - m2 = STAGE_LINE_RE.match(raw.rstrip("\n")) - if m2: # next stage starts - block_end = idx - break - if stage_start is None: - # Stage not found — return text unchanged (defensive). - return text - if block_end is None: - block_end = len(lines) - - for idx, raw in enumerate(lines): - line_no_nl = raw.rstrip("\n") - newline = raw[len(line_no_nl):] - - # Stage top-level line - if idx == stage_start: - m = STAGE_LINE_RE.match(line_no_nl) - optional = (m.group(2) == "*") - # Determine stage marker based on phase + whether any non-failed leaves - new_marker = _new_marker(plan.stage_phase, None, optional) - new_line = f"{m.group(1)}{new_marker}{m.group(3)}{m.group(4)}{m.group(5)}{newline}" - out.append(new_line) - continue - - # Inside the stage block: handle leaves - if stage_start < idx < block_end: - m_leaf = LEAF_LINE_RE.match(line_no_nl) - if m_leaf: - leaf_num = m_leaf.group(4) - leaf_status = plan.leaves_status.get(leaf_num) - optional = (m_leaf.group(2) == "*") - new_marker = _new_marker(plan.stage_phase, leaf_status, optional) - new_line = f"{m_leaf.group(1)}{new_marker}{m_leaf.group(3)}{m_leaf.group(4)}{m_leaf.group(5)}{newline}" - out.append(new_line) - continue - - out.append(raw) - - # Append annotation(s) immediately after the stage's last content line. - annotations_to_insert: list[str] = [] - block_text = "".join(out[stage_start:block_end]) - if annotation and annotation.strip() not in block_text: - annotations_to_insert.append(" " + annotation + "\n") - for rev_line in _reviewer_annotations(plan.reviewer_summary): - if rev_line.strip() not in block_text: - annotations_to_insert.append(" " + rev_line + "\n") - - if annotations_to_insert: - insertion_idx = block_end - 1 - while insertion_idx > stage_start and not out[insertion_idx].strip(): - insertion_idx -= 1 - # Insert in reverse so the order in final output matches the list. - for line in reversed(annotations_to_insert): - out.insert(insertion_idx + 1, line) - - return "".join(out) - - -def writeback_to_file(path: Path, plan: WritebackPlan) -> None: - text = path.read_text(encoding="utf-8") - new_text = apply_writeback(text, plan) - if new_text != text: - tmp = path.with_suffix(path.suffix + ".swarm.tmp") - tmp.write_text(new_text, encoding="utf-8") - tmp.replace(path) - - -# ---------- diff helper for INV-9 ---------- - -def _is_checkbox_swap(old: str, new: str) -> bool: - m_old = re.match(r"^(\s*- \[)[ x~*\-](\].*)$", old) - m_new = re.match(r"^(\s*- \[)[ x~*\-](\].*)$", new) - if not m_old or not m_new: - return False - return m_old.group(1) == m_new.group(1) and m_old.group(2) == m_new.group(2) - - -def diff_safe_line_by_line(old_text: str, new_text: str) -> tuple[bool, str]: - """Simpler line-by-line diff: pair lines positionally where possible. - - Strategy: compute a unified diff with `difflib.SequenceMatcher`, then for - each `replace` block ensure every (old, new) pair is a checkbox swap; for - every `insert` ensure inserted lines are annotation/blank; for every - `delete` reject if any line is non-blank-non-annotation. - """ - import difflib - old_lines = old_text.splitlines() - new_lines = new_text.splitlines() - sm = difflib.SequenceMatcher(a=old_lines, b=new_lines) - for tag, i1, i2, j1, j2 in sm.get_opcodes(): - if tag == "equal": - continue - if tag == "replace": - old_chunk = old_lines[i1:i2] - new_chunk = new_lines[j1:j2] - if len(old_chunk) != len(new_chunk): - return False, f"禁止替换段长度不一致: {old_chunk!r} → {new_chunk!r}" - for o, n in zip(old_chunk, new_chunk): - if o == n: - continue - if _is_checkbox_swap(o, n): - continue - return False, f"禁止改动非 checkbox 内容: {o!r} → {n!r}" - elif tag == "insert": - for n in new_lines[j1:j2]: - if not n.strip(): - continue - if ANNOTATION_LINE_RE.match(n): - continue - return False, f"禁止插入非注释行: {n!r}" - elif tag == "delete": - for o in old_lines[i1:i2]: - if not o.strip(): - continue - if ANNOTATION_LINE_RE.match(o): - continue - return False, f"禁止删除非空非注释行: {o!r}" - return True, "" - - -if __name__ == "__main__": - sys.stderr.write("task_swarm_writeback is a library module; use task_swarm.py CLI.\n") - sys.exit(0) diff --git a/plugins/specode/scripts/verify_local.sh b/plugins/specode/scripts/verify_local.sh deleted file mode 100755 index 737748b..0000000 --- a/plugins/specode/scripts/verify_local.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/sh -# Local end-to-end verification helper for specode. -# -# Usage: -# sh scripts/verify_local.sh setup # set up demo state for testing -# sh scripts/verify_local.sh status # show what's currently active -# sh scripts/verify_local.sh teardown # clean up demo state -# sh scripts/verify_local.sh tail # tail today's audit log - -set -e - -PLUGIN_DIR="$(cd "$(dirname "$0")/.." && pwd)" -TODAY=$(date -u +%Y-%m-%d) -AUDIT="$HOME/.specode/audit/$TODAY.log" - -case "${1:-help}" in - setup) - python3 "$PLUGIN_DIR/scripts/spec_state.py" demo-activate \ - --slug demo-verify --phase implementation - echo "" - echo "Now start Claude Code with the plugin:" - echo " claude --plugin-dir $PLUGIN_DIR" - echo "" - echo "And in another terminal tail the audit log:" - echo " mkdir -p ~/.specode/audit && touch $AUDIT && tail -f $AUDIT" - ;; - status) - python3 "$PLUGIN_DIR/scripts/spec_state.py" status - echo "" - python3 "$PLUGIN_DIR/scripts/spec_sync.py" status 2>/dev/null || true - echo "" - if [ -e "$HOME/.specode/.any-active" ]; then - echo "sentinel: present ($HOME/.specode/.any-active)" - else - echo "sentinel: missing (hooks will short-circuit)" - fi - ;; - teardown) - python3 "$PLUGIN_DIR/scripts/spec_state.py" demo-deactivate \ - --session "${TERM_SESSION_ID:-demo-verify}" 2>/dev/null || true - # Remove the demo spec dir if it lives under document_root - ROOT=$(python3 -c "import sys; sys.path.insert(0,'$PLUGIN_DIR/scripts'); import spec_state; r=spec_state.get_document_root(); print(r or '')") - if [ -n "$ROOT" ] && [ -d "$ROOT/demo-verify" ]; then - rm -rf "$ROOT/demo-verify" - echo "✓ removed $ROOT/demo-verify" - fi - python3 "$PLUGIN_DIR/scripts/spec_state.py" sync-sentinel - ;; - tail) - mkdir -p "$HOME/.specode/audit" - touch "$AUDIT" - tail -f "$AUDIT" - ;; - *) - cat <<USAGE -specode local verification helper. - -Commands: - setup Create a demo spec under document_root and activate it. - After running this, start Claude Code with --plugin-dir to see - hooks fire against the active demo. - status Print active-spec info, ledger summary, and sentinel state. - tail Tail today's audit log at ~/.specode/audit/<date>.log - teardown Deactivate the demo spec and remove the spec dir. - -USAGE - ;; -esac diff --git a/plugins/specode/skills/specode/SKILL.md b/plugins/specode/skills/specode/SKILL.md index 89a5499..8418744 100644 --- a/plugins/specode/skills/specode/SKILL.md +++ b/plugins/specode/skills/specode/SKILL.md @@ -1,211 +1,367 @@ --- name: specode -description: Specification-driven workflow for requirements, technical design, task lists, implementation, acceptance, and ongoing spec iteration. Use when the user explicitly invokes /spec, explicitly says to use spec mode, or the current conversation has an active persistent specode session that has not been ended. Do not use for ordinary coding, planning, requirements, design, or documentation requests unless spec mode is explicitly requested or already active. +description: Specification-driven workflow. All hooks are advisory injections — never blocking. Activates only when the user explicitly invokes `/specode:spec`, `/specode:continue`, `/specode:status`, `/specode:end`, `/specode:task-swarm`, or explicitly asks to use spec mode. Every active-spec turn must respect the phase order, selector format, code-doc sync reminders, and the status footer. --- -# Spec Mode +# specode — Spec-Mode 工作流 -File-first specification-driven workflow for CLI agents (Codex, Claude Code). Generated Markdown documents are the source of truth; coding starts only after requirements, design, and tasks are confirmed. +文件优先的规范驱动工作流。`requirements.md` / `bugfix.md` / `design.md` / `tasks.md` / `implementation-log.md` 是事实源;代码改动总是滞后于文档落地。所有 hook 都是**提示式注入**,永远不阻断;hook 注入失败或缺失时,本 SKILL.md 的硬约束仍然完整有效。 ## Activation Guard -This skill is opt-in only. Activate **only** when the user's current message contains one of: +只在以下任一情况激活: -- `/spec`, `/continue`, `/status`, `/end` -- `/spec -h` / `--help` -- `/spec --persist`, `/spec --freeform`, `/spec --strict` -- `/spec --set-vault`, `/spec --set-root`, `/spec --detect-vault`, `/spec --vault-status`, `/spec --sync-status` -- `使用 spec 模式` / `启用 spec 模式` / `用 spec 模式` / `use spec mode` +- 用户当前输入包含 `/specode:spec`、`/specode:continue`、`/specode:status`、`/specode:end`、`/specode:task-swarm`。 +- 用户显式说"使用 spec 模式" / "use spec mode"。 +- 当前会话的 `~/.specode/sessions/<session_id>.json` 中 `mode=active` 或 `mode=readonly`。 -**Hard rules:** +`mode=ended` 或 sessions 文件不存在且无触发条件 → **不要激活**,按普通对话处理。 -1. `/spec` always activates the spec workflow — even when the requested work is to inspect or modify the `specode` skill itself. -2. **Command compliance**: when any spec command is triggered, follow the corresponding workflow exactly. Do not skip phases, phase gates, or confirmation steps for any reason. Commands are absolute; the assistant's judgment cannot override a command. -3. **Persistent session exception**: if a persistent specode session is active for the current conversation, route follow-up messages through this skill until the user runs `/end`. +## Session Lifecycle -Do **not** activate for ordinary coding, planning, requirements, design, task lists, bugfixes, implementation, or documentation requests. In those cases, work normally — do not create spec folders. +持久会话是**唯一**模式(无 `--persist` 标志)。所有写操作必须同时更新 `<spec-dir>/.config.json` + `~/.specode/sessions/<session_id>.json` 两处;CLI 用 tempfile + `os.replace` + `os.fsync` 保证原子性;任一写失败 → CLI 整体 exit 1 + 回滚 + 你在 chat 如实报告,**禁止把 in-memory 半成功状态当成已落地**。 -## ⛔ Iron Rules — Top of Mind +### CLI 调用规约(强制) -These rules are checked at **every turn** of every specode session. Never violate them. Never defer them. If the user pushes back, acknowledge — then comply with the rule first, discuss after. +所有 specode CLI **必须**通过 `run.sh` 包装调用,脚本路径用 `$CLAUDE_PLUGIN_ROOT`(fallback `$CODEBUDDY_PLUGIN_ROOT`)拼绝对路径——**禁止**假设 cwd 在 scripts 目录,**禁止**裸 `python3 <脚本名>` 调用: -**Enforcement levels (0.4.0+)**: rules 1, 5–9 below are **hard-enforced** by hooks (violation = `exit 2`, tool call denied). Rules 2, 3, 4, 6 are **advisory** — sticky warnings on the ledger, surfaced in the next turn's status block. Advisory does NOT mean optional: a sticky warning that lingers across turns signals real code-doc drift and accumulating risk. Resolve them by writing the missing doc (auto-clears INV-1/2/4) or `/spec --dismiss-advisories` if intentional. +```bash +sh "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/run.sh" \ + "${CLAUDE_PLUGIN_ROOT:-${CODEBUDDY_PLUGIN_ROOT}}/scripts/<name>.py" \ + <verb> <args...> +``` -1. ⛔ **New spec via `spec_init.py` only — no manual scaffolding.** Any new spec (one-shot `/spec` or `/spec --persist`) MUST be created by calling `spec_init.py --name <slug> --requirement-name "<显示名>" --source-text "<需求>"`. The script alone resolves the document root (three-tier, see §Document Root Resolution) and writes `.config.json`. You **MUST NOT**: - - `mkdir` a spec directory anywhere yourself (not in cwd, not under `~/Git/<x>/`, not in any path you constructed) - - `Write` `requirements.md` / `bugfix.md` / `design.md` / `tasks.md` / `.config.json` to a path you chose - - Interpret phrasing like "在项目下创建"、"在 git 目录下创建一个新项目"、"放本地" as a directive to place spec docs in the project / cwd. Those phrases describe **future code location**, not spec-document location. Spec docs always live under the resolved doc_root; code lives wherever the user wants. **Never conflate the two.** +`run.sh` 自动探测 `python3 → python → py` 三档解释器并 exec 透传参数;任何 `python3 spec_session.py ...` 形式的裸调用在大多数 cwd 都会 `No such file or directory`。下表中的脚本名是简写,**实际 Bash 工具调用必须套用上面模板**。 - Workflow: derive slug → call `spec_init.py` → fill content into the files it created. If `spec_init.py` exits with `no_spec_root`, **stop and surface the guidance verbatim**; do not invent a fallback location. +四个命令的 CLI 展开: -2. ⛔ **Document-first.** Any change to requirements / design / tasks discussed in chat MUST be written to the corresponding spec document **in the same turn**, *before* further discussion or implementation. Verbal-only changes are invisible to the next session and silently drift from the persisted spec. +| 命令 | 解析 → 关键 CLI 调用 | +|---|---| +| `/specode:spec <需求>` | **优先** `-n <slug> <需求>`:slug 直接用作 spec 目录名;**兼容** `<名称>:<内容>` / 纯 `<需求>`(主代理推导 slug)→ `spec_init.py --name <slug> --requirement-name "<显示名>" --source-text "<需求>" --session <session_id>` | +| `/specode:continue [slug]` | 无 slug:`spec_session.py list-specs` 列表 → 用户回编号;有 slug:`spec_session.py acquire --spec <dir> --session <id>`(LockHeld → `takeover-options` 选择器)→ `continue` + `load` → 状态摘要 + 状态行 footer → end turn | +| `/specode:end` | `spec_session.py end --session <id>`(释锁 + mode=ended) | +| `/specode:status` | `spec_session.py status --session <id>` 或 `spec_status.py` | -3. ⛔ **Post-`/continue` sync — 非常重要.** After `/continue` you are resuming an **already-landed** spec. **Every** subsequent adjustment to requirements or design — even a single clarifying sentence from the user — MUST be reflected in `requirements.md` / `bugfix.md` / `design.md` / `tasks.md`, **in the same turn**. Do not wait for "later", do not batch into "next round", do not say "I'll update it after the code". Write **now**. The user said it → write it. The next session can only see what was persisted; chat is ephemeral. +→ 完整 phase 子步骤、`/continue` 接管流程详见 `references/workflow.md`。 -4. ⛔ **tasks.md 测试要点 follow-mode.** `requirements.md` or `bugfix.md` modified → update the `## 测试要点` section of `tasks.md` in the **same turn**, derived from the new SHALL statements. This is INV-4 (enforced at `Stop`): touching requirements/bugfix without touching tasks.md → hook denies the turn. +### session_id 的获取 -5. ⛔ **Write-before-verify-lock.** Before any `Edit`/`Write` on a spec document, call `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py verify-lock <spec-dir> --session <id>`. Returns `evicted` → stop work immediately and tell the user the spec was taken over by another session. +- `SessionStart` hook 注入当前 `session_id`;`UserPromptSubmit` hook 每轮重复注入避免遗忘。 +- 调任何 specode CLI 时必须传 `--session <session_id>`。 +- 永远**不要** invent session_id、不要从用户输入解析、不要在 chat echo 完整 ID(状态行只取前 8 位)。 -6. ⛔ **Phase gate compliance.** No skipping confirmation steps. No auto-selecting at gates. No "this seems simple, let's skip ahead". Commands are absolute; the assistant's judgment cannot override them. +## Status Footer -7. ⛔ **Forced writes.** Every config / document mutation must be persisted on the spot. When a write fails (IOError / permission / `lock_lost`), abort the operation — never continue with in-memory unpersisted state. +active spec 期间,**每一次响应末尾**必须额外输出状态行,与正文空一行隔开: -8. ⛔ **Selector via `spec_choice.py` only — never hand-roll options.** Every phase-gate selector (workflow choice / 文档确认 / 任务执行 / `/continue` 接管 / 验收 / 澄清完成) MUST be produced by running the exact `spec_choice.py` command from `references/prompts.md` and relaying its stdout **verbatim**. You **MUST NOT** type the option list from memory, paraphrase it, drop options, reorder them, or translate the labels. Hand-rolling silently hides newer options the script knows about (e.g. omitting `用 task-swarm 多 agent 并发` from the 任务执行 selector and forcing the user into the default path). If you don't have the exact command in context, Read `references/prompts.md` first — never improvise. +```text +─── spec-mode ─── spec: <slug> | session: <session_id 前 8 位> | phase: <phase> | /specode:end 退出 +``` -9. ⛔ **Non-interactive Bash (INV-11).** Every `Bash` command runs in a no-TTY harness. Commands that wait on stdin (`Ok to proceed?`, `[Y/n]`, `$EDITOR` open, password prompt) will hang forever. The `bash_guard` hook hard-denies the most common offenders — but you MUST default to non-interactive form yourself: - - `npm create xxx -- --yes` / `npx --yes xxx` / `npm init -y` / `yarn create xxx --yes` - - `git commit -m "..."` (never bare `git commit` — opens `$EDITOR`) - - `apt-get install -y xxx` (or prefix `DEBIAN_FRONTEND=noninteractive`) - - `gh pr create --title "..." --body "..."` - - `ssh -o BatchMode=yes ...` (fails fast on auth prompt instead of waiting) - - **Never** run `vim`/`nano`/`less`/`top` etc. — use Read/Edit tools or pipe to `head`/`cat` - - **Never** start bare REPL (`python3` alone, `node` alone) — use `python3 -c '<code>'` - - **Never** use `git rebase -i` / `git add -p` / `git add -i` - - When a Bash run completes with stdout containing a hang signature (`Ok to proceed?`, `[Y/n]`, etc.) or exit 124, a `PostToolUse` advisory is injected into your next turn — **do not retry the same command**; either rewrite to non-interactive form or report to the user with the exact command for them to run manually. +只读模式追加 `[只读]` 字段。状态行是机器友好格式(`─── spec-mode ───` 三符号包裹),不允许装饰、不允许 emoji。当本 turn 输出 selector 时,状态行放在 selector **之前**,再空一行接 selector。`mode=ended` 或不在 spec 模式 → **不**输出状态行。 -These rules trigger detectable signals (lint, `/continue` ⚠ markers, verify-lock exit codes). Treat any of those signals as a regression on your part, not a tool quirk. +### 新 spec 创建 / 接管的当 turn(hook 尚未刷新) -## Command Entry (Summary) +`/specode:spec <需求>`(`spec_init.py` 成功)、`/specode:continue [slug]`(`acquire`+`load`+`continue` 成功)这两类**当前 turn 内**:sessions/<id>.json 已经被 CLI 改成 `mode=active` + `pending_selector=<对应 selector>`,但 hook 已经在 user-prompt 提交时跑过了、本轮**不会再注入** footer / selector 提醒。主代理**必须主动**完成: -```text -/spec <requirement or path> ← one-shot workflow -/spec --persist <requirement or path> ← persistent session (footer + /end) -/continue [spec-slug] | /status | /end ← session control +1. chat 简报 2-3 行报告创建 / 接管结果(slug / phase / spec_dir 关键路径)。 +2. 按本节 §Status Footer 规则**主动**输出状态行 footer。 +3. 按 §Selectors 规则**主动**调 `AskUserQuestion` 呈现对应 `pending_selector`(spec 入口是 `workflow-choice`;continue 接管 ended 会到 `workflow-choice` 或文档 phase 对应 selector,详见 `references/workflow.md`)。 -/spec --set-vault <p> | --set-root <p> | --detect-vault | --vault-status -/spec --freeform | --strict | --sync-status -/spec -h ← help (hook-intercepted) -``` +**严禁**说 "使用 `/specode:continue` 进入下一阶段"、"你可以使用 ... 推进"、"下一步请输入 ..." 等让用户再输命令的引导——**spec 已在 active 模式,流程由 selector 推进,不需要用户再输命令**。`/specode:spec` / `/specode:continue` 是**持续流程的入口**,进入之后整条 phase 链由 selector + hook + phase-transition 自动推进,用户只通过 selector 选项做决策、通过 chat 给反馈、通过 `/specode:end` 退出。 + +下一轮 user-prompt 起 hook 会自动接上 selector / footer / 文档优先 / 模式提醒注入;本规则只 cover **首 turn**(spec_init / acquire 完成的那个 turn)。 + +## Spec 文档生成(主代理直接写,不 fork subagent) -→ **完整命令、子标志 dispatch、可选 spec 名前缀、会话模式、Helper Scripts、Hook 拦截**:`references/commands.md` +4 份核心 spec 文档(`requirements.md` / `bugfix.md` / `design.md` / `tasks.md`)由**主代理本身**生成,**不** fork subagent。统一规则: -## Pre-requirements Clarification (Plan-mode) +1. **Read 模板骨架**:`${CLAUDE_PLUGIN_ROOT}/assets/templates/<phase>.md`(4 份模板已就绪;章节大纲 / EARS SHALL 写法 / traceability 标签格式见 `references/templates.md`) +2. **按 `source_text`(用户原始需求)填空**:spec 的 `source_text` 字段(`<spec-dir>/.config.json.source_text`)是用户输入的需求原文,是 single-source-of-truth;按需求展开 SHALL / 设计决策 / 任务粒度,**严禁 hallucinate** "通用 X 系统应该有的需求"。需求模糊时进入 §Pre-requirements Clarification 走 `clarification-wizard`,不要凭空 invent +3. **Write 到 `<spec-dir>/<phase>.md`**:原子覆盖(spec_init 已经创建过空模板,主代理重新 Write 即可),写完按 §Document Output Brevity 报路径 + 3-8 条变更要点 + 未决问题 -Before generating `requirements.md` / `bugfix.md`: evaluate whether the user's requirement is unambiguous enough to translate into EARS SHALL statements **without invention**. +理由:0.10.11 起 `spec-writer` subagent 删除(subagent 拿不到主代理的 SKILL 上下文 + 用户原始需求 + 流程状态,hallucinate 通用模板内容)。主代理直接写质量更高、上下文连续、流程更顺。 -- **Clear enough** → proceed to workflow selection and document generation. -- **Real ambiguity** affecting scope / behavior / UX / data / validation / acceptance → enter clarification dialogue first. Phase stays in `intake`. **Do not write any spec document yet.** +`implementation-log.md` 同样由主代理直接追加(发生设计偏离 / 关键决策时)。 -每轮 ≤5 个【阻塞】项;用户答复后用 `references/prompts.md` §澄清完成 selector 决定 `进入下一阶段` 或 `继续澄清`。**Never** invent missing scope, business rules, UI behavior, data fields, or acceptance criteria. +## Selectors -→ 详见 `references/prompts.md` §Template B(开放式澄清问答)+ §澄清完成 +每个 phase-gate 节点必须**调用宿主内置 `AskUserQuestion` 工具**呈现选择器;工具自动渲染上下键导航 + 回车提交 + ESC 取消 + "Other" 自定义输入。具体退化路径(自己输出 markdown 列表让用户回复编号、自加 `Type something` 保留位等)见下方 §「看到 hook 注入…」的反例列表。 -## Document Root Resolution (Iron Law) +### `AskUserQuestion` 工具语义(重要 / 关乎流程连续性) -Three-tier resolution. **No project fallback, no home fallback.** +`AskUserQuestion` 是**同步阻塞工具**——调用它后宿主渲染选择器、等用户选项确定后**作为 tool result 返回给你**(你看到的不是 user prompt,是 tool output)。**同一 turn 内继续处理**,无需 end turn: -1. `--root` argument or `SPECODE_ROOT` env (highest) -2. `~/.config/specode/config.json` → `obsidianRoot` -3. Auto-detect Obsidian vault → `<vault>/spec-in/<os>-<user>/specs` (and persist) +1. 调 `AskUserQuestion` → 工具阻塞等用户选项 +2. 拿到 user 选项 → **同一 turn 内**按 selector 模板的「**用户选定后流程**」段继续推进(每个 selector 模板末尾都有这段) +3. 推完一个 phase 子步骤(写文档 / `phase-transition` / 呈现下一 selector)后**才** end turn 等下一轮 user prompt -All three miss → **hard stop**, output guidance, exit. `/spec` and `/continue` use the **same** resolution. **Spec documents are NEVER allowed outside the resolved doc_root**: +**严禁**拿到选项后只 chat 一句 "已选择 X,请下一轮输入 `/specode:continue`" 就 end turn —— `/specode:spec` / `/specode:continue` 是**持续流程的入口**,进入之后整条 phase 链由 selector + hook + phase-transition 自动推进。**用户只通过 selector 选项做决策、通过 chat 给反馈、通过 `/specode:end` 退出**,不需要重复输入命令推进流程。"命令"在 specode 里是流程入口而不是回合触发器。 -- ❌ `<project>/specs/`、`<project>/specode/`、`<project>/spec/`、`<project>/<任意名>/` -- ❌ `~/Git/<x>/specode/`、`~/Git/<x>/specs/` -- ❌ `<cwd>/specs/` -- ❌ Any path of your own choosing +### 呈现 selector 时禁止 invent / 简化选项 -The directory name (`specs`, `specode`, anything else) does NOT matter — the **location** (under doc_root or not) is what matters. If the user says "在项目下"、"在这个目录下"、"放本地" referring to where to create the spec, treat that as **misinterpreting code scope as document scope** and clarify: code can live in the project; spec docs must live under the configured doc_root. If no doc_root is configured, run `/spec --set-vault <p>` or `/spec --set-root <p>` first, **not** invent a fallback. +调 `AskUserQuestion` 呈现 selector 时**必须**用 `_selectors.py` 中 `SELECTOR_PROMPTS[<key>]` 的 `question` / `header` / `options[*].label` / `options[*].description` **逐字**传参(索引见 `references/selectors.md` §8 总览表),**禁止**自己改写成 "任务清单已就绪,下一步?" / "开始编码" 这种简化版——hook 注入的 selector 模板里有固定的 question 文本与 N 个固定 label / description,模型读到 "**用户选定后流程**" 段后续要做的是「按这个模板传参调工具」,不是「自己想一个更简短的选择器」。 -→ 详见 `references/obsidian.md` +实际反例:选定 `doc-confirm-design` 后切到 tasks phase,hook 下一轮会注入 `tasks-execution` selector 模板(4 个固定选项:用 task-swarm / 顺序执行 / 需要调整 / 暂不 coding);如果同一 turn 内主代理主动呈现,**必须查 `_selectors.py` SELECTOR_PROMPTS['tasks-execution']** 拿到 4 个固定选项原文,不要 invent 成 2 选项简化版。 -## Multi-Window + Lock (Iron Law) +### phase-transition 不退出 spec 模式 -Different agent windows may work on **different** specs in parallel. The **same** spec is held by at most one session at a time via a write lock in its `.config.json`. +`spec_session.py phase-transition` 切换 spec 的**内部 phase**(intake→requirements→design→tasks→implementation→acceptance→iteration),spec 仍然在 `mode=active`,session 仍然持锁,hook 继续每轮注入「📝 文档优先」「🔄 代码-文档同步」「🪧 状态行 footer」「⛔ 仍处于 spec 模式」四条提醒。**只有 `/specode:end`** 才让 session `mode=ended`、释锁、停 hook。 -**Before any spec document write**, three checks: +**严禁**说 "Spec 流程完成!现在退出 spec 模式,开始编码实现" / "spec 已完成" —— `tasks-execution` 选 "用 task-swarm" / "顺序执行" 后是 `phase-transition → implementation`,**仍在 spec 模式**,主代理改代码前后必须按 §Code-Doc Sync Reminders 同步 `tasks.md` / `implementation-log.md` / `design.md`。 -1. **specId**: active-pointer.specId == .config.json.specId -2. **boundary**: spec_dir is inside documentRoot (`spec_session.ensure_within_root`) -3. **lock**: `spec_session.py verify-lock <spec-dir> --session <id>` returns `ok` +「spec 流程完成 / 退出 spec 模式」判断**只有一条**才成立: +- 用户主动输入 `/specode:end` -Any failure → refuse the write, surface the error, do not silently continue. `/continue <slug>` on a locked spec must offer three options: 强制接管 / 只读查看 / 取消. Heartbeat before every Edit/Write; stale lock = 30 min. +注意:`acceptance-gate` 选「验收通过,进入 iteration」**只是把 phase 切到 iteration 默认停留态**,**不**自动呈现 `iteration-scope`、**不**退出 spec 模式——spec 仍 `mode=active`,session 仍持锁,hook 仍按轮注入四条提醒,直到用户 `/specode:end`。`iteration-scope` 仅在用户后续 turn **显式**提出迭代调整意图时由主代理主动呈现。 -→ 详见 `references/lock-protocol.md` +三种类型映射到 `AskUserQuestion`: -## Phase Gates +| 类型 | `AskUserQuestion` 参数形态 | 何时用 | +|---|---|---| +| **A 单列单选** | `questions=[1 question]` + `multiSelect=false` | 一个问题、互斥选项、单选。绝大多数 phase-gate。 | +| **B wizard** | `questions=[2-4 question]` + 每个 `multiSelect=false` | 一组无依赖子问题打包;**仅用于需求澄清问答**。 | +| **C 复选框多选** | `questions=[1 question]` + `multiSelect=true` | 非互斥选项可同时勾选。**仅 iteration-scope 一个场景**。 | -Phase order (**no skipping**): requirements (or bugfix) → Confirm → design → Confirm → tasks → Confirm → ask whether to execute → Code → validate → accept → iteration. +`AskUserQuestion` 工具铁约束(详见工具自身文档): +- 一次调用 `questions` 数组 **1-4 项**(B 类型 wizard 即占用全部 4 个 slot)。 +- 每个 question 的 `options` **2-4 项**;超过 4 项请收敛或拆 wizard。 +- `header` 是 chip-tab 短标签(≤12 字符)。 -At each gate, in the same response: (1) show document path, summary, key changes, unresolved questions; (2) invoke `spec_choice.py` — in non-interactive shells (Claude Code Bash, CI) the script prints the option block + `AWAITING_USER_CHOICE` sentinel on stdout and exits 0; relay stdout **verbatim** and end the turn; (3) **end the turn**. +→ 8 个场景(11 keys)的完整 `AskUserQuestion` 调用模板详见 `references/selectors.md`;常量库实现在 `spec_session/_selectors.py` 的 `SELECTOR_PROMPTS` 字典。 -Auto-selecting a default at a phase gate is **never** acceptable. +### 8 个固定场景 -→ 详见 `references/workflow.md` §Phase Gates Detailed Sub-steps + `references/iteration.md` +| 场景 key | 类型 | 触发 phase | header | +|---|---|---|---| +| `project-root-choice` | A | spec 创建后选项目实现目录 | 项目目录 | +| `workflow-choice` | A | 进入 requirements 前 | 工作流选择 | +| `clarification-wizard` | B | intake,写需求前 | 需求澄清 wizard | +| `clarification-done` | A | intake 澄清结束 | 澄清完成? | +| `doc-confirm-{requirements,bugfix,design}` | A | 对应文档生成后 | 需求/设计/缺陷确认 | +| `tasks-execution` | A | tasks.md 生成后(含「需要调整」回退) | 执行方式 | +| `takeover-options` | A | `/specode:continue` LockHeld | 接管选项 | +| `acceptance-gate` | A | acceptance 完成 | 验收门 | +| `iteration-scope` | C | iteration 子循环开始 | 迭代范围 | -## Document-first Discipline +### 看到 hook 注入"必须呈现 X 选择器"时的硬约束 -Spec documents are the sole persistent memory. Any change not written to a document is invisible to the next session. See also Iron Rules #1, #2, #3, #6 at the top of this file. +- 当前 turn **唯一**正确动作 = 调用 `AskUserQuestion` 工具(按提示词给出的 questions / options 逐字传参)→ 工具返回后 turn 自然结束。 +- 类型与场景映射固定——不允许自行变换类型(如把 A 改 C)。 +- 没看到 hook 提示但自己判断到了 phase-gate(如 hook 失败)→ 仍应按上表查类型并调 `AskUserQuestion`。 +- **绝对不允许**的退化路径: + - ❌ 在 chat 输出 markdown 列表 + "请回复 1/2/3" 让用户回复编号; + - ❌ 加 `Type something` / `Chat about this` / `AWAITING_USER_CHOICE` 等历史保留位(工具内置 Other / ESC); + - ❌ 在 selector 之外多写"也可以聊聊"之类的兜底文本; + - ❌ 在 `AskUserQuestion` 调用前没在 chat 给出 1-3 行上下文摘要(让用户知道这次选什么)。 +- 工具调用前在 chat 可以写一段 ≤8 行的简报(如 doc-confirm 时列 3-8 条关键变更要点);工具调用本身就是 end turn 触发器,不需要 sentinel。 -**Iron rules (apply from the moment a persistent session is active, **and** apply equally — and especially — after `/continue`):** +→ 完整调用模板详见 `references/selectors.md`。 -1. **Requirement change** → update `requirements.md` / `bugfix.md` **first**, then continue -2. **Design decision** → update `design.md` **first**, then implementation -3. **Task status change** → update `tasks.md` **immediately** (`[~]` / `[x]` / blocked) -4. **New task / sub-task** → append to `tasks.md` **before** starting it -5. **requirements.md / bugfix.md modified** → must update `tasks.md` 的 `## 测试要点` 节 in the **same turn**(INV-4,Stop hook 强制;未同步则整轮被拒绝) -6. **Write-before-verify**: before any `Edit`/`Write` on a spec document, call `spec_session.py verify-lock`. EVICTED → stop work and tell the user. -7. **Post-`/continue` sync (非常重要)**: after `/continue`, the spec docs are already landed. Any further requirement/design adjustment from the user (including verbal-only "顺便改一下…") MUST be applied to the landed `requirements.md` / `design.md` / `tasks.md` **in the same turn it is raised**, before any code action. **Never** leave a chat-only change unwritten between turns — the next session will lose it. If multiple docs are affected by one change, update all of them in the same turn. +## Code-Doc Sync Reminders -These writes are non-negotiable. If the user asks to skip writing and proceed, acknowledge — then write first, proceed second. **Writes are forced**: if a write fails (IOError/permission), abort the operation; never continue with in-memory unpersisted state. +### Spec 文档清单 -→ 详见 `references/workflow.md` §1.1(自然语言路由表) +| 文档 | 何时更新 | +|---|---| +| `requirements.md` / `bugfix.md` | 需求 / 验收标准 / 缺陷范围调整 | +| `design.md` | 架构 / 接口 / 数据模型决策 | +| `tasks.md` | 任务范围 / 状态推进 `[ ]` → `[~]` → `[x]`;末尾自带 `## 测试要点` 节,主代理在 tasks phase 按 SHALL 补几行,供测试人员参考 | +| `implementation-log.md` | 实施期间记录设计偏离 / 关键决策(可选;≥30 字) | + +→ 5 份文档的章节模板与 EARS SHALL 写法详见 `references/templates.md`。 + +### Document-first 响应约束 + +1. 看到「📝 文档优先提醒(输入侧)」+ 用户输入含需求 / 设计 / 任务 / 验收调整 → 本 turn **优先 Edit 对应文档**,再处理代码。 +2. 看到「🔄 代码-文档同步提醒(输出侧)」+ 本 turn 触碰过 Write/Edit 源码 → turn 结束前补齐文档;无法当 turn 补齐则在 chat 显式承诺下一轮第一件事补齐,并立刻做到。 +3. 没看到提醒(hook 失败 / 无 active spec)→ 仍保持 document-first 纪律。**这是硬约束,不依赖 hook 触发**。 + +## Help Fast-path + +`/specode:spec -h` / `--help` 由 hook 注入完整帮助文本,要求逐字打印。同样的 fast-path 适用于 `--vault-status` / `--detect-vault` / `--sync-status`:hook 给出预渲染输出,模型只负责 verbatim print,**禁止补充解释**。 ## Workflow Selection -Classify the request before creating documents: +进入 requirements 前由 `workflow-choice` 选择器决定走哪条流程: -- Feature, behavior-first → **Requirements-first** (recommended default) -- Feature, architecture-first → **Technical Design first** -- Bug / regression / failing test → **Bugfix** +- **Requirements-first**:行为优先,先 EARS SHALL,再补技术设计。 +- **Technical Design-first**:架构约束已知,先 design.md 框架,再反推需求。 +- **Bugfix**:缺陷修复,用 `bugfix.md`(Current / Expected / Unchanged)替代 `requirements.md`。 -Use `scripts/spec_choice.py` when the workflow matters and is unclear; non-interactive shells get the option block + `AWAITING_USER_CHOICE` sentinel on stdout. **Never silently choose for the user.** +→ 三档判定细则详见 `references/workflow.md` §3。 -## Help Output (Fast Path) +## Phase Order -When the prompt is exactly `/spec -h` or `/spec --help` — **fast path, no thinking, no file scanning beyond the one file below**: +`intake → requirements/bugfix → design → tasks → implementation → acceptance → iteration` -1. `Read` `references/help-output.md` (single file, no other context loading) -2. Extract the **first** ` ```text ... ``` ` fenced block -3. Output that block **verbatim** inside one ` ```text ` fence, then **stop** +每个 phase 切换通过 `spec_session.py phase-transition --from <p> --to <p2>`,自动更新 sessions.phase + 对应 `pending_selector`。 -Forbidden in this path: thinking blocks, summaries, "here is the help", reading other references, loading other files, calling any script. The output is purely a file echo. +**进入 acceptance phase 时主代理必须调一次 `spec_lint.py --spec <spec-dir>`**(通过 §CLI 调用规约的 run.sh 模板),把 traceability / log / EARS 三类 WARNING 列给用户参考,再呈现 `acceptance-gate` 选择器。lint 是 advisory,所有 WARNING `exit 0`,**不阻断**验收决策。 -The same fast-path applies to `/spec --vault-status`, `/spec --detect-vault`, `/spec --sync-status`: run the single mapped script in `references/commands.md` §Sub-flag Dispatch, output its stdout verbatim, stop. No additional commentary. +→ 每个 phase 的输入 / 产出文档 / 子步骤详见 `references/workflow.md`。 -## Output Language +## Document Root Resolution -All user-facing output (summaries, questions, confirmations, status, errors) — **Chinese**. +三层解析(无 fallback;详见 `references/obsidian.md`): -Exceptions (English / original form): technical terms, command names, file paths, code identifiers; content inside code blocks; skill's own rule files (`SKILL.md`, `references/`). +1. `--root <p>` 或 `SPECODE_ROOT` env(最高优先级) +2. `~/.config/specode/config.json.obsidianRoot` +3. 自动检测已安装 Obsidian vault → `<vault>/spec-in/<os>-<user>/specs` -If the user's requirement is in English, generated spec documents may use English; other agent output (summaries, confirmations) stays Chinese. +三层全 miss → `spec_init.py` exit 3 + 引导提示;**不**回退到 cwd / `~/specs` / 项目目录。 -## Document Output Brevity +`/specode:continue` 查找 spec 时**禁止 Grep 项目目录**——spec 不在项目里。正确流程:`spec_vault.py status` + `spec_session.py list-specs`,详见 `references/obsidian.md` §5.1。 + +### 首次使用 / auto-detect 命中时的确认(强制) + +`spec_init.py` 第 3 层(Obsidian 自检测)是 **silent fallback**——一旦命中 +就直接拿来当 `doc_root`,不向用户呈现"我用了哪个 vault"。首次使用时这会让 +用户莫名其妙地看到 spec 落在 Obsidian 目录里,跟自己直觉的 cwd / 项目内 +位置不一致(典型 case:用户在 git repo 下输入 `/specode:spec ...`,预期 spec +在项目内或被询问,结果 silent 写进了某个 Obsidian vault)。 + +因此 **commands/spec.md 在调 `spec_init.py` 之前必须先调 `spec_vault.py status` +确认 source**: + +- `source = env` 或 `source = config` → 已显式配置,直接调 `spec_init.py` +- `source = auto` 或 `source = none` → **禁止直接调 `spec_init.py`**,按以下 + 确认流程走: + + 1. **调 `AskUserQuestion`** 三选(中文 label / description): + - `"使用检测到的 <doc_root>(持久化到 config,下次不再问)"` + - `"改用其他绝对路径(你提供,将持久化到 config)"` + - `"中止本次创建"` + 2. 用户选定后: + - 前两个选项 → 调 `spec_vault.py set --vault <path>` 写入 + `~/.config/specode/config.json.obsidianRoot`,下次 `source` 就变成 + `config`、不再触发本流程 + - 选"中止" → end turn,**不调** `spec_init.py` + 3. 持久化成功后再进入常规 `spec_init.py` 流程 + +这样首次使用时用户**显式知道并同意** spec 文档落点,避免 silent fallback +带来的认知 mismatch;后续会话因 config 已写,全程沉默自动用,不打扰。 + +## Multi-Window + Lock + +不同窗口可并行不同 spec;同一 spec 同时只一个会话持锁(lock 字段持有者键 = `session_id`,30 分钟无 heartbeat 视为 stale)。 + +每次 spec 文档写入前三重校验:specId 匹配 / spec_dir 在 documentRoot 下 / `verify-lock` 返回 ok。`/specode:continue` 命中 LockHeld → 呈现 `takeover-options` 选择器(强制接管 / 只读查看 / 取消)。 + +→ 锁状态机与接管流程详见 `references/lock-protocol.md`。 + +## Pre-requirements Clarification(铁律) -When writing or updating a spec document (`requirements.md` / `bugfix.md` / `design.md` / `tasks.md` / `implementation-log.md`), **never reprint the full document content in chat**. The Write/Edit tool UI already shows a preview; the user can also open the file. Duplicating the full content in user-facing text is pure noise. +**核心约束**:在 `requirements.md` / `bugfix.md` / `design.md` 任何一份**首次生成之前**,对源需求里**任何**不明确的地方——范围边界、行为细节、数据模型、UX 交互、验证规则、验收口径——主代理**必须**主动提出来与用户讨论;**严禁**凭主代理自己的判断假设/补全/invent。 -In chat, report only: +### 触发条件 + +`workflow-choice` 用户选定工作流后("Requirements first" / "Technical Design first" / "Bugfix" 任一),**先**做歧义自检: + +- 通读 `<spec-dir>/.config.json.source_text`(用户原始需求)+ 用户最近 turn 在 chat 里的补充; +- 自问:要把这份需求落成 EARS SHALL 条款(或 bugfix 的 Current/Expected)/ 写出 design 架构,**有没有任何**会让我编一条规则填空的问题?典型阻塞维度: + - **scope**:边界在哪?哪些场景包含/排除? + - **behavior**:触发条件是什么?正常/异常路径如何分? + - **UX**:交互是同步阻塞还是异步反馈?文案/提示哪里给? + - **data**:字段类型、唯一性约束、默认值、迁移口径? + - **validation**:长度上限、格式、特殊字符处理、空值? + - **acceptance**:怎么算"做完了"?测试用例颗粒度? +- 若 **≥1 个**阻塞维度回答不出来 → 触发 `clarification-wizard`。 + +### 必须呈现 wizard 的强制场景 + +- 源需求是**单句口语化描述**("加个登录功能"/"做个 todo app")→ 几乎必有 ≥2 个阻塞维度,**默认必呈现**; +- 源需求包含**含糊措辞**("等"/"诸如此类"/"差不多"/"先简单做下"/"以后再说")→ 必呈现; +- 历史 turn 用户已经主动列了多个需求点但未给细节 → 必呈现; +- 任何"主代理一边写 SHALL 一边自己脑补 X 是 Y 类型 / Z 是默认值"的冲动 → 都是必呈现信号。 + +### 唯一例外(允许跳过 wizard) + +用户在当前会话**明确放权**才可跳过: +- 显式说"由你决定"/"你看着办"/"随便填"/"按通用做法"/"先 MVP,细节后面再说"等同类表达; +- 或在更早 turn 已给过明确的放权指示(如『需求很粗,你直接按业界默认实现』)。 + +放权范围**只覆盖用户讲过的部分**;超出范围的歧义仍要先问。**禁止**把用户没说话当成默认放权——沉默 ≠ 同意你 invent。 + +### 落地动作 + +- 满足触发条件 → 调 `AskUserQuestion` 呈现 `clarification-wizard`(类型 B,一次性 2-4 个阻塞性子问题,模板见 `_selectors.py` SELECTOR_PROMPTS['clarification-wizard']);**不写**任何 spec 文档。 +- 用户答完 → 立即呈现 `clarification-done`(类型 A)决定再问一轮 / 进入 requirements 生成。 +- 跳过 wizard(用户已放权 / 无歧义)→ 在 chat 显式声明放权范围或"已自检无阻塞性歧义",再走 `phase-transition --to <requirements|bugfix|design>`。 +- 写文档时遇到**新**冒出来的歧义点(自检漏掉的)→ **停写**,回头补一轮 wizard;不要边写边猜。 + +### 反例(违反铁律) + +- ❌ "我看用户没说密码强度要求,就按 8 位字母数字混合写吧" → 必须先问。 +- ❌ "用户说'添加登录',我先按邮箱+密码方案写 requirements" → 是不是邮箱登录?是不是密码方案?必须先问。 +- ❌ "我把'通常应该有的功能'都写进 SHALL,用户不要的再删" → SHALL 是 ground truth,不是脑暴清单。 + +→ wizard 详细出题策略详见 `references/workflow.md` §intake;模板见 `_selectors.py` SELECTOR_PROMPTS['clarification-wizard']。 + +## Task-Swarm(多 agent 并发任务执行) + +`tasks-execution` 选择器若选中"用 task-swarm 多 agent 并发",主代理切到 task-swarm 编排模式:`task_swarm.py init` 解析 tasks.md 并按文件冲突切 group → 多 coder 并发 → reviewer(单实例,advisory)→ p0-fix coder(仅一次)→ validator(单实例,循环修复直到 pass)→ `task_swarm.py writeback`。state.json 是单一事实源;`on-task-completed` hook 在每个 subagent 返回后注入"下一步该做什么"提示。 + +### `/specode:task-swarm` 前置校验(强制) + +commands/task-swarm.md 在调 `task_swarm.py init` 之前**必须**先调 `spec_session.py read-session --session <id>` 校验 session 状态: + +- `mode == "active"`(不能是 idle / ended / readonly) +- `active_spec_dir` 非空(init 的 `--tasks` 必须用此值 + `/tasks.md`,**禁止 invent**) +- `phase == "tasks"` +- `pending_selector == "tasks-execution"` 且用户已选 task-swarm 路径 + +任一不满足 → **禁止** init,在 chat 引导用户先到 tasks phase 跑 `tasks-execution` selector 选 task-swarm 路径。理由:task-swarm 是 tasks phase + tasks-execution selector 选中后的下游编排,**不**应该被用户裸输 `/specode:task-swarm` 直接触发——前置 selector 才是用户做"用 task-swarm vs 顺序执行 vs 调整 tasks.md"决策的入口。 + +→ 完整协议、agent 角色边界、产物 schema、writeback 格式详见 `references/task-swarm.md` + `references/task-swarm-example.md`。 + +## Output Language + +User-facing 输出(摘要、问题、确认、状态、错误)——**中文**。 + +Exceptions(保留英文 / 原样):技术名、命令、文件路径、代码标识符;代码块内容;本 skill 自身的规则文件(SKILL.md / references)。需求若是英文,生成的 spec 文档可英文;其他 agent 输出(摘要、确认)仍中文。 + +## Document Output Brevity -- File path (one line) -- Section headings or 3–8 key change bullets (e.g., "added §4 data model / §5.B Mascot behavior / tightened §6 validation thresholds") -- Open questions, if any -- Next action (gate confirm / next phase / etc.) +写 / 更新 spec 文档时**绝不**在 chat reprint 全文。报告只含: -Never paste the document body, EARS SHALL clauses, code snippets, full task lists, or full design rationale. Comply if the user explicitly asks; otherwise default to **summary only**. +- 文件路径(一行) +- 3–8 条章节标题或关键变更 bullets +- 未决问题(如有) +- 下一步动作 -This applies equally to first-time creation and to follow-up edits. +never paste 文档正文、EARS SHALL 全集、代码块、完整任务列表、设计 rationale。用户显式要求才例外。 ## References -- `references/commands.md` — **命令完整参考**(入口、子标志 dispatch、可选 spec 名前缀、会话模式、Helper Scripts、Hook 拦截) -- `references/workflow.md` — 完整 phase 协议、interactive selector 命令、`/continue` 上下文加载、EARS 示例 -- `references/prompts.md` — **统一 prompt 模板**(selector 用法、澄清格式、列表视图、禁用措辞) -- `references/iteration.md` — iteration 阶段、子循环、文档累积规则 -- `references/lock-protocol.md` — 锁机制、接管、只读模式、驱逐 -- `references/obsidian.md` — vault 检测、目录树、config.json 生命周期 -- `references/templates.md` — 文档模板与样式约定 -- `references/help-output.md` — 帮助文本原文(hook 拦截输出源) -- `references/task-swarm.md` — 任务执行阶段可选委派给 task-swarm(多角色 agent 并发;按阶段聚合 coder/reviewer/validator;防"自我认可") -- `references/task-swarm-example.md` — task-swarm 模式的 tasks.md 完整示例 +- `references/workflow.md` — phase 序列、三档工作流、phase-gate 输出顺序、`/specode:continue` 完整流程 +- `references/lock-protocol.md` — 锁状态机、接管三选项、只读模式、被驱逐窗口行为 +- `references/obsidian.md` — vault 三层解析、目录约定、`list-specs` 查找流程 +- `references/selectors.md` — 三类选择器骨架 + 8 场景常量库 + 输出格式约束 +- `references/templates.md` — 5 份文档模板、EARS SHALL 写法、traceability 规范 +- `references/iteration.md` — iteration 子循环、文档累积规则 +- `references/task-swarm.md` — task-swarm 编排协议、角色边界、产物 schema、writeback 格式 +- `references/task-swarm-example.md` — 完整 tasks.md 示例 + +## Session Logging(0.10.0+) + +specode 自带**会话日志收集**,默认开启。日志内容:每个 hook 触发、主代理工具调用(Bash / Read / Write / Edit 等)的 tool_input / tool_response、specode CLI 调用的 cmd / argv / exit_code、session phase / lock 状态变化。**用途**:排查"主代理为什么走偏 / 选错 selector / 漏写文档"等问题时回溯现场,配合截图反馈给开发者。 + +- **存储位置**:`~/.specode/logs/<session_id>.jsonl`(每行一个 JSON event) +- **关闭**:`export SPECODE_LOG=off` 临时关 / 编辑 `~/.config/specode/config.json` 设 `"logging": false` 永久关 +- **隐私**:默认 redact 黑名单(`password / api_key / token / secret / authorization / cookie` 等键名匹配 → 占位 `<redacted>`);字符串字段超 500 字符自动截断;可在 config 加 `redact_keys` 列表扩展 +- **回放**:`sh "$CLAUDE_PLUGIN_ROOT/scripts/run.sh" "$CLAUDE_PLUGIN_ROOT/scripts/spec_log.py" replay --session <id>` 按时序打印 events +- **占用查询**:`spec_log.py status` 输出当前 `~/.specode/logs/` 大小;超过 100MB 会提示手动清理 `rm -rf ~/.specode/logs/` + +日志收集任何异常都吞并,绝不阻断业务流程。 + +## Iron Rules + +1. **持久会话是唯一模式**——`/specode:end` 是退出口;不退出 hook 永远继续注入。 +2. **文档优先**——需求 / 设计 / 任务调整必须先 Edit 对应文档,再代码或解释。 +3. **强制双写 + 原子写**——`/specode:spec` / `/specode:continue` / `/specode:end` 任何写入失败视为整命令失败;不接受 in-memory 半成功。 +4. **selector 由你按骨架生成 + 必须以 sentinel 结尾 + end turn**——hook 只注入"该呈现哪个",文本由你写。 +5. **状态行 footer 每轮必输**——缺失视为流程违规;hook 不会因此阻断,但用户与下一轮上下文都能察觉。 +6. **CLI 调用必须走 `run.sh` 包装 + `$CLAUDE_PLUGIN_ROOT` 绝对路径**——见 §Session Lifecycle "CLI 调用规约";任何 `python3 spec_session.py ...` 裸调用一律视为流程违规,发现立即换模板重试,不要在错误路径上循环。 +7. **`requirements.md` / `bugfix.md` / `design.md` / `tasks.md` 4 份核心文档由主代理直接生成**——Read `${CLAUDE_PLUGIN_ROOT}/assets/templates/<phase>.md` 骨架 + 按 `source_text` 填空 + Write 到 `<spec-dir>/<phase>.md`。`implementation-log.md` 同样由主代理追加。详细规则与 spec-writer subagent 已删除的原因见 §Spec 文档生成。 +8. **文档头 `Status` / `Review Status` 字段不允许主代理手改**——这些字段反映 phase / 评审状态,由 `phase-transition` CLI 与 selector 流程驱动改变。主代理写完 `requirements.md` 把 `Status: Requirements Draft` 改成 `Requirements Complete` 是越权(这是 selector 走完后才该发生的事);保持模板默认值不动。文档**正文**该怎么写还是怎么写,只是别动 frontmatter 状态字段。 diff --git a/plugins/specode/skills/specode/references/commands.md b/plugins/specode/skills/specode/references/commands.md deleted file mode 100644 index 0ede1a3..0000000 --- a/plugins/specode/skills/specode/references/commands.md +++ /dev/null @@ -1,95 +0,0 @@ -# Spec Mode Commands Reference - -> 命令入口、子标志 dispatch、可选 spec 名前缀、会话模式、Helper Scripts、Hook 拦截。 -> SKILL.md 仅保留最常用片段;细节在此。 - -## Command Entry - -```text -/spec <requirement or path> [extras] ← one-shot workflow -/spec --persist <requirement or path> ← persistent session (footer + /end) -/continue [spec-slug] ← resume / switch; multi-window aware -/status ← show current session status -/end ← end persistent session (docs preserved) - -/spec --set-vault <vault-path> ← set Obsidian vault → vault/spec-in/<os>-<user>/specs -/spec --set-root <dir> ← set any directory as spec root -/spec --detect-vault ← detect installed Obsidian vaults -/spec --vault-status ← show current root + obsolete-location warnings - -/spec --freeform ← relax INV-1 for current spec (INV-2 still enforced) -/spec --strict ← restore INV-1 -/spec --sync-status ← show sync ledger / pending / last violation - -/spec -h ← help (hook-intercepted; bypasses model) -``` - -`--set-vault` / `--set-root` 任何时候都可运行;新值立即写入 `~/.config/specode/config.json` 并被后续命令使用。 - -`/spec` 后文本若是已存在的文件路径 → 当作需求源读入;否则当作需求描述。 - -## Sub-flag Dispatch - -执行后立即停止 —— **不**触发 spec 工作流、**不**创建 spec 目录、**不**进入 Plan-mode。 - -| Flag | Action | -|---|---| -| `--set-vault <path>` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py set --vault <path>` | -| `--set-root <path>` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py set --root <path>` | -| `--detect-vault` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py detect` | -| `--vault-status` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py get` | -| `-h` / `--help` | Output `references/help-output.md` 第一个 ```text``` 围栏块 verbatim (Fast Path — see SKILL.md §Help Output) | -| `--persist <req>` | `spec_init.py --persistent`, then start workflow | -| `--freeform` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_sync.py freeform on` | -| `--strict` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_sync.py freeform off` | -| `--sync-status` | `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_sync.py status [--spec-dir <active-spec-dir>]` — 当前会话已知 active spec dir 时务必传 `--spec-dir`,否则脚本会回退到 active-pointer 解析,可能返回 `(no active spec)` | - -Sub-flag dispatch 由模型按本表执行,不进入 intake 流程。`-h` / `--vault-status` / `--detect-vault` / `--sync-status` 走 **Fast Path**(详见 SKILL.md §Help Output)—— 单文件读取或单脚本调用,输出 verbatim,不思考、不解释。 - -## Optional Spec Name Prefix - -若需求文本以 `<名称>:<内容>`(全角 `:`)或 `<名称>: <内容>`(ASCII `:` 后跟空格)开头,按首个冒号拆分: - -- **冒号前** = spec 文件夹名提示。Agent 派生英文 slug,调 `spec_init.py --name <slug> --requirement-name "<原名称>"`。`.config.json.requirementName` 保留原中文名作为显示名。 -- **冒号后** = 需求源文本。 -- **跳过条件**:前缀像路径(含 `/` 或 `\`)、URL、或前 30 字内无冒号。 -- 无冒号 → 整段作需求;slug 由 agent 从需求内容推断。 - -## Sessions: One-shot vs Persistent - -每次 `/spec` 都生成永久文档(`requirements.md` 或 `bugfix.md`, `design.md`, `tasks.md`(含 `## 测试要点` 节), `.config.json`),随时可通过 `/continue` 重开。 - -| | one-shot `/spec` | `/spec --persist` | -|--|--|--| -| 任务完成后会话 | 自动结束 | 保持活跃 | -| 状态 footer | 不显示 | 每次回复显示 | -| 退出方式 | 自动 | 显式 `/end` | - -**Persistent footer 格式(仅持久会话)**: - -``` -─── specode ─── spec: <slug> | session: <sessionId> | phase: <phase> | /end 退出 -``` - -只读模式额外标记 `[只读]`(见 `lock-protocol.md`): - -``` -─── specode ─── spec: <slug> | session: <id> | phase: <phase> | [只读] | /end 退出 -``` - -**sessionId 解析顺序**:`$TERM_SESSION_ID` → `$SPEC_SESSION_ID` → `"default"`。多窗口并行需要每窗口独立 sessionId。 - -**状态文件**: -- `<spec-dir>/.config.json` — per-spec 身份、生命周期、**锁**、sessions、iteration round -- `<document-root>/.active-specode.json` — v2 窗口索引,按 sessionId 索引(slug-only,不含绝对路径) - -## Helper Scripts - -- `scripts/spec_init.py` — 创建 spec 目录;**必须传 `--name <slug>`**(agent 派生 slug) -- `scripts/spec_session.py` — `start / continue / status / end / list / list-specs / load / acquire / release / heartbeat / verify-lock / iterate` -- `scripts/spec_vault.py` — `detect / set --vault / set --root / get` -- `scripts/spec_lint.py` — 校验 spec 文件(含锁字段) -- `scripts/spec_status.py` — 任务进度视图(`spec_session.py load --json` 的薄包装) -- `scripts/spec_choice.py` — 选择器;TTY → curses ↑/↓ + Enter;非 TTY(Claude Code Bash / CI)→ 打印 option 块 + `AWAITING_USER_CHOICE` 哨兵 + exit 0,agent 把 stdout 原样转发给用户后结束 turn -- `scripts/spec_sync.py` — Code-Doc Sync ledger(`status / freeform / strict` 等) -- `scripts/spec_guard.py` — Hook 入口(INV-1/2/3/4/6 强制 + SessionStart/End 跟踪) diff --git a/plugins/specode/skills/specode/references/help-output.md b/plugins/specode/skills/specode/references/help-output.md deleted file mode 100644 index f3cc382..0000000 --- a/plugins/specode/skills/specode/references/help-output.md +++ /dev/null @@ -1,69 +0,0 @@ -# Help Output - -When `/spec -h` is triggered, output exactly this block and stop: - -```text -specode 命令速查 -══════════════════════════════════════════════════════ - -工作流 - /spec <需求描述或文件路径> 一次性规格工作流(需求→设计→任务) - /spec <名称>:<需求描述> 指定 spec 文件夹名(支持 : 或 ": ") - /spec --persist <需求> 启动持久会话模式 - /continue [spec-slug] 列出可继续的 spec,或恢复 / 切换到指定 spec - /status 显示当前会话状态(含锁状态) - /end 结束当前会话并释放锁(不删除文档) - -任务执行(多 agent 并发) - /task-swarm [<spec-dir>/tasks.md] 按一级阶段派发 coder/reviewer/validator 子 agent - 缺省取当前 active spec 的 tasks.md - reviewer/validator 物理无 Edit/Write,防自我认可 - 协议: references/task-swarm.md - -Obsidian / 根目录配置 - /spec --set-vault <vault路径> 设置 Obsidian vault(spec 存入 vault/spec-in/<os>-<user>/specs) - /spec --set-root <目录> 直接设置 spec 文档根目录(完全自定义路径) - /spec --detect-vault 检测已安装的 Obsidian vault - /spec --vault-status 显示当前 vault / spec root 配置 + 旧位置警告 - -任何时候都可以重新运行 --set-vault / --set-root 修改根目录;新值立即写入 -~/.config/specode/config.json 并被后续命令使用。 - -代码-文档同步守卫(CDSG) - /spec --freeform 当前 spec 切到 freeform 模式 - 放宽 INV-1(允许改未在 tasks.md 登记的源码) - INV-2 仍生效(turn 内必须有任意 spec doc 改动) - 适合探索性开发 / 临时实验 - /spec --strict 恢复严格模式(默认) - 改源码前必须先动 spec doc 或文件已在 tasks.md - /spec --sync-status 显示同步账本:tasks_files / turn 改动 / 最近违规 - -帮助 - /spec -h 显示本帮助 - -文档根目录解析 - 1. --root 参数 / SPECODE_ROOT 环境变量 - 2. ~/.config/specode/config.json → obsidianRoot - 3. 自动检测 Obsidian vault → <vault>/spec-in/<os>-<user>/specs(首次检测自动写入 config) - -三级全部未命中 → 终止 /spec,输出引导提示。 - -/continue 多窗口行为 - - 不同窗口可同时持有不同 spec - - 同一 spec 同一时刻只允许一个窗口持有写锁(.config.json.lock) - - 选择已锁定 spec 时,提示三选项:强制接管 / 只读查看 / 取消 - - /end 仅结束当前会话并释放锁,不影响其他会话或 spec 文档 - -spec 文档结构 - <root>/<spec-slug>/requirements.md 需求与验收标准(agent 必须传 --name slug) - <root>/<spec-slug>/bugfix.md 缺陷规格(替代 requirements.md) - <root>/<spec-slug>/design.md 技术设计 - <root>/<spec-slug>/tasks.md 任务列表 + 测试要点(供测试人员的 SHALL 级验证场景) - <root>/<spec-slug>/.config.json specId / lock / iterationRound - <root>/.active-specode.json v2 窗口索引(slug-only) - -持久会话状态行格式 - ─── specode ─── spec: <slug> | session: <id> | phase: <phase> | /end 退出 - 只读模式额外标记 [只读]: - ─── specode ─── spec: <slug> | session: <id> | phase: <phase> | [只读] | /end 退出 -``` diff --git a/plugins/specode/skills/specode/references/iteration.md b/plugins/specode/skills/specode/references/iteration.md index 8a44a43..561f771 100644 --- a/plugins/specode/skills/specode/references/iteration.md +++ b/plugins/specode/skills/specode/references/iteration.md @@ -1,91 +1,204 @@ -# Iteration Phase +--- +description: Use when 进入 acceptance phase 后用户提到迭代 / 继续调整 / 重跑测试 / 改需求;或 spec 已交付但要继续推进。详述 iteration 子循环规则与文档累积写法。 +--- -Spec lifecycle after first delivery. Defines how `iteration` differs from `implementation` / `acceptance`, what triggers it, and how subsequent rounds accumulate in spec documents. +# Iteration — 子循环规则与文档累积 -## Phase 生命周期 +`iteration` 是 spec 已交付后的**常驻**状态。本文件给出触发条件、phase 子循环、文档累积写法、退出条件。 + +## 0. Phase 生命周期回顾 ``` -intake → requirements → design → tasks → implementation → acceptance → iteration - ↑ - 交付完成后的常驻状态 +intake → requirements / bugfix → design → tasks → implementation → acceptance → iteration + ↑ + 交付完成后的常驻状态 ``` | Phase | 含义 | 进入条件 | |---|---|---| -| `intake` | 初始解析,文档尚未写入 | `/spec` 触发后 | -| `requirements` | requirements.md / bugfix.md 编写或确认中 | 开始写第一份文档 | -| `design` | design.md 编写或确认中 | requirements 确认后 | +| `intake` | 初始解析,文档尚未写入 | `/specode:spec` 触发后 | +| `requirements` | requirements.md / bugfix.md 编写或确认中 | 工作流确认后开始第一份文档 | +| `design` | design.md 编写或确认中 | requirements / bugfix 确认后(或 design-first 工作流直接进入) | | `tasks` | tasks.md 编写或确认中 | design 确认后 | | `implementation` | 正在执行代码任务 | tasks 确认并选择执行 | -| `acceptance` | 代码完成,UAT 验收 | 所有 required tasks `[x]` | -| `iteration` | 已交付需求的持续演进 | 用户在 acceptance 完成后输入 `/spec-accept`,或 `/continue` 一个已 accept 的 spec | +| `acceptance` | 代码完成,跑 UAT | 所有 required 任务 `[x]` | +| `iteration` | 已交付需求的持续演进 | `acceptance-gate` 选 `验收通过` 或 `/specode:continue` 一个已 accept 的 spec | -## 三阶段语义对照 +## 1. 三阶段语义对照 | Phase | 一句话 | 允许操作 | |---|---|---| -| `implementation` | 正在写代码兑现 tasks | 编辑代码、改 tasks | -| `acceptance` | 代码写完,跑 UAT | 改 `tasks.md`(含 `## 测试要点`);不允许改代码(除非回退) | -| `iteration` | 已交付,等待下一轮演进 | 全部允许,需求变更走子循环 | +| `implementation` | 正在写代码兑现 tasks | 编辑代码、改 tasks 状态、追加 implementation-log | +| `acceptance` | 代码写完,跑 UAT | 不允许新功能改动(只允许回退 / 测试修复) | +| `iteration` | 已交付,等待下一轮演进 | 全部允许;需求变更走子循环 | + +## 2. 触发条件 + +进入 iteration 的两种触发: + +1. **`acceptance-gate` 选 `验收通过`**:调 `spec_session.py iterate <spec-dir>`(实际由 `phase-transition --from acceptance --to iteration` 完成;CLI 内部把 `iterationRound` +1、`iterationHistory` 追加一条)。 +2. **`/specode:continue <slug>` 一个已 accept 的 spec**:`spec_session.py continue` 读 `.config.json.currentPhase`,若是 `iteration` 则进入;若仍是 `acceptance` / `implementation` 等,按落盘值进入对应 phase(**不**写死 iteration —— 避免把还在 design 阶段的 spec 误置为 iteration、跳过未完成的设计门控)。 + +进入 iteration 后的默认动作: -## iteration 子循环 +- 状态行 footer 显示 `phase: iteration`。 +- SKILL.md 不自动呈现 `iteration-scope` 选择器 —— 默认进入 iteration 后停在 chat 等用户提出下一步;模型可在判断到调整范围明确时主动呈现。 +- 起,若用户明示"开始下一轮迭代" → 呈现 `iteration-scope`(类型 C)让用户选本轮调整范围。 -iteration 不是"一切都允许"的自由阶段,而是"在已交付基础上,重新走一遍完整循环"。 +## 3. iteration 子循环 + +iteration **不是**"一切都允许"的自由阶段,而是"在已交付基础上,**重新走一遍完整循环**"。 ``` -iteration ← 默认停留状态 - ├─ user: "我想加一个 X 功能" - │ → spec_session.py iterate(round + 1) - │ → 进入 iteration.requirements 子 phase - │ → 在 requirements.md 末尾追加 "## 迭代 N 新增需求" 节 - │ → 走 confirm → design → tasks → implementation → acceptance - │ → 全部完成后 → 自动回到 iteration - │ - ├─ user: "改一下验收里的某条规则" - │ → 直接编辑 tasks.md 的 `## 测试要点` 节(不需走完整循环) - │ - └─ user: /end - → 释放 session 锁,spec 文档保留 +iteration ← 默认停留状态 + ├─ 用户:"我想加一个 X 功能" + │ → spec_session.py iterate <spec-dir> (iterationRound +1) + │ → 进入 iteration.requirements 子 phase + │ → 在 requirements.md 末尾追加 "## 迭代 N 新增需求" 节 + │ → 走 doc-confirm-requirements → design → tasks + │ → implementation → acceptance + │ → acceptance-gate 通过 → 自动回到 iteration(round +1) + │ + ├─ 用户:"改一下 acceptance 里某条规则" + │ → 直接编辑 tasks.md 对应任务或末尾 `## 测试要点` 行 + │ →(不需走完整子循环 —— 视为微调) + │ + ├─ 用户:"只重跑测试" + │ → 不改文档,跑 tasks.md 任务对应的"验证:xxx"小项 + │ → 更新 implementation-log 记录实际结果 + │ + └─ 用户:/specode:end + → 释放 session 锁,sessions/<id>.json.mode=ended + → spec 文档保留 ``` -## .config.json 字段 +iteration 期间所有 phase 限制**放松**: + +- 可重走 requirements → design → tasks → implementation 子循环。 +- 可直接补 implementation-log.md。 +- 但**仍**走 phase-transition CLI 切换子 phase —— 不要手改 `.config.json.currentPhase`。 + +## 4. `.config.json` 字段 ```json { - "iterationRound": 2, - "iterationHistory": [ - { "round": 1, "startedAt": "...", "completedAt": "2026-05-08T...", "newReqCount": 3 }, - { "round": 2, "startedAt": "2026-05-11T...", "newReqCount": 0 } - ] + "specId": "uuid-of-spec", + "currentPhase": "iteration", + "iterationRound": 2, + "iterationHistory": [ + { "round": 1, "startedAt": "2026-05-01T...", "completedAt": "2026-05-08T...", "newReqCount": 3 }, + { "round": 2, "startedAt": "2026-05-11T...", "newReqCount": 0 } + ], + ... } ``` -- `iterationRound` 在首次 `iterate` 时 0 → 1,每次重新走完 acceptance 后 +1 -- `iterationHistory` 保留历史轮次,便于追溯 -- 用 `spec_session.py iterate <spec-dir>` 推进,不要手动改字段 +- `iterationRound` 在首次 `iterate` 时 0 → 1;每次再次跑通 `acceptance-gate=验收通过` 后 +1。 +- `iterationHistory` 保留历史轮次(startedAt / completedAt / newReqCount)便于追溯。 +- 用 `spec_session.py iterate <spec-dir> --session <id>` 推进,**不要**手动改字段。 -## 文档累积写法(铁律) +## 5. 文档累积写法(铁律) | 文档 | 累积规则 | |---|---| -| `requirements.md` | 原内容不动;末尾追加 `## 迭代 N 新增需求` 节;新 SHALL 前缀 `[迭代N]` | -| `tasks.md` | 原 `[x]` 不清理;新任务追加 `## 迭代 N 任务` 节;`## 测试要点` 同 turn 跟新 SHALL 增删,前几轮已通过的行保留并在行尾追加 `(已验收 迭代 N-1)` | -| `design.md` | 原节内可修改;必须在 `## 变更历史` 追加 `### 迭代 N`(无此节则创建) | +| `requirements.md` | **原内容不动**;末尾追加 `## 迭代 N 新增需求` 节;新 SHALL 编号前缀 `[迭代 N]`,如 `[迭代 2] 5.1 WHEN ... SHALL ...`。原编号继续延续(不重排)。 | +| `bugfix.md` | 同 requirements.md:末尾追加 `## 迭代 N 新增问题` 节;新条目带 `[迭代 N]` 前缀。 | +| `design.md` | 原节内**可修改**;必须在 `## 变更历史` 节追加 `### 迭代 N` 子节(无此节则创建),说明本轮架构 / 接口 / 数据模型变更。原节内修改的地方留 `<!-- [迭代 N] -->` 注释标记。 | +| `tasks.md` | 原 `[x]` 任务**不清理**;新任务追加 `## 迭代 N 任务` 节;任务编号续延(如旧最后是 `5.`,新任务从 `6.` 起);新任务 traceability 引用 requirements.md 的 `[迭代 N]` 前缀编号。末尾 `## 测试要点` 节可按需追加新行供测试人员参考。 | +| `implementation-log.md` | 按日期继续追加,每条记录开头加 `[迭代 N]` 前缀。 | -## /continue 进入 iteration 的判断 +### 5.1 累积示例 -恢复 spec 时 `spec_session.py continue` 的 `--phase` **不**写死 `iteration`: +```markdown +(requirements.md 末尾) + +## 迭代 2 新增需求 + +### 需求 5:[新需求标题] + +**用户故事:** 作为 ... + +#### 验收标准 + +[迭代 2] 5.1 WHEN ... SHALL ... +[迭代 2] 5.2 IF ... THEN ... SHALL ... +``` + +```markdown +(design.md 变更历史节) + +## 变更历史 + +### 迭代 1 + +- 2026-05-08:架构无变化,仅修复 §错误处理 中条目 3 的描述。 + +### 迭代 2 -- 默认值 `None`,由代码读 `.config.json.currentPhase` -- 只有用户明确通过 `/spec-accept` 或类似动作进入 iteration,phase 才被改写 -- 这样可避免把还在 `design` 阶段的 spec 误置为 iteration、跳过未完成的设计门控 +- 2026-05-11:新增 `[Component] PasswordPolicy`(§组件与接口 §3)支持需求 5.1 / 5.2。 +- 2026-05-12:调整数据模型 `User.password` 字段为 `salted_hash + algo_version`(迁移计划见 §流程)。 +``` + +```markdown +(tasks.md 新任务节) + +## 迭代 2 任务 + +- [ ] 6. [新阶段任务标题] + - [ ] 6.1 [子任务] + - 文件:`src/auth/password_policy.py` + - 验证:`pytest tests/test_password_policy.py` + - _需求:5.1_ + - [ ] 6.2 [子任务] + - _需求:5.2_ +``` -## /spec-accept(可选未实装命令) +```markdown +(tasks.md 末尾 `## 测试要点` 按需追加新行后片段) -文档约定:在 acceptance 阶段所有 required checklist 行结论=通过后,用户输入 `/spec-accept` 或交互式选择"验收通过",agent 调: +## 测试要点 -```bash -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py iterate <spec-dir> +- 输入少于 8 位密码点击提交 → 系统提示"密码长度不足"(需求 1.1) +- 连续 5 次错误密码登录 → 账号锁定 15 分钟(需求 1.2) +- [迭代 2] 输入弱密码(如 12345678)→ 提示"密码强度不足"(需求 5.1) +- [迭代 2] 修改密码时复用历史密码 → 拒绝并提示原因(需求 5.2) ``` -将 phase 置为 `iteration`、`iterationRound` +1。 +## 6. `/specode:continue` 进入 iteration 的判断 + +恢复 spec 时 `spec_session.py continue` 的 `--phase` **不**写死 `iteration`: + +- 默认值 `None`,由 CLI 读 `<spec-dir>/.config.json.currentPhase`。 +- 只有用户明确通过 `acceptance-gate=验收通过` 才被改写为 `iteration`。 +- 这样可避免把还在 `design` 阶段的 spec 误置为 iteration、跳过未完成的设计门控。 + +## 7. `iteration-scope` 选择器(类型 C) + +iteration 子循环开始时主会话呈现 `iteration-scope`(类型 C 复选框)让用户勾选本轮调整范围。模板详见 `_selectors.py` SELECTOR_PROMPTS['iteration-scope']。 + +选项(可多选): + +1. 改 requirements +2. 改 design +3. 改 tasks +4. 重跑测试 + +允许"全不选"(视为本轮 iteration 取消)。 + +该选择器**不**自动呈现 —— 默认在 chat 等用户提出下一步;模型在判断到调整范围明确时可主动呈现。 + +## 8. 退出条件 + +iteration 子循环按以下任一条件退出: + +1. **用户运行 `/specode:end`**:调 `spec_session.py end --session <id>` → 释放锁 + sessions/<id>.json.mode=ended。spec 文档保留;下次 `/specode:continue <slug>` 仍可恢复。 +2. **再次跑通 `acceptance-gate=验收通过`**:本轮迭代完成 → `iterationRound +1` → 回到 iteration 默认停留状态。 + +iteration 状态本身**不会自动**退出 —— 它是 spec 的"已交付"常驻态。`/specode:end` 关闭的只是当前 session,不是 spec。 + +## 9. 跨文档引用 + +- phase 序列与 `acceptance-gate` 选择器详情 → `references/workflow.md` §7、模板 `_selectors.py` SELECTOR_PROMPTS['acceptance-gate']。 +- 6 份文档的章节模板 → `references/templates.md`。 +- 锁状态机(iteration 期间仍受锁保护)→ `references/lock-protocol.md`。 +- 入口选择器 `iteration-scope` → 模板 `_selectors.py` SELECTOR_PROMPTS['iteration-scope']。 diff --git a/plugins/specode/skills/specode/references/lock-protocol.md b/plugins/specode/skills/specode/references/lock-protocol.md index 223317e..e2dba13 100644 --- a/plugins/specode/skills/specode/references/lock-protocol.md +++ b/plugins/specode/skills/specode/references/lock-protocol.md @@ -1,126 +1,201 @@ -# Spec-mode Lock Protocol +--- +description: Use when 涉及 lock / takeover / heartbeat / stale / 多窗口同 spec / verify-lock 异常 / 接管 / 只读模式。详述锁状态机、接管三选项、stale 判定。 +--- -Per-spec write lock for safe multi-window operation. Implemented in -`scripts/spec_session.py`; this document is the behavioral contract. +# Lock Protocol — 锁状态机与多窗口接管 -## 设计原则 +每个 spec 自己的 `<spec-dir>/.config.json.lock` 字段管单写权。**持有者键 = `session_id`**(与 `~/.specode/sessions/<id>.json` 文件名同源)。 -- **锁存放在 spec 自身 `.config.json`**,不放在 `.active-specode.json`。原因:spec 文档可能跨设备同步(Obsidian),边界判断必须以 spec 自身状态为准;如果索引文件丢失,锁不能跟着消失。 -- **任何 spec 文档写入前,必须持锁。** 这是不可绕过的铁律。 -- **多窗口可同时打开不同 spec**;同一 spec 同一时刻只允许一个 session 写入。 +## 0. 设计原则 -## .config.json 字段 +- 锁存放在 spec 自身 `.config.json`,**不**放在 `<doc-root>/.active-specode.json`。原因:spec 文档可能跨设备同步(Obsidian),边界判断必须以 spec 自身状态为准;如果索引文件丢失,锁不能跟着消失。 +- **任何 spec 文档写入前,必须持锁**。这是不可绕过的铁律(由 SKILL.md §Multi-Window + Lock 的"写前三重校验"保证)。 +- 多窗口可同时打开**不同** spec;同一 spec 同一时刻只允许一个会话写入。 +- 锁主即会话:所有 `acquire` / `release` / `heartbeat` / `verify-lock` 必须传 `--session <session_id>`,CLI 拒绝匿名调用。 + +## 1. `.config.json.lock` 字段 ```json { - "specId": "uuid", - "lock": { - "sessionId": "TERM_SESSION_A1B2", - "acquiredAt": "2026-05-11T10:00:00Z", - "lastHeartbeatAt": "2026-05-11T10:25:00Z", - "agent": "claude-code", - "pid": 12345 - }, - "evictedSessions": [ - { - "sessionId": "TERM_SESSION_C3D4", - "evictedAt": "2026-05-11T10:30:00Z", - "evictedBy": "TERM_SESSION_A1B2", - "reason": "force_acquire" - } - ] + "specId": "uuid-of-spec", + "currentPhase": "tasks", + "workflow": "requirements", + "lock": { + "session_id": "abc-def-1234-...", + "acquired_at": "2026-05-19T10:00:00Z", + "last_heartbeat_at": "2026-05-19T10:25:00Z", + "agent": "cli-agent", + "pid": 12345 + }, + "evicted_sessions": [ + { + "session_id": "old-session-id", + "evicted_at": "2026-05-19T10:30:00Z", + "evicted_by": "abc-def-1234-...", + "reason": "force_acquire" + } + ] } ``` -`lock: null` 表示空闲,可被任意 session 直接获取。 +`lock: null` 表示空闲,可被任意会话直接 acquire。`evicted_sessions` 数组追加而不清理(每条记录极小;运维需要时手动编辑)。 -## 五个核心操作 +## 2. 五个核心命令 | 命令 | 行为 | 退出码 | |---|---|---| -| `spec_session.py acquire <dir> --session <id>` | 拿锁;自持续约;他持抛 `LockHeld`;stale 自动接管 | 0 / 4(LockHeld) | -| `spec_session.py acquire <dir> --session <id> --force` | 强制接管,记录 `evictedSessions` | 0 | -| `spec_session.py release <dir> --session <id>` | 释放自持锁;非自持时静默 | 0 | -| `spec_session.py heartbeat <dir> --session <id>` | 续约;不持锁抛 `lock_lost` | 0 / 1 | -| `spec_session.py verify-lock <dir> --session <id>` | 检查持有状态:`ok` / `evicted` / `not_held` | 0(ok) / 3(其他) | +| `spec_session.py acquire --spec <dir> --session <id>` | 持锁;若已被自己持有 → 续约;他持 → 抛 LockHeld;stale 静默接管(记 evicted_sessions reason=`stale`) | 0 / 4 (LockHeld) | +| `spec_session.py acquire --spec <dir> --session <id> --force` | 强制接管;写 evicted_sessions reason=`force_acquire` | 0 | +| `spec_session.py release --spec <dir> --session <id>` | 释放自持锁;非自持时静默;同 turn 写 `sessions/<id>.json.lock_state=released` | 0 | +| `spec_session.py heartbeat --spec <dir> --session <id>` | 刷 `lock.last_heartbeat_at` + `sessions/<id>.json.last_activity_at`;不持锁 → exit 1 `lock_lost` | 0 / 1 | +| `spec_session.py verify-lock --spec <dir> --session <id>` | 检查持有状态:`ok` / `evicted` / `not_held` / `stale_lock` | 0 (ok) / 3 (其他) | + +退出码 `3` 细分: + +- `verify-lock` 输出 stdout `evicted` → 被驱逐窗口。 +- `verify-lock` 输出 stdout `not_held` → 锁字段为 null 或被他人持有。 +- `verify-lock` 输出 stdout `stale_lock` → 距 `last_heartbeat_at` 超过 stale 阈值。 + +约定:CLI 写 stdout 的关键状态字(`ok` / `evicted` / `not_held` / `stale_lock` / `LockHeld`)固定,方便主会话脚本化处理。 + +## 3. Stale 阈值 + +- 默认 `lock.last_heartbeat_at` 超过 **1800 秒(30 分钟)** → 视为 stale。 +- 下一次 `acquire` 静默接管(不抛 LockHeld,evicted_sessions reason=`stale`)。 +- 通过环境变量 `SPECODE_LOCK_STALE_SECONDS` 覆盖。 +- `UserPromptSubmit` 的 `on-heartbeat-quiet` hook 每轮静默续约(自动);主会话也可显式调 `spec_session.py heartbeat` 强制刷新。 + +## 4. 心跳触发点(主会话行为契约) + +主会话在持久 session 中**必须**在以下时机调 `heartbeat`: + +1. **每次写 spec 文档前**(Edit / Write 工具调用之前一行)。 +2. **每次回答用户消息前**,如果距上次心跳超过 5 分钟。 +3. **每次完成一个 task-swarm subagent 后**。 -## Stale 阈值 +只读命令(`spec_status.py` / `spec_lint.py` / `load --json` / `read-session` / `verify-lock`)**不**触发心跳。 -- 默认 `lastHeartbeatAt` 超过 **1800 秒(30 分钟)** → 视为 stale -- 下一次 `acquire` 静默接管(记录到 `evictedSessions`,reason=`stale`) -- 通过环境变量 `SPECODE_LOCK_STALE_SECONDS` 覆盖 +## 5. 写前三重校验(铁律) -## 心跳触发点(agent 行为契约) +任何 spec 文档写入前,主会话必须按顺序确认: -agent 在持久 session 中必须在以下时机调 `heartbeat`: +1. **specId 校验**:active-pointer 里的 `specId == <spec-dir>/.config.json.specId`。 +2. **边界校验**:`<spec-dir>` 物理位于 `<doc-root>` 之下(不允许 `../` 穿越)。 +3. **锁校验**:`spec_session.py verify-lock --spec <dir> --session <id>` 返回 `ok`。 -1. **每次写 spec 文档前**(Edit / Write 工具调用前) -2. **每次回答用户消息前**(如果中间间隔 > 5 分钟) +任一失败 → **拒绝写入**,在 chat 报告原因(哪一项校验失败、当前持有者、可能的处置),**不要**静默继续。 -只读命令(`spec_status.py`、`spec_lint.py`、`load --json`)**不**触发心跳。 +> 写前三重校验是 spec-mode 边界纪律的核心。0.6.0 不再用 PreToolUse hook 阻断,靠 SKILL.md + 本协议自律 + CLI 在 exit code 3/4 上的报错。模型若执意写,能写出去,但 reviewer / 验收门 / 下一会话的 verify-lock 都会发现错位。 -## /continue 接管流程 +## 6. `/specode:continue` 接管协议 ``` -用户:/continue <slug> - -代码: - 1. 解析 slug → spec_dir - 2. acquire(currentSession) - 成功 → 进入 spec - 失败(LockHeld)→ 先向用户输出锁状态摘要(持有者 sessionId + 最后活动时间), - 然后运行 `references/prompts.md` 中的「/continue 接管」选择器: - - 强制接管 → acquire --force - - 只读查看 → 加载文档但不 acquire,footer 标记 [只读] - - 取消 → 退出 +用户:/specode:continue <slug> + │ + ▼ +1. 解析 spec_dir + │ + ▼ +2. spec_session.py acquire --spec <dir> --session <id> + │ + ├── exit 0 → 持锁成功 → 走 §6.1 后续步骤 + │ + └── exit 4 LockHeld → 走 §6.2 接管三选项 ``` -接管选择器命令、措辞、推荐项见 `references/prompts.md`(统一选择器命令节)。 +### 6.1 持锁成功 -## 被驱逐窗口的行为 +1. `spec_session.py load --spec <dir>` 拿 phase / iteration / tasks 计数 / 文档 mtime。 +2. `spec_session.py continue --spec <dir> --session <id>` 写 `sessions/<id>.json`(mode=active / active_spec_slug / lock_state=ok)+ 更新 `<doc-root>/.active-specode.json` active-pointer。 +3. 输出"已加载 spec"报告 + 状态行 footer + end turn。 -旧 session 在下一次写操作前调 `verify-lock`: +### 6.2 LockHeld 三选项 -- 返回 `evicted` → agent 必须立即停止当前工作,输出: +1. 输出锁状态摘要:`持有者 session_id 前 8 位 + 最近 heartbeat 时间`。 +2. 呈现 `takeover-options` 选择器(类型 A,详见 `_selectors.py` SELECTOR_PROMPTS['takeover-options'])。**无推荐项**——让用户根据对方是否仍活跃自己判断。 +3. End turn 等用户选。 - ``` - ⚠ 你的会话已被 session <newId> 强制接管。当前 spec 在此窗口已转为只读。 - 继续工作请用 /continue 强制接管回来。 - ``` +| 选项 | 后续操作 | +|---|---| +| **1. 强制接管** | `spec_session.py acquire --spec <dir> --session <id> --force` → exit 0 + evicted_sessions 追加记录 → §6.1 后续。告知用户"对方下一次写操作会被 verify-lock 拒绝"。 | +| **2. 只读查看** | **不**调 acquire,直接 `spec_session.py load`;写 `sessions/<id>.json.mode=readonly`、`lock_state=readonly`;**不**更新 active-pointer 的 specSlug 绑定;状态行 footer 加 `[只读]`。后续所有 Edit/Write 在 SKILL.md 层面被劝阻(不阻断,但模型必须主动拒绝)。 | +| **3. 取消** | 不做任何写动作;回到对话起点。 | - 并将本窗口 `.active-specode.json` 对应条目改为 `status: "evicted"`。后续在该 spec 的任何写操作 → 直接拒绝。 +## 7. 被驱逐窗口的行为 -## 只读模式 +被强制接管的旧会话在**下一次写操作前**调 `verify-lock`: -- agent 调 `spec_session.py load --json` 加载文档(**不**调 acquire) -- 不更新 `.active-specode.json` 中本 session 的 specSlug 绑定 -- footer 格式: +- 返回 `evicted` → 该会话必须立即停止当前工作,输出: - ``` - ─── specode ─── spec: <slug> | session: <id> | phase: <phase> | [只读] | /end 退出 - ``` +```text +⚠ 你的会话已被 session <newId 前 8 位> 强制接管。 +当前 spec 在此窗口已转为只读。继续工作请用 + /specode:continue <slug> +选择"强制接管"恢复可写。 +``` + +- 同步把本窗口 `<doc-root>/.active-specode.json` 对应条目改为 `status: "evicted"`;写 `sessions/<id>.json.mode=readonly`、`lock_state=evicted`。 +- 后续在该 spec 的任何写操作 → 主动拒绝,向用户提示重新走 `/specode:continue` 接管。 +- 状态行 footer 加 `[只读]`。 + +## 8. 只读模式 + +进入条件: -- 禁止所有写操作:Edit / Write / heartbeat / `spec_session.py continue|start|iterate` 全部拒绝 -- 用户要切换为可写:再次 `/continue <slug>` 并选"强制接管" +- `/specode:continue <slug>` LockHeld 时选 2 `只读查看`。 +- 被强制接管的旧会话 verify-lock=evicted。 -## 写前三重校验(铁律) +行为约束: -任何 spec 文档写入前 agent 必须确认: +- `spec_session.py load --spec <dir>`(**不**调 acquire)。 +- **不**更新 active-pointer 的 specSlug 绑定。 +- 状态行 footer 加 `[只读]`。 +- 禁止所有写操作:Edit / Write / heartbeat / `spec_session.py continue|acquire|phase-transition` 都不调;如用户要求修改,SKILL.md 引导"请先 `/specode:continue <slug>` 选强制接管"。 +- 用户要切回可写 → 再次 `/specode:continue <slug>` 选 1 `强制接管`。 + +## 9. 锁状态机(汇总) + +``` + ┌──────────┐ + │ null │ (lock=null, 空闲) + └────┬─────┘ + │ acquire + ▼ + ┌─── force_acquire ──► ┌──────┐ ──── stale ────► (下一次 acquire 静默接管) + │ │ ok │ + │ ◄── release/end ────└──┬───┘ + │ │ + │ │ 他人 force_acquire + │ ▼ + ┌────────────┐ verify-lock ┌──────────┐ + │ evicted │ ◄────────── │ evicted? │ + └────┬───────┘ └──────────┘ + │ /specode:continue 强制接管 + ▼ + ok(新会话) + +侧路: + - heartbeat 在 ok 状态下刷新 last_heartbeat_at + - not_held = lock=null 或被别人持有;verify-lock 路径 + - readonly = 只读模式(lock 字段不变,仅 sessions/<id>.json.mode=readonly) +``` -1. **specId 校验**:active pointer.specId == .config.json.specId -2. **边界校验**:spec_dir 在 documentRoot 下(`spec_session.ensure_within_root`) -3. **锁校验**:`spec_session.py verify-lock` 返回 `ok` +## 10. 原子性保证 -任一失败 → 拒绝写入,输出错误,**不**静默继续。 +- 所有 `.config.json` 读改写序列被 `_file_lock(config_path)`(`fcntl.flock` / `msvcrt.locking`)保护。 +- 写入用 `tempfile.NamedTemporaryFile` → `os.replace` → `os.fsync`,crash-safe。 +- 同时写 `<spec-dir>/.config.json` + `~/.specode/sessions/<id>.json` 时 CLI 必须两边都成功才算成功;任一失败 → 回滚已写字段 + exit 1。 +- `_file_lock` 在不支持平台(罕见)静默退化为无锁原子写入(仍不会出半文件,但极端竞争下可能出现 lost update —— 已知风险)。 -## 原子性保证 +## 11. evicted_sessions 数组 -- 所有 `.config.json` 读改写序列被 `_file_lock(config_path)`(`fcntl.flock` / `msvcrt.locking`)保护 -- 写入使用 `write_json` 的 temp + `os.replace()` 模式,crash-safe -- `_file_lock` 在不支持平台(罕见)静默退化,原子写入仍保证不会写出半文件 +- 每次驱逐追加一条(保留历史)。 +- 被驱逐会话用它来判断"我是否被驱逐了"——`verify-lock` 检查当前 session_id 是否在数组里。 +- 不自动清理;运维需要时手动编辑 `.config.json`。 -## evictedSessions 数组 +## 12. 跨文档引用 -- 每次驱逐追加一条记录(保留历史) -- 用于 `verify-lock` 检测自己是否被驱逐 -- 不会自动清理(数量极小,每次驱逐一条;如有运维需求可手动编辑) +- 三层文档根目录解析 → `references/obsidian.md`。 +- 选择器三种类型与 `takeover-options` 文本骨架 → `references/selectors.md`。 +- phase 切换协议 → `references/workflow.md` §Phase-gate 输出顺序。 +- 写前三重校验在 SKILL.md §Multi-Window + Lock 也有摘要。 diff --git a/plugins/specode/skills/specode/references/obsidian.md b/plugins/specode/skills/specode/references/obsidian.md index deb83ef..e440498 100644 --- a/plugins/specode/skills/specode/references/obsidian.md +++ b/plugins/specode/skills/specode/references/obsidian.md @@ -1,66 +1,276 @@ -# Obsidian Integration Reference +--- +description: Use when 涉及 vault / obsidian / doc_root / --set-vault / --detect-vault / spec 找不到 / specs 目录在哪。详述三层 doc_root 解析、Obsidian 自检测、目录结构约定。 +--- -## 文档目录结构 +# Obsidian / Document Root 解析 + +specode 的 spec 目录默认放在 Obsidian vault 内(也支持纯文件系统目录)。本文件给出三平台 `obsidian.json` 路径、三层根目录解析顺序、多 vault 选择策略、目录结构约定。 + +## 0. 文档目录结构 ```text <vault>/ └── spec-in/ - └── <os>-<username>/ ← e.g. macos-alice, windows-bob, linux-carol - └── specs/ - ├── .active-specode.json - └── <requirement-name>/ - ├── requirements.md (or bugfix.md) - ├── design.md - ├── tasks.md ← 含 `## 测试要点` 节 - └── .config.json + └── <os>-<username>/ ← e.g. macos-alice, windows-bob, linux-carol + └── specs/ + ├── .active-specode.json ← v2 window index, slug-only + └── <slug>/ + ├── requirements.md ← 或 bugfix.md(互斥) + ├── design.md + ├── tasks.md ← 末尾自带 `## 测试要点` 章节 + ├── implementation-log.md(可选) + └── .config.json ← per-spec lock + iteration state ``` -路径段 `spec-in/<os>-<username>/specs` 由 `scripts/spec_vault.py` 的 `device_segment()` 自动生成,确保同一 vault 在多设备/多用户共享时各设备的 spec 独立存放。 +路径段 `spec-in/<os>-<username>/specs` 由 `spec_vault.py` 的 `device_segment` 自动生成: + +- `<os>` = `macos` / `windows` / `linux`。 +- `<username>` = 当前操作系统用户名(`getpass.getuser`)。 +- 同一 vault 在多设备 / 多用户共享时各设备的 spec 独立存放(避免锁串扰、避免文件冲突)。 -## config.json 生命周期 +`.active-specode.json` schema(v2,slug-only): + +```json +{ + "version": 2, + "active_specs": [ + { + "session_id": "abc-def-1234-...", + "specId": "uuid", + "slug": "login-password-rule", + "phase": "tasks", + "status": "active", + "updated_at": "2026-05-19T10:05:00Z" + } + ] +} +``` -`~/.config/specode/config.json` 在两种情况下写入: +`status` 取值:`active` / `readonly` / `evicted` / `ended`。多窗口同时活跃时数组里有多条。 -- **首次 Obsidian 检测**:`resolve_spec_root()` 检测到 vault 后计算路径并自动保存。后续调用直接读取此文件,不再重新检测 Obsidian。 -- **显式设置**:用户运行 `/spec --set-vault` 或 `/spec --set-root`(任何时候可执行,立即覆盖旧值)。 +## 1. 三层根目录解析(顺序固定) -此文件不会自动创建于其他情况。若 Obsidian 未安装且未显式设置,`resolve_spec_root()` 返回 `None`,由 `spec_init.py` 抛出引导提示并终止(不再回退到项目目录或默认路径)。 +由 `spec_init.py:resolve_document_root` 与 `spec_vault.py resolve_spec_root` 共同实现: -## 跨会话路径读取 +### 第 1 层:命令行 / 环境变量 -对于持久 session 和跨会话恢复(`/continue`),文档根目录从各 spec 自身的 `.config.json`(`documentRoot` 字段)直接读取,**不依赖** vault 检测或 `~/.config/specode/config.json`。vault 路径解析仅在创建新 spec 时需要。 +- 显式参数 `--root <path>` 最高优先级。 +- 环境变量 `SPECODE_ROOT` 次之。 +- 命中 → 直接用,**不**追加 `spec-in/<os>-<user>/specs` 子结构(用户给什么就用什么)。 -## 旧位置警告 +### 第 2 层:用户级配置 -`/spec --set-vault` / `--set-root` 执行后,`spec_vault.py` 会扫描历史 fallback 位置(`<cwd>/specs`、`~/new project/specs`)。若发现遗留 spec 目录,输出 `⚠ 旧位置仍有 N 个 spec(不会自动迁移)` 警告,并列出最多 10 个 spec 路径。如需迁移,用户手动 `mv` 并更新各 spec 的 `.config.json.documentRoot` 字段。 +- 读 `~/.config/specode/config.json`(类 Unix 下也可走 `$XDG_CONFIG_HOME/specode/config.json`)。 +- 字段 `obsidianRoot` 命中 → 自动追加 `spec-in/<os>-<user>/specs` 后使用。 +- `rootOverride` 命中(由 `set --root` 写入)→ 直接用,不追加子结构。 -## 平台 Obsidian 配置文件路径 +### 第 3 层:自动检测 Obsidian vault -`spec_vault.py` 按当前平台读取 Obsidian 的全局配置文件以获取已注册 vault 列表: +- 按当前平台读 Obsidian 全局配置 `obsidian.json`: | Platform | Path | -|----------|------| -| macOS | `~/Library/Application Support/obsidian/obsidian.json` | -| Windows | `%APPDATA%\obsidian\obsidian.json` | -| Linux | `~/.config/obsidian/obsidian.json` (or `$XDG_CONFIG_HOME/obsidian/obsidian.json`) | +|---|---| +| macOS | `~/Library/Application Support/obsidian/obsidian.json` | +| Windows | `%APPDATA%\obsidian\obsidian.json` | +| Linux | `~/.config/obsidian/obsidian.json` 或 `$XDG_CONFIG_HOME/obsidian/obsidian.json` | + +- 读取 `vaults` 字段(dict,value 含 `path` / `ts` / `open` 等),按 §2 规则选 vault。 + +### 三层全 miss → 硬停 + 引导 + +`spec_init.py` exit 3,输出 SKILL.md §Document Root Resolution 中的引导文案(中文,三种设置方式)。**不**回退到 cwd、不回退到 `~/specs`、不回退到项目目录。 + +这条规则保证 spec 永远不会"被静默散布到不可预期的位置"。 -`obsidian.json` 中的 `vaults` 字段包含所有已注册 vault 的路径、时间戳和 `open` 状态。 +## 2. 多 vault 选择规则 -## 多 Vault 选择逻辑 +`spec_vault.py detect` 输出 vault 列表时按以下规则排序: -1. 过滤掉路径不存在的 vault。 -2. 优先选 `open: true` 的 vault,按时间戳降序取最新。 -3. 若有多个 `open: true` 的 vault,使用 `spec_choice.py` 让用户选择,然后通过 `spec_vault.py set --vault` 保存选择。 -4. 若无 `open` vault,取时间戳最大的一个。 +1. **过滤**:路径不存在的 vault 直接丢弃。 +2. **优先选 `open=true` 的**:按 timestamp(`ts` 字段)降序取最新。 +3. **若有多个 `open=true` 的 vault**:调用 `AskUserQuestion` 工具让用户选择(详见 §3)。 +4. **若无 `open=true` 的 vault**:取 timestamp 最大的一个,并在 chat 提示"自动选 `<path>`;如需切换请运行 `/specode:spec --set-vault <other-path>`"。 -## spec_vault.py 命令参考 +选定 vault 后调 `spec_vault.py set --vault <path>` 把结果持久化到 `~/.config/specode/config.json.obsidianRoot`(下次跳过自动检测)。 + +## 3. 多 vault 选择的 UI 形式 + +多 vault 时按 SELECTOR_PROMPTS 同款三段式 YAML 格式呈现选择器(动态构造;path 来自 `spec_vault.py detect` 输出)。这是**动态选择器**——hook 不预生成、不在 11 个固定场景常量库中,由 SKILL.md 指引在 `--detect-vault` / 首次检测命中多 vault 时直接调工具。 ```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py detect ← 列出已安装的 vault,未检测到时给出手动指定提示 -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py set --vault <p> ← 绑定 vault(写入 config.json) -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py set --root <p> ← 直接指定根目录(写入 config.json) -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py get ← 显示当前解析到的根目录及来源 +## 选择器节点:选择 Obsidian vault + +**目的**:检测到多个已安装的 vault,需用户指定 specode 使用哪个目录。 + +**上下文**:当前未设置 obsidianRoot;`spec_vault.py detect` 返回 N 个 vault。 + +**前置动作(chat 简报,≤2 行)**:写一句"检测到 N 个 vault,请选择 specode 使用的目录。" + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "检测到多个 Obsidian vault,挑选 specode 使用的目录。" + header: "选择 vault" + multiSelect: false + options: + - label: "<vault-1 绝对路径>" + description: "open=<true|false>,最近活动 <ts>" + - label: "<vault-2 绝对路径>" + description: "open=<true|false>,最近活动 <ts>" + # 最多 4 个;超过 4 时取按 ts 最新的 4 个,其余在 chat 引导 + # 用户走 /specode:spec --set-vault <path> + +**约束**: +- `multiSelect=false`;options ≤ 4(工具上限)。 +- **不**给"(推荐)"——用户必须显式选;即使第一个 vault 看起来最近。 +- 不要列出 `spec_vault.py detect` 之外的路径。 +- 工具自动提供 "Other" + ESC;用户可在 Other 中输入自定义 vault 绝对路径。 +- 工具返回后下一轮调 `spec_vault.py set --vault <chosen-path>` 持久化。 +- 调用工具后立即 end turn。 ``` -`set --vault <p>` 自动将 spec root 设为 `<p>/spec-in/<os>-<user>/specs`。 -`set --root <p>` 使用完全自定义路径,不附加 `spec-in/` 子目录结构。 +如果只有一个 vault,**不**呈现选择器,直接调 `set --vault <path>` 并在 chat 简报一句"已绑定 vault `<path>`,如需切换运行 `/specode:spec --set-vault <other>`"。 + +## 4. `~/.config/specode/config.json` 生命周期 + +写入时机(仅这两种): + +1. **首次 Obsidian 检测后**:`resolve_spec_root` 检测到 vault → 自动保存。后续调用直接读此文件,不再重新检测。 +2. **显式设置**:用户运行 `/specode:spec --set-vault <path>` 或 `--set-root <path>`,立即覆盖旧值。 + +文件内容示例: + +```json +{ + "version": 1, + "obsidianRoot": "/Users/alice/Documents/main-vault", + "rootOverride": null, + "specRootCache": "/Users/alice/Documents/main-vault/spec-in/macos-alice/specs", + "lastDetectedAt": "2026-05-19T09:30:00Z" +} +``` + +`specRootCache` 是计算结果缓存(vault + device_segment);若 `obsidianRoot` 或 `rootOverride` 改动,CLI 同步刷新。 + +不会在其他情况自动创建。Obsidian 未安装且未显式设置 → `resolve_spec_root` 返回 `None` → `spec_init.py` 抛引导提示并 exit 3。 + +## 5. 跨会话路径读取 + +对于持久 session 和 `/specode:continue`: + +- 文档根目录从**各 spec 自身**的 `.config.json` 的 `documentRoot` 字段直接读取。 +- **不**依赖 vault 检测或 `~/.config/specode/config.json`。 +- vault 路径解析仅在**创建新 spec**(`/specode:spec <需求>`)时需要。 + +这保证已落地 spec 即使在不同设备 / 不同 vault 配置下仍能稳定恢复。 + +### 5.1 `/specode:continue` 无 slug 时的查找流程(**禁止 Grep 项目目录**) + +> spec 文档**不在项目代码目录里**——它们在 `<vault>/spec-in/<os>-<user>/specs/` 之下(见 §1 目录约定)。模型**不能**用 `Grep` / `Glob` 去项目根目录扫 `**/.spec/**` 或 `**/specs/**`——找不到就会误判"无可继续 spec",但实际上 spec 在 vault 里。 + +正确流程(必须严格按这个顺序,不要发挥): + +```bash +# step 1: 拿当前已配置 doc_root(只读 config.json,不重新检测) +python3 plugins/specode/scripts/spec_vault.py status +# → {"root": "...", "source": "env|config|auto|none"} + +# step 2: 若 source=none → 提示用户运行 /specode:spec --set-vault <p> 后 end turn +# 若 source 有效 → 列出该 root 下全部 spec +python3 plugins/specode/scripts/spec_session.py list-specs +# → {"root": "...", "source": "...", "specs": [ +# {"slug": "...", "phase": "...", "lock_state": "held|free|stale", +# "holder": "abc12345", "displayName": "...", "iterationRound": N, +# "mtimes": {...}}, +# ... +# ], "ok": true} +``` + +`list-specs` 的输出已经聚合了 spec 元数据 + 锁状态 + 文档 mtime——不需要再去读各 spec 的 `.config.json`。按 SELECTOR_PROMPTS 同款三段式 YAML 格式调 `AskUserQuestion`(这也是**动态选择器**,不在 11 个固定常量中,由本节指引直接调工具): + +```text +## 选择器节点:选择要继续的 spec + +**目的**:用户运行 /specode:continue 无 slug;列出当前 doc_root 下全部可恢复 spec,让用户选。 + +**上下文**:当前 root=<root>,source=<env|config|auto>,找到 N 个 spec。 + +**前置动作(chat 简报,≤2 行)**:写一句"找到 N 个可继续 spec(M 个空闲 / K 个被持有),请选择。" + +**调用 `AskUserQuestion` 工具**: + +questions: + - question: "继续哪个 spec?" + header: "选择 spec" + multiSelect: false + options: + - label: "<slug-1>" + description: "phase=<phase> 迭代=<N> lock=<held|free|stale> 最近修改 <ts>" + - label: "<slug-2>" + description: "phase=<phase> 迭代=<N> lock=<held|free|stale> 最近修改 <ts>" + # 最多 4 个;超过 4 个时按 last_heartbeat_at 取最近 4 个, + # 其余在 chat 引导用户用 /specode:continue <slug> 显式指定 + +**约束**: +- `multiSelect=false`;options ≤ 4。 +- **不**给"(推荐)"——用户必须显式选。 +- 工具自动提供 "Other",允许用户输入 spec slug 或路径作为自定义答案。 +- 工具返回后下一轮进入 `/specode:continue <slug>` 流程(详见 `references/workflow.md` §9.2)。 +- 调用工具后立即 end turn。 +- 锁状态描述用固定词:`held` / `free` / `stale`。 +``` + +`list-specs` 返回 `specs: []` → **不**调工具,直接在 chat 引导用户用 `/specode:spec <需求>` 创建新 spec。 + +**绝不允许的回退路径**: + +- ❌ `Grep('**/.spec/**')` 或 `Glob('**/specs/**')` 扫项目目录——spec 不在项目里 +- ❌ 看到 `list-specs.specs == []` 就说"项目里没有 .spec/ 目录"——`list-specs` 已经是权威答案,空列表就是"该 root 下确实没有 spec",引导用户用 `/specode:spec <需求>` 创建 +- ❌ 假设 spec 在 cwd 之下(spec 永远在 `<doc_root>/specs/<slug>/`) + +## 6. 旧位置警告 + +`/specode:spec --set-vault` / `--set-root` 执行后,`spec_vault.py` 会扫描历史 fallback 位置: + +- `<cwd>/specs/` +- `~/new project/specs/` +- `~/specs/` + +发现遗留 spec 目录 → 输出: + +```text +⚠ 旧位置仍有 N 个 spec(不会自动迁移): + - /path/to/old/spec-1 + - /path/to/old/spec-2 + ... + +如需迁移,请手动 mv 并更新各 spec 的 .config.json.documentRoot 字段; +否则旧位置 spec 在新 root 下不可见。 +``` + +最多列出 10 个,多余的提示总数。**不**自动迁移(避免静默移动用户文件)。 + +## 7. `spec_vault.py` 命令参考 + +```text +python3 plugins/specode/scripts/spec_vault.py detect + 列出已安装的 vault;未检测到时给出手动指定提示 + +python3 plugins/specode/scripts/spec_vault.py status + 显示当前解析到的根目录及来源(cli / env / config / auto) + +python3 plugins/specode/scripts/spec_vault.py set --vault <path> + 绑定 vault(写入 config.json.obsidianRoot;自动追加 spec-in/<os>-<user>/specs 子结构) + +python3 plugins/specode/scripts/spec_vault.py set --root <path> + 直接指定根目录(写入 config.json.rootOverride;不追加 spec-in/<os>-<user>/specs) +``` + +退出码:0 ok / 3 用户引导(含 hard-stop 提示)。 + +## 8. 跨文档引用 + +- 三层解析的引导文案 → SKILL.md §Document Root Resolution。 +- 锁与多窗口接管 → `references/lock-protocol.md`。 +- 选择器三种类型与具体场景(非 vault 选择场景,如 takeover-options)→ `references/selectors.md`。 +- vault 内目录约定与 phase 序列的关系 → `references/workflow.md`。 diff --git a/plugins/specode/skills/specode/references/prompts.md b/plugins/specode/skills/specode/references/prompts.md deleted file mode 100644 index 49e4b29..0000000 --- a/plugins/specode/skills/specode/references/prompts.md +++ /dev/null @@ -1,173 +0,0 @@ -# Prompt Output Templates - -Unified format for all clarification / confirmation / selection outputs in specode. Every interaction point must conform to one of the templates below. - -## Selector Preference (Iron Rule) - -For any fixed-option decision (≤5 options), use `scripts/spec_choice.py`. - -- **TTY**: curses ↑/↓ + Enter; script writes the chosen label to stdout, exits 0. -- **Non-TTY (Claude Code Bash, CI)**: script prints the option block + `[specode:non-interactive] AWAITING_USER_CHOICE` sentinel on stdout, exits 0. Agent must forward the stdout block to the user verbatim and end the turn. **Do not** re-run the script in the same turn to "retry" or restate the options in your own words. - -Never ask "请回复确认/继续/取消" as plain text without running the script first — the script is the source of truth for option wording. - -## Template A — Fixed-option Confirmation - -Use for: workflow choice, document confirmation, task execution choice, `/continue` takeover, clarification completion. - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "<标题以问号结尾?>" \ - --option "<选项 1>::<一行说明>::recommended" \ - --option "<选项 2>::<一行说明>" \ - --option "<选项 3>::<一行说明>" -``` - -Rules: - -- 标题以问号结尾,简洁明确 -- 选项标签 ≤8 字,用中文动词式(如 `确认`、`查看全文`、`继续沟通`、`强制接管`、`只读查看`、`取消`、`进入下一阶段`) -- 仅一个选项标 `recommended` -- 每个选项必须有一行说明 - -## Template B — Open-ended Clarification (Plan-mode) - -Use for: pre-requirements clarification, mid-workflow ambiguity that needs free-form answers. - -```text -=== 需求澄清 === -当前阶段:intake -源需求摘要:<一句话不超过 60 字> - -待确认问题: - -【阻塞】1. <问题描述> -【阻塞】2. <问题描述> -【可延后】3. <问题描述> - (未回答 → 写入 requirements.md 的"待确认问题"节) - -请按编号回答阻塞项。回答完成后将出现「澄清完成」选择器。 -``` - -Rules: - -- 标头三行固定:`=== 需求澄清 ===` / `当前阶段:<phase>` / `源需求摘要:<≤60 字>` -- 每条问题前必须标 `【阻塞】` 或 `【可延后】` -- 阻塞项 ≤5 条;超过则分轮提问,不要一次堆十几个 -- 可延后项必须注明"未回答 → 待确认问题节" -- 结束语固定:`请按编号回答阻塞项。回答完成后将出现「澄清完成」选择器。` -- agent 收到回答后 **end the turn**,下一回合先解析回答、再调澄清完成选择器(Template A) - -## Template C — List + Numeric Selection - -Use for: `/continue` 无参数时列出可继续 spec、多 vault 选择。 - -```text -=== <列表标题> === -配置根目录:<path> - -当前会话 (session: <id>) - ► <slug> <name> <phase> <m/n 任务> <lock state> - -其他窗口 - <slug> <name> <phase> <m/n 任务> <lock state> - -可继续的全部 specs: - 1. <slug> <name> <phase> <m/n 任务> <lock state> - 2. ... - -请输入编号 [1-N],或输入 spec slug 名。 -``` - -Rules: - -- 三段固定(当前会话 / 其他窗口 / 全部 specs);空段也保留标题 -- 每行列宽对齐(spec_session.py list-specs 输出已支持) -- 锁状态用固定词:`✓持有锁` / `⚠ 锁定于 <id>` / `○ 空闲` / `(已过期)` -- 结束语固定:`请输入编号 [1-N],或输入 spec slug 名。` - -## Template D — Read-only Status Banner - -Use for: 进入只读模式后每次响应前的提醒。 - -```text -[只读模式] 当前 session 未持有 spec 锁,所有写入操作将被拒绝。 -请用 /continue <slug> 选择"强制接管"恢复可写。 -``` - -固定 footer 末尾追加 `| [只读]`。 - -## Template E — Eviction Notice - -Use for: 被驱逐的会话下一次响应时。 - -```text -⚠ 你的会话已被 session <newSessionId> 强制接管。 -当前 spec 在此窗口已转为只读,请用 /continue 重新接管。 -``` - -## 标准选择器命令(直接复制使用) - -### Workflow 类型选择 - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "选择 spec 工作流类型?" \ - --option "Requirements::按需求驱动,先写需求再设计::recommended" \ - --option "Technical Design::先做技术设计,再回推需求" \ - --option "Bugfix::记录当前/期望/不变行为" -``` - -### 文档确认 - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "确认 <filename>?" \ - --option "确认::继续生成下一阶段文档::recommended" \ - --option "查看全文::在聊天中展示完整文档" \ - --option "继续沟通::先根据反馈修改当前文档" -``` - -### 任务执行 - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "是否开始执行 tasks?" \ - --option "开始 required tasks::只执行必需任务::recommended" \ - --option "开始 required + optional tasks::执行必需任务和可选任务" \ - --option "用 task-swarm 多 agent 并发::按阶段聚合派发 coder/reviewer/validator 子 agent(需已安装 task-swarm skill)" \ - --option "暂不 coding::只保留文档,不开始实现" -``` - -第三个选项 `用 task-swarm 多 agent 并发` 的协议见 `references/task-swarm.md`。 -如本机未安装 task-swarm skill(`~/.claude/skills/task-swarm/` 不存在),用户选这一项时温柔降级到第一项,并提示安装路径(`~/Git/task-swarm/install.sh`)。 - -### `/continue` 接管(spec 已被其他 session 锁定) - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "spec 已被 session <holder> 锁定,如何继续?" \ - --option "强制接管::驱逐另一窗口,本窗口接管写权::recommended" \ - --option "只读查看::加载文档但不写入,footer 标记 [只读]" \ - --option "取消::返回上一步" -``` - -### 澄清完成(Plan-mode 结束) - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "需求澄清是否完成?" \ - --option "进入下一阶段::开始 workflow 选择和文档生成::recommended" \ - --option "继续澄清::还有问题需要讨论" -``` - -### 验收通过(进入 iteration) - -```text -sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_choice.py --title "本轮验收是否通过?" \ - --option "验收通过::进入 iteration 阶段,等待下一轮迭代::recommended" \ - --option "继续修改::返回 acceptance 阶段调整" -``` - -## Forbidden Phrasing - -下列措辞**禁止**出现在 agent 面向用户的输出中: - -- "够了"、"差不多"、"应该可以了"——口语化,改用选择器的正式选项 -- "随便选一个"、"看你"——必须明确推荐项或问具体问题 -- "我猜……"、"我假设……"——禁止猜测;走 Template B 澄清 -- "稍等"、"我来想一下"——直接输出结果或结束 turn 等回复,不要中间填充语 diff --git a/plugins/specode/skills/specode/references/sample-analysis.md b/plugins/specode/skills/specode/references/sample-analysis.md deleted file mode 100644 index e8567a0..0000000 --- a/plugins/specode/skills/specode/references/sample-analysis.md +++ /dev/null @@ -1,123 +0,0 @@ -# Kiro Sample Analysis - -Observed source: - -```text -/Users/xueqiang/Git/markdown/.kiro/specs/undo-redo-support/ -├── requirements.md -├── design.md -├── tasks.md -└── .config.kiro -``` - -The sample is a requirements-first feature spec for undo/redo support in a macOS Markdown editor. - -## Directory Shape - -Kiro stores one folder per concrete requirement: - -```text -.kiro/specs/<spec-name>/ -``` - -This skill adapts that shape to the requested project layout: - -```text -<document-root>/<requirement-name>/ -``` - -Do not create an extra `spec/` child folder in this project's output layout. - -## `requirements.md` Pattern - -The sample structure: - -```text -# 需求文档 -## 简介 -## 词汇表 -## 需求 -### 需求 1:... -**用户故事:** ... -#### 验收标准 -1. WHEN ..., THE ... SHALL ... -``` - -Important characteristics: - -- Chinese headings when the user context is Chinese. -- Intro includes implementation context and observed problem roots. -- Glossary defines local domain terms. -- Requirements are numbered. -- Each requirement has a user story. -- Acceptance criteria use EARS-style English modal verbs inside Chinese documents. -- Criteria are specific enough to map into tasks. - -## `design.md` Pattern - -The sample structure: - -```text -# 设计文档:<feature>(<slug>) -## 概述 -## 架构 -### 现有架构 -### 修复后架构 -## 组件与接口 -## 数据模型 -## 正确性属性 -``` - -Important characteristics: - -- Design starts by restating the implementation goal. -- It names current failure points before proposing changes. -- It uses code snippets only where useful for precision. -- It prefers minimal invasive changes over broad rewrites. -- It includes correctness properties that map back to requirements. - -## `tasks.md` Pattern - -The sample structure: - -```text -# 实现计划:<feature>(<slug>) -## 概述 -## 任务 -- [ ] 1. High-level task - - [~] 1.1 Subtask in progress - - implementation bullets - - 文件:... - - _需求:6.1、6.2_ -- [ ] 4. 检查点 —— 确保所有测试通过 -``` - -Important characteristics: - -- Tasks are nested and actionable. -- Task numbers are human-readable, not opaque IDs. -- `[~]` is used for in-progress work. -- Requirement traceability uses `_需求:..._`. -- Checkpoint tasks appear between implementation stages. -- Optional task notes may be used for lower-priority work. - -## `.config.kiro` Pattern - -The sample contains: - -```json -{"specId": "...", "workflowType": "requirements-first", "specType": "feature"} -``` - -This skill uses `.config.json` in the generated requirement folder for portability: - -```json -{ - "specId": "...", - "workflowType": "requirements-first", - "specType": "feature", - "documentRoot": "...", - "requirementName": "...", - "createdBy": "specode" -} -``` diff --git a/plugins/specode/skills/specode/references/selectors.md b/plugins/specode/skills/specode/references/selectors.md new file mode 100644 index 0000000..3a092a2 --- /dev/null +++ b/plugins/specode/skills/specode/references/selectors.md @@ -0,0 +1,148 @@ +--- +description: Use when 准备调 AskUserQuestion 呈现 phase-gate selector,或对 selector 选项 / 文案 / 类型 / Other 兜底有疑问。详述 8 个固定场景模板与三类骨架。 +--- + +# Selectors — `AskUserQuestion` 调用规范 + +每个 phase-gate 节点必须**调用宿主内置 `AskUserQuestion` 工具**呈现选择器;工具自身渲染 chip-tabs / 选项列表 / 上下键导航 / 回车提交 / ESC 取消 / "Other" 自定义输入 UI。模型只负责传参,**绝不**自己输出 markdown 列表让用户回复编号。 + +本文件给出 8 个固定场景的**类型骨架 + 总览索引**;完整模板文本是 `spec_session/_selectors.py` 的 `SELECTOR_PROMPTS` 字典字面量(按 key 查找)。`doc-confirm-tasks` 已合并进 `tasks-execution`(见总览表 §A4 行)。 + +--- + +## 类型 → AskUserQuestion 参数形态 + +| 类型 | `questions` 数组 | `multiSelect` | UI 形态 | +|---|---|---|---| +| **A 单列单选** | 1 个 question | `false` | 一个问题 + N 个互斥选项;用户上下键选 + 回车 | +| **B wizard(多项串行决策)** | 2-4 个 question | 每个都 `false` | 顶部 chip-tabs(每个 question 一个 tab)+ 每 tab 单选;全部填完才能 Submit | +| **C 复选框多选** | 1 个 question | `true` | 一个问题 + N 个非互斥选项 + checkbox UI | + +公共参数约束(来自 `AskUserQuestion` 工具本身): +- `questions`: 1-4 项 +- 每个 question 的 `options`: 2-4 项 +- `question`: 完整问句,结尾建议带 `?` +- `header`: ≤12 字符 chip 短标签 +- `description`: 选项的一句话说明(描述含义 / trade-off) +- `preview`: 可选;为有具体 artifact 对比的场景(mockup / 代码片段 / 配置)添加 markdown 渲染预览。**不要**为简单问题写 preview。 + +公共禁区: +- ❌ 在 selector 文本之外添加 "请回复编号"、"请输入选项名称"、`AWAITING_USER_CHOICE` 等措辞。 +- ❌ 手工加 `Type something` / `Chat about this` / `Submit` 等保留位选项——`AskUserQuestion` 工具自动提供 "Other" + ESC。 +- ❌ 选项数超过 4 → 收敛 / 重新拆问题;不要硬塞。 +- ❌ `multiSelect=true` 配 `preview`(工具仅支持单选时 preview)。 + +--- + +## 类型变体 A+:单选 + 预览(side-by-side 布局) + +类型 A 的视觉增强变体——为每个 option 额外传 `preview` 字段后,宿主 UI 自动切到**左右分栏**布局:左侧垂直选项列表,右侧 monospace 渲染当前焦点选项的 preview 内容(markdown,支持多行)。用户在选项间上下移动时,右侧 preview 实时切换,方便**逐项对比具体 artifact**。 + +**何时考虑**: + +- 让用户在多份 UI mockup / 代码片段 / 配置 / 示意图之间挑一份。 +- 文字描述不够直观、需要"看东西做选择"。 + +**何时不要用**: + +- 简单偏好题(label + description 已经说清楚)——徒增视觉负担。 +- 多选场景(`multiSelect=true`)—— 工具仅在单选时支持 preview。 +- 候选不存在可视差异(仅取舍而无具象差别)。 + +**调用形态**: + +```text +questions: + - question: "<具体问题>?" + header: "<≤12 字>" + multiSelect: false + options: + - label: "<选项 A 名>" + description: "<选项含义/trade-off>" + preview: | + <多行 markdown / ASCII mock / 代码片段 / 配置示例> + - label: "<选项 B 名>" + description: "..." + preview: | + <对应的另一份 artifact> +``` + +**当前状态**:8 个固定场景**均未启用** A+ 形态;本节是模板留档,待将来出现"让用户视觉对比 artifact"的 phase-gate 时按本骨架填空即可。如果新增固定场景使用 A+,应同步在 `spec_session.py SELECTOR_PROMPTS` 内加常量并补到下方 8 场景表里。 + +--- + +## 8 个固定场景常量库 + +下列 8 个固定场景(11 个 key,其中 `doc-confirm-{requirements,bugfix,design}` +共享 3 个变体)的**完整提示词文本**是 hook 注入时的运行时常量,单一事实源为 +`plugins/specode/scripts/spec_session/_selectors.py` 的 `SELECTOR_PROMPTS` +字典。本文件仅给总览 + 链接,不再 reprint 模板原文——重复维护两份会导致 +drift 和歧义。 + +| § | key | 类型 | 触发 phase | header | _selectors.py 行号 | +|---|---|---|---|---|---| +| §A0 | `project-root-choice` | A | spec 创建后选项目目录 | 项目目录 | L19 | +| §A1 | `workflow-choice` | A | 进入 requirements 前 | 工作流选择 | L68 | +| §A2 | `clarification-done` | A | intake 澄清结束 | 澄清完成? | L157 | +| §A3 | `doc-confirm-requirements` | A | requirements.md 生成后 | 需求确认 | L187 | +| §A3 | `doc-confirm-bugfix` | A | bugfix.md 生成后 | 缺陷确认 | L222 | +| §A3 | `doc-confirm-design` | A | design.md 生成后 | 设计确认 | L257 | +| §A4 | `tasks-execution` | A | tasks.md 生成后(含调整回退) | 执行方式 | L292 | +| §A5 | `takeover-options` | A | `/specode:continue` LockHeld | 接管选项 | L332 | +| §A6 | `acceptance-gate` | A | acceptance 完成 | 验收门 | L365 | +| §B1 | `clarification-wizard` | B | intake,写需求前 | 需求澄清 wizard | L107 | +| §C1 | `iteration-scope` | C | iteration 子循环开始 | 迭代范围 | L398 | + +### 阅读模板的方式 + +完整 `question` / `header` / `options[*].label` / `options[*].description` +原文都在 `SELECTOR_PROMPTS[<key>]` 字符串字面量里——Python triple-quoted +string,markdown 语法直接渲染可读。运行时 hook 命中 `pending_selector` +后会把对应字典值拿出来做占位符替换(`<slug>` / `<phase>` / `<spec_dir>` / +`<source_text_head>` 等)后 emit 到 `additionalContext`,主代理读到该模板 +后**逐字**作为 `AskUserQuestion` 参数。 + +### 调模板时必读的硬约束(与具体 key 无关) + +1. **不要**翻译 / 重写 / 简化 `question` 或 `options[*].label` / + `options[*].description`——主代理读到 hook 注入的 YAML 块,作为 + `AskUserQuestion` 参数**逐字**传入。 +2. **不要**在 selector 之外加 `Type something` / `Chat about this` / + `请回复编号` 等保留位——工具内置 "Other" + ESC。 +3. **不要**自行变换类型(如把 A 改 C)—— 类型与 key 的映射在 + 上方表格中固定。 +4. 每个 selector 模板末尾都有 **「用户选定后流程」** 段,描述 + "user 点完选项后**同一 turn 内**继续做什么"——必须读完那段再决定下一步。 + +### Drift 守卫 + +`tests/test_catalog.py` 与 `tests/test_selectors_drift.py` 共同保证: +- `_selectors.py` 的 `SELECTOR_PROMPTS` 字典 11 个 key 与本表 11 行一一对应; +- 本表每个 key 在 `_selectors.py` 中实际存在。 + +不再做"selectors.md ```text 块与字典字面量 byte-identical"全文对账——单一 +事实源就是 `_selectors.py`,本表是它的目录索引。 + + +## hook 注入与模板替换 + +`UserPromptSubmit` 的 `on-user-prompt` hook 在 `sessions/<id>.json.pending_selector` 命中某 key 时,把 `SELECTOR_PROMPTS[key]` 拿出来做字符串替换(`<slug>` / `<phase>` / `<spec_dir>` / `<source_text_head>` 等),包入 `additionalContext` 注入。 + +实际注入文本采用三段式 YAML 缩进格式(**目的** / **上下文** / **前置动作** / 工具参数 / **约束** / **用户选定后流程**)。 + +模型看到注入后**唯一动作**: + +1. 在 chat 写 hook 提示中"前置动作"要求的简报内容(doc-confirm 类型必须含 3-8 条变更要点;接管类型必须含锁持有者信息)。 +2. 调 `AskUserQuestion` 工具,参数按 hook 给的 YAML 块**逐字**翻译为工具参数(不要翻译选项 label / description)。 +3. 工具返回后按用户选择推进下一步(调对应 CLI 子命令)。 + +绝不允许: +- ❌ 把模板里的 questions / options 翻译 / 重写 / 加减项。 +- ❌ 工具调用之外另开 chat 输出"也可以告诉我..."。 +- ❌ 跳过工具直接做下一步(即使你"觉得"用户的意图明确)。 + +--- + +## 自主判断(hook 失败时) + +按上面 8 个场景对照表选 key → 用对应模板直接调 `AskUserQuestion`。hook 是**提醒**而非**触发**——hook 失效时仍要按本文规范走。 diff --git a/plugins/specode/skills/specode/references/task-swarm-example.md b/plugins/specode/skills/specode/references/task-swarm-example.md index 1ba28c9..eb1d66d 100644 --- a/plugins/specode/skills/specode/references/task-swarm-example.md +++ b/plugins/specode/skills/specode/references/task-swarm-example.md @@ -1,75 +1,111 @@ -# 实现计划:用户登录流程(user-login) +--- +description: Use when 想看一份完整可用的 tasks.md 示例,理解 @writes / @reads / @depends-on 标签的实际写法。 +--- -Spec Type: Feature -Workflow: requirements-first -Status: Tasks Confirmed -Review Status: confirmed +# task-swarm 示例:3 阶段 / 8 任务 -> 这是一份 **specode 风格** 的 tasks.md 示例。 -> 在 specode 会话中走到"任务执行"selector 时选第 3 项「用 task-swarm 多 agent 并发」, -> 或直接 `/task-swarm <此文件路径>` 触发。 +下面是一份**完整可用**的 tasks.md 示例,演示: + +- `@writes` / `@reads` / `@depends-on` 标签用法 +- `_需求:x.y_` traceability +- 检查点任务(→ validator 用) +- 同文件冲突的两个 stage 如何被自动分到不同 group + +--- + +## 示例 tasks.md + +```markdown +# 用户认证系统 — tasks.md + +## 阶段 1: 数据层 +- [ ] 1.1 定义 User 模型 @writes:src/models/user.py _需求:1.1_ +- [ ] 1.2 定义 Session 模型 @writes:src/models/session.py _需求:1.2_ +- [ ] 1.3 数据库迁移脚本 @writes:migrations/0001_init.sql @reads:src/models/user.py,src/models/session.py _需求:1.3_ + +## 阶段 2: 服务层 +- [ ] 2.1 AuthService 登录/登出 @writes:src/auth/service.py @reads:src/models/user.py,src/models/session.py @depends-on:1 _需求:2.1,2.2_ +- [ ] 2.2 PasswordHasher 工具 @writes:src/auth/hasher.py _需求:2.3_ +- [ ] 2.3 LockoutCounter 工具 @writes:src/auth/lockout.py _需求:2.4_ + +## 阶段 3: API 层(依赖服务层 + 与阶段 1 同文件 user.py) +- [ ] 3.1 /login endpoint @writes:src/api/login.py @reads:src/auth/service.py,src/auth/lockout.py @depends-on:2 _需求:3.1_ +- [ ] 3.2 User schema 验证扩展 @writes:src/models/user.py @reads:src/api/login.py @depends-on:1 _需求:3.2_ +``` + +--- + +## 期望的 group 切分(max_parallel=4) + +``` +group 0: + - 阶段 1(writes: src/models/user.py, src/models/session.py, migrations/0001_init.sql) + - 阶段 2 不能进 group 0:depends-on=1 +group 1: + - 阶段 2(writes: src/auth/service.py, src/auth/hasher.py, src/auth/lockout.py) +group 2: + - 阶段 3(writes: src/api/login.py, src/models/user.py) + ↑ 注意:阶段 3 与阶段 1 都写 src/models/user.py(文件冲突) + 即使没有 @depends-on:1,也会被自动分到独立 group + 这里阶段 3 实际 depends-on=2,所以排在 group 2 +``` + +主代理派 coder 时(plan 输出): + +``` +group 0:fork 1 个 coder(阶段 1) +group 1:fork 1 个 coder(阶段 2) +group 2:fork 1 个 coder(阶段 3) +``` + +> 注:每个 stage 即使含多个子任务也由**单个** coder 接手(按子任务清单顺序完成); +> 跨 stage 文件冲突时才切 group。 + +--- + +## 一轮 validator fail → v-fix → pass 的注释块例子 + +writeback 后,阶段 3 末尾会追加: + +```markdown +## 阶段 3: API 层(依赖服务层 + 与阶段 1 同文件 user.py) +- [x] 3.1 /login endpoint @writes:src/api/login.py @reads:src/auth/service.py,src/auth/lockout.py @depends-on:2 _需求:3.1_ +- [x] 3.2 User schema 验证扩展 @writes:src/models/user.py @reads:src/api/login.py @depends-on:1 _需求:3.2_ + +> ✅ validator g3-r2 pass: `pytest tests/test_login.py -v` > -> task-swarm 会按一级阶段聚合派发: -> - 阶段 1(3 子任务)→ 1 个 coder + 1 个 reviewer -> - 阶段 2(检查点)→ 1 个 validator -> - 阶段 3(2 子任务)→ 1 个 coder + 1 个 reviewer -> - 阶段 4(检查点)→ 1 个 validator -> - 阶段 5(可选,coder-only)→ 1 个 coder +> 评审建议(task-swarm reviewer): +> - [P0 已修复] src/auth/service.py:34 [req:2.1] — login 失败未区分锁/密码错 +> - [P0 已修复] src/api/login.py:8 [security] — 缺 rate limit +> - [P1 未修复] src/models/user.py:12 — email 字段格式校验缺失 +> - [adv 未修复] src/auth/service.py:50 — error wrapping 风格(无证据标签,自动降级) > -> 总共 7 个 subagent,而不是朴素 1:3 展开的 21 个。 - -## 概述 - -实现用户登录流程:账号密码登录、登出、密码强度校验、登录失败锁定。 - -## 任务 - -- [ ] 1. 实现登录核心流程 - - [ ] 1.1 写 User model - - 文件:`src/models/user.py` - - 验证:`pytest tests/test_user.py` - - _需求:1.1_ - - [ ] 1.2 写 auth service - - 文件:`src/auth/service.py` - - 验证:`pytest tests/test_auth_service.py` - - _需求:1.2_ - - [ ] 1.3 写 login controller - - 文件:`src/api/login.py` - - 验证:`pytest tests/test_login_api.py` - - _需求:1.3_ - -- [ ] 2. 检查点 — 登录核心流程通过端到端 - - 运行 `pytest tests/test_login_e2e.py -v`。 - - 如有失败,停止继续执行并向用户确认。 - -- [ ] 3. 实现登出与会话失效 - - [ ] 3.1 auth service 增加 logout(token) - - 文件:`src/auth/service.py` - - _需求:2.1_ - - [ ] 3.2 logout controller - - 文件:`src/api/logout.py` - - _需求:2.2_ - -- [ ] 4. 检查点 — 登出流程跑通 - - 运行 `pytest tests/test_logout.py -v`。 - -- [*] 5. 优化:登录失败锁定计数器 - - [ ] 5.1 加 Redis 失败计数 @swarm:coder-only - - 文件:`src/auth/lockout.py` - - _需求:可选_ - -## 测试要点 - -供测试人员快速了解需要验证的场景。每行对应 `requirements.md` 中的一条 SHALL;需求变更时由 agent 在同一轮 turn 内同步本节。 - -- [ ] 输入合法用户名+密码 → 登录成功,返回 token(需求 1.1) -- [ ] 密码少于 8 位 → 拒绝并提示"密码长度不足"(需求 1.2) -- [ ] 已登录用户调用 /logout → token 失效(需求 2.1) -- [ ] 连续 5 次错误密码 → 账号锁定 15 分钟(需求 1.3 / 可选 5.1) - -## 验收 - -- [ ] 所有 required 任务完成。 -- [ ] 所有指定验证命令通过。 -- [ ] 未完成或跳过的 optional 任务已记录。 -- [ ] 用户确认验收。 +> validator 历轮: +> - g3-r1: fail — fail signature 4a2b3c1d8e9f +> - g3-r2: pass +``` + +--- + +## 检查点任务(含 _需求:x.y_)的作用 + +`_需求:x.y_` 让 validator 在跑测试时能把"测试通过 / 失败"对回到具体 SHALL 条款: + +- 测试失败 → validation.md 的"按子任务的验证结果"那行写明 `_需求:x.y_` +- writeback → tasks.md 注释里也保留该编号 +- spec_lint.py 在 acceptance phase 时会再校验"全部 SHALL 是否都有对应测试" + +--- + +## @depends-on 的作用 + +- group 切分时 stage X depends_on Y → X 的 group index 必须严格大于 Y 的 group index。 +- 跨 group 自动串行:上一 group writeback 完成才能开始下一 group。 + +--- + +## 边界情况 + +- 一个 stage 在 tasks.md 没写 `@writes` → 该 stage 视为 "无文件冲突约束",会被尽量打包到当前 group。 +- 一个 stage 在 tasks.md 没写 `@depends-on` → 不强制顺序,仅靠 @writes 冲突切 group。 +- 一个 stage 没写任何子任务(只有 `## 阶段 N: 标题`)→ 解析器跳过该 stage(视为占位)。 diff --git a/plugins/specode/skills/specode/references/task-swarm.md b/plugins/specode/skills/specode/references/task-swarm.md index 4385f08..bbd30ed 100644 --- a/plugins/specode/skills/specode/references/task-swarm.md +++ b/plugins/specode/skills/specode/references/task-swarm.md @@ -1,271 +1,320 @@ -# Task-Swarm 设计规约(CLI 编排 + 多角色 agent 物理隔离) +--- +description: Use when 涉及 task-swarm / reviewer / validator / v-fix / p0-fix / @writes / @depends-on / writeback / deadloop / 多 agent 并发执行 tasks。详述 task-swarm 完整协议(角色边界、状态机、产物 schema、死循环保护)。 +--- -specode "任务执行" selector 的第三个选项 `用 task-swarm 多 agent 并发`。 +# task-swarm 协议参考(references/task-swarm.md) -> ⚠️ 本文档不是运行时手册。运行时模型只看 `commands/task-swarm.md` 里的 7 步 CLI 协议。本文档是**设计规约**:为什么这样设计、状态机怎么运转、铁律如何兜底——给读代码、修脚本、扩展功能的人看。 +本文档是 `/specode:task-swarm` 命令背后的完整协议。 +主代理在 task-swarm run 期间必须严格按本协议工作。 -## 它解决什么 +--- -specode 默认的 §7 Task Execution 是**单 agent 顺序执行**:主会话一个一个跑任务、自己写代码、自己跑验证、自己打 `[x]`。等于让同一个 LLM 上下文自我背书——这是"自我认可"问题。 +## 1. 角色与并发度 -task-swarm 模式把任务派发给**不同角色的独立子 agent**: +| 角色 | 是否并发 | 工具白名单(物理隔离) | 何时被 fork | +|---|---|---|---| +| `task-swarm-coder` | **多实例并行** | `Bash, Read, Edit, Write, Grep, Glob` | coding / p0-fix / v-fix 各 phase | +| `task-swarm-reviewer` | **单实例** | `Bash, Read, Grep, Glob`(无 Edit/Write) | review phase(每 review-round 一次) | +| `task-swarm-validator` | **单实例** | `Bash, Read, Grep, Glob`(无 Edit/Write) | validation phase(每 validation-round 一次) | +| `task-swarm-planner` | 视情况 | `Bash, Read, Grep, Glob, Write` | 可选:tasks.md 不够细时 | -- **coder** 只写代码,没有评审能力 -- **reviewer** 只评审,**工具层面拿不到 Edit/Write**——想改代码也改不了 -- **validator** 只验收,同样**没有 Edit/Write**,必须用真实命令证明结论 -- **planner** 只拆任务(一般 specode 已经在 tasks 阶段拆好,planner 备用) +reviewer / validator 单实例的理由: +- reviewer = 一个上帝视角的读代码人,要对全部 coder 产物有整体判断;切成多份会破坏交叉关联检测。 +- validator = 跑测试的客观信号,并发跑没意义;同一测试套件单进程跑一次就够。 +- coder = 并行收益最大(多 stage 互不干扰时各占一份文件)。 -子 agent 之间**无共享上下文**,只能通过 `outbox → inbox` 文件交换信息。 -这是工具+上下文的双重物理隔离。 +--- -## 总体架构(控制反转) +## 2. 文件冲突避免 -旧设计:**主模型**理解全部状态机,自己解析 tasks.md / outbox,自己 Edit tasks.md。问题——长上下文里轮号心算容易乱、subagent 输出格式漂移就误判、回写时容易动到 traceability。 +`task_swarm.py init` 解析 tasks.md 时按以下规则把 stage 切成 group: -新设计:**脚本**持有所有确定性逻辑,**主模型只负责派单与文本生成**: +1. 提取每个 stage 的 `@writes:<files>` 列表(含通配符展开)。 +2. 在同一 group 内:任意两个 stage 的 @writes 集合**不相交**且**无 @depends-on 关系**。 +3. 跨 group:上一 group 全部 pass 后才能开始下一 group。 +4. 一个 stage 即使可以并发也不会被拆——以"stage = coder 任务粒度的最大单元"为铁律。 -| 决策点 | 由谁负责 | -|---|---| -| 解析 tasks.md → 派发计划 | `task_swarm_parse_md.py` | -| 状态机推进(轮号、收敛、死循环) | `task_swarm_state.py` | -| 解析 result.md / review.md / validation.md | `task_swarm_outbox.py` | -| 渲染 subagent prompt(含 @writes、修复指引) | `task_swarm_prompt.py` | -| 回写 tasks.md(行级安全 Edit) | `task_swarm_writeback.py` | -| 综合调度 + JSON 指令 | `task_swarm.py` CLI | -| 工具层兜底(INV-7/8/9) | `task_swarm_guard.py` + `spec_guard.py` | +主代理在 coding phase **同一 message 内**发出多个 Task block(每个对应当前 group 的一个 stage),由宿主并行执行。 + +**强约束**:派发 coder 时,必须先调 `task_swarm.py plan` 拿当前 group 的 stage 列表,**逐字拷贝**到 Task block——绝不可凭印象自己派;脚本已经处理过文件冲突分组。 -主模型只跑:`init → loop(next → fork → parse → advance → ...) → writeback → done`。 +--- -## CLI 协议(`commands/task-swarm.md` 的实现细节) +## 3. Phase 状态机 -七个子命令: +``` +init → coding → review ─┬─► p0-fix ──► validation + │ │ + └─►(no P0) ────────┘ + │ + ┌────────────────┤ + │ │ + (pass) (fail) + │ │ + ▼ ▼ + writeback v-fix ──► validation (循环) + │ + ▼ + next group / done +``` -| 子命令 | 输入 | 输出 | -|---|---|---| -| `init --tasks <path>` | tasks.md 路径 | run_id + 初始派发计划 | -| `next --run <id>` | run_id | `{"action": "fork|writeback|wait|done", ...}` | -| `parse --run <id> --stage N --role R --round K` | outbox 文件 | `{"judgment": "...", ...}`(含 schema-error 兜底) | -| `advance --run <id> ... --judgment J` | 判定 | 推进 state.json | -| `writeback --run <id> --stage N` | 收敛状态 | 安全 Edit tasks.md | -| `status --run <id>` | — | 人话状态汇报 | -| `resolve --run <id>` | — | run_dir 路径 | +| Phase | 触发 | 子代理 | 完成条件 | 失败行为 | +|---|---|---|---|---| +| `coding` | 进入新 group | 并发 N 个 coder | 全部 STATUS: ok | 任一 fail → 主代理报告用户、整个 group failed | +| `review` | coding 完成 | 单个 reviewer | review.md 含分级 P0/P1/P2 | reviewer fail → 主代理报告,**继续走** validation(reviewer 是 advisory) | +| `p0-fix` | review 含**带证据标签**的 P0 | 并发 M 个 coder(按 P0 涉及文件分组) | 全部 STATUS: ok(不再 review) | 任一 fail → 主代理把 P0 标"未修复"写入 tasks.md,继续走 validation | +| `validation` | p0-fix 完成 或 review 无 P0 | 单个 validator | validator pass | validator fail → 进入 v-fix | +| `v-fix` | validation fail | 并发 M 个 coder(按 validator 修复指引涉及文件分组) | 全部 STATUS: ok | 任一 fail → 主代理报告用户、整个 group failed | +| `validation` (再) | v-fix 完成 | 单个 validator | pass → writeback;fail → v-fix 循环 | 死循环检测:连续 3 轮同一 fail 签名 → 整个 group failed | +| `writeback` | validation pass | 主代理调 CLI | tasks.md `[ ]` → `[x]` + 评审块追加 | line-safe diff 失败 → 主代理报错、不推进 | -主模型必须按 next 返回的指令字段去 fork——`subagent_type`、`prompt_file`、`workspace` 都由脚本提供。 +**关键差别(与原 0.3.0 方案)**: +- "整个 group 一起 coding → 一次 reviewer → 一次 validator",reviewer / validator 看的是 group 范围。 +- reviewer P0 → coder 修复**只触发一次**(修完不再 re-review,直接进 validation)。 +- validator fail → coder 修复**循环**到 pass。 +- 死循环检测:v-fix → validation 连续 3 轮同一 fail 签名(测试名 + assertion 哈希)→ 整个 group failed。 -## 角色到 subagent 的映射 +--- -| @role | subagent_type | 职责 | 工具 | -| --- | --- | --- | --- | -| `coder` | `specode:task-swarm-coder` | 写 / 改业务代码,按子任务清单顺序完成阶段下所有叶子;修复轮按 validation.md 的失败指引定向修补 | Bash, Read, Edit, Write, Grep, Glob | -| `reviewer` | `specode:task-swarm-reviewer` | 评审上游 coder 的产出,输出 P0/P1/P2 分级建议(**advisory 模式**:不参与修复循环,不阻塞推进;产出会作为 `> ⚠️` 注释写入 tasks.md 供使用者审阅) | Bash, Read, Grep, Glob **(无 Edit/Write)** | -| `validator` | `specode:task-swarm-validator` | 跑测试 / lint / 端到端检查,给 pass/fail 判定;fail 时**必须**输出"给 coder 的修复指引"(validator 是阻塞门,coder ↔ validator 形成修复循环) | Bash, Read, Grep, Glob **(无 Edit/Write)** | -| `planner` | `specode:task-swarm-planner` | 把粗粒度需求拆成 task-swarm 风格的 tasks.md | Bash, Read, Grep, Glob, Write | +## 4. 子代理产物 schema -reviewer 和 validator 故意没有 Edit/Write —— 这是工具层面的物理隔离。 +每个子代理 fork 时主代理把 prompt 文件预渲染到: -## 按一级阶段聚合派发 +``` +.task-swarm/runs/<run_id>/agents/<agent-key>/task.md +``` -specode tasks.md 的天然层级: +产物路径: -```markdown -- [ ] 1. 实现登录流程 ← 一级阶段 - - [ ] 1.1 写 user model ← 叶子任务 - - 文件:`src/models/user.py` - - _需求:1.1_ -- [ ] 2. 检查点 — 跑通登录流程 ← specode 内置 validator 任务 +``` +.task-swarm/runs/<run_id>/agents/<agent-key>/outbox/ + result.md ← coder + review.md ← reviewer + validation.md ← validator ``` -派发规则(由 `task_swarm_parse_md.py` + `task_swarm_state.py` 实现): +`agent-key` 命名约定: +- coder:`coder-g{group}-s{stage}-r{round}` +- p0-fix coder:`coder-p0fix-g{group}-r{round}-f{file-idx}` +- v-fix coder:`coder-vfix-g{group}-r{round}-f{file-idx}` +- reviewer:`reviewer-g{group}-r{round}` +- validator:`validator-g{group}-r{round}` -| 角色 | 派发粒度 | 数量 | -| --- | --- | --- | -| **coder** | 每个一级阶段一个(包揽阶段下所有叶子) | = 阶段数 | -| **reviewer** | 每个一级阶段一个 | = 阶段数 | -| **validator** | 复用 specode 的"检查点"任务 | = 检查点数 | +### 4.1 coder result.md schema -并发判定:**互不冲突**("文件:" 行的并集不相交)的阶段可以并发,受 `--parallel N` 约束(默认 3)。冲突或依赖未满足 → `next` 返回 `wait`。 +```markdown +# <agent-key>:<阶段标题或修复任务> -### 子任务标签(`@swarm:`) +## 上下文 +- specId / spec_dir / group / stage / round -| 标签 | 行为 | -| --- | --- | -| `@swarm:full` | 单独走 coder+reviewer+validator | -| `@swarm:coder-only` | 只 coder | -| `@swarm:skip` | 完全跳过 | -| 无标签 | 默认按阶段聚合 | +## 子任务状态 +- 2.1 user model: done — src/models/user.py +- 2.2 user service: failed — ImportError, 缺 deps -启发式默认(无标签时): -- `[*]` 可选任务 → 自动 coder-only -- 无 `_需求:` traceability → 自动 coder-only +## 关键变更 +- ... -冲突时优先级(高→低):`skip > full > coder-only > 启发式`。 -解析器在冲突时**不报错**,仅在 `warnings` 数组里留 `[INFO]` / `[WARN]` 行,可用 `init` 输出查看。 +## 给下游 reviewer 的提示(可选) +- ... -### 标签命名空间(避免混淆) +STATUS: ok | failed: <原因> | blocked: <原因> +``` -task-swarm 涉及**两套独立的标签命名空间**,二者作用完全不同、不能互换: +### 4.2 reviewer review.md schema -| 命名空间 | 出现位置 | 用途 | 由谁解析 | -|---|---|---|---| -| `@swarm:<word>` | tasks.md 的 **leaf 标题或子项行** | 控制派发策略(`full` / `coder-only` / `skip`) | `task_swarm_parse_md._arbitrate_tags` | -| `[<word>]` | reviewer 的 **review.md P0 行** | 标注 P0 证据来源(`req:x.y` / `security` / `contract`) | `task_swarm_outbox.parse_review` | +```markdown +# reviewer-g{group}-r{round} -二者**互不相干**——`@swarm:` 决定一个叶子任务是否参与评审/验收循环;`[req:...]` 决定 reviewer 提的 P0 是否阻塞 coder 修复轮。不要在同一行混用、也不要把 `@swarm:` 写到 review.md 或把 `[req:...]` 写到 tasks.md。 +## 结论 +needs-changes | approved-with-comments | approved -## 状态机 +## P0(必须带证据标签:[req:x.y] / [security] / [contract]) +- src/auth/service.py:34 [req:1.2] — login 失败未区分锁/密码错 +(如无 P0:本节写 `(none)`) -每个 stage 的生命周期: +## P1 +- src/models/user.py:12 — email 字段格式校验缺失 -``` -pending → running → converged ✔ - └→ failed ✗ - └→ skipped (全部 leaf 是 @swarm:skip) -``` +## P2 +- 命名 `auth_svc` 可改为 `auth_service` -`task_swarm_state.next_action()` 决定下一步(**R3 重构后**:reviewer 退出循环,coder ↔ validator 是唯一阻塞循环): +## 给使用者的提示 +- 一句话总结 +STATUS: ok ``` -pending stage: - └ kind=checkpoint → fork validator r1 - └ kind=stage → fork coder r1 -running stage, last action was: - coder ok: - └ kind=checkpoint → validator (re-run,validator 自己的轮号) - └ kind=stage, has full/default leaves → reviewer (advisory) - └ kind=stage, all coder-only → converge +**advance --phase review 解析**: +1. 提取所有 P0 项 + 证据标签。 +2. **无证据标签的 P0 自动降级为 advisory**。 +3. 若降级后仍有 P0 → 下一 phase = `p0-fix`,state.json 写 `p0_pending[]`。 +4. 若无 P0 → 下一 phase = `validation`。 +5. 所有 P0/P1/P2 项(含降级的)都写入 `findings[]`,writeback 时落到 tasks.md。 - reviewer (任何 judgment) → converge(**advisory,不进循环**; - P0 / advisory_p0 摘要会被 writeback 写到 tasks.md 注释里给使用者看) +### 4.3 validator validation.md schema - validator pass → converge +```markdown +# validator-g{group}-r{round} + +## 判定 +pass | fail + +## 复现命令 +` ` `bash +cd <project root> +pytest tests/test_auth.py -v +` ` ` + +## 按子任务的验证结果 +- [x] 1.1 user model: pass +- [ ] 1.3 controller: fail — 5 次失败未锁账号 (_需求:1.3_) + +## 失败现场(fail 时必填) +` ` ` +FAILED tests/test_auth.py::test_lockout_after_5_failures +AssertionError: expected 423, got 401 +` ` ` + +## 给 coder 的修复指引(fail 时必填,不带 P0/P1 标签) +### 修复 1 — lockout 计数器 +- 文件: src/api/login.py +- 位置: login 失败分支 +- 问题: 没有调用 lockout 计数器 +- 建议: 引入 src/auth/lockout.py,记录失败次数,第 5 次返回 423 +- _需求:1.3_ + +STATUS: ok +``` - validator fail: - └ round >= validator_rounds → fail - └ else → coder (validator-fail-fix scope) → validator re-run +**advance --phase validation 解析**: +1. 抓"判定"行 → pass 或 fail。 +2. fail → 解析"给 coder 的修复指引"→ 输出 `fix_targets[]`(按文件分组)→ 下一 phase = `v-fix`。 +3. pass → 下一 phase = `writeback`。 +4. **死循环检测**:比对本轮 fail 签名(测试名 + assertion 文本哈希)与上一轮,连续 3 轮相同 → state.json 标 group `failed-deadloop`。 - validator loop / schema-error → fail -``` +--- -修复循环上限: +## 5. tasks.md 写回格式 -- **validator 默认 3 轮**(`--validator-rounds N`)—— validator 是跑代码下结论,测试 fail 是客观信号,给足修复机会 -- `--max-rounds N` 作为 fallback 默认 -- `--reviewer-rounds` 已弃用(reviewer 不再参与循环)。参数保留仅为兼容旧脚本 +`task_swarm.py writeback --run <id> --group <N>` 干两件事: -### reviewer P0 证据标签(advisory 分级) +1. group 内所有 stage 的 `[ ]` → `[x]`。 +2. 在每个 group 最后一个 stage 下方追加一段 `> ` 注释块,含: + - validator 最终结论(pass 轮号 + 命令) + - 所有 review findings(P0 含证据标签、修复状态;P1/P2 含修复状态) + - validator 历轮简报(fail → pass 的轮次链) -reviewer 输出 P0 时必须带证据标签之一,否则 `task_swarm_outbox.parse_review` 会把它分类为 `advisory_p0`(仍写入 tasks.md 注释、但以 `(adv)` 前缀标记): +writeback 严格 line-safe:禁止改动 stage 标题、`@writes` / `@reads` / `_需求:x.y_` 等任何已有内容;只允许 checkbox toggle + 新增 `> ` 行。任何越界 diff 让 writeback `exit 1` 报错,主代理不能继续。 + +### 5.1 修复状态标签 | 标签 | 含义 | |---|---| -| `[req:x.y]` | 直接违反某条 `_需求:x.y_` 的 SHALL | -| `[security]` | 安全 / 数据完整性问题 | -| `[contract]` | 接口契约不一致(上下游对返回类型/字段名理解不同) | - -设计意图:reviewer 的所有担忧都会作为 `> ⚠️ 评审建议` 注释写入 tasks.md,**带证据标签的 P0** 以醒目形式呈现,**无标签的 advisory** 以 `(adv)` 前缀呈现。使用者一眼区分"客观依据"与"风格意见",决定是否人工开新 spec 跟进。**所有 P0 / advisory 都不触发 coder 重派**——reviewer 是 advisory,不参与循环。 - -## 死循环识别(成本控制) +| `[P0 已修复]` | 带证据标签的 P0 + p0-fix 阶段 coder STATUS: ok | +| `[P0 未修复]` | 带证据标签的 P0 + p0-fix coder failed / 主代理选择跳过 | +| `[P1 未修复]` / `[P2 未修复]` | reviewer 列出但默认不修;状态默认为"未修复" | +| `[adv 未修复]` | reviewer 列为 P0 但未带证据标签,被自动降级 | -validator prompt 强制要求:若**本轮**的 fail 项与上轮 inbox 的 prev-validation.md 完全一致,在文件**顶部**加 `## 进入死循环风险` 节。`task_swarm_outbox.parse_validation` 检测到该节立即把 judgment 升级为 `loop`,主编排器收到 loop 后立刻标 stage failed。 +--- -reviewer 由于不参与循环,死循环识别**对 reviewer 不再适用**(reviewer 只跑一次)。 +## 6. on-task-completed hook 提醒矩阵 -## 三检写守(具体落地) +`PostToolUse` matcher=`Task` 每次 subagent 返回都触发。hook 读 sessions/<id>.json 看是否在 run 中 → 调 `task_swarm.py plan --run <id>` 拿提示。 -`writeback` 子命令内部自动执行: +| 当前 state | 注入文本要点 | +|---|---| +| coding 进行中,仍有 coder 未返回 | "coding phase 还在等 N 个 subagent,无需 fork 新 agent;等齐后再判断。" | +| coding 全部返回 | "本 group coder 已全部返回。请 fork **1 个** `task-swarm-reviewer`。" | +| review 返回,含带证据 P0 | "reviewer 提了 N 个带证据 P0。请按 P0 涉及文件 fork M 个 `task-swarm-coder`(p0-fix)。提醒:reviewer 修复**只触发一次**,不 re-review。" | +| review 返回,无 P0(或全降级) | "reviewer 无带证据 P0。请 fork **1 个** `task-swarm-validator`。" | +| p0-fix 全部返回 | "p0-fix coder 已返回。请 fork **1 个** `task-swarm-validator`。" | +| validation 返回 pass | "validator pass。请调 `task_swarm.py writeback` 回写 tasks.md,然后进入下一 group。" | +| validation 返回 fail | "validator fail。请按 validation.md 的 fix_targets 各文件 fork **N 个** `task-swarm-coder`(v-fix)。" | +| v-fix 全部返回 | "v-fix coder 已返回。请 fork **1 个** `task-swarm-validator` 验证。" | +| v-fix 已连续 3 轮同 fail 签名 | "⚠️ 死循环检测:g{g} 已连续 3 轮同一 fail。建议停止本 group,向用户报告 `failed-deadloop`。" | +| 所有 group 完成 | "全部 group 已完成。请按 SKILL.md 退出 task-swarm 模式,回到 spec-mode acceptance phase。" | -1. `spec_session.verify_and_heartbeat(spec_dir, session_id)` — INV-3 lock check + 续锁(单次调用) -2. 调用 `task_swarm_writeback.apply_writeback()` 做行级安全 Edit + 追加 reviewer advisory 注释 -3. 通过 `diff_safe_line_by_line` 二次确认 diff 只包含 checkbox 切换 + `> ` 注释 -4. verify-lock 异常时把详细信息放进 JSON `warnings` 字段透出给主编排器 +所有提醒**末尾固定加**:"本提醒仅供参考;fork 谁、是否 fork、何时 writeback 仍由你判断;可忽略。" -主编排器**不应**直接 Edit tasks.md——INV-9 hook 会拦下任何非 checkbox / 非注释的改动。 +--- -## subagent 工作目录布局 +## 7. 信息流总览 ``` -.task-swarm/ - active-run # 当前 run_id pointer(hook 用) - runs/ - 20260517-153012-ab12cd/ - state.json # 状态机持久化 - agents/ - stage-1-coder/ # 普通 stage 初轮 - task.md # 预渲染的 subagent prompt - inbox/ # 上游产物(脚本中继过来) - outbox/result.md - stage-1-reviewer/ # 普通 stage 唯一一次 reviewer (advisory) - inbox/ ← coder outbox 自动 cp - outbox/review.md - stage-2-validator/ # checkpoint 初验 - stage-2-coder-r2/ # checkpoint validator-fail-fix - inbox/ - prev-result.md - validation.md - outbox/result.md - stage-2-validator-r2/ # checkpoint 复验 - inbox/ - prev-validation.md - upstream-result.md - outbox/validation.md +主代理(spec-mode 主会话,持锁) + │ + ├─[调]── task_swarm.py init ─────────────► state.json (groups, stages) + │ ┌──────────────────────────────────────┘ + │ │ + ├─[读]── task_swarm.py plan ──► 当前应 fork 的 subagent 列表 + │ + ├─[fork]── Task(coder1) ─┐ + │ [fork]── Task(coder2)─┼─► (并发执行) + │ [fork]── Task(coderN)─┘ + │ ┌─► 各自写 outbox/result.md + │ ←─── PostToolUse hook 注入(每返回一个) + │ + ├─[调]── task_swarm.py advance --phase coding ──► state.json 更新 + │ + ├─[fork]── Task(reviewer) ─► outbox/review.md + ├─[调]── task_swarm.py advance --phase review ──► state.json + p0_pending[] + ├─[fork]── Task(coder p0-fix x M) ─► outbox/result.md ... + ├─[fork]── Task(validator) ─► outbox/validation.md + ├─[调]── task_swarm.py advance --phase validation + │ + │ if fail: + │ ├─[fork]── Task(coder v-fix x M) ─► outbox/... + │ └─ loop 回 validator + │ + │ if pass: + │ └─[调]── task_swarm.py writeback --run <id> --group <g> + │ ─► tasks.md 行级安全更新 + │ + └─ 进入下一 group / 全部完成 → 退出 task-swarm 模式 ``` -后缀规则: -- 无后缀 = 初轮 -- `-r2`、`-r3` = 第 N 轮 -- **reviewer 没有 `-rN` 工作区**(advisory 模式只跑一次) -- coder 与 validator 各有自己的轮号空间(互不串号),由 `task_swarm_state.stage["rounds"]` 跟踪 +**关键不变量**: -## 与 specode 铁律的兜底关系 +1. 主代理是**唯一**持有 spec 锁的实体;subagent 不动锁。 +2. 所有跨进程信息走文件系统(outbox + state.json)。 +3. `state.json` 是唯一事实源;主代理状态丢了可以从 `state.json` + outbox 文件完全恢复(resume 暂未实现,但数据结构已为之留路)。 +4. hook 只读、只提醒——任何"该做什么"由主代理决定。 -| specode 铁律 | task-swarm 兜底机制 | -| --- | --- | -| Document-first | `writeback` 子命令在每阶段收敛后回写 tasks.md | -| Post-`/continue` sync | UserPromptSubmit 注入"current step"提示,模型不会遗忘上下文 | -| INV-3 Write-before-verify-lock | `writeback` 内部强制 `verify-lock + heartbeat` | -| Phase gate (INV-6) | task-swarm 只在 implementation phase 被调起 | -| Forced writes | `writeback` 失败立即 abort,不在内存累积 | -| INV-1 (源文件 = tasks.md 列出) | subagent 工作区内的产物自动归类为 "swarm 内部",不走 INV-1;业务代码仍走 INV-1 | -| INV-2 (改源码必须同 turn 改 spec) | `writeback` 每阶段写 tasks.md,自动满足 | +--- -新增铁律(仅 task-swarm 期间生效): +## 8. 死循环保护规则 -| 铁律 | 内容 | -| --- | --- | -| **INV-7** | `Task` 调用 `subagent_type` 必须带 `specode:task-swarm-` 前缀,否则 hook deny | -| **INV-8** | subagent 写边界——只能写自己 task.md 中 `@writes` 列出的文件或自己 outbox/;越界(包括 spec 文档)一律 hook deny | -| **INV-9** | task-swarm 运行期编辑 tasks.md 必须走 `writeback` 子命令;直接 Edit 时 hook 校验 diff,只放行 checkbox 切换 + `> ` 注释,其余 deny | -| **INV-10** | subagent outbox 必须通过 schema 校验(必需节、STATUS 行、判定字段);由 `task_swarm.py parse` **CLI 子命令兜底**(非 Stop hook,因为 subagent Stop 不在父会话 hook 拦截范围内):parse 返回 `judgment=schema-error` 时同时**清空 outbox** 与 **重置 in_flight**,并在 JSON 里附 `retry: true` + `outbox_snapshot`,主编排器照原 stage/role/round 重派 subagent,prompt 不变 | +- 连续 3 轮 v-fix → validation 出现**完全相同**的 fail 签名(测试名 + assertion 文本哈希)→ 整个 group 标 `failed-deadloop`。 +- state.json 不再推进;主代理向用户报告并退出 task-swarm。 +- writeback 该 group 时注释块会写明"failed-deadloop(连续 3 轮同一 fail 签名)"。 +- 用户介入后可:手改源码 → 重跑 `/specode:task-swarm`;或调 `task_swarm.py resolve --run <id> --abort` 中止。 -## 调试 +--- -| 想看什么 | 命令 | -|---|---| -| run 全貌 | `task_swarm.py status --run <id>` | -| subagent 拿到的 prompt | `cat .task-swarm/runs/<id>/agents/<stage>/task.md` | -| subagent 产出 | `cat .task-swarm/runs/<id>/agents/<stage>/outbox/*` | -| 历史轮 | `ls .task-swarm/runs/<id>/agents/stage-3-*` | -| 清理 | `rm -rf .task-swarm/runs/<id>` | +## 9. CLI 接口速查 -## 关键原则(写给将来扩展功能的人) +```text +task_swarm.py init --tasks <abs> [--max-parallel N] [--max-rounds N] + [--session <session_id>] [--spec <abs>] + → {"run_id", "groups": [...], "spec_dir": ...} -1. **不要把决策逻辑挪回 prompt**——任何"状态机"或"格式解析"应该新增 Python 函数 + 单测,不要改 references 文档让模型猜。 -2. **每条新增铁律都要有 hook 兜底**——prompt-only 约束等于没有约束。 -3. **outbox schema 是接口**——改 schema 要同步改 `task_swarm_outbox.py` + 三个 agent.md + INV-10 hook。 -4. **state.json 是持久化的**——schema 变更要带迁移逻辑或版本号 bump。 -5. **subagent prompt 由脚本渲染**——不要让主编排器自己拼 prompt。新增字段先加到 `StageContext`,再改 `render_*_prompt`。 +task_swarm.py status --run <run_id> + → 当前 phase / group / round / 待派 subagent 列表 -## 完整示例 +task_swarm.py plan --run <run_id> + → 下一步该 fork 哪些 subagent 的 JSON(不改 state) -`references/task-swarm-example.md` —— 一份完整的 specode 风格 tasks.md 样本(5 阶段 / 5 子任务 / 7 个 subagent)。 +task_swarm.py advance --run <run_id> --phase <coding|review|p0-fix|validation|v-fix> + --round <n> + → 解析 outbox、更新 state.json、返回下一步建议 -## 用户怎么用 +task_swarm.py writeback --run <run_id> --group <N> + → 当前 group 全部 pass 后回写 tasks.md(line-safe diff) -### 方式 1:从 specode selector 触发(推荐) -走正常 specode 流程到 tasks 确认后,在"任务执行"selector 选择 `用 task-swarm 多 agent 并发`。 +task_swarm.py heartbeat --run <run_id> + → 刷新 state.json.last_activity_at(spec 锁需主代理另调 spec_session.py) -### 方式 2:手动触发 -``` -/specode:task-swarm <spec-dir>/tasks.md +task_swarm.py resolve --run <run_id> [--abort] + → 标记完成或中止;清理 sessions.task_swarm_run_id ``` diff --git a/plugins/specode/skills/specode/references/templates.md b/plugins/specode/skills/specode/references/templates.md index 8c83934..954d126 100644 --- a/plugins/specode/skills/specode/references/templates.md +++ b/plugins/specode/skills/specode/references/templates.md @@ -1,274 +1,177 @@ -# Spec Templates - -Use these templates when generating documents. Prefer adapting wording to the project and language of the user request. - -## `requirements.md` - -```markdown -# 需求文档 - -Spec Type: Feature -Workflow: requirements-first -Status: Requirements Draft -Review Status: unreviewed - -## 简介 - -[说明要实现的能力、用户价值、当前背景。若已有代码上下文,简述相关模块和约束。] - ---- - -## 词汇表 - -- **[Term]**:[定义] - --- - -## 需求 - -### 需求 1:[需求标题] - -**用户故事:** 作为[用户/角色],我希望[能力],以便[价值]。 - -#### 验收标准 - -1. WHEN [触发条件],THE [系统/组件] SHALL [期望行为]。 -2. IF [条件],THEN THE [系统/组件] SHALL [期望行为]。 -3. WHILE [状态],THE [系统/组件] SHALL [持续行为]。 - +description: Use when 准备生成或修订 requirements.md / bugfix.md / design.md / tasks.md / implementation-log.md。详述 5 份文档的章节模板、EARS SHALL 写法、traceability 规范。 --- -## 边界情况 +# Spec Document Templates -1. WHEN [边界条件],THE [系统/组件] SHALL [安全行为]。 +5 份 spec 文档的**写作约束 + EARS / traceability 规范**。完整章节骨架的 +**单一事实源**是 `${CLAUDE_PLUGIN_ROOT}/assets/templates/<phase>.md`——主 +代理按 SKILL.md §「Spec 文档生成」Read 那份骨架 + 按 `source_text` 填空。 +本文件**不重复骨架**,只列每份文档独有的写作约束。 ---- - -## 非功能需求 +## 0. 命名约定 -1. WHEN [运行条件],THE [系统/组件] SHALL [可验证的质量要求]。 +| 文件 | 用途 | 骨架文件 | 互斥关系 | +|---|---|---|---| +| `requirements.md` | 需求-first / design-first 工作流的需求文档 | `assets/templates/requirements.md` | 与 `bugfix.md` 互斥 | +| `bugfix.md` | bugfix 工作流的问题描述 | `assets/templates/bugfix.md` | 与 `requirements.md` 互斥 | +| `design.md` | 技术设计文档 | `assets/templates/design.md` | — | +| `tasks.md` | 任务拆分 + 进度 + traceability + 末尾 `## 测试要点`(tasks phase 按 SHALL 顺手补给测试人员参考) | `assets/templates/tasks.md` | — | +| `implementation-log.md` | 实现记录(可选;spec_init 不预生成,主代理首次记录时再 Write) | — | — | ---- +每份文档头部固定四行 metadata(骨架已含;**主代理不要手改 `Status` / +`Review Status` 字段**——这些由 `phase-transition` CLI + selector 流程驱动): -## 待确认问题 - -- [问题] +```text +Spec Type: <Feature | Bugfix> +Workflow: <requirements-first | design-first | bugfix> +Status: <Requirements Draft | Bug Analysis Draft | Design Draft | Tasks Draft | Implementation Log> +Review Status: <unreviewed | reviewed | accepted> ``` -## `bugfix.md` +## 1. `requirements.md` 写作约束 -```markdown -# Bugfix 文档 +骨架见 `assets/templates/requirements.md`。章节:简介 / 词汇表 / 需求 / 边界 +情况 / 非功能需求 / 待确认问题。 -Spec Type: Bugfix -Workflow: bugfix -Status: Bug Analysis Draft -Review Status: unreviewed +- SHALL 必须按 `<需求编号>.<条目编号>` 编号(如 `1.1` / `2.3`)—— tasks.md + 的 `_需求:x.y_` 用同一编号系统 traceback。 +- 「待确认问题」节是给"用户回头要确认"用的;澄清 wizard 解决不了的可延后项 + 写在这里。 +- 避免使用「假设」/「Assumptions」节—— 用 `待确认问题` 主动问,不要假设。 -## 问题摘要 +## 2. `bugfix.md` 写作约束 -[一句话说明缺陷。] +骨架见 `assets/templates/bugfix.md`。章节:问题摘要 / 复现步骤 / 当前行为 / +期望行为 / 保持不变的行为 / 影响范围 / 证据 / 约束 / 待确认问题。 -## 复现步骤 +- `当前行为` 用 `WHEN ... THE ... [错误行为]`(**不带** SHALL;因为不是期望)。 +- `期望行为` 用标准 `WHEN ... SHALL ...`。 +- `保持不变的行为` 用 `WHEN ... SHALL CONTINUE TO ...` —— bugfix 专用 EARS + 写法。 +- 调研代码后再写「根因」相关结论;**不要**在 bugfix.md 里凭空断言根因(根因 + 写在 design.md)。 -1. [步骤] -2. [步骤] -3. [观察到的错误结果] +## 3. `design.md` 写作约束 -## 当前行为 +骨架见 `assets/templates/design.md`。章节:概述 / 架构(现有 + 目标)/ 组件 +与接口 / 数据模型 / 流程 / 错误处理 / 安全与隐私 / 性能与可靠性 / 测试策略 / +正确性属性 / 风险 / 变更历史 / 待确认问题。 -1. WHEN [触发条件],THEN THE [系统/组件] [错误行为]。 +- 「正确性属性」必须显式写 `**验证:需求 x.y**`,把每条 design 属性映射回 + requirements.md / bugfix.md 编号。 +- 「测试策略」是策略不是计划 —— 具体任务在 tasks.md 里。 +- 「变更历史」节是 iteration 子循环的累积入口,**首次落地时也保留空节标题**, + 方便后续追加。 -## 期望行为 +## 4. `tasks.md` 写作约束 -1. WHEN [触发条件],THE [系统/组件] SHALL [正确行为]。 +骨架见 `assets/templates/tasks.md`。**采用 task-swarm 兼容格式**:顶层 +`## 阶段 N: 标题` 段对应一个 stage(task-swarm fork 粒度);每条具体任务 +`- [ ] N.M 任务 @writes:文件 @reads:文件 @depends-on:N _需求:x.y_`。 -## 保持不变的行为 +- **顶层段落必须用 `## 阶段 N: 标题`** 格式(`task_swarm/_parse_md.py` 强制 + 要求;不符合解析器会报 `tasks.md 中未解析出任何 ## 阶段 N: 段`)。 +- **每条具体任务编号 `N.M`**(不能仅 `N`),任务行末必须带 `_需求:x.y_` + 或 `_需求:可选_` traceability。 +- **`@writes`**(task-swarm 据此切 group 避免并发冲突);**`@reads`** 可选; + **`@depends-on:N`** 可选(不写则仅靠 `@writes` 冲突切 group)。 +- 可选任务把 `[ ]` 改 `[*]`;checkpoint 任务把标题写成 `检查点 —— ...`。 +- 文件路径直接写裸路径(不用反引号;task-swarm parse_md 按裸路径切分)。 +- 「验收」节固定四行(顺序、措辞与骨架一致),不要改写。 +- 同一 stage 内多条任务并入 single coder 顺序执行;要拆 coder 必须把它们分 + 到不同 stage(不同 `## 阶段 N: ...` 段)。 -1. WHEN [相关条件],THE [系统/组件] SHALL CONTINUE TO [现有正确行为]。 +详细切 group 规则与 `@depends-on` 语义见 `references/task-swarm-example.md`。 -## 影响范围 +### 4.1 任务标记语义 -- [用户影响/业务影响/技术影响] - -## 证据 - -- [日志、错误信息、测试、截图、用户报告] - -## 约束 - -- [不应改变的代码、接口、数据或行为] - -## 待确认问题 - -- [问题] ``` - -## `design.md` - -````markdown -# 设计文档:[需求名]([slug]) - -Spec Type: [Feature | Bugfix] -Workflow: [requirements-first | design-first | bugfix] -Status: Design Draft -Review Status: unreviewed - -## 概述 - -[说明设计目标、范围、主要技术选择和不做什么。] - -## 架构 - -### 现有架构 - -```text -[用文本图或 Mermaid 描述现状。] -``` - -### 目标架构 - -```text -[用文本图或 Mermaid 描述修改后的结构。] +[ ] pending [~] in progress [x] completed +[-] skipped [*] optional ``` -## 组件与接口 +推进规则: +- 开始一个任务 → `[ ]` → `[~]`。 +- 该任务对应验证通过 → `[~]` → `[x]`。 +- 跳过任务 → `[-]` + 在 chat / log 说明原因。 +- 可选任务:用户选 `开始 required` 时不动;选 `开始 required + optional` 时 + 也走 `[ ] → [~] → [x]` 流程。 -### 1. `[Component]` +### 4.2 `## 测试要点` 节填充提示 -**职责**:[组件职责] +tasks phase 生成 tasks.md 时,按 requirements.md / bugfix.md 的 SHALL +**顺手**补几行: -**变更**: +- 每行格式 `触发场景 → 预期结果(需求 X.Y)`(不带 checkbox;这一节是参考 + 清单而非任务清单)。 +- **触发场景**:测试人员可执行的具体动作。 +- **预期结果**:SHALL 后的期望行为。 +- **需求 X.Y**:requirements.md 的 SHALL 编号。 -- [变更点] +非硬纪律——SHALL 模糊或拿不准时可以留 `_待补充_` 占位;后续 acceptance 时 +可以补。**不要把这一节当成验收门**——验收只看 tasks.md 是否全 `[x]`。 -**接口**: +iteration 子循环按需追加新行,详见 `references/iteration.md`。 -```text -[API / function / event / command contract] -``` +## 5. `implementation-log.md` 写作约束 -## 数据模型 +无 spec_init 预生成骨架——首次记录时主代理 Write,按 `## YYYY-MM-DD` 分日期。 +每天追加任务进度 / 设计偏离 / 关键决策三类内容。 -[数据结构、数据库、配置、文件格式、迁移。] +- 自由式格式;每条记录 **≥30 字**(`spec_lint.py` 检查;过短报 WARNING)。 +- 按日期分节(`## YYYY-MM-DD`),每天追加。 +- 记录三类内容: + 1. **任务进度**:任务 x.y 完成 / blocker。 + 2. **设计偏离**:实现期间偏离 design.md 的决策(必须**同 turn** Edit + design.md 同步)。 + 3. **关键决策**:选型 / 安全性 / 性能取舍。 +- 缺关键文件引用(路径 / 行号 / 函数名)→ WARNING。 +- log 是「轻量级补救手段」 —— 如果同 turn 改了代码但实在没法重写 + design.md / tasks.md,至少在 log 里记一行;**空 log 等于没改过**(下一 + 会话看不到)。 -## 流程 +## 6. EARS 四种 SHALL 写法 -```mermaid -sequenceDiagram - participant User - participant System - User->>System: Request - System-->>User: Response +```text +WHEN [condition/event], THE [system/component] SHALL [expected behavior]. +WHILE [state], THE [system/component] SHALL [expected behavior]. +IF [condition], THEN THE [system/component] SHALL [expected behavior]. +WHEN [condition], THE [system/component] SHALL CONTINUE TO [existing behavior]. ``` -## 错误处理 - -- [错误场景与处理方式] - -## 安全与隐私 - -- [鉴权、权限、数据校验、敏感信息、PII] - -## 性能与可靠性 - -- [延迟、吞吐、并发、重试、幂等、降级] - -## 测试策略 - -- 单元测试: -- 集成测试: -- 端到端测试: -- 回归测试: -- 属性测试候选: - -## 正确性属性 - -### 属性 1:[属性名称] - -*对任意* [输入范围],当 [操作],系统应 [不变量/性质]。 - -**验证:需求 [编号]** - -## 风险 - -- [风险]: [缓解方式] - -## 待确认问题 - -- [问题] -```` - -## `tasks.md` +含义: -```markdown -# 实现计划:[需求名]([slug]) +| 写法 | 含义 | 用于 | +|---|---|---| +| `WHEN ... SHALL ...` | 事件触发型 | 一般行为 | +| `WHILE ... SHALL ...` | 状态持续型 | 持续行为(如"登录态下持续刷新 token") | +| `IF ... THEN ... SHALL ...` | 条件型 | 分支行为 | +| `WHEN ... SHALL CONTINUE TO ...` | 不变行为 | bugfix.md 专用,断言修复后某行为不变 | -Spec Type: [Feature | Bugfix] -Workflow: [requirements-first | design-first | bugfix] -Status: Tasks Draft -Review Status: unreviewed +`spec_lint.py` 检查每条 SHALL:缺动词 / 缺 trigger(WHEN/WHILE/IF)→ WARNING。 -## 概述 +## 7. traceability 规范(`_需求:x.y_`) -[说明实现策略和任务拆分原则。] +- 写法:`_需求:1.1_`、`_需求:1.1、1.2_`、`_需求:2.3、可选_`。 +- 编号系统 = `requirements.md` / `bugfix.md` 的"需求 1 > 验收标准 1.1"路径。 +- 编号 `可选` 用于 optional 任务。 +- 多需求覆盖一个任务时用全角顿号「、」分隔。 +- `spec_lint.py` 检查每条具体子任务(不含 checkpoint / 顶层任务):缺 + traceability 或编号在 requirements.md 中找不到 → WARNING。 -## 任务 +## 8. Document Style 总则 -- [ ] 1. [阶段任务标题] - - [ ] 1.1 [具体子任务] - - [具体实现点] - - 文件:`[path]` - - 验证:`[command]` - - _需求:1.1、1.2_ +- 章节结构稳定(见各 `assets/templates/<phase>.md` 骨架),不要随意改 + H2 / H3 标题。 +- 中文叙述;技术名 / 命令 / 路径 / 函数名 / 变量名保持英文原样。 +- 禁止使用「假设」/「Assumptions」节 —— 用 `待确认问题` 节主动问。 +- 禁止使用模糊措辞("大概"、"可能"、"应该差不多")—— 不确定就走澄清 wizard。 +- iteration 子循环里旧节不动,按规则追加(详见 `references/iteration.md`)。 -- [ ] 2. 检查点 —— 确保阶段验证通过 - - 运行相关测试和检查。 - - 如有失败,停止继续执行并修复或向用户确认。 +## 9. 跨文档引用 -- [*] 3. [可选任务标题] - - [ ] 3.1 [可选子任务] - - [说明] - - _需求:可选_ - -## 测试要点 - -供测试人员快速了解需要验证的场景。每行对应 `requirements.md` / `bugfix.md` 中的一条 SHALL;需求或 bug 行为变更时由 agent 在同一轮 turn 内同步更新本节。 - -- [ ] 输入合法用户名+密码 → 登录成功,跳转首页(需求 1.1) -- [ ] 密码少于 8 位 → 提示"密码长度不足"(需求 1.2) -- [ ] 连续 5 次错误密码 → 账号锁定 15 分钟(需求 1.3) -- [ ] 已登录用户访问 /login → 自动重定向 /home(边界) -- [ ] 数据库不可用 → 返回 503 并记录日志(非功能) - -## 验收 - -- [ ] 所有 required 任务完成。 -- [ ] 所有指定验证命令通过。 -- [ ] 未完成或跳过的 optional 任务已记录。 -- [ ] 用户确认验收。 -``` - -## Document Style - -### Document Section Structure - -- `requirements.md`: 简介, 词汇表, 需求, 用户故事, 验收标准 -- `design.md`: 概述, 架构, 组件与接口, 数据模型, 错误处理, 测试策略, 正确性属性, 风险 -- `tasks.md`: 概述, 任务(nested checkbox items, `_需求:..._` traceability, optional markers, checkpoint tasks), 测试要点(供测试人员快速了解的 SHALL 级测试场景) -- `bugfix.md`: Current Behavior, Expected Behavior, Unchanged Behavior -- Avoid "Assumptions" sections. Prefer "待确认问题" and ask before continuing. - -### EARS Acceptance Criteria Formats - -```text -WHEN [condition/event], THE [system/component] SHALL [expected behavior]. -WHILE [state], THE [system/component] SHALL [expected behavior]. -IF [condition], THEN THE [system/component] SHALL [expected behavior]. -``` - -For bugfix unchanged behavior: - -```text -WHEN [condition], THE [system/component] SHALL CONTINUE TO [existing behavior]. -``` +- phase 序列与 `doc-confirm-*` 选择器 → `references/workflow.md`。 +- 选择器三种类型与场景常量 → `references/selectors.md`。 +- iteration 子循环的文档累积规则 → `references/iteration.md`。 +- 5 份文档与文档优先纪律的关系 → SKILL.md §Code-Doc Sync Reminders。 diff --git a/plugins/specode/skills/specode/references/workflow.md b/plugins/specode/skills/specode/references/workflow.md index ae3cfa8..562a8a3 100644 --- a/plugins/specode/skills/specode/references/workflow.md +++ b/plugins/specode/skills/specode/references/workflow.md @@ -1,351 +1,334 @@ -# Spec Mode Workflow +--- +description: Use when 涉及 phase / clarification / workflow-choice / 三档工作流选择 / phase 转换细节 / /specode:continue 接管子步骤。SKILL.md §Phase Order 与 §Workflow Selection 的运维详解。 +--- -Operational reference for the rules defined in `SKILL.md`. Activation conditions, the hard rules, and command compliance live in SKILL.md and are **not** restated here. +# Workflow — Phase 协议详解 -## 0. Activation Guard +SKILL.md §Phase Order / §Workflow Selection 的运维细节版本。本文件**不**重复激活规则、状态行 footer、selector 三种类型与场景表 —— 那些在 SKILL.md 与 `references/selectors.md` 里。 -Activation rules are defined in `SKILL.md §Activation Guard` and apply here without exception. Do not re-state or paraphrase them. If the current request does not satisfy SKILL.md activation conditions, do not create a spec directory and do not run the phase-gated workflow. +## 0. Phase 序列总图 -## 1. `/spec` Intake - -Parse user input as: - -```text -/spec <requirement-or-path> [extra instructions] -/spec --persist <requirement-or-path> -/continue [spec-slug] -/status -/end +``` +intake ──► requirements / bugfix ──► design ──► tasks ──► implementation ──► acceptance ──► iteration + │ │ │ │ │ │ + │ ▼ ▼ ▼ ▼ ▼ + │ doc-confirm-* doc-confirm-* tasks-execution 推进 [ ] → [~] → [x] acceptance-gate iteration 子循环 + │ 选择器 选择器 选择器 + │ + ├─ 需求有歧义 → clarification-wizard(类型 B)+ clarification-done(类型 A) + └─ workflow 不明 → workflow-choice(类型 A) ``` -Intake rules: - -- If `<requirement-or-path>` points to a readable file, summarize it and use it as the source. -- If it is prose, use it directly. -- **Spec name prefix parsing**: detect a leading `<名称>:<内容>` (full-width 冒号) or `<名称>: <内容>` (ASCII 冒号 + 空格) within the first ~30 chars; **不**对路径 / URL / 无冒号输入做拆分。匹配则:`<名称>` 作为 slug 来源(非英文需 agent 自行派生语义 slug,原文保留为 `requirementName`),`<内容>` 作为需求源文本。否则整段都是需求,agent 自行从内容推断 slug。 -- Extract requirement name (semantic English slug — see §1.2), root hints, workflow hints, constraints, validation expectations. -- If the user only gives a root and no requirement, ask for the requirement. -- **Pre-requirements clarification (Plan-mode)**: if the requirement has real ambiguity affecting scope, behavior, UX, data, validation, or acceptance, stay in `intake` phase and enter a clarification dialogue **before** writing any document. Do not invent missing details. Group questions compactly (≤5), end the turn, wait for user reply. After resolution, proceed to workflow selection. → 详见 SKILL.md §Pre-requirements Clarification。 -- Group unclear points into a compact confirmation list before generating the next document. - -Persistent command rules: - -- `/spec <requirement>` — one-shot. Runs the workflow without updating `.active-specode.json`. -- `/spec --persist <requirement>` — persistent. Initializes spec and starts an active session. -- `/continue [slug]` — resume; multi-window aware (see §9). -- `/status` — prints current session, spec path, phase, task counts, lock state. -- `/end` — ends current session, releases the spec lock, **does not** delete docs. +phase 切换永远走 `spec_session.py phase-transition --spec <dir> --session <id> --from <p> --to <p>`。**不要**手动改 `<spec-dir>/.config.json.currentPhase`。 -## 1.1 Natural-language Follow-up Routing +## 1. `/specode:spec <需求>` Intake -Within an active persistent session, route natural-language follow-ups via document-first discipline (the iron rules live in SKILL.md §Document-first Discipline). +### 1.1 输入解析 -> ⛔ **Post-`/continue` 同 turn 同步(非常重要)**:恢复一个已落地 spec 后,用户在聊天中提出的任何对需求或设计的调整——哪怕只是一句澄清——都必须**在同一轮 turn 内**写回 `requirements.md` / `bugfix.md` / `design.md` / `tasks.md`(需求/bug 行为变更并同 turn 更新 `tasks.md` 的 `## 测试要点` 节)。不允许累积到"下一轮",不允许"先写代码后补文档"。 +接受三种形式: +```text +/specode:spec -n <slug> <需求文本> ← 推荐:直接指定 spec 目录名 +/specode:spec --name <slug> <需求文本> ← 同上(长形式) +/specode:spec <需求文本> ← 兼容:主代理推导 slug +/specode:spec <名称>:<需求文本> ← 兼容:从前缀提显示名 + 推导 slug +/specode:spec <需求文本> --root /path/to/dir +/specode:spec <文件路径> +``` -| Intent | Action | -|---|---| -| Requirement change | Update `requirements.md` / `bugfix.md`, **same turn update `tasks.md` 的 `## 测试要点` 节**, then check whether `design.md` and其余 `tasks.md` 任务 are stale | -| Design change | Update `design.md`, then check whether `tasks.md` is stale | -| Task change | Update `tasks.md`, preserve `_需求:..._` traceability | -| Execution request | Verify lock → load only active spec's docs → execute selected or next pending task | -| Acceptance feedback | Update task/review state in `tasks.md`(含 `## 测试要点` 节) | -| User said "/spec-accept" or "验收通过" | Run `spec_session.py iterate <spec-dir>` → phase becomes `iteration` | +`<需求文本>` 解析步骤(**有 `-n` / `--name` 时跳过 step 1 + 2,直接走 step 0**): + +0. **显式 slug**(**优先**,避免推导歧义):若 `$ARGUMENTS` 以 `-n <slug>` 或 `--name <slug>` 开头: + - `<slug>` 直接当 spec 目录名(保留用户原文,**不做翻译/推导**) + - 0.10.16+ 起允许 Unicode(中文/日文/emoji 都可),仅禁文件系统危险字符 + (`< > : " / \ | ? *`、空白、首字符 `.` 或 `-`、Windows 保留名 `CON` / `PRN` / `AUX` / `NUL` / `COM1-9` / `LPT1-9`) + - `requirement_name` 默认:英文 slug 按短横线 → 空格 + 首字母大写(`user-login` → `User Login`);非 ASCII slug(中文等)直接复用原文 + - 剩余文本 → `source_text` + - **跳过下面 1+2 步**,直接进 §1.2 + - 例 1:`/specode:spec -n user-login 添加用户登录功能` + → `--name user-login --requirement-name "User Login" --source-text "添加用户登录功能"` + - 例 2:`/specode:spec -n 登录页面 帮我做一个简单的登录页面` + → `--name 登录页面 --requirement-name "登录页面" --source-text "帮我做一个简单的登录页面"` + - **slug 非法(spec_init.py exit 3)时**:不要 fallback 到 step 2 推导;把 CLI stderr 报给用户让用户重选,仅当用户明确说"你帮我想一个"时才走 step 2 + +1. **名称前缀解析**(兼容路径,仅当未给 `-n` 时):检测前 30 字符内是否含 `<名称>:<内容>`(全角 `:`)或 `<名称>: <内容>`(半角 `:` 必须有空格)。命中: + - 左半部分 → 显示名(中文允许;保留为 `requirementName`) + - 右半部分 → 源需求文本(`--source-text`) + - 不对路径 / URL / 无冒号输入做拆分 +2. **slug 推导**(兼容路径,仅当未给 `-n` 时;由你负责,CLI 不会从中文推 slug): + - 读完用户需求后,给一个**短 + 语义 + 英文 + 小写 + 连字符** ≤64 字符 slug + - 例:`login-password-rule`、`undo-redo`、`dark-mode`、`api-rate-limit` + - **注意推导结果对用户不可预知**——若用户在意目录名,应引导用户改用 step 0 的 `-n` 形式 +3. **文件路径模式**:若 `<需求>` 是一个可读文件路径,先 Read 该文件,把内容当成源文本继续走前面 step 0-2。 +4. 提取根目录提示(`--root`)、工作流提示(如用户已说"做个 bugfix")、约束、验证期望。 + +### 1.2 调 `spec_init.py` + +```bash +python3 plugins/specode/scripts/spec_init.py \ + --name <slug> \ + --requirement-name "<中文显示名>" \ + --source-text "<原始需求文本>" \ + --session <session_id> \ + [--root <override>] \ + [--detect-vault] +``` -## 1.2 Mandatory Entry — `spec_init.py` is the ONLY way to create a new spec +CLI 行为: -⛔ **Iron Rule #1 (see SKILL.md).** A new spec — whether one-shot `/spec` or `/spec --persist` — is created exclusively by calling `scripts/spec_init.py`. You **MUST NOT**: +1. 三层文档根目录解析(详见 `references/obsidian.md`)。 +2. 在 `<doc_root>/specs/<slug>/` 写 5 份骨架文档(按 `references/templates.md` 模板)+ `.config.json`(`specId` / `createdAt` / `phase=intake` / `lock` 字段指向 `--session`)。 +3. 更新 `<doc_root>/.active-specode.json` active-pointer。 +4. 强制双写 `~/.specode/sessions/<session_id>.json`(mode=active / active_spec_slug / phase=intake / lock_state=ok)。 +5. 三步任一失败 → 回滚 + exit 1(半成功是禁区)。 +6. 三层全 miss → exit 3 + 引导(SKILL.md §Document Root Resolution 给出引导文本)。 +7. 成功 → 输出 JSON:`{"spec_dir": "...", "specId": "...", "session_id": "<id>", "phase": "intake"}`。 -- `mkdir` any spec directory yourself -- `Write` `requirements.md` / `bugfix.md` / `design.md` / `tasks.md` / `.config.json` to a path you constructed -- Use `<project>/specode/`, `<project>/specs/`, `<cwd>/...`, `~/Git/<x>/...`, or any path you chose -- Treat phrasing like "在项目下创建"、"在 git 目录下创建一个新项目" as a directive to place spec docs in the project. Those phrases describe **future code location**, not spec-document location. +### 1.3 进入 intake 阶段后 -### Steps (agent responsibility) +如果需求有歧义 → 进 §1.4 澄清子流程;否则跳到 §2 workflow 选择。 -`scripts/spec_init.py` **requires `--name <slug>`**. The script does not infer slugs from Chinese — that responsibility falls on the agent: +只给了 root 没给需求 → 用一句话问用户"请告诉我本次要做的需求",end turn。 -1. Read the user's requirement description -2. Produce a short semantic English slug, lowercase, hyphen-separated, ≤64 chars (e.g. `login-password-rule`, `undo-redo`, `dark-mode`) -3. Call `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_init.py --name <slug> --requirement-name "<中文显示名>" --source-text "..."` -4. The script prints the resolved `specDir` (under doc_root, never under cwd). Fill content into the files **it created**. +### 1.4 Pre-requirements Clarification(Plan-mode) -### Failure modes (do NOT improvise) +→ 详见 SKILL.md §Pre-requirements Clarification 与 `_selectors.py` SELECTOR_PROMPTS['clarification-wizard']。 -- Exit code with `{"error": "no_spec_root", ...}` → stop and surface the message verbatim. Tell the user to run `/spec --set-vault <p>` or `/spec --set-root <p>`. **Do not** invent a fallback location. -- Exit code with `{"error": "invalid_name", ...}` → `--name` was missing or normalized to empty. Pick a different slug and retry; do NOT bypass the script. +约束摘要: -## 2. Workflow Choice Prompt +- 留在 `intake` phase。 +- **一次性** wizard 问完 2–4 个**无依赖**决策点;不要逐 turn 散问(`AskUserQuestion` 工具 `questions` 数组上限 4 个)。 +- 子问题必须是"是 / 否 / 选哪条"具体问题;非互斥应拆类型。 +- 子问题之间无依赖;每个 question `multiSelect=false`。 +- "Other" 由工具自动提供;不需要手工加 `Type something` 等保留位。 +- inputs 不足以构成决策点 → 不要塞进 wizard。 +- 一个决策点都没有 → 跳到 `clarification-done`,不调 wizard 工具。 -When workflow is unclear, present the **Workflow 类型选择** selector from `references/prompts.md`. +用户回复后下一轮: -| Option | When | -|---|---| -| Requirements | Behavior-first feature work — **recommended default** | -| Technical Design | Architecture / low-level design / non-functional constraints are primary | -| Bugfix | Defect / regression / failing test / incident | +1. 解析回答 → 把解决的内容写入待生成的 requirements.md / bugfix.md 待用(不要 mutating 任何文件)。 +2. 呈现 `clarification-done`(类型 A,推荐选项 1 `进入下一阶段`)。 +3. End turn。 +4. 用户选 1 → 进 §2 workflow 选择;用户选 2 → 再发一轮 wizard。 -## 2.1 Document Confirmation Prompt +## 2. Workflow 选择 -After every generated document, in the **same response**: +`workflow-choice` 选择器(类型 A) → 模板详见 `_selectors.py` SELECTOR_PROMPTS['workflow-choice']。 -1. Do not paste the full document by default — rely on the client's file diff preview. -2. Show file path, concise summary, key changes, unresolved questions. -3. Run the **文档确认** selector from `references/prompts.md`. -4. **End the turn.** +三档定义: -Rules: +| 选项 | 何时选 | 后续 phase 序列 | +|---|---|---| +| **Requirements first**(默认推荐) | 行为优先的新特性;先把 SHALL 写清楚,再补技术设计 | requirements → design → tasks → implementation → acceptance → iteration | +| **Technical Design first** | 架构约束已知;先把 design.md 框架定下来,再反推 requirements | design → requirements → tasks → implementation → acceptance → iteration | +| **Bugfix** | 缺陷修复 / 回归测试 | bugfix → design → tasks → implementation → acceptance → iteration | -- `确认` → next phase -- `查看全文` → print full document, then re-show selector -- `继续沟通` → apply feedback, re-show summary + selector -- Repeat until `确认` +用户选完 → 调 `spec_session.py phase-transition --from intake --to requirements / design / bugfix` → 进入对应 phase。 -After `tasks.md` is confirmed, run the **任务执行** selector from `references/prompts.md`. +工作流选择写入 `<spec-dir>/.config.json.workflow` 字段(`requirements` / `design` / `bugfix` 之一)。 -## 3. Directory Resolution +## 3. Requirements-first Flow -Spec layout: +### 3.1 phase=requirements -```text -<document-root>/ -├── .active-specode.json ← v2 window index, slug-only -└── <spec-slug>/ - ├── requirements.md or bugfix.md - ├── design.md - ├── tasks.md ← 含 `## 测试要点` 节(供测试人员) - └── .config.json ← per-spec lock + iteration state -``` +1. **主代理**按 SKILL.md §「Spec 文档生成」生成 `requirements.md`(Read `${CLAUDE_PLUGIN_ROOT}/assets/templates/requirements.md` 模板 + 按 `source_text` 填空)。章节模板见 `references/templates.md` §requirements.md。 +2. 按 SKILL.md §Document Output Brevity 报路径 + 3–8 条变更要点 + 未决问题。 +3. 呈现 `doc-confirm-requirements`(类型 A,推荐选项 1 `确认`)。 +4. End turn 等用户选。 +5. 选 1 `确认` → phase-transition → design;选 2 `查看全文` → 完整 echo 文档后**再次**呈现同一 selector + end turn;选 3 `继续沟通` → 接收用户反馈 → 改文档 → 重出 step 2–3。 -Resolution priority (handled by `spec_init.py:resolve_document_root` / `spec_vault.resolve_spec_root`): +### 3.2 phase=design -1. `--root` or `SPECODE_ROOT` -2. `~/.config/specode/config.json` → `obsidianRoot` -3. Auto-detect Obsidian vault → `<vault>/spec-in/<os>-<user>/specs` +1. **主代理**按 SKILL.md §「Spec 文档生成」生成 `design.md`(章节见 templates.md)。 +2. 报路径 + 摘要。 +3. `doc-confirm-design` 选择器。 +4. End turn 等确认 → 通过则 phase-transition → tasks。 -**No further fallback.** All three miss → hard stop with guidance (see SKILL.md §Document Root Resolution). +### 3.3 phase=tasks -## 4. Requirements-first Flow +1. **主代理**按 SKILL.md §「Spec 文档生成」生成 `tasks.md`。要求: + - 嵌套 checkbox(顶层任务 / 子任务 / 检查点任务)。 + - 每条具体任务**必须**带 `_需求:x.y_` 或 `_需求:可选_` traceability 标签。 + - 可选任务用 `[*]` 标记;checkpoint 任务用 `[ ]` 但标题含"检查点"。 + - 验收节固定四行:所有 required 任务完成 / 所有验证命令通过 / 跳过 optional 已记录 / 用户确认验收。 +2. 报路径 + 摘要(任务总数 / required 数 / optional 数 / 主要阶段 + traceability / 同文件冲突 stage)。 +3. 呈现 `tasks-execution` 选择器(类型 A,一步完成确认 + 执行方式选择 + 回退入口): + - 选 1 `用 task-swarm 多 agent 并发(推荐)` → 调 `task_swarm.py init --tasks <spec_dir>/tasks.md --session <id>` 切到 task-swarm 编排模式;详见 `references/task-swarm.md`。required + optional 一并处理。 + - 选 2 `顺序执行(同时处理 optional)` → phase-transition → implementation,单 agent 顺序推进 required + optional。如用户在 Other 里说"只跑 required"则跳过 optional。 + - 选 3 `需要调整 tasks.md` → 留在 tasks phase;接收用户反馈 → 改 tasks.md → 重出本选择器。 + - 选 4 `暂不 coding` → 留在 tasks phase;告知用户随时 `/specode:end` 或后续 `/specode:continue` 继续。 -1. Generate `requirements.md` with sections: 简介 / 词汇表 / 需求 / 用户故事 / EARS 验收标准 / 边界情况 / 非功能需求 / 待确认问题 -2. Stop for review; show path, summary, key changes, unresolved questions -3. After confirm → generate `design.md` → review → confirm -4. → generate `tasks.md`(含 `## 测试要点` 节,**同一文档**内由 SHALL 衍生,见 §4.1)→ review → confirm -5. → ask whether to execute tasks -6. Code → validate → accept +## 4. Technical-design-first Flow -## 4.1 tasks.md 测试要点同步(铁律 / INV-4) +1. `design.md` first(**主代理**按 SKILL.md §「Spec 文档生成」生成,章节同 §3.2)。问用户做 high-level 还是 low-level design 时合并到一份。 +2. End turn → `doc-confirm-design` → 确认。 +3. 从已 approved 的 design.md 反推 `requirements.md`。 +4. `doc-confirm-requirements` → 确认。 +5. `tasks.md` 同 §3.3。 +6. `tasks-execution` 同 §3.3。 -`tasks.md` 的 `## 测试要点` 节没有独立确认门,它跟随 `requirements.md` / `bugfix.md` 的变更,由 agent 在**同一轮 turn 内**更新。`Stop` hook 检测:本轮触碰 requirements/bugfix 但未触碰 tasks.md → 拒绝整轮 (INV-4)。 +## 5. Bugfix Flow -**填充规则:** +1. `bugfix.md`(不写 `requirements.md`,二者**互斥**)。章节见 templates.md: + - 问题摘要 / 复现步骤 / 当前行为(错误行为,WHEN ... THEN ... [错误]) / 期望行为(WHEN ... SHALL [正确])/ 保持不变的行为(WHEN ... SHALL CONTINUE TO ...)/ 影响范围 / 证据 / 约束 / 待确认问题。 +2. 调研代码后再断根因 —— 不要凭空断言根因。 +4. `doc-confirm-bugfix` → 确认。 +5. `design.md`:根因 / 修复策略 / 回归风险 / 测试策略。`doc-confirm-design` → 确认。 +6. `tasks.md`:**复现测试 first** → 最小修复 → 不变行为回归测试 → 最终验证。呈现 `tasks-execution`(已合并 doc-confirm-tasks 的确认 + 调整入口)。 -- 读取 requirements.md / bugfix.md 中每一条 EARS `SHALL` 语句 -- 每条 SHALL → `## 测试要点` 一行 checkbox:`触发场景 → 预期结果(需求 X.Y)` -- 操作步骤必须是测试人员可执行的具体动作(**禁止**"触发该能力"这种泛化描述) -- 预期结果直接引用 SHALL 后的期望行为 -- **禁止保留**模板里"_agent 待填充_"等占位行 -- 写完代码标记 `[x]` 即可,跑通的场景不要删 +## 6. phase=implementation -**例**:需求"新增密码强度校验" → 一行: -`- [ ] 输入少于 8 位密码点击提交 → 提示"密码长度不足"(需求 1.2)` +### 6.1 写代码前 -## 5. Technical-design-first Flow +1. 解析 active spec:从 `sessions/<id>.json` 拿 `active_spec_dir`。 +2. **写前三重校验**(详见 `references/lock-protocol.md`):specId / 边界 / 锁。任一失败 → 拒写。 +3. 加载 spec 目录下全部文档(**不**碰其他 spec)。 +4. 找目标任务(用户指定)或下一条 pending required 任务。 +5. **Heartbeat**:`spec_session.py heartbeat --spec <dir> --session <id>`(写文档前必调;距上次心跳 > 5 分钟也调)。 +6. 把任务标记从 `[ ]` 改成 `[~]`(in-progress)。 -1. `design.md` first; choose level (high / low) -2. Stop → confirm -3. Derive `requirements.md` from approved design -4. `tasks.md`(含 `## 测试要点` 节,由 SHALL 同 turn 衍生) -5. Display + confirm each -6. Ask whether to execute +### 6.2 写代码 -## 6. Bugfix Flow +1. 做满足该任务对应 `_需求:x.y_` 的**最小**改动。不要顺手重构无关代码。 +2. 跑该任务对应的验证命令或最近的项目测试。 +3. 验证通过 → 把任务标记从 `[~]` 改成 `[x]`。 +4. 验证不通过 → 留 `[ ]` 或 `[~]`,在 chat 报告 blocker、在 `implementation-log.md` 追加一条 ≥30 字的记录(什么任务、什么 blocker、下一步怎么处理)。 +5. 任务被跳过 → 标 `[-]` 并在 chat / log 说明。 -1. `bugfix.md` with: Current / Expected / Unchanged / Reproduction / Evidence / Impact -2. Investigate code before claiming root cause -3. `design.md` with root cause / fix strategy / regression risks / testing strategy -4. `tasks.md` with: reproduction test first → minimal fix → unchanged-behavior regression tests → final validation;`## 测试要点` 节同 turn 从 SHALL 衍生 -5. Display + confirm each +### 6.3 turn 结束前自检 -## 7. Task Execution +看到 `on-stop` 注入的「🔄 代码-文档同步提醒(输出侧)」时: -Before editing code: +1. `tasks.md` 是否更新?(推进 `[ ]` → `[~]` → `[x]` / blocker) +2. `implementation-log.md` 是否记录?(实现说明、设计偏离、关键决策) +3. `design.md` 接口契约是否变化?(若改了,**同 turn** Edit) -1. Resolve active spec from command or active session -2. **Three-check write guard** (see SKILL.md §Multi-Window + Lock): specId, boundary, lock -3. Load every file in that spec directory only -4. Find target task or next pending required task -5. **Heartbeat**: `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py heartbeat <spec-dir>` -6. Update task marker `[~]` -7. Implement only the linked scope -8. Run validation -9. Mark `[x]` only when validation passes -10. If blocked, leave `[ ]` / `[~]` and note the blocker +任一遗漏 → 在 chat 显式承诺"下一轮第一件事补齐 X",并在下一轮立刻做到。 -Task markers: +### 6.4 任务标记语义 ``` -[ ] pending [~] in progress [x] completed -[-] skipped [*] optional +[ ] pending [~] in progress [x] completed +[-] skipped [*] optional ``` -### 7.1 可选:委派给 task-swarm(多 agent 并发) - -如果用户在"任务执行"selector 选择 `用 task-swarm 多 agent 并发`,specode 主会话**仍然是 orchestrator**(持锁、回写 tasks.md),但**实际编码**委派给 task-swarm skill: - -1. 校验 `~/.claude/skills/task-swarm/` 存在;不存在则降级到默认 required tasks 模式 -2. 调用 task-swarm skill,传入 `<spec-dir>/tasks.md`,附 `--specode` 提示(task-swarm 也会自动嗅探) -3. task-swarm 按一级阶段聚合派发 coder/reviewer/validator 子 agent -4. 每个阶段完成后,**主会话**用 `verify-lock` 守卫后回写 tasks.md 的 `[x]`(INV-4 不触发,因为不改 requirements/bugfix) -5. 全部完成后回到 §8 Acceptance 流程,与传统执行无差异 - -→ 协议详情见 `references/task-swarm.md` +## 7. phase=acceptance -## 8. Acceptance +1. 触发:所有 required 任务标 `[x]`。 +2. phase-transition → acceptance。 +3. **先调一次** `spec_lint.py --spec <spec-dir>`(通过 SKILL.md §CLI 调用规约的 run.sh 模板),把 traceability / log / EARS 三类 WARNING 列在 chat 给用户参考。lint 是 advisory,不阻断验收。 +4. 做一份**验收摘要**(chat):tasks.md 完成度(done/total)/ lint WARNING 列表 / 余留风险 / 未决问题。若 tasks.md 末尾 `## 测试要点` 节存在,简述本次需要测试人员关注的要点(参考信息,不参与验收门判定)。 +5. 呈现 `acceptance-gate`(类型 A): + - 若 tasks.md 全 `[x]` → 推荐选项 1 `验收通过,进入 iteration`。 + - 否则 → 无推荐项。 +6. 用户选 1 → 调 `spec_session.py phase-transition --from acceptance --to iteration`(同时 `iterationRound += 1`,记 `iterationHistory`)。 +7. 用户选 2 `继续修改` → 留在 acceptance;视具体未达标项回退到 requirements / design / tasks(**走 phase-transition**,不要手改 `.config.json`)。 -Final acceptance must include: +## 8. phase=iteration -- Documents created or updated -- Tasks completed -- Validation commands and results -- Any skipped validation -- `tasks.md` 的 `## 测试要点` 节作为测试人员的验证清单 + 已记录的执行结果(可直接在原 checkbox 行后追加 `→ 实际:xxx`) -- Remaining risks or open questions -- If persistent: footer with `/end` +iteration 是已交付 spec 的**常驻**状态。子循环规则见 `references/iteration.md`。 -When all required tasks 已完成且 `## 测试要点` 所有 checkbox 均 `[x]`, agent runs the **验收通过** selector from `references/prompts.md`: +简要: -- `验收通过` → run `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py iterate <spec-dir>` → `iterationRound` 自增、phase 变为 `iteration` -- `继续修改` → 留在 `acceptance` 阶段补测试点或回滚到 `implementation` +- 用户提"我想加一个 X 功能" → `spec_session.py iterate <spec-dir>` → 进入 `iteration.requirements` 子 phase → 在 requirements.md 末尾追加 `## 迭代 N 新增需求` 节,走 confirm → design → tasks → implementation → acceptance 子循环 → 回到 iteration。 +- 用户提"改 acceptance 里一条规则" → 直接编辑 tasks.md 对应任务或 `## 测试要点` 行,不走完整子循环。 +- 用户运行 `/specode:end` → 释放锁 + sessions mode=ended,spec 文档保留。 -## 9. `/continue` — Context Loading + Multi-Window +## 9. `/specode:continue` — 上下文加载 + 多窗口 -`/continue` is a load-and-report command. It restores context and stops; it does not start implementation, run validation, or evaluate acceptance. +`/specode:continue` 是"加载并报告"型命令。它**恢复上下文然后停**;不开始实现、不跑验证、不评估验收。 -### 9.1 No-arg form +### 9.1 无参数形式 ```text -/continue +/specode:continue ``` -Steps: +步骤: -1. Resolve configured root: `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_vault.py get --json --configured-only` - - If no configured root → ask user to run `/spec --set-vault` or `/spec --set-root` and stop -2. List specs: `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py list-specs --root <root> --json` -3. List sessions: `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py list --root <root> --json` -4. Present using **Template C — List + Numeric Selection** in `references/prompts.md` (三段固定:当前会话 / 其他窗口 / 可继续的全部 specs;锁状态用固定词;结束语固定) -5. After user picks → run 9.2 with that slug +1. 调 `spec_vault.py status` 拿当前已配置 root(仅读 config.json,不重新检测)。 + - 无配置 root → 提示用户运行 `/specode:spec --set-vault <path>` 或 `--set-root <path>` 后 end turn。 +2. 调 `spec_session.py list-specs` 拿 root 下全部 spec(含 slug / phase / lock_state / holder / iterationRound / mtimes)。 +3. 在 chat 写 1-2 行上下文摘要("找到 N 个可继续 spec,当前 root:<root>,其中 <m> 个锁定 / <n> 个空闲"),然后**调 `AskUserQuestion` 工具**呈现选择器(详见 `references/obsidian.md` §5.1): + - 类型 A 单列单选;`multiSelect=false`。 + - 选项 ≤ 4 项;超过时按 last_heartbeat_at 取最近 4 个,其余在 chat 引导用户用 `/specode:continue <slug>` 显式指定。 + - 每个选项 `label=<slug>`,`description` 简述 phase / 迭代 / 锁状态 / 最近修改 mtime。 +4. 锁状态描述用固定词:`持有锁` / `锁定于 <id 前 8 位>` / `空闲` / `已过期`。 +5. 工具返回后下一轮进入 §9.2 with slug。 +6. `list-specs.specs == []` → **不**调工具,直接在 chat 引导用户用 `/specode:spec <需求>` 创建新 spec。 -### 9.2 With slug +### 9.2 有 slug 形式 ```text -/continue <slug> +/specode:continue <slug> ``` -Steps: +步骤: -1. Resolve `spec_dir = <root>/<slug>` -2. `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py acquire <spec-dir> --session <id>` - - **Exit 0** → owned, proceed to step 3 - - **Exit 4 (LockHeld)** → 输出锁状态摘要,运行 **`/continue` 接管** 选择器(见 `references/prompts.md`) - - `强制接管` → `acquire --force`, warn that previous session was evicted - - `只读查看` → skip acquire, set read-only flag; do **not** update active-pointer's specSlug binding - - `取消` → exit -3. `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py load <spec-dir> --session <id>` — capture output -4. `sh ${CLAUDE_PLUGIN_ROOT}/scripts/run.sh ${CLAUDE_PLUGIN_ROOT}/scripts/spec_session.py continue <spec-dir> --session <id>` — bind session, write active pointer (skipped in read-only) -5. Present loaded context: +1. 解析 `spec_dir = <root>/specs/<slug>`。 +2. `spec_session.py acquire --spec <dir> --session <id>`: + - exit 0 → 持锁成功,进入 step 3。 + - exit 4 `LockHeld` → 输出锁状态摘要 → 呈现 `takeover-options` 选择器(详见 SKILL.md §Multi-Window + Lock)→ end turn。 + - 选 1 `强制接管` → `acquire --force` → 继续 step 3。 + - 选 2 `只读查看` → 跳 acquire,调 `load` 拿数据,写 `sessions/<id>.json.mode=readonly` → 进 step 5。 + - 选 3 `取消` → 退出。 +3. `spec_session.py load --spec <dir>` → 拿 phase / iteration round / tasks 计数 / 文档 mtime。 +4. `spec_session.py continue --spec <dir> --session <id>` → 绑定 sessions + 写 active-pointer(只读模式跳过这步)。 +5. 输出"已加载 spec"报告: +```text +已加载 spec:<slug> + specId:<id> + phase:<phase> + iteration:第 N 轮(若 > 0) + session:<session 前 8 位>(active / readonly) + lock:本会话持有 | ⚠ 锁定于 <other 前 8 位> | 空闲 + + requirements.md ← N 条验收标准 | 修改:<time> + design.md ← | 修改:<time> + tasks.md ← N/M 已完成,P 待处理 | 修改:<time> ``` -已加载 spec: <slug> - specId: <id> - phase: <phase> - iteration: 第 N 轮(若 > 0) - session: <sessionId> (<status>) - lock: 本会话持有 | ⚠ 锁定于 <id> | 空闲 - - requirements.md ← N 条验收标准 | 修改: <time> - design.md ← | 修改: <time> - tasks.md ← N/M 已完成, P 待处理 | 修改: <time> -``` - -6. Output footer (if persistent / read-only) -7. **Stop and wait for user's next input.** Do not start tasks. - -> ⛔ 从这一刻起,本会话进入"已落地 spec 的持续沟通"模式。后续任何对需求或设计的调整 **必须同 turn 写回对应文档**——见 §1.1 顶部铁律。聊天里说过但没写入文件的内容,下次 `/continue` 时全部丢失。 - -## 10. Boundary Anti-contamination Rules - -Enforced for every continue, switch, edit, end, and any spec document write: - -1. `specDir` must be inside `documentRoot` (`ensure_within_root`); refuse if not -2. Active pointer `specId` must match `<spec-dir>/.config.json.specId`; refuse if not -3. **Lock must be held by current session** (`verify-lock` returns `ok`); refuse if not -4. Only files inside the selected spec folder are treated as active spec documents -5. Changes to one spec never update another spec's documents, config, task state, or active pointer entry -6. All writes use atomic temp + `os.replace()`; read-modify-write of `.config.json` is guarded by `_file_lock` - -→ 详见 `lock-protocol.md`(5 个 lock 子命令、接管协议、只读模式、被驱逐窗口行为) -## 11. iteration Phase +6. 状态行 footer。 +7. **End turn 等用户下一句**。不开始任务、不跑验证、不评估验收。 -→ 详见 `iteration.md`(完整 phase 生命周期、子循环图、文档累积写法、`/spec-accept` 触发约定、`spec_session.py continue --phase` 默认 None 的原因) +> ⛔ 从此刻起,本会话进入"已落地 spec 的持续沟通"模式。用户后续任何对需求 / 设计 / 任务的调整 —— 哪怕只是聊一句 —— 都必须**同 turn 写回**对应文档。chat 累积到"下一轮再写"是禁区——next session 看不到。 -## Phase Gates — Detailed Sub-steps +## 10. Phase-gate 输出顺序(铁律) -Output order within each confirmation step (strictly follow): +每个 phase-gate 的 turn 严格按此顺序: -1. Generate or update the document (write the file) -2. **First**: in agent's text — document path, concise summary, key changed points, unresolved questions -3. **Then**: confirmation options via `spec_choice.py`. TTY → user picks in curses. Non-TTY → the script prints the option block + `AWAITING_USER_CHOICE` sentinel on stdout and exits 0; relay the stdout block to the user verbatim. Do **not** re-run the script to "retry" or restate the options yourself in different wording. -4. **End the turn.** Do not continue to the next phase in the same response +1. 先做工具调用(Write/Edit 文档 / Read 验证文档)。 +2. 在 chat 正文输出:文档**绝对路径**、简短摘要、3–8 条关键变更要点、未决问题。 +3. 空一行 → 状态行 footer。 +4. **调 `AskUserQuestion` 工具**呈现选择器(类型按 SKILL.md §Selectors 表查;模板见 `_selectors.py` SELECTOR_PROMPTS[<key>],索引见 `references/selectors.md` §8 总览表)。 +5. 工具调用本身就是 turn 终止;不需要 sentinel,不需要在工具调用之后追加任何文本。 -The user's next reply drives the next action: +用户回复(即 `AskUserQuestion` 工具返回值)→ 下一轮按用户选择做对应动作;选 `查看全文`(doc-confirm-* 选项 2)就完整 echo 文档后**再次**调同一选择器工具。 -- `"确认" / "1" / "confirm"` → proceed to next phase -- `"查看全文" / "2"` → display full document, then show confirmation options again; end turn -- `"继续沟通" / "3" / any feedback` → update document, show revised summary + options; end turn +绝不在同一轮里"先调工具再继续到下一阶段"——工具调用结束了本轮,下一阶段在新一轮处理。 -Full phase sequence: +## 11. 与 task-swarm 的交接 -1. Generate or update `requirements.md` (feature) or `bugfix.md` (bugfix). Show summary + options. End turn. Wait for confirm. -2. After confirm: generate or update `design.md`. Show summary + options. End turn. Wait for confirm. -3. After confirm: generate or update `tasks.md`,**同一文档**内同 turn 把 `## 测试要点` 节按 SHALL 填好(跟随式,无单独确认门;INV-4 hook 强制). Show summary + options. End turn. Wait for confirm. -4. After confirm: show task execution options (required only / required + optional / hold). End turn. Wait for choice. -5. After explicit execution choice: begin coding tasks, validate, accept. +`tasks-execution` 选项 3 `用 task-swarm 多 agent 并发` 由 `task_swarm.py` 编排器实现。 -If user asks for one-pass generation, still show paths, summaries, key changes per document, and mark `Review Status: unreviewed`. +选 3 → 主会话切到 task-swarm 编排模式(按 `commands/task-swarm.md` 协议),所有 group 完成后回到 implementation → acceptance 通路。详见 `references/task-swarm.md`。 -## Implementation Execution — Full Steps +## 12. CLI 命令参考 -1. Resolve and validate the active spec session if persistent mode is active -2. **Three-check write guard** + heartbeat (see §7) -3. Load all spec files from the selected `<document-root>/<requirement-name>/` -4. Identify the selected task ID or next pending required task -5. Mark the task in `tasks.md` as in-progress using `[~]` -6. Make the smallest code change that satisfies the linked requirement -7. Run the validation command or nearest relevant project test -8. Mark `[x]` only after validation passes -9. If validation cannot run, keep the task incomplete and record the reason -10. Finish with an acceptance summary: changed files, completed tasks, validation result, remaining risks - -**Task menu semantics:** - -- "Run all tasks" = required tasks only unless the user opts in to optional tasks -- "Run required and optional tasks" = includes optional -- Stop at checkpoints if validation fails or user confirmation is needed - -## Interactive Selectors (Reference) - -Run at each decision point. In a TTY the script offers ↑/↓ + Enter. In a non-TTY shell (Claude Code Bash, CI) it prints the option block + `[specode:non-interactive] AWAITING_USER_CHOICE` sentinel on stdout and exits 0; agent forwards the stdout block to the user and ends the turn. Do not invent your own option text — always run the script first. - -All selector command blocks live in `references/prompts.md` — copy-paste them verbatim: - -- Workflow 类型选择 -- 文档确认(每份 spec 文档生成后) -- 任务执行(tasks.md 确认后) -- `/continue` 接管(spec 已被锁定时) -- 澄清完成(Plan-mode 结束) -- 验收通过(acceptance 完成时) - -Selectors are preferred over plain-text confirmation. Use plain text only when tool execution is unavailable. **Forbidden phrasings** (`够了`、`差不多`、`随便选` 等口语词) are listed at the bottom of `references/prompts.md`. +| 命令 | 用途 | +|---|---| +| `spec_vault.py detect` | 列出已知 Obsidian vault | +| `spec_vault.py status` | 当前 doc root + 来源 | +| `spec_vault.py set --vault <p>` / `set --root <p>` | 永久绑定 vault / 根目录 | +| `spec_init.py --name <slug> --requirement-name "..." --source-text "..." --session <id>` | 创建新 spec | +| `spec_session.py acquire / release / heartbeat / verify-lock --spec <dir> --session <id>` | 锁管理 | +| `spec_session.py phase-transition --spec <dir> --session <id> --from <p> --to <p>` | phase 切换(必走 CLI) | +| `spec_session.py load --spec <dir>` | 只读加载状态摘要 | +| `spec_session.py continue --spec <dir> --session <id>` | 接管 / 恢复 | +| `spec_session.py end --session <id>` | `/specode:end` 入口 | +| `spec_session.py status --session <id>` / `read-session --session <id>` | 状态查询(只读) | +| `spec_lint.py` | traceability 缺失 / log 过短 / EARS 缺动词 等 WARNING(acceptance phase 由主代理调一次)| +| `spec_status.py` | `/specode:status` 命令入口(聚合输出) | + +CLI 退出码语义:0 ok / 1 lock_lost 或写失败 / 3 evicted / not_held / stale_lock 或 vault miss / 4 LockHeld。所有 hook 子命令始终 exit 0(仅注入提示,不阻断)。 diff --git a/plugins/specode/tests/conftest.py b/plugins/specode/tests/conftest.py index ab01494..ec86884 100644 --- a/plugins/specode/tests/conftest.py +++ b/plugins/specode/tests/conftest.py @@ -1,107 +1,150 @@ -"""Shared fixtures for specode tests. +"""Shared pytest fixtures for specode plugin tests (v0.6.0). -Tests use pytest but the plugin runtime stays stdlib-only. Install dev deps -with `python3 -m pip install pytest` to run the suite. +Bedrock rules: + * Tests MUST be hermetic: never read or write the real $HOME/.specode. + * Always redirect HOME / XDG_CONFIG_HOME / SPECODE_ROOT to tmp_path-based dirs. + * Scripts are invoked as CLIs via subprocess (NOT imported as modules). + * Each test uses a freshly-minted UUID session id to avoid cross-test pollution. """ from __future__ import annotations -import io import json -import shutil +import os +import subprocess import sys -import tempfile +import uuid from pathlib import Path -from typing import Iterator +from typing import Optional import pytest -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) -import spec_state # noqa: E402 -import spec_guard # noqa: E402 +REPO_ROOT = Path(__file__).resolve().parents[3] +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" @pytest.fixture -def workspace() -> Iterator[dict]: - """Provide a self-contained temp dir with spec_dir + project_root. +def repo_root() -> Path: + return REPO_ROOT - Yields a dict with paths and patches spec_state.find_active_spec to return - a synthetic info struct pointing into the workspace. + +@pytest.fixture +def scripts_dir() -> Path: + return SCRIPTS_DIR + + +@pytest.fixture +def fake_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Redirect $HOME to tmp_path so Path.home() resolves to an isolated dir. + + Also drop any inherited SPECODE_ROOT / XDG_CONFIG_HOME so child processes + do not accidentally see user state. Tests that need those vars must set + them explicitly via monkeypatch.setenv. """ - root = Path(tempfile.mkdtemp(prefix="specode-test-")) - spec_dir = root / "test-spec" - project_root = root / "project" - spec_dir.mkdir() - project_root.mkdir() - (project_root / "src").mkdir() - (spec_dir / "tasks.md").write_text( - "# Tasks\n\n- [ ] FILE: src/foo.py\n- [ ] FILE: src/bar.py\n" - ) - (spec_dir / ".config.json").write_text(json.dumps({"specId": "test-id"})) - - state = { - "root": root, - "spec_dir": spec_dir, - "project_root": project_root, - "current_phase": "implementation", - "session_id": "test-sess", - "slug": "test-spec", - } - - def fake_find_active(prefer_session_id=None): - return { - "spec_slug": state["slug"], - "spec_dir": str(state["spec_dir"]), - "current_phase": state["current_phase"], - "session_id": state["session_id"], - "spec_id": "test-id", - "last_activity_at": "2026-05-15T00:00:00Z", - } - - original = spec_state.find_active_spec - spec_state.find_active_spec = fake_find_active - - yield state - - spec_state.find_active_spec = original - shutil.rmtree(root, ignore_errors=True) - - -def call_hook(sub: str, payload: dict, capture_stderr=False, capture_stdout=False): - """Invoke spec_guard.main with a fabricated stdin payload. - - Returns (exit_code, stdout, stderr). + monkeypatch.setenv("HOME", str(tmp_path)) + # USERPROFILE for hypothetical Windows runners + monkeypatch.setenv("USERPROFILE", str(tmp_path)) + # Pin XDG_CONFIG_HOME under fake home so spec_vault's config never + # escapes to the real user's ~/.config. + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / ".config")) + # APPDATA / LOCALAPPDATA are read by spec_vault.detect to find Obsidian + # on Windows; pin them inside fake_home so a real Obsidian install on + # the test machine cannot leak into the assertions. + monkeypatch.setenv("APPDATA", str(tmp_path / "AppData" / "Roaming")) + monkeypatch.setenv("LOCALAPPDATA", str(tmp_path / "AppData" / "Local")) + monkeypatch.delenv("SPECODE_ROOT", raising=False) + monkeypatch.delenv("SPECODE_GUARD", raising=False) + return tmp_path + + +@pytest.fixture +def specode_home(fake_home: Path) -> Path: + """The simulated ~/.specode/ directory (parent of sessions/).""" + return fake_home / ".specode" + + +@pytest.fixture +def doc_root(tmp_path: Path, fake_home: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """The simulated specode root (also written to SPECODE_ROOT env var). + + Uses a sub-dir of tmp_path that is sibling to fake_home so the two + namespaces are well separated. """ - sys.stdin = io.StringIO(json.dumps(payload)) - out = io.StringIO() - err = io.StringIO() - if capture_stdout: - sys.stdout = out - if capture_stderr: - sys.stderr = err - try: - rc = spec_guard.main(["spec_guard", sub]) - finally: - sys.stdout = sys.__stdout__ - sys.stderr = sys.__stderr__ - return rc, out.getvalue(), err.getvalue() + root = tmp_path / "vault" / "spec-in" / "test" + root.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("SPECODE_ROOT", str(root)) + return root @pytest.fixture -def hook_caller(): - return call_hook +def make_session_id(): + """Factory returning a fresh UUID string per call.""" + def _make() -> str: + return str(uuid.uuid4()) + return _make -def make_edit_payload(target, project_root, session_id="test-sess"): - return { - "session_id": session_id, - "cwd": str(project_root), - "tool_name": "Edit", - "tool_input": {"file_path": str(target)}, - } +@pytest.fixture +def run_script(scripts_dir: Path, fake_home: Path): + """Run a specode CLI script under the test-controlled environment. + Usage: + cp = run_script("spec_vault.py", "status") + cp = run_script("spec_session.py", "on-user-prompt", stdin=json.dumps(...)) + """ + def _run(script_name: str, *args: str, stdin: Optional[str] = None, + extra_env: Optional[dict] = None) -> subprocess.CompletedProcess: + env = os.environ.copy() + # Make sure HOME redirection sticks (subprocesses inherit current env + # which already has the monkeypatched HOME, but we re-assert for safety). + env["HOME"] = str(fake_home) + env["USERPROFILE"] = str(fake_home) + env.setdefault("XDG_CONFIG_HOME", str(fake_home / ".config")) + # Force Python UTF-8 mode in child: on Windows the default locale is + # cp936/gbk which makes pathlib + stderr writes incompatible with the + # utf-8 decoding we use here. Tests on macOS/Linux already default to + # utf-8, so this is a no-op there. + env.setdefault("PYTHONUTF8", "1") + env.setdefault("PYTHONIOENCODING", "utf-8") + if extra_env: + env.update(extra_env) + cmd = [sys.executable, str(scripts_dir / script_name), *args] + return subprocess.run( + cmd, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + input=stdin if stdin is not None else "", + env=env, + timeout=30, + ) + return _run + + +# -------------------------------------------------------------------------- +# helper: create a working spec dir + session file the way spec_init would +# -------------------------------------------------------------------------- @pytest.fixture -def edit_payload(): - return make_edit_payload +def init_spec(run_script, doc_root: Path, make_session_id): + """Initialise a spec via spec_init.py and return (slug, session_id, spec_dir, payload). + + Useful for tests of spec_session that need a real spec to operate on. + """ + def _init(slug: str = "demo-spec", requirement_name: str = "Demo Spec", + source_text: str = "测试用源需求文本", + session_id: Optional[str] = None): + sid = session_id or make_session_id() + cp = run_script( + "spec_init.py", + "--name", slug, + "--requirement-name", requirement_name, + "--source-text", source_text, + "--session", sid, + ) + assert cp.returncode == 0, f"spec_init failed: {cp.stderr}\n{cp.stdout}" + payload = json.loads(cp.stdout) + spec_dir = Path(payload["spec_dir"]) + return slug, sid, spec_dir, payload + return _init diff --git a/plugins/specode/tests/test_bash_guard.py b/plugins/specode/tests/test_bash_guard.py deleted file mode 100644 index 647aa75..0000000 --- a/plugins/specode/tests/test_bash_guard.py +++ /dev/null @@ -1,313 +0,0 @@ -"""Tests for INV-11 Bash hang guard (bash_guard.py). - -Each blacklist rule has at least one positive (must deny) and one negative -(must pass) sample. Hang-signature scan has positive/negative samples too. -""" -from __future__ import annotations - -import sys -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import bash_guard - - -def _deny(cmd: str, rule: str | None = None) -> None: - res = bash_guard.check_bash_command(cmd) - assert res.decision == "deny", f"expected deny for {cmd!r}, got {res}" - if rule: - assert res.rule == rule, f"expected rule {rule}, got {res.rule}" - - -def _ok(cmd: str) -> None: - res = bash_guard.check_bash_command(cmd) - assert res.decision == "ok", f"expected ok for {cmd!r}, got deny: {res.message}" - - -# ---- npm / yarn / pnpm scaffolders ----------------------------------------- - -def test_npm_create_without_yes_denied(): - _deny("npm create vite@latest myapp -- --template react-ts", rule="npm-create") - - -def test_npm_create_with_yes_passes(): - _ok("npm create vite@latest myapp -- --yes --template react-ts") - - -def test_npm_create_with_yes_pipe_passes(): - _ok("yes | npm create vite@latest myapp") - - -def test_npm_init_without_y_denied(): - _deny("npm init", rule="npm-init") - - -def test_npm_init_with_y_passes(): - _ok("npm init -y") - - -def test_npm_init_with_yes_passes(): - _ok("npm init --yes") - - -def test_yarn_create_denied(): - _deny("yarn create vite myapp", rule="yarn-create") - - -def test_yarn_create_with_yes_passes(): - _ok("yarn create vite myapp --yes") - - -def test_pnpm_create_denied(): - _deny("pnpm create vite myapp", rule="pnpm-create") - - -def test_npx_without_yes_denied(): - _deny("npx create-foo", rule="npx-create") - - -def test_npx_with_yes_passes(): - _ok("npx --yes create-foo") - - -def test_npx_with_short_y_passes(): - _ok("npx -y create-foo") - - -# ---- git interactive ------------------------------------------------------- - -def test_git_rebase_interactive_denied(): - _deny("git rebase -i HEAD~3", rule="git-rebase-interactive") - _deny("git rebase --interactive main", rule="git-rebase-interactive") - - -def test_git_rebase_non_interactive_passes(): - _ok("git rebase main") - _ok("git rebase --onto main feature") - - -def test_git_add_patch_denied(): - _deny("git add -p", rule="git-add-interactive") - _deny("git add -i", rule="git-add-interactive") - _deny("git add --patch", rule="git-add-interactive") - - -def test_git_add_explicit_path_passes(): - _ok("git add src/foo.py") - _ok("git add -A") - _ok("git add .") - - -def test_git_commit_no_message_denied(): - _deny("git commit", rule="git-commit-needs-message") - - -def test_git_commit_with_m_passes(): - _ok('git commit -m "fix bug"') - - -def test_git_commit_with_message_long_flag_passes(): - _ok('git commit --message="fix bug"') - - -def test_git_commit_amend_no_edit_passes(): - _ok("git commit --amend --no-edit") - - -def test_git_commit_with_F_passes(): - _ok("git commit -F /tmp/msg") - - -# ---- TUI editors / pagers -------------------------------------------------- - -def test_vim_denied(): - _deny("vim file.txt", rule="tty-editor") - _deny("nvim file.txt", rule="tty-editor") - - -def test_nano_denied(): - _deny("nano file.txt", rule="tty-editor") - - -def test_less_denied(): - _deny("less /var/log/foo.log", rule="tty-editor") - - -def test_top_denied(): - _deny("top", rule="tty-editor") - - -def test_cat_passes(): - """`cat` is not in the editor list — it just emits and exits.""" - _ok("cat file.txt") - - -def test_head_passes(): - _ok("head -n 50 file.txt | grep foo") - - -# ---- interactive shells ---------------------------------------------------- - -def test_bash_dash_i_denied(): - _deny("bash -i", rule="interactive-shell") - - -def test_python_dash_i_denied(): - _deny("python3 -i script.py", rule="interactive-shell") - - -def test_python_dash_c_passes(): - _ok("python3 -c 'print(1+1)'") - - -def test_bare_python_denied(): - _deny("python3", rule="repl-bare") - - -def test_bare_node_denied(): - _deny("node", rule="repl-bare") - - -def test_python_with_script_passes(): - _ok("python3 script.py --arg value") - - -# ---- ssh / gh / apt -------------------------------------------------------- - -def test_ssh_without_batch_denied(): - _deny("ssh user@host", rule="ssh-no-batch") - - -def test_ssh_with_batch_passes(): - _ok("ssh -o BatchMode=yes user@host uptime") - - -def test_ssh_with_key_passes(): - _ok("ssh -i ~/.ssh/deploy user@host uptime") - - -def test_gh_pr_create_no_args_denied(): - _deny("gh pr create", rule="gh-pr-create-no-args") - - -def test_gh_pr_create_with_title_body_passes(): - _ok('gh pr create --title "x" --body "y"') - - -def test_gh_pr_create_with_fill_passes(): - _ok("gh pr create --fill") - - -def test_apt_install_no_y_denied(): - _deny("sudo apt install nginx", rule="apt-no-yes") - _deny("apt-get install curl", rule="apt-no-yes") - - -def test_apt_install_with_y_passes(): - _ok("apt-get install -y nginx") - - -def test_apt_install_with_env_passes(): - _ok("DEBIAN_FRONTEND=noninteractive apt-get install nginx") - - -# ---- safe commands sanity check -------------------------------------------- - -def test_safe_commands_all_pass(): - for cmd in [ - "ls -la", - "pwd", - "git status", - "git log --oneline -10", - "git push origin main", - "npm install", - "npm run build", - "pip install requests", - "brew install jq", - "cargo build", - "go install github.com/foo/bar@latest", - "docker pull nginx", - "python3 -m pytest -q", - "echo hello", - "find . -name '*.py'", - "grep -r foo .", - ]: - _ok(cmd) - - -# ---- empty / whitespace ---------------------------------------------------- - -def test_empty_command_passes(): - _ok("") - _ok(" ") - _ok("\n\t") - - -# ---- chained commands ------------------------------------------------------ - -def test_chained_denied_command_caught(): - _deny("cd /tmp && npm create vite myapp", rule="npm-create") - _deny("ls && vim file.txt", rule="tty-editor") - - -def test_chained_safe_commands_pass(): - _ok("cd /tmp && ls && npm install") - - -# ---- hang signature detection --------------------------------------------- - -def test_detect_hang_by_exit_124(): - is_hang, reason = bash_guard.detect_hang("partial output\n", "", exit_code=124) - assert is_hang - assert "124" in reason - - -def test_detect_hang_by_ok_to_proceed(): - stdout = "Need to install the following packages:\ncreate-vite@9.0.7\nOk to proceed? (y)\n" - is_hang, _ = bash_guard.detect_hang(stdout) - assert is_hang - - -def test_detect_hang_by_y_n_prompt(): - is_hang, _ = bash_guard.detect_hang("Continue? [Y/n]") - assert is_hang - - -def test_detect_hang_by_password_prompt(): - is_hang, _ = bash_guard.detect_hang("", "sudo: password: ") - assert is_hang - - -def test_detect_hang_chinese_prompts(): - for s in ["确认吗?(y/n)", "是否继续?", "请输入密码:"]: - is_hang, _ = bash_guard.detect_hang(s) - assert is_hang, f"expected hang for {s!r}" - - -def test_no_hang_on_clean_output(): - stdout = "added 142 packages in 30s\nfound 0 vulnerabilities\n" - is_hang, _ = bash_guard.detect_hang(stdout, exit_code=0) - assert not is_hang - - -def test_no_hang_when_only_normal_text(): - is_hang, _ = bash_guard.detect_hang("npm WARN deprecated foo@1.0.0\n") - assert not is_hang - - -def test_format_hang_advisory_includes_command(): - out = bash_guard.format_hang_advisory("test reason", command_excerpt="npm create vite myapp") - assert "INV-11 ADVISORY" in out - assert "test reason" in out - assert "npm create vite myapp" in out - assert "do NOT retry" in out - - -def test_format_hang_advisory_truncates_long_command(): - long_cmd = "a" * 500 - out = bash_guard.format_hang_advisory("reason", command_excerpt=long_cmd) - # Excerpt is capped at 200 chars in format - assert "a" * 200 in out - assert "a" * 250 not in out diff --git a/plugins/specode/tests/test_catalog.py b/plugins/specode/tests/test_catalog.py new file mode 100644 index 0000000..d3abb99 --- /dev/null +++ b/plugins/specode/tests/test_catalog.py @@ -0,0 +1,206 @@ +"""Tests for spec_session.py `on-user-prompt-catalog` hook (B2 description-as-trigger). + +Catalog hook scans user prompt for keywords in CATALOG dict and emits a +"consider reading references/<X>.md" injection. Active-only — silent for +idle / ended / readonly to avoid noise in non-spec turns. +""" +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Optional + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[3] +REFS_DIR = REPO_ROOT / "plugins" / "specode" / "skills" / "specode" / "references" +CATALOG_PY = REPO_ROOT / "plugins" / "specode" / "scripts" / "spec_session" / "_catalog.py" + + +def _parse_hook(stdout: str) -> Optional[dict]: + s = stdout.strip() + if not s: + return None + return json.loads(s) + + +def _ctx(payload: Optional[dict]) -> str: + if payload is None: + return "" + return payload.get("hookSpecificOutput", {}).get("additionalContext", "") + + +def _write_session(fake_home: Path, sid: str, **overrides) -> Path: + sess_dir = fake_home / ".specode" / "sessions" + sess_dir.mkdir(parents=True, exist_ok=True) + base = { + "session_id": sid, + "started_at": "2026-01-01T00:00:00Z", + "last_activity_at": "2026-01-01T00:00:00Z", + "ended_at": None, + "mode": "idle", + "active_spec_slug": None, + "active_spec_dir": None, + "spec_id": None, + "phase": None, + "lock_state": "released", + "task_swarm_run_id": None, + "pending_selector": None, + } + base.update(overrides) + p = sess_dir / f"{sid}.json" + p.write_text(json.dumps(base), encoding="utf-8") + return p + + +def _load_catalog_keys() -> set[str]: + """Parse spec_session/_catalog.py CATALOG dict keys without importing the module.""" + src = CATALOG_PY.read_text(encoding="utf-8") + m = re.search( + r"CATALOG:\s*dict\[str,\s*list\[str\]\]\s*=\s*\{(.*?)\n\}\s*$", + src, + re.DOTALL | re.MULTILINE, + ) + assert m, "CATALOG dict not found in spec_session/_catalog.py" + return set(re.findall(r'^\s*"([a-z][a-z0-9-]+)":', m.group(1), re.MULTILINE)) + + +# -------------------------------------------------------------------------- +# drift guards: CATALOG keys ↔ references/*.md ↔ frontmatter description +# -------------------------------------------------------------------------- + +def test_catalog_keys_have_matching_reference_files(): + """Every CATALOG key must point at a real references/<key>.md file.""" + keys = _load_catalog_keys() + missing = sorted(k for k in keys if not (REFS_DIR / f"{k}.md").exists()) + assert not missing, f"CATALOG keys with no matching reference file: {missing}" + + +def test_every_catalog_referenced_file_has_description_frontmatter(): + """Each reference targeted by CATALOG must carry a non-empty `description:` field.""" + keys = _load_catalog_keys() + bad: list[str] = [] + for k in sorted(keys): + text = (REFS_DIR / f"{k}.md").read_text(encoding="utf-8") + if not text.startswith("---\n"): + bad.append(f"{k}: no frontmatter") + continue + end = text.find("\n---\n", 4) + if end < 0: + bad.append(f"{k}: frontmatter not closed") + continue + fm = text[4:end] + desc = None + for line in fm.split("\n"): + if line.startswith("description:"): + desc = line[len("description:"):].strip() + break + if not desc: + bad.append(f"{k}: empty or missing description") + assert not bad, "frontmatter issues:\n " + "\n ".join(bad) + + +# -------------------------------------------------------------------------- +# activation gate: only mode=active triggers +# -------------------------------------------------------------------------- + +@pytest.mark.parametrize("mode", ["idle", "ended", "readonly"]) +def test_catalog_silent_when_not_active(run_script, fake_home, make_session_id, mode): + sid = make_session_id() + _write_session(fake_home, sid, mode=mode) + cp = run_script( + "spec_session.py", "on-user-prompt-catalog", + stdin=json.dumps({"session_id": sid, "prompt": "请讲一下 lock takeover heartbeat 流程"}), + ) + assert cp.returncode == 0 + assert _parse_hook(cp.stdout) is None, f"mode={mode} should not emit" + + +def test_catalog_silent_when_session_missing(run_script, fake_home, make_session_id): + sid = make_session_id() + # no session file written + cp = run_script( + "spec_session.py", "on-user-prompt-catalog", + stdin=json.dumps({"session_id": sid, "prompt": "task-swarm reviewer"}), + ) + assert cp.returncode == 0 + assert _parse_hook(cp.stdout) is None + + +def test_catalog_silent_when_no_keyword_matches(run_script, fake_home, make_session_id): + sid = make_session_id() + _write_session(fake_home, sid, mode="active") + cp = run_script( + "spec_session.py", "on-user-prompt-catalog", + stdin=json.dumps({"session_id": sid, "prompt": "今天天气真好,散个步吧"}), + ) + assert cp.returncode == 0 + assert _parse_hook(cp.stdout) is None + + +# -------------------------------------------------------------------------- +# keyword matching: spot-check several CATALOG keys +# -------------------------------------------------------------------------- + +@pytest.mark.parametrize("prompt,expected_ref", [ + ("我需要 takeover 这个 spec", "lock-protocol"), + ("锁主是谁?heartbeat 多久一次?", "lock-protocol"), + ("vault 路径不对,要 --set-vault", "obsidian"), + ("specs 目录在哪", "obsidian"), + ("我想跑 task-swarm,让 reviewer 评审", "task-swarm"), + ("调一下 AskUserQuestion 工具的 selector", "selectors"), + ("EARS SHALL 怎么写", "templates"), + ("迭代一下需求", "iteration"), + ("workflow-choice 选哪一个", "workflow"), +]) +def test_catalog_active_hits_expected_reference( + run_script, fake_home, make_session_id, prompt, expected_ref, +): + sid = make_session_id() + _write_session(fake_home, sid, mode="active") + cp = run_script( + "spec_session.py", "on-user-prompt-catalog", + stdin=json.dumps({"session_id": sid, "prompt": prompt}), + ) + assert cp.returncode == 0 + ctx = _ctx(_parse_hook(cp.stdout)) + assert f"references/{expected_ref}.md" in ctx, ( + f"prompt {prompt!r} should hit {expected_ref}; got ctx:\n{ctx}" + ) + # The injection must also carry the reference's description (frontmatter) + assert "Use when" in ctx + + +def test_catalog_multi_hit_lists_all_and_deduplicates( + run_script, fake_home, make_session_id, +): + sid = make_session_id() + _write_session(fake_home, sid, mode="active") + prompt = ( + "我想 takeover 这个 spec,然后跑 task-swarm,让 reviewer 评审 — " + "vault 在哪也告诉我下 obsidian" + ) + cp = run_script( + "spec_session.py", "on-user-prompt-catalog", + stdin=json.dumps({"session_id": sid, "prompt": prompt}), + ) + ctx = _ctx(_parse_hook(cp.stdout)) + for expected in ("lock-protocol", "task-swarm", "obsidian"): + # each ref appears exactly once (dedup) + assert ctx.count(f"references/{expected}.md") == 1 + + +def test_catalog_guard_off_emits_nothing( + run_script, fake_home, make_session_id, +): + sid = make_session_id() + _write_session(fake_home, sid, mode="active") + cp = run_script( + "spec_session.py", "on-user-prompt-catalog", + stdin=json.dumps({"session_id": sid, "prompt": "task-swarm reviewer"}), + extra_env={"SPECODE_GUARD": "off"}, + ) + assert cp.returncode == 0 + assert _parse_hook(cp.stdout) is None diff --git a/plugins/specode/tests/test_integration.py b/plugins/specode/tests/test_integration.py new file mode 100644 index 0000000..d2f0694 --- /dev/null +++ b/plugins/specode/tests/test_integration.py @@ -0,0 +1,257 @@ +"""End-to-end integration tests covering the full specode v0.6 event chain.""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +def _read_sess(fake_home: Path, sid: str) -> dict: + return json.loads((fake_home / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8")) + + +def _read_cfg(spec_dir: Path) -> dict: + return json.loads((spec_dir / ".config.json").read_text(encoding="utf-8")) + + +def _ctx(stdout: str) -> str: + s = stdout.strip() + if not s: + return "" + return json.loads(s).get("hookSpecificOutput", {}).get("additionalContext", "") + + +def test_full_lifecycle_event_chain(run_script, fake_home, doc_root, make_session_id): + """SessionStart → /spec → phase-transition → /end → SessionEnd. + Validate state of sessions/<id>.json and spec.config.json at each step.""" + sid = make_session_id() + + # 1) SessionStart hook (new session) + cp = run_script("spec_session.py", "on-session-start", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0 + sess = _read_sess(fake_home, sid) + assert sess["mode"] == "idle" + + # 2) /spec creates a new spec; mode should become active, lock held by sid. + cp = run_script( + "spec_init.py", + "--name", "lifecycle", + "--requirement-name", "Lifecycle Spec", + "--source-text", "做一个完整生命周期的测试", + "--session", sid, + ) + assert cp.returncode == 0, cp.stderr + payload = json.loads(cp.stdout) + spec_dir = Path(payload["spec_dir"]) + sess = _read_sess(fake_home, sid) + assert sess["mode"] == "active" + assert sess["active_spec_slug"] == "lifecycle" + # 0.10.15+:spec_init 后第一个 selector 是 project-root-choice + assert sess["pending_selector"] == "project-root-choice" + cfg = _read_cfg(spec_dir) + assert cfg["phase"] == "intake" + assert cfg["lock"]["holder"] == sid + + # 2b) set-project-root → 推进到 workflow-choice + cp = run_script("spec_session.py", "set-project-root", + "--spec", str(spec_dir), "--session", sid, + "--root", str(spec_dir)) # 测试用 spec_dir 自己也算合法路径 + assert cp.returncode == 0, cp.stderr + sess = _read_sess(fake_home, sid) + cfg = _read_cfg(spec_dir) + assert cfg["project_root"] == str(spec_dir) + assert cfg["pending_selector"] == "workflow-choice" + assert sess["pending_selector"] == "workflow-choice" + + # 3) phase-transition intake → requirements + cp = run_script("spec_session.py", "phase-transition", + "--spec", str(spec_dir), "--session", sid, + "--from", "intake", "--to", "requirements") + assert cp.returncode == 0 + assert _read_cfg(spec_dir)["phase"] == "requirements" + assert _read_sess(fake_home, sid)["phase"] == "requirements" + assert _read_sess(fake_home, sid)["pending_selector"] == "doc-confirm-requirements" + + # 4) /end releases the lock and sets mode=ended + cp = run_script("spec_session.py", "end", "--session", sid) + assert cp.returncode == 0 + sess = _read_sess(fake_home, sid) + assert sess["mode"] == "ended" + assert sess["ended_at"] + assert _read_cfg(spec_dir)["lock"] is None + + # 5) SessionEnd hook is idempotent; sess remains ended; lock stays released + cp = run_script("spec_session.py", "on-session-end", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0 + assert _read_sess(fake_home, sid)["mode"] == "ended" + assert _read_cfg(spec_dir)["lock"] is None + + +def test_after_end_user_prompt_emits_one_shot_then_nothing( + run_script, fake_home, doc_root, make_session_id +): + """After /end: the FIRST on-user-prompt injects a one-shot reverse reminder + (telling the model to stop emitting the spec-mode footer and discard prior + spec-mode discipline). The SECOND on-user-prompt injects nothing.""" + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "ended", + "--requirement-name", "Ended", + "--source-text", "x", + "--session", sid, + ) + assert cp.returncode == 0 + run_script("spec_session.py", "end", "--session", sid) + + # 第一 turn:应注入一次性反向提醒 + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "hey, anything?"}) + ) + assert cp.returncode == 0 + out = cp.stdout.strip() + assert out, "post-end first prompt should emit one-shot reverse reminder" + payload = json.loads(out) + ctx = payload["hookSpecificOutput"]["additionalContext"] + assert "spec 模式已退出" in ctx + assert "─── spec-mode ───" in ctx # explicitly tells model to stop emitting this + + # 第二 turn:标志已被消费,hook 静默 + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "and now?"}) + ) + assert cp.returncode == 0 + assert cp.stdout.strip() == "" + + +def test_multi_window_takeover(run_script, fake_home, doc_root, make_session_id): + """Two sessions racing for the same spec lock — acquire/force/heartbeat semantics.""" + sid_a = make_session_id() + sid_b = make_session_id() + + # Session A creates the spec → holds lock + cp = run_script( + "spec_init.py", + "--name", "race", + "--requirement-name", "Race", + "--source-text", "race", + "--session", sid_a, + ) + assert cp.returncode == 0 + spec_dir = Path(json.loads(cp.stdout)["spec_dir"]) + assert _read_cfg(spec_dir)["lock"]["holder"] == sid_a + + # Session B fails to acquire (no --force) + cp = run_script("spec_session.py", "acquire", + "--spec", str(spec_dir), "--session", sid_b) + assert cp.returncode == 4 + assert _read_cfg(spec_dir)["lock"]["holder"] == sid_a + + # Session B forces takeover + cp = run_script("spec_session.py", "acquire", + "--spec", str(spec_dir), "--session", sid_b, "--force") + assert cp.returncode == 0 + assert _read_cfg(spec_dir)["lock"]["holder"] == sid_b + + # Session A's heartbeat is now rejected + cp = run_script("spec_session.py", "heartbeat", + "--spec", str(spec_dir), "--session", sid_a) + assert cp.returncode == 1 + payload = json.loads(cp.stdout) + assert payload["ok"] is False + assert payload["reason"] == "lock_lost" + + +def test_guard_off_bypasses_all_hooks(run_script, fake_home, make_session_id): + """SPECODE_GUARD=off → all four hooks exit 0 with empty stdout.""" + sid = make_session_id() + # Even if we set up an active session, GUARD=off should silence everything + sess_dir = fake_home / ".specode" / "sessions" + sess_dir.mkdir(parents=True, exist_ok=True) + (sess_dir / f"{sid}.json").write_text(json.dumps({ + "session_id": sid, + "mode": "active", + "active_spec_slug": "x", + "active_spec_dir": "/tmp/no-such", + "phase": "intake", + "pending_selector": "workflow-choice", + "lock_state": "ok", + }), encoding="utf-8") + env = {"SPECODE_GUARD": "off"} + for hook in ("on-session-start", "on-user-prompt", "on-stop", "on-session-end"): + cp = run_script( + "spec_session.py", hook, + stdin=json.dumps({"session_id": sid, "prompt": "ping"}), + extra_env=env, + ) + assert cp.returncode == 0, f"hook {hook} did not exit 0" + assert cp.stdout.strip() == "", f"hook {hook} emitted output: {cp.stdout!r}" + + +def test_continue_readonly_emits_readonly_mode( + run_script, fake_home, doc_root, make_session_id +): + """`continue --readonly` returns ok with mode=readonly and does not seize the lock. + + NOTE: as of v0.6 scripts, the --readonly code-path in cmd_continue does NOT + persist the readonly mode to sessions/<id>.json (the `else: acquire lock` + branch is the only one that writes a session payload). This test pins the + current observable behaviour: stdout reports mode=readonly, lock holder is + unchanged. See report for interface drift. + """ + sid_a = make_session_id() + sid_b = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "ro-flow", + "--requirement-name", "RO Flow", + "--source-text", "x", + "--session", sid_a, + ) + assert cp.returncode == 0 + spec_dir = Path(json.loads(cp.stdout)["spec_dir"]) + + cp = run_script("spec_session.py", "continue", + "--spec", str(spec_dir), "--session", sid_b, "--readonly") + assert cp.returncode == 0, cp.stderr + payload = json.loads(cp.stdout) + assert payload["ok"] is True + assert payload["mode"] == "readonly" + # Lock still held by A + assert _read_cfg(spec_dir)["lock"]["holder"] == sid_a + + +def test_continue_no_force_no_readonly_writes_takeover_pending( + run_script, fake_home, doc_root, make_session_id +): + """When B continues without --force or --readonly on a locked spec, + spec config's pending_selector flips to takeover-options and exit 4.""" + sid_a = make_session_id() + sid_b = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "takeover-flow", + "--requirement-name", "TF", + "--source-text", "x", + "--session", sid_a, + ) + assert cp.returncode == 0 + spec_dir = Path(json.loads(cp.stdout)["spec_dir"]) + + cp = run_script("spec_session.py", "continue", + "--spec", str(spec_dir), "--session", sid_b) + assert cp.returncode == 4 + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["pending_selector"] == "takeover-options" + # Spec config updated; session B's sessions file written + cfg = _read_cfg(spec_dir) + assert cfg["pending_selector"] == "takeover-options" + sess_b = _read_sess(fake_home, sid_b) + assert sess_b["mode"] == "readonly" + assert sess_b["pending_selector"] == "takeover-options" diff --git a/plugins/specode/tests/test_no_blocking_io.py b/plugins/specode/tests/test_no_blocking_io.py deleted file mode 100644 index a6e5bc0..0000000 --- a/plugins/specode/tests/test_no_blocking_io.py +++ /dev/null @@ -1,115 +0,0 @@ -"""Static guard: no specode runtime script may contain a blocking stdin read. - -`input()` / `sys.stdin.read*()` / `getpass.getpass()` all block under a CLI -agent harness (no TTY), causing the kind of multi-minute zombie processes -documented in CHANGELOG 0.4.0. To prevent regressions, every Python file -under scripts/ is grepped for these tokens; any match fails CI. - -Whitelist: add a line `# stdin-block: <reason>` immediately above the -forbidden call in the source if you have a genuinely TTY-only utility that -should never run under an agent. -""" -from __future__ import annotations - -import io -import re -import tokenize -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" - -FORBIDDEN_PATTERNS = [ - re.compile(r"\binput\s*\("), - re.compile(r"\braw_input\s*\("), - re.compile(r"\bsys\.stdin\.read\b"), - re.compile(r"\bsys\.stdin\.readline\b"), - re.compile(r"\bsys\.stdin\.readlines\b"), - re.compile(r"\bgetpass\.getpass\s*\("), -] - -WHITELIST_MARKER = "# stdin-block:" - - -def _code_lines_only(path: Path) -> dict[int, str]: - """Return {lineno: line_text} for lines containing real code tokens. - - Uses tokenize to skip comments, docstrings, and other string literals so - documentation that mentions 'input()' or 'sys.stdin.read' doesn't trip - the scanner. - """ - src = path.read_text(encoding="utf-8") - raw_lines = src.splitlines() - code_linenos: set[int] = set() - try: - for tok in tokenize.generate_tokens(io.StringIO(src).readline): - if tok.type in (tokenize.NAME, tokenize.OP, tokenize.NUMBER): - code_linenos.add(tok.start[0]) - except tokenize.TokenizeError: - # If tokenize chokes, fall back to "every line is code" — safer - # than missing a real offender. - return {i + 1: line for i, line in enumerate(raw_lines)} - return {lineno: raw_lines[lineno - 1] for lineno in code_linenos if lineno - 1 < len(raw_lines)} - - -def _scan_file(path: Path) -> list[tuple[int, str]]: - """Return [(lineno, line)] of forbidden hits not preceded by whitelist marker.""" - hits: list[tuple[int, str]] = [] - code_lines = _code_lines_only(path) - raw_lines = path.read_text(encoding="utf-8").splitlines() - for lineno in sorted(code_lines): - line = code_lines[lineno] - for pat in FORBIDDEN_PATTERNS: - if pat.search(line): - # Whitelist: preceding non-blank source line is a marker. - prev_idx = lineno - 2 - while prev_idx >= 0 and not raw_lines[prev_idx].strip(): - prev_idx -= 1 - if prev_idx >= 0 and raw_lines[prev_idx].strip().startswith(WHITELIST_MARKER): - continue - hits.append((lineno, line.rstrip())) - break - return hits - - -def test_no_runtime_script_blocks_on_stdin(): - offenders: dict[str, list[tuple[int, str]]] = {} - for py in sorted(SCRIPTS_DIR.glob("*.py")): - if py.name.startswith("test_"): - continue - hits = _scan_file(py) - if hits: - offenders[py.name] = hits - - if offenders: - msg_lines = [ - "Found blocking stdin reads in runtime scripts — these will hang under CLI agent harnesses.", - "Either remove the call, or add a `# stdin-block: <reason>` marker on the line directly above.", - "", - ] - for fname, hits in offenders.items(): - msg_lines.append(f" {fname}:") - for lineno, line in hits: - msg_lines.append(f" L{lineno}: {line}") - raise AssertionError("\n".join(msg_lines)) - - -def test_scanner_self_check_detects_input_call(tmp_path): - """Sanity: the scanner actually catches a forbidden pattern in a fixture.""" - bad = tmp_path / "bad.py" - bad.write_text("def main():\n x = input('hi')\n return x\n", encoding="utf-8") - hits = _scan_file(bad) - assert len(hits) == 1 - assert hits[0][0] == 2 - - -def test_scanner_respects_whitelist_marker(tmp_path): - bad = tmp_path / "bad.py" - bad.write_text( - "def main():\n" - " # stdin-block: this util only runs under interactive CLI testing\n" - " x = input('hi')\n" - " return x\n", - encoding="utf-8", - ) - hits = _scan_file(bad) - assert hits == [] diff --git a/plugins/specode/tests/test_selector_prompts.py b/plugins/specode/tests/test_selector_prompts.py new file mode 100644 index 0000000..a92cc13 --- /dev/null +++ b/plugins/specode/tests/test_selector_prompts.py @@ -0,0 +1,202 @@ +"""Snapshot-style tests for the 11 SELECTOR_PROMPTS templates. + +We don't import SELECTOR_PROMPTS directly — we drive each key end-to-end +through `on-user-prompt`, parse the additionalContext, and assert that the +template's most distinctive substrings are present. +""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +def _write_session(fake_home: Path, sid: str, **overrides) -> Path: + sess_dir = fake_home / ".specode" / "sessions" + sess_dir.mkdir(parents=True, exist_ok=True) + base = { + "session_id": sid, + "started_at": "2026-01-01T00:00:00Z", + "last_activity_at": "2026-01-01T00:00:00Z", + "ended_at": None, + "mode": "active", + "active_spec_slug": "snap-spec", + "active_spec_dir": None, + "spec_id": "snap", + "phase": "intake", + "lock_state": "ok", + "task_swarm_run_id": None, + "pending_selector": None, + } + base.update(overrides) + p = sess_dir / f"{sid}.json" + p.write_text(json.dumps(base), encoding="utf-8") + return p + + +def _write_spec(doc_root: Path, slug: str, **overrides) -> Path: + spec_dir = doc_root / "specs" / slug + spec_dir.mkdir(parents=True, exist_ok=True) + base = { + "specId": "snap-id", + "slug": slug, + "phase": "intake", + "workflow": None, + "pending_selector": None, + "lock": {"holder": "self"}, + "source_text": "示例需求文本摘要内容内容内容", + } + base.update(overrides) + (spec_dir / ".config.json").write_text( + json.dumps(base), encoding="utf-8" + ) + return spec_dir + + +def _fetch_ctx(run_script, fake_home, sid: str, prompt: str = "go") -> str: + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": prompt}) + ) + assert cp.returncode == 0, cp.stderr + s = cp.stdout.strip() + assert s, "expected non-empty hook output" + payload = json.loads(s) + return payload["hookSpecificOutput"]["additionalContext"] + + +# -------------------------------------------------------------------------- +# Type-A single-column selectors +# -------------------------------------------------------------------------- + +@pytest.fixture +def selector_setup(fake_home, doc_root, make_session_id): + """Factory: configures a session+spec with the given pending_selector and phase.""" + def _setup(pending: str, phase: str = "intake", slug: str = "snap-spec", + extra_cfg: dict = None) -> str: + sid = make_session_id() + cfg_extra = {"pending_selector": pending, "phase": phase} + if extra_cfg: + cfg_extra.update(extra_cfg) + spec_dir = _write_spec(doc_root, slug, **cfg_extra) + _write_session( + fake_home, sid, + mode="active", + active_spec_slug=slug, + active_spec_dir=str(spec_dir), + phase=phase, + pending_selector=pending, + ) + return sid + return _setup + + +def test_workflow_choice_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("workflow-choice", phase="intake") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "选择器节点:工作流选择" in ctx + assert "Requirements first" in ctx + assert "Technical Design first" in ctx + assert "Bugfix" in ctx + # 改为 AskUserQuestion 工具协议 + YAML 缩进格式 + assert "AskUserQuestion" in ctx + assert "multiSelect: false" in ctx + assert "label:" in ctx + assert "options:" in ctx + # 三段式结构(目的/前置动作/约束) + assert "**目的**" in ctx or "目的" in ctx + assert "**约束**" in ctx or "约束" in ctx + # 显式断言"禁止保留位"措辞存在 + assert "Type something" in ctx # 在禁区说明里出现 + assert "Other" in ctx + + +def test_clarification_wizard_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("clarification-wizard", phase="intake") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "选择器节点:需求澄清问答" in ctx + assert "wizard" in ctx + assert "AskUserQuestion" in ctx + assert "multiSelect: false" in ctx # wizard 内每个 question 都是单选 + assert "wizard" in ctx + assert "决策点" in ctx + + +def test_clarification_done_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("clarification-done", phase="intake") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "需求澄清是否完成?" in ctx + assert "进入下一阶段(推荐)" in ctx + assert "继续澄清" in ctx + + +def test_doc_confirm_requirements_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("doc-confirm-requirements", phase="requirements") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "requirements.md 文档确认" in ctx + assert "确认(推荐)" in ctx + assert "查看全文" in ctx + assert "继续沟通" in ctx + + +def test_doc_confirm_bugfix_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("doc-confirm-bugfix", phase="bugfix") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "bugfix.md 文档确认" in ctx + assert "确认(推荐)" in ctx + assert "查看全文" in ctx + assert "继续沟通" in ctx + + +def test_doc_confirm_design_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("doc-confirm-design", phase="design") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "design.md 文档确认" in ctx + assert "确认(推荐)" in ctx + assert "查看全文" in ctx + assert "继续沟通" in ctx + + +def test_tasks_execution_snapshot(run_script, fake_home, selector_setup): + """0.10.20+:tasks-execution 4 选项含两种 task-swarm 模式(full vs skip-validator)。""" + sid = selector_setup("tasks-execution", phase="tasks") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "任务执行选择" in ctx + # 两种 task-swarm 模式都要在 + assert "task-swarm + validator 自动验收(推荐)" in ctx + assert "task-swarm + 人工验收(跳过 validator)" in ctx + # --skip-validator flag 在用户选定后流程中被引用 + assert "--skip-validator" in ctx + # 顺序执行 + 暂停/调整保留 + assert "顺序执行(同时处理 optional)" in ctx + assert "暂停 / 调整 tasks.md" in ctx + + +def test_takeover_options_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("takeover-options", phase="design") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "强制接管" in ctx + assert "只读查看" in ctx + assert "取消" in ctx + + +def test_acceptance_gate_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("acceptance-gate", phase="acceptance") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "验收结论" in ctx + assert "验收通过,进入 iteration" in ctx + assert "继续修改" in ctx + + +def test_iteration_scope_snapshot(run_script, fake_home, selector_setup): + sid = selector_setup("iteration-scope", phase="iteration") + ctx = _fetch_ctx(run_script, fake_home, sid) + assert "iteration 调整范围" in ctx + assert "改 requirements" in ctx + assert "改 design" in ctx + assert "改 tasks" in ctx + assert "重跑测试" in ctx + # 类型 C 关键:multiSelect=true + assert "AskUserQuestion" in ctx + assert "multiSelect: true" in ctx diff --git a/plugins/specode/tests/test_selectors_drift.py b/plugins/specode/tests/test_selectors_drift.py new file mode 100644 index 0000000..2b86fff --- /dev/null +++ b/plugins/specode/tests/test_selectors_drift.py @@ -0,0 +1,102 @@ +"""selectors.md §8 场景总览表 与 SELECTOR_PROMPTS 11 keys 必须一一对应。 + +`spec_session/_selectors.py` 的 `SELECTOR_PROMPTS` 字典是 selector 模板的 +**单一事实源**——hook 在 `UserPromptSubmit` 时按 `pending_selector` 命中 +key 取出对应字符串值,做占位符替换后注入 `additionalContext`。 + +`selectors.md` 的 §8 场景常量库总览表是这些 key 的目录索引(每行含 §章节号 +/ key / 类型 / 触发 phase / header / _selectors.py 行号),让人类读 SKILL.md +/ 其他 reference 时能快速跳到代码里的具体模板字面量。 + +本套测试在 pytest 阶段自动比对: +- selectors.md 总览表中每行的 key 都在 SELECTOR_PROMPTS 里 +- 反向:SELECTOR_PROMPTS 的每个 key 都在总览表里出现一次 + +不再做"selectors.md ```text 块与字典字面量 byte-identical"全文对账(重构前 +有 ~440 行的字面量副本两边维护,drift test 报警时两边都得改、PR 双份 diff; +重构后单一事实源在 _selectors.py,selectors.md 只索引,无 byte-level 副本)。 +""" +from __future__ import annotations + +import re +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[3] +SELECTORS_PY = REPO_ROOT / "plugins" / "specode" / "scripts" / "spec_session" / "_selectors.py" +SELECTORS_MD = REPO_ROOT / "plugins" / "specode" / "skills" / "specode" / "references" / "selectors.md" + + +def _load_runtime_keys() -> set[str]: + """从 SELECTOR_PROMPTS 字典字面量里提取所有 key。""" + src = SELECTORS_PY.read_text(encoding="utf-8") + m = re.search( + r"SELECTOR_PROMPTS:\s*dict\[str,\s*str\]\s*=\s*\{(.*?)\n\}\s*$", + src, + re.DOTALL | re.MULTILINE, + ) + assert m, "SELECTOR_PROMPTS dict not found in spec_session/_selectors.py" + body = m.group(1) + return set(re.findall(r'"([a-z][a-z0-9-]+)":\s*"""', body)) + + +def _load_doc_keys() -> set[str]: + r"""从 selectors.md §8 场景常量库总览表的 markdown table 提取 key 列。 + + 表格行形如 `| §A0 | \`project-root-choice\` | A | ... |`,第 2 列加了 + 反引号包裹 key——挑这一列。 + """ + md = SELECTORS_MD.read_text(encoding="utf-8") + # 限定在 "## 8 个固定场景常量库" 节内(避免误抓其他章节里的反引号) + sec_match = re.search( + r"^##\s*8 个固定场景常量库\s*$(.*?)(?=^##\s|\Z)", + md, + re.DOTALL | re.MULTILINE, + ) + assert sec_match, "selectors.md §8 场景常量库 section header not found" + section = sec_match.group(1) + keys: set[str] = set() + # 匹配总览表行:第二个 |...| 字段含 `key` + for line in section.splitlines(): + if not line.startswith("| §"): + continue + m = re.search(r"\|\s*`([a-z][a-z0-9-]+)`\s*\|", line) + if m: + keys.add(m.group(1)) + return keys + + +def test_overview_table_matches_runtime_keys(): + py_keys = _load_runtime_keys() + md_keys = _load_doc_keys() + assert py_keys, "no keys parsed from SELECTOR_PROMPTS" + assert md_keys, "no keys parsed from selectors.md §8 overview table" + + extra_py = py_keys - md_keys + extra_md = md_keys - py_keys + msg: list[str] = [] + if extra_py: + msg.append( + "Keys in SELECTOR_PROMPTS but missing from selectors.md overview " + f"table: {sorted(extra_py)} — add a row to the §8 table" + ) + if extra_md: + msg.append( + "Keys in selectors.md overview but not in SELECTOR_PROMPTS: " + f"{sorted(extra_md)} — either delete the orphan row or add the " + "key to _selectors.py" + ) + assert not msg, "\n".join(msg) + + +def test_expected_key_count(): + """Sanity check: 11 keys total (8 scenes; doc-confirm-* contributes 3 variants). + + Catches a class of regressions where someone accidentally drops a key from + SELECTOR_PROMPTS without realizing it. + """ + py_keys = _load_runtime_keys() + assert len(py_keys) == 11, ( + f"expected 11 SELECTOR_PROMPTS keys (8 scenes + 2 extra doc-confirm " + f"variants); got {len(py_keys)}: {sorted(py_keys)}" + ) diff --git a/plugins/specode/tests/test_spec_audit.py b/plugins/specode/tests/test_spec_audit.py deleted file mode 100644 index 1e48fa9..0000000 --- a/plugins/specode/tests/test_spec_audit.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Audit log rotation + reader CLI tests.""" -from __future__ import annotations - -import io -import json -import sys -from contextlib import redirect_stdout -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import spec_guard -import spec_state - - -def _reset_audit(tmp_path: Path, max_bytes: int) -> Path: - audit_dir = tmp_path / "audit" - audit_dir.mkdir() - spec_guard.AUDIT_DIR = audit_dir - spec_guard.AUDIT_MAX_BYTES = max_bytes - spec_guard._truncate_checked = False - spec_state.AUDIT_DIR = audit_dir - return audit_dir - - -def test_truncate_keeps_tail_when_over_cap(tmp_path): - audit_dir = _reset_audit(tmp_path, max_bytes=4096) - today = spec_guard.datetime.now(spec_guard.timezone.utc).strftime("%Y-%m-%d") - log_file = audit_dir / f"{today}.log" - - line = json.dumps({"ts": "x", "event": "Pad", "decision": "ok"}) + "\n" - with log_file.open("w", encoding="utf-8") as f: - for _ in range(500): - f.write(line) - pre_size = log_file.stat().st_size - assert pre_size > 4096 - - spec_guard._audit("SessionStart", {"session_id": "s1"}, "ok", "after-truncate") - - post_size = log_file.stat().st_size - assert post_size <= 4096 + 1024 - contents = log_file.read_text(encoding="utf-8").splitlines() - assert any('"event": "_truncate"' in line or '"event":"_truncate"' in line for line in contents) - assert any('"decision": "ok"' in line and "after-truncate" in line for line in contents) - - -def test_no_truncate_under_cap(tmp_path): - audit_dir = _reset_audit(tmp_path, max_bytes=1024 * 1024) - spec_guard._audit("SessionStart", {"session_id": "s1"}, "ok", "small") - spec_guard._audit("Stop", {"session_id": "s1"}, "ok-conserved", "small") - today = spec_guard.datetime.now(spec_guard.timezone.utc).strftime("%Y-%m-%d") - contents = (audit_dir / f"{today}.log").read_text(encoding="utf-8") - assert "_truncate" not in contents - assert contents.count("\n") == 2 - - -def test_truncate_runs_only_once_per_process(tmp_path): - audit_dir = _reset_audit(tmp_path, max_bytes=2048) - today = spec_guard.datetime.now(spec_guard.timezone.utc).strftime("%Y-%m-%d") - log_file = audit_dir / f"{today}.log" - line = json.dumps({"ts": "x", "event": "Pad", "decision": "ok"}) + "\n" - with log_file.open("w", encoding="utf-8") as f: - for _ in range(500): - f.write(line) - - spec_guard._audit("E1", {}, "ok", "first") - after_first = log_file.read_text(encoding="utf-8") - truncate_markers_first = after_first.count('"event": "_truncate"') - - # Pad again past the cap; truncation should NOT fire again (one-shot per process). - with log_file.open("a", encoding="utf-8") as f: - for _ in range(500): - f.write(line) - spec_guard._audit("E2", {}, "ok", "second") - after_second = log_file.read_text(encoding="utf-8") - assert after_second.count('"event": "_truncate"') == truncate_markers_first - assert log_file.stat().st_size > 2048 # grew past cap, deliberately not re-truncated - - -def test_audit_tail_pretty_and_json(tmp_path): - _reset_audit(tmp_path, max_bytes=1024 * 1024) - spec_guard._audit("UserPromptSubmit", {"session_id": "s1", "tool_name": "Edit"}, "injected", "slug-a") - spec_guard._audit("Stop", {"session_id": "s1"}, "deny-INV-2", "") - - today = spec_guard.datetime.now(spec_guard.timezone.utc).strftime("%Y-%m-%d") - import argparse - ns_pretty = argparse.Namespace(n=10, date=today, follow=False, json=False) - buf = io.StringIO() - with redirect_stdout(buf): - spec_state._cmd_audit_tail(ns_pretty) - out = buf.getvalue() - assert "UserPromptSubmit" in out and "deny-INV-2" in out and "slug-a" in out - - ns_json = argparse.Namespace(n=10, date=today, follow=False, json=True) - buf = io.StringIO() - with redirect_stdout(buf): - spec_state._cmd_audit_tail(ns_json) - json_lines = [json.loads(l) for l in buf.getvalue().splitlines() if l.strip()] - assert [r["event"] for r in json_lines] == ["UserPromptSubmit", "Stop"] - - -def test_audit_summary_counts_and_denies(tmp_path): - _reset_audit(tmp_path, max_bytes=1024 * 1024) - spec_guard._audit("PreToolUse", {"session_id": "s1", "tool_name": "Edit"}, "ok-code-allowed", "a.py") - spec_guard._audit("PreToolUse", {"session_id": "s1", "tool_name": "Edit"}, "deny-INV-1", "b.py") - spec_guard._audit("Stop", {"session_id": "s1"}, "ok-conserved", "") - - import argparse - ns = argparse.Namespace(days=0, show_deny=5) - buf = io.StringIO() - with redirect_stdout(buf): - spec_state._cmd_audit_summary(ns) - out = buf.getvalue() - assert "3 records" in out - assert "PreToolUse" in out and "Stop" in out - assert "deny-INV-1" in out - assert "b.py" in out # surfaced via "recent denies" diff --git a/plugins/specode/tests/test_spec_choice.py b/plugins/specode/tests/test_spec_choice.py deleted file mode 100644 index 493366c..0000000 --- a/plugins/specode/tests/test_spec_choice.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Tests for spec_choice.py — non-interactive selector emitter (post-0.4.0). - -The script must never read stdin. timeout failures here = regression. -""" -from __future__ import annotations - -import subprocess -import sys -from pathlib import Path - -SCRIPT = Path(__file__).resolve().parent.parent / "scripts" / "spec_choice.py" - - -def _run(args: list[str], stdin: str | None = None, timeout: float = 3.0) -> subprocess.CompletedProcess: - return subprocess.run( - [sys.executable, str(SCRIPT), *args], - input=stdin, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - timeout=timeout, - text=True, - ) - - -def test_emits_sentinel_and_exits_zero_with_no_stdin(): - proc = _run([ - "--title", "Pick one", - "--option", "First", - "--option", "Second", - ]) - assert proc.returncode == 0 - assert "AWAITING_USER_CHOICE" in proc.stdout - assert "Pick one" in proc.stdout - assert "1. First" in proc.stdout - assert "2. Second" in proc.stdout - - -def test_does_not_block_on_piped_empty_stdin(): - """CodeBuddy-style: pipe attached but no data. Must NOT hang.""" - proc = _run( - ["--title", "T", "--option", "A", "--option", "B"], - stdin="", - ) - assert proc.returncode == 0 - assert "AWAITING_USER_CHOICE" in proc.stdout - - -def test_does_not_consume_piped_stdin_data(): - """Even with data on stdin, script must not read it (no input() calls).""" - proc = _run( - ["--title", "T", "--option", "A", "--option", "B"], - stdin="2\nignored garbage\n", - ) - assert proc.returncode == 0 - assert "AWAITING_USER_CHOICE" in proc.stdout - - -def test_no_curses_flag_is_noop(): - """--no-curses must still emit cleanly (back-compat for older callers).""" - proc = _run([ - "--title", "T", - "--option", "A", - "--option", "B", - "--no-curses", - ]) - assert proc.returncode == 0 - assert "AWAITING_USER_CHOICE" in proc.stdout - - -def test_recommended_marker_in_output(): - proc = _run([ - "--title", "T", - "--option", "Alpha::desc::recommended", - "--option", "Beta", - ]) - assert proc.returncode == 0 - assert "(Recommended)" in proc.stdout - assert "desc" in proc.stdout - - -def test_default_index_overrides_recommended(): - proc = _run([ - "--title", "T", - "--option", "Alpha::::recommended", - "--option", "Beta", - "--option", "Gamma", - "--default-index", "3", - ]) - # Default appears in the "Select 1-3 [3]:" prompt line. - assert "Select 1-3 [3]" in proc.stdout - - -def test_print_default_short_circuits(): - proc = _run([ - "--title", "x", - "--option", "Alpha::desc::recommended", - "--option", "Beta", - "--print-default", - ]) - assert proc.returncode == 0 - assert proc.stdout.strip() == "Alpha" - assert "AWAITING_USER_CHOICE" not in proc.stdout - - -def test_print_default_explicit_index(): - proc = _run([ - "--title", "x", - "--option", "Alpha", - "--option", "Beta", - "--option", "Gamma", - "--default-index", "3", - "--print-default", - ]) - assert proc.returncode == 0 - assert proc.stdout.strip() == "Gamma" - - -def test_print_default_json(): - proc = _run([ - "--title", "x", - "--option", "Alpha::first option::recommended", - "--option", "Beta", - "--print-default", - "--json", - ]) - assert proc.returncode == 0 - import json - record = json.loads(proc.stdout.strip()) - assert record["label"] == "Alpha" - assert record["index"] == 1 - assert record["description"] == "first option" - - -# Note: static AST-aware check lives in test_no_blocking_io.py — it covers -# spec_choice.py (and every other runtime script) using tokenize so docstring -# mentions of 'input()' don't trigger false positives. diff --git a/plugins/specode/tests/test_spec_guard.py b/plugins/specode/tests/test_spec_guard.py deleted file mode 100644 index e0818b2..0000000 --- a/plugins/specode/tests/test_spec_guard.py +++ /dev/null @@ -1,375 +0,0 @@ -"""Integration tests for spec_guard.py: hook handlers + invariants end-to-end.""" -from __future__ import annotations - -import json -import sys -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - - -def make_edit_payload(target, project_root, session_id="test-sess"): - return { - "session_id": session_id, - "cwd": str(project_root), - "tool_name": "Edit", - "tool_input": {"file_path": str(target)}, - } - - -def _new_turn(ws, hook_caller): - hook_caller("user-prompt-submit", {"session_id": ws["session_id"], "cwd": str(ws["project_root"])}, capture_stdout=True) - - -# ---- INV-1 (Code-Doc Sync) ------------------------------------------------- - -def test_user_prompt_submit_injects_status_block(workspace, hook_caller): - rc, stdout, _ = hook_caller( - "user-prompt-submit", - {"session_id": workspace["session_id"], "cwd": str(workspace["project_root"])}, - capture_stdout=True, - ) - assert rc == 0 - payload = json.loads(stdout) - block = payload["hookSpecificOutput"]["additionalContext"] - assert "specode active" in block - assert "test-spec" in block - assert "implementation" in block - - -def test_pretooluse_allows_tasks_files(workspace, hook_caller): - _new_turn(workspace, hook_caller) - target = workspace["project_root"] / "src/foo.py" - rc, _, _ = hook_caller("pre-tool-use", make_edit_payload(target, workspace["project_root"])) - assert rc == 0 - - -def test_pretooluse_inv1_advisory_does_not_block(workspace, hook_caller): - """INV-1 is advisory as of 0.4.0: tool call passes, advisory recorded.""" - import spec_sync - _new_turn(workspace, hook_caller) - target = workspace["project_root"] / "src/baz.py" - rc, _, err = hook_caller( - "pre-tool-use", - make_edit_payload(target, workspace["project_root"]), - capture_stderr=True, - ) - assert rc == 0, "INV-1 must not block (advisory only)" - assert "INV-1" in err and "ADVISORY" in err - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - advisories = [a for a in (ledger.get("pending_advisories") or []) if a.get("id") == "INV-1"] - assert len(advisories) == 1 - assert advisories[0]["file"] == str(target) - - -def test_pretooluse_allows_after_doc_change(workspace, hook_caller): - _new_turn(workspace, hook_caller) - # Stage a doc change first. - doc_target = workspace["spec_dir"] / "design.md" - hook_caller("post-tool-use", make_edit_payload(doc_target, workspace["project_root"])) - target = workspace["project_root"] / "src/baz.py" - rc, _, err = hook_caller( - "pre-tool-use", - make_edit_payload(target, workspace["project_root"]), - capture_stderr=True, - ) - assert rc == 0, f"expected ok, got {rc}, err={err}" - - -def test_freeform_silences_inv1_advisory(workspace, hook_caller): - """In freeform mode INV-1 doesn't even raise advisory (intentionally silent).""" - import spec_sync - (workspace["spec_dir"] / ".config.json").write_text( - json.dumps({"specId": "test-id", "freeformMode": True}) - ) - _new_turn(workspace, hook_caller) - target = workspace["project_root"] / "src/quux.py" - rc, _, err = hook_caller( - "pre-tool-use", make_edit_payload(target, workspace["project_root"]), capture_stderr=True - ) - assert rc == 0 - assert "INV-1" not in err - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - assert not [a for a in (ledger.get("pending_advisories") or []) if a.get("id") == "INV-1"] - - -def test_inv6_advisory_in_forbidden_phase(workspace, hook_caller): - """INV-6 is advisory as of 0.4.0 — phase-gate violation logs sticky warning but does not block.""" - import spec_sync - workspace["current_phase"] = "design" - _new_turn(workspace, hook_caller) - target = workspace["project_root"] / "src/quux.py" - rc, _, err = hook_caller( - "pre-tool-use", make_edit_payload(target, workspace["project_root"]), capture_stderr=True - ) - assert rc == 0, "INV-6 must not block (advisory only)" - assert "INV-6" in err and "ADVISORY" in err - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - assert any(a.get("id") == "INV-6" for a in (ledger.get("pending_advisories") or [])) - - -# ---- INV-2 (turn conservation) -------------------------------------------- - -def test_stop_inv2_advisory_when_code_only(workspace, hook_caller): - """INV-2 is advisory: Stop passes but records sticky advisory.""" - import spec_sync - _new_turn(workspace, hook_caller) - target = workspace["project_root"] / "src/foo.py" - hook_caller("post-tool-use", make_edit_payload(target, workspace["project_root"])) - rc, _, err = hook_caller( - "stop", {"session_id": workspace["session_id"]}, capture_stderr=True - ) - assert rc == 0, "INV-2 must not block (advisory only)" - assert "INV-2" in err and "ADVISORY" in err - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - assert any(a.get("id") == "INV-2" for a in (ledger.get("pending_advisories") or [])) - - -def test_stop_passes_with_code_plus_doc(workspace, hook_caller): - _new_turn(workspace, hook_caller) - hook_caller("post-tool-use", make_edit_payload(workspace["spec_dir"] / "design.md", workspace["project_root"])) - hook_caller("post-tool-use", make_edit_payload(workspace["project_root"] / "src/foo.py", workspace["project_root"])) - rc, _, _ = hook_caller("stop", {"session_id": workspace["session_id"]}, capture_stderr=True) - assert rc == 0 - - -def test_inv4_advisory_requirements_without_tasks(workspace, hook_caller): - import spec_sync - _new_turn(workspace, hook_caller) - hook_caller("post-tool-use", make_edit_payload(workspace["spec_dir"] / "requirements.md", workspace["project_root"])) - rc, _, err = hook_caller("stop", {"session_id": workspace["session_id"]}, capture_stderr=True) - assert rc == 0, "INV-4 must not block (advisory only)" - assert "INV-4" in err and "ADVISORY" in err - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - assert any(a.get("id") == "INV-4" for a in (ledger.get("pending_advisories") or [])) - - -def test_inv4_requirements_with_tasks(workspace, hook_caller): - _new_turn(workspace, hook_caller) - hook_caller("post-tool-use", make_edit_payload(workspace["spec_dir"] / "requirements.md", workspace["project_root"])) - hook_caller("post-tool-use", make_edit_payload(workspace["spec_dir"] / "tasks.md", workspace["project_root"])) - rc, _, _ = hook_caller("stop", {"session_id": workspace["session_id"]}, capture_stderr=True) - assert rc == 0 - - -def test_inv4_advisory_bugfix_without_tasks(workspace, hook_caller): - import spec_sync - _new_turn(workspace, hook_caller) - hook_caller("post-tool-use", make_edit_payload(workspace["spec_dir"] / "bugfix.md", workspace["project_root"])) - rc, _, err = hook_caller("stop", {"session_id": workspace["session_id"]}, capture_stderr=True) - assert rc == 0, "INV-4 must not block (advisory only)" - assert "INV-4" in err and "ADVISORY" in err - - -# ---- INV-3 (verify-lock) -------------------------------------------------- - -def test_inv3_denies_when_evicted(workspace, hook_caller): - (workspace["spec_dir"] / ".config.json").write_text(json.dumps({ - "specId": "test-id", - "lock": { - "sessionId": "other-session", - "acquiredAt": "2026-05-15T00:00:00+00:00", - "lastHeartbeatAt": "2026-05-15T00:00:00+00:00", - }, - "evictedSessions": [{ - "sessionId": workspace["session_id"], - "evictedAt": "2026-05-15T00:00:00+00:00", - "evictedBy": "other-session", - "reason": "force_acquire", - }], - })) - _new_turn(workspace, hook_caller) - rc, _, err = hook_caller( - "pre-tool-use", - make_edit_payload(workspace["spec_dir"] / "design.md", workspace["project_root"]), - capture_stderr=True, - ) - assert rc == 2 and "INV-3" in err - - -def test_inv3_allows_when_lock_owned(workspace, hook_caller): - (workspace["spec_dir"] / ".config.json").write_text(json.dumps({ - "specId": "test-id", - "lock": { - "sessionId": workspace["session_id"], - "acquiredAt": "2026-05-15T00:00:00+00:00", - "lastHeartbeatAt": "2026-05-15T00:00:00+00:00", - }, - })) - _new_turn(workspace, hook_caller) - rc, _, _ = hook_caller( - "pre-tool-use", - make_edit_payload(workspace["spec_dir"] / "design.md", workspace["project_root"]), - capture_stderr=True, - ) - assert rc == 0 - - -# ---- outside / silent paths ----------------------------------------------- - -def test_outside_project_root_ignored(workspace, hook_caller): - _new_turn(workspace, hook_caller) - rc, _, _ = hook_caller( - "pre-tool-use", - make_edit_payload(Path("/tmp/elsewhere.txt"), workspace["project_root"]), - capture_stderr=True, - ) - assert rc == 0 - - -# ---- Advisory infrastructure (0.4.0) --------------------------------------- - -def test_advisory_sticky_appears_in_next_status_block(workspace, hook_caller): - """An INV-2 advisory recorded on Stop must show up in next UserPromptSubmit block.""" - _new_turn(workspace, hook_caller) - # Trigger INV-2 advisory - hook_caller("post-tool-use", make_edit_payload(workspace["project_root"] / "src/foo.py", workspace["project_root"])) - hook_caller("stop", {"session_id": workspace["session_id"]}, capture_stderr=True) - - # Next turn: status block must surface pending advisory - rc, stdout, _ = hook_caller( - "user-prompt-submit", - {"session_id": workspace["session_id"], "cwd": str(workspace["project_root"])}, - capture_stdout=True, - ) - assert rc == 0 - payload = json.loads(stdout) - block = payload["hookSpecificOutput"]["additionalContext"] - assert "pending advisories" in block - assert "INV-2" in block - - -def test_spec_doc_edit_auto_dismisses_advisory(workspace, hook_caller): - """Editing any spec doc clears INV-1/2/4 advisories (drift is being fixed).""" - import spec_sync - _new_turn(workspace, hook_caller) - hook_caller("post-tool-use", make_edit_payload(workspace["project_root"] / "src/foo.py", workspace["project_root"])) - hook_caller("stop", {"session_id": workspace["session_id"]}, capture_stderr=True) - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - assert any(a.get("id") == "INV-2" for a in ledger.get("pending_advisories") or []) - - # Now edit a spec doc → advisory should auto-clear - hook_caller("post-tool-use", make_edit_payload(workspace["spec_dir"] / "design.md", workspace["project_root"])) - ledger = spec_sync.read_ledger(workspace["spec_dir"]) - assert not [a for a in (ledger.get("pending_advisories") or []) if a.get("id") == "INV-2"] - - -# INV-3 hard-deny path is already covered by test_inv3_denies_when_evicted above. - - -# ---- INV-11 Bash hang guard integration (0.4.0) ---------------------------- - -def _bash_payload(command, session_id="test-sess", tool_response=None, cwd=None): - p = { - "session_id": session_id, - "cwd": cwd or "/tmp", - "tool_name": "Bash", - "tool_input": {"command": command}, - } - if tool_response is not None: - p["tool_response"] = tool_response - return p - - -def test_inv11_pretooluse_denies_npm_create_without_yes(workspace, hook_caller): - rc, _, err = hook_caller( - "pre-tool-use", - _bash_payload("npm create vite@latest myapp -- --template react-ts"), - capture_stderr=True, - ) - assert rc == 2 - assert "INV-11" in err - assert "npm-create" in err - assert "--yes" in err - - -def test_inv11_pretooluse_allows_npm_create_with_yes(workspace, hook_caller): - rc, _, _ = hook_caller( - "pre-tool-use", - _bash_payload("npm create vite@latest myapp -- --yes --template react-ts"), - ) - assert rc == 0 - - -def test_inv11_pretooluse_denies_vim(workspace, hook_caller): - rc, _, err = hook_caller( - "pre-tool-use", - _bash_payload("vim file.txt"), - capture_stderr=True, - ) - assert rc == 2 and "tty-editor" in err - - -def test_inv11_pretooluse_denies_git_commit_no_message(workspace, hook_caller): - rc, _, err = hook_caller( - "pre-tool-use", - _bash_payload("git commit"), - capture_stderr=True, - ) - assert rc == 2 and "git-commit-needs-message" in err - - -def test_inv11_pretooluse_allows_safe_bash(workspace, hook_caller): - for cmd in ["ls -la", "git status", "npm install", "python3 -c 'print(1)'"]: - rc, _, _ = hook_caller("pre-tool-use", _bash_payload(cmd)) - assert rc == 0, f"expected ok for {cmd!r}" - - -def test_inv11_posttool_hang_injects_advisory(workspace, hook_caller): - """PostToolUse on a Bash that ran into 'Ok to proceed?' must inject advisory.""" - hang_output = "Need to install the following packages:\ncreate-vite@9.0.7\nOk to proceed? (y)\n" - rc, stdout, _ = hook_caller( - "post-tool-use", - _bash_payload( - "npm create vite@latest myapp -- --template react-ts", - tool_response={"stdout": hang_output, "stderr": "", "exit_code": None}, - ), - capture_stdout=True, - ) - assert rc == 0 - payload = json.loads(stdout) - block = payload["hookSpecificOutput"]["additionalContext"] - assert "INV-11" in block - assert "ok to proceed" in block.lower() - assert "do NOT retry" in block - - -def test_inv11_posttool_no_advisory_on_clean_output(workspace, hook_caller): - rc, stdout, _ = hook_caller( - "post-tool-use", - _bash_payload( - "npm install", - tool_response={"stdout": "added 100 packages\n", "stderr": "", "exit_code": 0}, - ), - capture_stdout=True, - ) - assert rc == 0 - assert stdout == "" or "INV-11" not in stdout - - -def test_inv11_posttool_exit_124_triggers_advisory(workspace, hook_caller): - rc, stdout, _ = hook_caller( - "post-tool-use", - _bash_payload( - "some-stuck-command", - tool_response={"stdout": "partial\n", "stderr": "", "exit_code": 124}, - ), - capture_stdout=True, - ) - assert rc == 0 - payload = json.loads(stdout) - assert "INV-11" in payload["hookSpecificOutput"]["additionalContext"] - assert "124" in payload["hookSpecificOutput"]["additionalContext"] - - -def test_inv11_works_without_active_spec(hook_caller, monkeypatch): - """INV-11 must guard Bash even when no spec session is active.""" - import spec_state - monkeypatch.setattr(spec_state, "find_active_spec", lambda prefer_session_id=None: None) - rc, _, err = hook_caller( - "pre-tool-use", - _bash_payload("vim file.txt"), - capture_stderr=True, - ) - assert rc == 2 and "INV-11" in err diff --git a/plugins/specode/tests/test_spec_init.py b/plugins/specode/tests/test_spec_init.py new file mode 100644 index 0000000..ad8f188 --- /dev/null +++ b/plugins/specode/tests/test_spec_init.py @@ -0,0 +1,266 @@ +"""Tests for spec_init.py — initial spec scaffolding + session/active-pointer writes.""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +DOC_FILENAMES = ( + "requirements.md", + "bugfix.md", + "design.md", + "tasks.md", + "implementation-log.md", +) + + +def test_spec_init_creates_full_skeleton(run_script, doc_root, fake_home, make_session_id): + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "alpha-spec", + "--requirement-name", "Alpha Spec", + "--source-text", "我要做一个测试需求", + "--session", sid, + ) + assert cp.returncode == 0, cp.stderr + payload = json.loads(cp.stdout) + spec_dir = Path(payload["spec_dir"]) + assert spec_dir.exists() + # All 5 markdown docs present + for name in DOC_FILENAMES: + assert (spec_dir / name).exists(), f"{name} missing" + # .config.json present + assert (spec_dir / ".config.json").exists() + + +def test_spec_init_config_json_initial_state( + run_script, doc_root, fake_home, make_session_id +): + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "beta-spec", + "--requirement-name", "Beta", + "--source-text", "需求 B", + "--session", sid, + ) + assert cp.returncode == 0, cp.stderr + payload = json.loads(cp.stdout) + spec_dir = Path(payload["spec_dir"]) + cfg = json.loads((spec_dir / ".config.json").read_text(encoding="utf-8")) + assert cfg["specId"] == payload["specId"] + assert cfg["slug"] == "beta-spec" + assert cfg["phase"] == "intake" + # 0.10.15+:spec 创建后第一个 selector 是 project-root-choice,由 + # set-project-root CLI 推进到 workflow-choice + assert cfg["pending_selector"] == "project-root-choice" + assert cfg["workflow"] is None + # lock initially held by the initialising session + assert cfg["lock"]["holder"] == sid + assert cfg["doc_root"] == str(doc_root) + assert cfg["source_text"] == "需求 B" + # 0.10.15+:spec_init 时记录 cwd 给 project-root-choice selector 渲染用 + assert "invocation_cwd" in cfg + assert cfg["invocation_cwd"] # 非空 + # project_root 此时尚未指定,等 set-project-root CLI 才写入 + assert cfg["project_root"] is None + + +def test_spec_init_writes_sessions_file( + run_script, doc_root, fake_home, make_session_id +): + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "gamma", + "--requirement-name", "Gamma", + "--source-text", "需求 C", + "--session", sid, + ) + assert cp.returncode == 0, cp.stderr + sess_path = fake_home / ".specode" / "sessions" / f"{sid}.json" + assert sess_path.exists() + sess = json.loads(sess_path.read_text(encoding="utf-8")) + assert sess["session_id"] == sid + assert sess["mode"] == "active" + assert sess["active_spec_slug"] == "gamma" + assert sess["phase"] == "intake" + # 0.10.15+:spec 创建后第一个 selector 是 project-root-choice + assert sess["pending_selector"] == "project-root-choice" + assert sess["lock_state"] == "ok" + + +def test_spec_init_updates_active_pointer( + run_script, doc_root, fake_home, make_session_id +): + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "delta", + "--requirement-name", "Delta", + "--source-text", "需求 D", + "--session", sid, + ) + assert cp.returncode == 0, cp.stderr + ptr_path = doc_root / ".active-specode.json" + assert ptr_path.exists() + ptr = json.loads(ptr_path.read_text(encoding="utf-8")) + assert ptr["active_spec_slug"] == "delta" + assert ptr["session_id"] == sid + assert ptr["specId"] == json.loads(cp.stdout)["specId"] + + +def test_spec_init_missing_root_exits_3( + run_script, fake_home, monkeypatch, make_session_id +): + """All three tiers miss → exit 3 + stderr hint.""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "no-root", + "--requirement-name", "NoRoot", + "--source-text", "x", + "--session", sid, + ) + assert cp.returncode == 3 + assert "doc_root" in cp.stderr or "vault" in cp.stderr + + +def test_spec_init_duplicate_slug_refuses( + run_script, doc_root, fake_home, make_session_id +): + """Re-running with the same slug must fail without clobbering existing data.""" + sid1 = make_session_id() + cp1 = run_script( + "spec_init.py", + "--name", "dupe", + "--requirement-name", "First", + "--source-text", "first", + "--session", sid1, + ) + assert cp1.returncode == 0, cp1.stderr + first_cfg = json.loads( + (doc_root / "specs" / "dupe" / ".config.json").read_text(encoding="utf-8") + ) + + sid2 = make_session_id() + cp2 = run_script( + "spec_init.py", + "--name", "dupe", + "--requirement-name", "Second", + "--source-text", "second", + "--session", sid2, + ) + assert cp2.returncode == 3 + assert "已存在" in cp2.stderr + # The original config is untouched + again_cfg = json.loads( + (doc_root / "specs" / "dupe" / ".config.json").read_text(encoding="utf-8") + ) + assert again_cfg["specId"] == first_cfg["specId"] + + +def test_spec_init_missing_session_arg_errors(run_script, doc_root, fake_home): + """argparse should reject when --session is missing.""" + cp = run_script( + "spec_init.py", + "--name", "no-session", + "--requirement-name", "NoSession", + "--source-text", "x", + ) + # argparse exits 2 for missing required args + assert cp.returncode != 0 + assert "session" in cp.stderr.lower() + + +@pytest.mark.parametrize("bad_slug,why", [ + ("evil/path", "含 /"), + ("bad\\slash", "含 \\"), + ("bad<x>", "含 < >"), + ("bad:colon", "含 :"), + ("bad*star", "含 *"), + ("has space", "含空格"), + (".hidden", "首字符 ."), + # 首字符 - 由 argparse 在 --name 解析阶段就被拒("-" 当作 flag prefix), + # 不会进入 SLUG_RE。本测试不 cover 这条——已在更外层兜底。 + ("CON", "Windows 保留名"), + ("nul", "Windows 保留名"), + ("trailing.", "末尾 ."), + ("", "空 slug"), +]) +def test_spec_init_rejects_invalid_slug( + run_script, doc_root, fake_home, make_session_id, bad_slug, why +): + """0.10.16+:放宽到 Unicode,但仍拒绝文件系统危险字符 / Windows 保留名等。""" + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", bad_slug, + "--requirement-name", "Bad", + "--source-text", "x", + "--session", sid, + ) + assert cp.returncode == 3, ( + f"slug={bad_slug!r} ({why}) 应被拒,但 exit={cp.returncode}\n" + f"stderr={cp.stderr}" + ) + assert "非法" in cp.stderr or "slug" in cp.stderr.lower() + + +@pytest.mark.parametrize("ok_slug", [ + "user-login", # 标准 ASCII + "UserLogin", # 大写也允许(0.10.16+ 放宽) + "登录页面", # 中文 + "ログイン", # 日文 + "auth_v2", # 下划线 + 数字 + "spec.with.dots", # 中间含 .(仅首字符不可) + "user-1.0.0", # 版本号风格 +]) +def test_spec_init_accepts_unicode_and_extended_ascii_slug( + run_script, doc_root, fake_home, make_session_id, ok_slug +): + """0.10.16+:Unicode (中文/日文/emoji) 与扩展 ASCII (大写/下划线/点) 都允许。""" + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", ok_slug, + "--requirement-name", "Test", + "--source-text", "x", + "--session", sid, + ) + assert cp.returncode == 0, ( + f"slug={ok_slug!r} 应被接受,但 exit={cp.returncode}\n" + f"stderr={cp.stderr}" + ) + payload = json.loads(cp.stdout) + spec_dir = Path(payload["spec_dir"]) + assert spec_dir.exists() + assert spec_dir.name == ok_slug # 目录名跟用户原文一致 + + +def test_spec_init_root_override_wins( + run_script, fake_home, tmp_path, make_session_id, monkeypatch +): + """--root flag overrides env.""" + env_root = fake_home / "env-root" + env_root.mkdir() + monkeypatch.setenv("SPECODE_ROOT", str(env_root)) + cli_root = tmp_path / "cli-root" + cli_root.mkdir() + sid = make_session_id() + cp = run_script( + "spec_init.py", + "--name", "override-spec", + "--requirement-name", "OverrideSpec", + "--source-text", "x", + "--session", sid, + "--root", str(cli_root), + ) + assert cp.returncode == 0, cp.stderr + payload = json.loads(cp.stdout) + assert payload["doc_root_source"] == "override" + assert Path(payload["spec_dir"]).is_relative_to(cli_root) diff --git a/plugins/specode/tests/test_spec_lint.py b/plugins/specode/tests/test_spec_lint.py new file mode 100644 index 0000000..aa19d61 --- /dev/null +++ b/plugins/specode/tests/test_spec_lint.py @@ -0,0 +1,95 @@ +"""Tests for spec_lint.py — 3 lint rules, all warning-only (exit 0).""" +from __future__ import annotations + +from pathlib import Path + +import pytest + + +def _bootstrap_spec_dir(doc_root: Path, slug: str = "lint-spec") -> Path: + """Create a minimal spec directory with placeholder files.""" + sd = doc_root / "specs" / slug + sd.mkdir(parents=True, exist_ok=True) + (sd / "requirements.md").write_text( + "# 需求文档\n\n## 需求 1\n\n### 需求 1.1\n\n" + "WHEN 用户登录,THE System SHALL 返回 token。\n", + encoding="utf-8", + ) + (sd / "tasks.md").write_text( + "# 任务\n\n- [ ] 1. 实现登录 _需求:1.1_\n", + encoding="utf-8", + ) + (sd / "implementation-log.md").write_text( + "# 实现记录\n\n## 2026-01-01 — 初始化\n\nspec 已初始化,文件 src/main.py 等待实现。这条 entry 写得很长足够 30 字。\n", + encoding="utf-8", + ) + return sd + + +def test_lint_clean_spec_has_zero_warnings(run_script, doc_root): + sd = _bootstrap_spec_dir(doc_root, "clean") + cp = run_script("spec_lint.py", "--spec", str(sd)) + assert cp.returncode == 0 + assert "0 warnings" in cp.stdout + + +def test_lint_trace_warns_for_orphan_tag(run_script, doc_root): + sd = _bootstrap_spec_dir(doc_root, "orphan-tag") + # tasks.md references 需求 1.99 — not present in requirements.md + (sd / "tasks.md").write_text( + "# 任务\n\n- [ ] 999. 看不见的任务 _需求:1.99_\n", + encoding="utf-8", + ) + cp = run_script("spec_lint.py", "--spec", str(sd)) + assert cp.returncode == 0 + assert "trace" in cp.stdout + assert "1.99" in cp.stdout + + +def test_lint_log_short_entry_warns(run_script, doc_root): + sd = _bootstrap_spec_dir(doc_root, "short-log") + # Replace implementation-log with a short entry (< 30 chars, no file ref) + (sd / "implementation-log.md").write_text( + "# 实现记录\n\n## 2026-01-02 — 改了点东西\n\nok\n", + encoding="utf-8", + ) + cp = run_script("spec_lint.py", "--spec", str(sd)) + assert cp.returncode == 0 + assert "log" in cp.stdout + # Either short body or missing file-ref warning may fire + assert "implementation-log.md" in cp.stdout + + +def test_lint_ears_missing_trigger_warns(run_script, doc_root): + sd = _bootstrap_spec_dir(doc_root, "ears-bad") + # SHALL without WHEN/IF/WHILE/WHERE/WHENEVER + (sd / "requirements.md").write_text( + "# 需求文档\n\n## 需求 1\n\nThe System SHALL 处理所有请求。\n", + encoding="utf-8", + ) + cp = run_script("spec_lint.py", "--spec", str(sd)) + assert cp.returncode == 0 + assert "ears" in cp.stdout + + +def test_lint_always_exit_zero_even_with_many_warnings(run_script, doc_root): + """Any/all 3 rules can fire and exit code is still 0.""" + sd = _bootstrap_spec_dir(doc_root, "all-bad") + # Force every rule: + (sd / "requirements.md").write_text( + "# 需求\n\nThe System SHALL 处理一切。\n", + encoding="utf-8", + ) + (sd / "tasks.md").write_text( + "# 任务\n\n- [ ] 1. xxx _需求:9.99_\n", + encoding="utf-8", + ) + (sd / "implementation-log.md").write_text( + "# log\n\n## 2026-01-03 — \n\nshort\n", + encoding="utf-8", + ) + cp = run_script("spec_lint.py", "--spec", str(sd)) + assert cp.returncode == 0 + # All 3 rule names appear + rule_hits = sum(1 for r in ("trace", "log", "ears") if r in cp.stdout) + assert rule_hits == 3 diff --git a/plugins/specode/tests/test_spec_log.py b/plugins/specode/tests/test_spec_log.py new file mode 100644 index 0000000..bd88c28 --- /dev/null +++ b/plugins/specode/tests/test_spec_log.py @@ -0,0 +1,180 @@ +"""Tests for spec_log.py (0.10.0+) — write_event / replay / status / redact / disable.""" +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest + + +def _log_file(home: Path, sid: str) -> Path: + return home / ".specode" / "logs" / f"{sid}.jsonl" + + +def _read_log(home: Path, sid: str) -> list[dict]: + p = _log_file(home, sid) + if not p.exists(): + return [] + out = [] + for line in p.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if line: + out.append(json.loads(line)) + return out + + +def test_write_event_creates_jsonl(run_script, fake_home, make_session_id): + sid = make_session_id() + cp = run_script("spec_log.py", "write-event", "--event", "test_evt", + "--session", sid, "--payload", '{"k":"v"}') + assert cp.returncode == 0, cp.stderr + log = _read_log(fake_home, sid) + assert len(log) == 1 + assert log[0]["event"] == "test_evt" + assert log[0]["payload"]["k"] == "v" + assert "ts" in log[0] + + +def test_disabled_via_env_var(run_script, fake_home, make_session_id): + sid = make_session_id() + cp = run_script("spec_log.py", "write-event", "--event", "test_evt", + "--session", sid, "--payload", "{}", + extra_env={"SPECODE_LOG": "off"}) + assert cp.returncode == 0 + # File should NOT exist because logging is off + assert not _log_file(fake_home, sid).exists() + + +def test_disabled_via_config(run_script, fake_home, make_session_id): + sid = make_session_id() + config_dir = fake_home / ".config" / "specode" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / "config.json").write_text( + json.dumps({"logging": False}), encoding="utf-8") + cp = run_script("spec_log.py", "write-event", "--event", "test_evt", + "--session", sid, "--payload", "{}") + assert cp.returncode == 0 + assert not _log_file(fake_home, sid).exists() + + +def test_redact_default_keys(run_script, fake_home, make_session_id): + sid = make_session_id() + payload = json.dumps({ + "api_key": "sk-secret-123", + "password": "hunter2", + "harmless": "ok", + }) + cp = run_script("spec_log.py", "write-event", "--event", "test_evt", + "--session", sid, "--payload", payload) + assert cp.returncode == 0 + log = _read_log(fake_home, sid) + p = log[0]["payload"] + assert p["api_key"] == "<redacted>" + assert p["password"] == "<redacted>" + assert p["harmless"] == "ok" + + +def test_redact_extended_via_config(run_script, fake_home, make_session_id): + sid = make_session_id() + config_dir = fake_home / ".config" / "specode" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / "config.json").write_text( + json.dumps({"redact_keys": ["custom_key"]}), encoding="utf-8") + payload = json.dumps({"custom_key": "leaked", "ok": "fine"}) + cp = run_script("spec_log.py", "write-event", "--event", "test_evt", + "--session", sid, "--payload", payload) + assert cp.returncode == 0 + log = _read_log(fake_home, sid) + p = log[0]["payload"] + assert p["custom_key"] == "<redacted>" + assert p["ok"] == "fine" + + +def test_truncate_long_string(run_script, fake_home, make_session_id): + sid = make_session_id() + long_str = "x" * 1000 + payload = json.dumps({"big": long_str}) + cp = run_script("spec_log.py", "write-event", "--event", "test_evt", + "--session", sid, "--payload", payload) + assert cp.returncode == 0 + log = _read_log(fake_home, sid) + big = log[0]["payload"]["big"] + assert len(big) < 1000 + assert big.endswith("...<truncated>") + + +def test_replay_outputs_events(run_script, fake_home, make_session_id): + sid = make_session_id() + for i in range(3): + run_script("spec_log.py", "write-event", "--event", f"evt_{i}", + "--session", sid, "--payload", "{}") + cp = run_script("spec_log.py", "replay", "--session", sid) + assert cp.returncode == 0, cp.stderr + assert "evt_0" in cp.stdout + assert "evt_1" in cp.stdout + assert "evt_2" in cp.stdout + + +def test_replay_missing_session(run_script, fake_home, make_session_id): + sid = make_session_id() + cp = run_script("spec_log.py", "replay", "--session", sid) + assert cp.returncode == 3 + assert "no log for session" in cp.stderr + + +def test_status_reports_empty_when_no_logs(run_script, fake_home): + cp = run_script("spec_log.py", "status") + assert cp.returncode == 0 + info = json.loads(cp.stdout) + assert info["enabled"] is True # default + assert info["exists"] is False or info.get("session_log_files", 0) == 0 + + +def test_status_reports_after_writes(run_script, fake_home, make_session_id): + sid = make_session_id() + run_script("spec_log.py", "write-event", "--event", "evt", + "--session", sid, "--payload", "{}") + cp = run_script("spec_log.py", "status") + assert cp.returncode == 0 + info = json.loads(cp.stdout) + assert info["enabled"] is True + assert info["session_log_files"] >= 1 + assert info["total_bytes"] > 0 + + +def test_status_reflects_env_disabled(run_script, fake_home): + cp = run_script("spec_log.py", "status", extra_env={"SPECODE_LOG": "off"}) + assert cp.returncode == 0 + info = json.loads(cp.stdout) + assert info["enabled"] is False + assert "env:SPECODE_LOG=off" in info["switch_source"] + + +def test_hook_invocation_writes_log(run_script, fake_home, make_session_id): + """spec_session.py hook 触发时应在 _safe_hook 里写一条 hook_invoked event.""" + sid = make_session_id() + cp = run_script("spec_session.py", "on-session-start", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0 + # hook_invoked event 是 session_id=None(_safe_hook 不传 sid),落 _orphan.jsonl + orphan = _log_file(fake_home, "_orphan") + assert orphan.exists(), "hook_invoked should land in _orphan.jsonl" + events = _read_log(fake_home, "_orphan") + assert any(e["event"] == "hook_invoked" for e in events) + + +def test_cli_call_writes_log(run_script, fake_home, doc_root, make_session_id): + """spec_session.py 业务命令 main() 应记 cli_call + cli_exit.""" + sid = make_session_id() + # 先 init 一个 spec 让后续命令有 spec_dir 可用 + cp = run_script( + "spec_init.py", + "--name", "logtest", "--requirement-name", "Log Test", + "--source-text", "test", "--session", sid, + ) + assert cp.returncode == 0 + # spec_init 应有 cli_call + cli_exit + events = _read_log(fake_home, sid) + assert any(e["event"] == "cli_call" and e["payload"].get("script") == "spec_init.py" for e in events) + assert any(e["event"] == "cli_exit" and e["payload"].get("script") == "spec_init.py" for e in events) diff --git a/plugins/specode/tests/test_spec_session_business.py b/plugins/specode/tests/test_spec_session_business.py new file mode 100644 index 0000000..12b4be1 --- /dev/null +++ b/plugins/specode/tests/test_spec_session_business.py @@ -0,0 +1,431 @@ +"""Tests for spec_session.py business sub-commands. + +Covers: acquire / release / heartbeat / verify-lock / phase-transition / + load / continue / end / status / read-session. +""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +def _spec_cfg(spec_dir: Path) -> dict: + return json.loads((spec_dir / ".config.json").read_text(encoding="utf-8")) + + +def _sess(fake_home: Path, sid: str) -> dict: + return json.loads((fake_home / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8")) + + +# --- acquire / release / heartbeat --------------------------------------- + +def test_acquire_when_lock_null_succeeds( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + # release first so lock=null + cp_rel = run_script("spec_session.py", "release", + "--spec", str(spec_dir), "--session", sid_init) + assert cp_rel.returncode == 0 + assert _spec_cfg(spec_dir)["lock"] is None + + sid_new = make_session_id() + cp = run_script("spec_session.py", "acquire", + "--spec", str(spec_dir), "--session", sid_new) + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["ok"] is True + assert out["holder"] == sid_new + cfg = _spec_cfg(spec_dir) + assert cfg["lock"]["holder"] == sid_new + assert cfg["lock"]["acquired_at"] + assert cfg["lock"]["last_heartbeat_at"] + + +def test_acquire_when_held_by_other_returns_exit_4( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + # sid_init holds the lock by default. Try to acquire from another session. + sid_other = make_session_id() + cp = run_script("spec_session.py", "acquire", + "--spec", str(spec_dir), "--session", sid_other) + assert cp.returncode == 4, cp.stderr + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["reason"] == "LockHeld" + assert out["holder"] == sid_init + + +def test_acquire_force_takes_over( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + sid_other = make_session_id() + cp = run_script("spec_session.py", "acquire", + "--spec", str(spec_dir), "--session", sid_other, "--force") + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["ok"] is True + assert out["holder"] == sid_other + assert _spec_cfg(spec_dir)["lock"]["holder"] == sid_other + + +def test_release_when_holder_clears_lock( + run_script, init_spec, fake_home +): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "release", + "--spec", str(spec_dir), "--session", sid) + assert cp.returncode == 0 + assert _spec_cfg(spec_dir)["lock"] is None + sess = _sess(fake_home, sid) + assert sess["lock_state"] == "released" + + +def test_release_by_non_holder_is_silent_ok( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + sid_other = make_session_id() + cp = run_script("spec_session.py", "release", + "--spec", str(spec_dir), "--session", sid_other) + # release is tolerant: non-holder should not break the system + assert cp.returncode == 0, cp.stderr + # Lock still held by sid_init + assert _spec_cfg(spec_dir)["lock"]["holder"] == sid_init + + +# --- heartbeat ---------------------------------------------------------- + +def test_heartbeat_refreshes_last_heartbeat( + run_script, init_spec, fake_home +): + slug, sid, spec_dir, _ = init_spec() + before = _spec_cfg(spec_dir)["lock"]["last_heartbeat_at"] + # ensure clock moves a second + import time as _t; _t.sleep(1.1) + cp = run_script("spec_session.py", "heartbeat", + "--spec", str(spec_dir), "--session", sid) + assert cp.returncode == 0, cp.stderr + after = _spec_cfg(spec_dir)["lock"]["last_heartbeat_at"] + assert after >= before # ISO timestamps; later or equal + + +def test_heartbeat_non_holder_returns_exit_1( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + sid_other = make_session_id() + cp = run_script("spec_session.py", "heartbeat", + "--spec", str(spec_dir), "--session", sid_other) + assert cp.returncode == 1 + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["reason"] == "lock_lost" + + +# --- verify-lock ----------------------------------------------------------- + +def test_verify_lock_ok_for_holder(run_script, init_spec, fake_home): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "verify-lock", + "--spec", str(spec_dir), "--session", sid) + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["ok"] is True + assert out["holder"] == sid + + +def test_verify_lock_evicted_for_other( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + sid_other = make_session_id() + cp = run_script("spec_session.py", "verify-lock", + "--spec", str(spec_dir), "--session", sid_other) + assert cp.returncode == 3 + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["reason"] in ("evicted", "stale_lock") + + +def test_verify_lock_not_held_when_null( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + run_script("spec_session.py", "release", + "--spec", str(spec_dir), "--session", sid_init) + sid_q = make_session_id() + cp = run_script("spec_session.py", "verify-lock", + "--spec", str(spec_dir), "--session", sid_q) + assert cp.returncode == 3 + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["reason"] == "not_held" + + +# --- phase-transition ----------------------------------------------------- + +def test_phase_transition_updates_both_files( + run_script, init_spec, fake_home +): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "phase-transition", + "--spec", str(spec_dir), "--session", sid, + "--from", "intake", "--to", "requirements") + assert cp.returncode == 0, cp.stderr + cfg = _spec_cfg(spec_dir) + assert cfg["phase"] == "requirements" + # auto pending_selector for requirements phase + assert cfg["pending_selector"] == "doc-confirm-requirements" + sess = _sess(fake_home, sid) + assert sess["phase"] == "requirements" + assert sess["pending_selector"] == "doc-confirm-requirements" + + +def test_phase_transition_to_iteration_clears_pending_selector( + run_script, init_spec, fake_home +): + """acceptance → iteration 不再自动注入 iteration-scope;停在 chat 等用户提。 + + 回归防护:曾经 `_auto_pending_selector(phase="iteration")` 返回 + `"iteration-scope"`,导致验收通过后立刻追问"本轮要调整什么",与 + iteration.md §2 / §7「不自动呈现」设计冲突。0.10.23 起统一返回 None。 + """ + slug, sid, spec_dir, _ = init_spec() + cfg = _spec_cfg(spec_dir) + cfg["phase"] = "acceptance" + (spec_dir / ".config.json").write_text( + json.dumps(cfg, ensure_ascii=False, indent=2), encoding="utf-8" + ) + cp = run_script("spec_session.py", "phase-transition", + "--spec", str(spec_dir), "--session", sid, + "--from", "acceptance", "--to", "iteration") + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["ok"] is True + assert out["phase"] == "iteration" + assert out["pending_selector"] is None + assert _spec_cfg(spec_dir)["pending_selector"] is None + assert _sess(fake_home, sid)["pending_selector"] is None + + +def test_phase_transition_lock_lost_returns_exit_1( + run_script, init_spec, fake_home, make_session_id +): + slug, sid_init, spec_dir, _ = init_spec() + sid_other = make_session_id() + cp = run_script("spec_session.py", "phase-transition", + "--spec", str(spec_dir), "--session", sid_other, + "--from", "intake", "--to", "requirements") + assert cp.returncode == 1 + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["reason"] == "lock_lost" + # phase unchanged + assert _spec_cfg(spec_dir)["phase"] == "intake" + + +def test_phase_transition_phase_mismatch_blocks( + run_script, init_spec, fake_home +): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "phase-transition", + "--spec", str(spec_dir), "--session", sid, + "--from", "design", "--to", "tasks") + assert cp.returncode == 1 + out = json.loads(cp.stdout) + assert out["reason"] == "phase_mismatch" + assert out["current"] == "intake" + assert _spec_cfg(spec_dir)["phase"] == "intake" + + +# --- end ------------------------------------------------------------------ + +def test_end_sets_mode_ended_and_releases_lock( + run_script, init_spec, fake_home +): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "end", "--session", sid) + assert cp.returncode == 0, cp.stderr + sess = _sess(fake_home, sid) + assert sess["mode"] == "ended" + assert sess["ended_at"] + assert sess["pending_selector"] is None + # 对齐 end.md 文档:active_spec_* / task_swarm_run_id 必须清空 + assert sess["active_spec_slug"] is None + assert sess["active_spec_dir"] is None + assert sess["spec_id"] is None + assert sess["phase"] is None + assert sess["task_swarm_run_id"] is None + # 下一 turn 由 hook 注入一次性反向提醒 + assert sess["post_end_reminder_pending"] is True + # lock cleared because end-holder == session + assert _spec_cfg(spec_dir)["lock"] is None + + +# --- read-session & status ------------------------------------------------ + +def test_read_session_emits_payload(run_script, init_spec, fake_home): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "read-session", "--session", sid) + assert cp.returncode == 0 + payload = json.loads(cp.stdout) + assert payload["session_id"] == sid + assert payload["mode"] == "active" + assert payload["active_spec_slug"] == slug + + +def test_read_session_migrates_legacy_claude_session_id( + run_script, fake_home, make_session_id +): + """老 sessions/<id>.json 字段名是 claude_session_id;read_session 应自动塞 session_id 字段。""" + sid = make_session_id() + sess_dir = fake_home / ".specode" / "sessions" + sess_dir.mkdir(parents=True, exist_ok=True) + legacy = { + "claude_session_id": sid, + "started_at": "2026-01-01T00:00:00Z", + "last_activity_at": "2026-01-01T00:00:00Z", + "mode": "idle", + } + (sess_dir / f"{sid}.json").write_text(json.dumps(legacy), encoding="utf-8") + cp = run_script("spec_session.py", "read-session", "--session", sid) + assert cp.returncode == 0, cp.stderr + payload = json.loads(cp.stdout) + assert payload["session_id"] == sid + # 老字段仍保留(向后兼容;后续写入才会被覆盖为新格式) + assert payload.get("claude_session_id") == sid + + +def test_status_emits_human_summary(run_script, init_spec, fake_home): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "status", "--session", sid) + assert cp.returncode == 0 + out = json.loads(cp.stdout) + assert out["ok"] is True + assert "session" in out + assert "spec_config" in out + assert out["session"]["active_spec_slug"] == slug + + +def test_status_session_not_found(run_script, fake_home, make_session_id): + sid = make_session_id() + cp = run_script("spec_session.py", "status", "--session", sid) + # Spec says ok=False but exit 0 (still soft) + assert cp.returncode == 0 + out = json.loads(cp.stdout) + assert out["ok"] is False + assert out["reason"] == "session_not_found" + + +def test_load_emits_spec_config(run_script, init_spec, fake_home): + slug, sid, spec_dir, _ = init_spec() + cp = run_script("spec_session.py", "load", "--spec", str(spec_dir)) + assert cp.returncode == 0 + out = json.loads(cp.stdout) + assert out["ok"] is True + assert out["config"]["slug"] == slug + + +# --- set-project-root (0.10.15+) ----------------------------------------- + +def test_set_project_root_writes_config_and_advances_selector( + run_script, init_spec, fake_home, tmp_path +): + """成功路径:写 project_root + pending_selector 推到 workflow-choice。""" + slug, sid, spec_dir, _ = init_spec() + cfg_before = _spec_cfg(spec_dir) + assert cfg_before["pending_selector"] == "project-root-choice" + assert cfg_before["project_root"] is None + + project = tmp_path / "my-project" + project.mkdir() + + cp = run_script( + "spec_session.py", "set-project-root", + "--spec", str(spec_dir), + "--session", sid, + "--root", str(project), + ) + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["ok"] is True + assert out["project_root"] == str(project) + assert out["pending_selector"] == "workflow-choice" + + cfg = _spec_cfg(spec_dir) + assert cfg["project_root"] == str(project) + assert cfg["pending_selector"] == "workflow-choice" + sess = _sess(fake_home, sid) + assert sess["pending_selector"] == "workflow-choice" + + +def test_set_project_root_auto_creates_missing_dir( + run_script, init_spec, fake_home, tmp_path +): + """--root 路径不存在时自动 mkdir -p(cwd/slug 新项目场景)。""" + slug, sid, spec_dir, _ = init_spec() + new_dir = tmp_path / "brand-new" / "nested" + assert not new_dir.exists() + + cp = run_script( + "spec_session.py", "set-project-root", + "--spec", str(spec_dir), + "--session", sid, + "--root", str(new_dir), + ) + assert cp.returncode == 0, cp.stderr + assert new_dir.exists() and new_dir.is_dir() + + +def test_set_project_root_rejects_relative_path( + run_script, init_spec, fake_home +): + slug, sid, spec_dir, _ = init_spec() + cp = run_script( + "spec_session.py", "set-project-root", + "--spec", str(spec_dir), + "--session", sid, + "--root", "relative/path/here", + ) + assert cp.returncode == 1 + assert "绝对路径" in cp.stderr + + +def test_set_project_root_rejects_non_directory( + run_script, init_spec, fake_home, tmp_path +): + slug, sid, spec_dir, _ = init_spec() + a_file = tmp_path / "i-am-a-file.txt" + a_file.write_text("content", encoding="utf-8") + + cp = run_script( + "spec_session.py", "set-project-root", + "--spec", str(spec_dir), + "--session", sid, + "--root", str(a_file), + ) + assert cp.returncode == 1 + assert "不是目录" in cp.stderr + + +def test_set_project_root_rejects_non_lock_holder( + run_script, init_spec, fake_home, make_session_id, tmp_path +): + slug, _sid_holder, spec_dir, _ = init_spec() + other_sid = make_session_id() # 不持锁 + project = tmp_path / "proj" + project.mkdir() + + cp = run_script( + "spec_session.py", "set-project-root", + "--spec", str(spec_dir), + "--session", other_sid, + "--root", str(project), + ) + assert cp.returncode == 1 + assert "lock holder" in cp.stderr diff --git a/plugins/specode/tests/test_spec_session_hooks.py b/plugins/specode/tests/test_spec_session_hooks.py new file mode 100644 index 0000000..36687fc --- /dev/null +++ b/plugins/specode/tests/test_spec_session_hooks.py @@ -0,0 +1,675 @@ +"""Tests for spec_session.py hook sub-commands. + +Hooks always exit 0. They communicate with the host via stdout JSON of the form: + {"hookSpecificOutput": {"hookEventName": <e>, "additionalContext": <str>}} +Empty stdout (or empty JSON) means "no injection". +""" +from __future__ import annotations + +import json +from pathlib import Path +from typing import Optional + +import pytest + + +# -------------------------------------------------------------------------- +# Helpers +# -------------------------------------------------------------------------- + +def _parse_hook(stdout: str) -> Optional[dict]: + """Parse a hook's stdout. Returns None when stdout is empty.""" + s = stdout.strip() + if not s: + return None + return json.loads(s) + + +def _ctx(payload: Optional[dict]) -> str: + """Pull additionalContext text out of a hook payload (or '' when none).""" + if payload is None: + return "" + return payload.get("hookSpecificOutput", {}).get("additionalContext", "") + + +def _write_session(fake_home: Path, sid: str, **overrides) -> Path: + """Write a sessions/<sid>.json with sensible defaults that callers can patch.""" + sess_dir = fake_home / ".specode" / "sessions" + sess_dir.mkdir(parents=True, exist_ok=True) + base = { + "session_id": sid, + "started_at": "2026-01-01T00:00:00Z", + "last_activity_at": "2026-01-01T00:00:00Z", + "ended_at": None, + "mode": "idle", + "active_spec_slug": None, + "active_spec_dir": None, + "spec_id": None, + "phase": None, + "lock_state": "released", + "task_swarm_run_id": None, + "pending_selector": None, + } + base.update(overrides) + p = sess_dir / f"{sid}.json" + p.write_text(json.dumps(base), encoding="utf-8") + return p + + +# -------------------------------------------------------------------------- +# on-session-start +# -------------------------------------------------------------------------- + +def test_on_session_start_new_session_writes_idle( + run_script, fake_home, make_session_id +): + sid = make_session_id() + cp = run_script("spec_session.py", "on-session-start", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0, cp.stderr + sess_path = fake_home / ".specode" / "sessions" / f"{sid}.json" + assert sess_path.exists() + sess = json.loads(sess_path.read_text(encoding="utf-8")) + assert sess["mode"] == "idle" + assert sess["session_id"] == sid + + +def test_on_session_start_additional_context_contains_session_id( + run_script, fake_home, make_session_id +): + sid = make_session_id() + cp = run_script("spec_session.py", "on-session-start", + stdin=json.dumps({"session_id": sid})) + payload = _parse_hook(cp.stdout) + ctx = _ctx(payload) + assert sid in ctx + assert "Specode session" in ctx + + +def test_on_session_start_reactivates_ended_session( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="ended", ended_at="2026-01-01T00:00:00Z") + cp = run_script("spec_session.py", "on-session-start", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0 + sess = json.loads( + (fake_home / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8") + ) + assert sess["mode"] == "idle" # back to idle from ended + assert sess["ended_at"] is None + + +# -------------------------------------------------------------------------- +# on-user-prompt +# -------------------------------------------------------------------------- + +def test_on_user_prompt_ended_session_emits_nothing( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="ended") + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "hello"}) + ) + assert cp.returncode == 0 + # mode=ended branch returns early; no additionalContext emitted. + assert _parse_hook(cp.stdout) is None + + +def test_on_user_prompt_post_end_reminder_emits_once_then_clears( + run_script, fake_home, make_session_id +): + """刚 /specode:end 完的下一 turn 必须收到一次性反向提醒;标志被消费后, + 再后续 turn 不再注入任何内容。""" + sid = make_session_id() + _write_session(fake_home, sid, mode="ended", post_end_reminder_pending=True) + + # 第一 turn:注入反向提醒 + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "随便问点啥"}) + ) + assert cp.returncode == 0 + ctx = _ctx(_parse_hook(cp.stdout)) + assert "spec 模式已退出" in ctx + assert "不要" in ctx and "─── spec-mode ───" in ctx + # 标志应已被清掉,落盘 + sess = json.loads( + (fake_home / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8") + ) + assert sess.get("post_end_reminder_pending") is False + + # 第二 turn:不再注入 + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "再问一句"}) + ) + assert cp.returncode == 0 + assert _parse_hook(cp.stdout) is None + + +def test_on_user_prompt_idle_session_emits_nothing( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="idle") + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "hello"}) + ) + assert cp.returncode == 0 + # idle also returns early + assert _parse_hook(cp.stdout) is None + + +def test_on_user_prompt_active_with_workflow_choice_emits_all_segments( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + # set up an active spec with a real spec config + spec_dir = doc_root / "specs" / "active-spec" + spec_dir.mkdir(parents=True) + (spec_dir / ".config.json").write_text(json.dumps({ + "specId": "abc", + "slug": "active-spec", + "phase": "intake", + "workflow": None, + "pending_selector": "workflow-choice", + "lock": {"holder": sid}, + "source_text": "示例源需求摘要内容", + }), encoding="utf-8") + _write_session( + fake_home, sid, + mode="active", + active_spec_slug="active-spec", + active_spec_dir=str(spec_dir), + phase="intake", + pending_selector="workflow-choice", + lock_state="ok", + ) + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "继续推进"}) + ) + payload = _parse_hook(cp.stdout) + ctx = _ctx(payload) + # 5 segments expected + assert sid in ctx # session_id 提醒 + assert "选择器节点:工作流选择" in ctx + assert "AskUserQuestion" in ctx + assert "multiSelect: false" in ctx + assert "Requirements first" in ctx + assert "Technical Design first" in ctx + assert "Bugfix" in ctx + assert "状态行" in ctx # footer template + assert "spec-mode" in ctx + assert "文档优先提醒" in ctx + assert "你仍处于 spec 模式" in ctx # continue reminder + + +def test_on_user_prompt_project_root_choice_emits_with_cwd_context( + run_script, fake_home, make_session_id, doc_root +): + """0.10.15+:pending_selector=project-root-choice 时,hook 注入 selector + 模板并把 invocation_cwd / cwd_subdir 填进去给主代理。""" + sid = make_session_id() + spec_dir = doc_root / "specs" / "pr-test" + spec_dir.mkdir(parents=True) + fake_cwd = "/home/user/my-repo" + (spec_dir / ".config.json").write_text(json.dumps({ + "specId": "pr", + "slug": "pr-test", + "phase": "intake", + "workflow": None, + "pending_selector": "project-root-choice", + "lock": {"holder": sid}, + "source_text": "新需求一句话", + "invocation_cwd": fake_cwd, + "project_root": None, + }), encoding="utf-8") + _write_session( + fake_home, sid, + mode="active", + active_spec_slug="pr-test", + active_spec_dir=str(spec_dir), + phase="intake", + pending_selector="project-root-choice", + lock_state="ok", + ) + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "走起"}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + # selector 模板被注入 + assert "选择器节点:项目实现目录选择" in ctx + assert "AskUserQuestion" in ctx + # invocation_cwd 占位被替换成实际值 + assert fake_cwd in ctx + # cwd_subdir 也已填入(路径拼接 cwd + slug) + assert "pr-test" in ctx # slug 出现在 cwd/slug 选项 + # 3 个 label 都在 + assert "cwd(在已有项目里迭代)" in ctx + assert "cwd/slug(新项目子目录)" in ctx + assert "自定义路径" in ctx + + +def test_on_user_prompt_active_implementation_no_pending( + run_script, fake_home, make_session_id, doc_root +): + """phase=implementation with no pending_selector → selector segment absent + but other segments present.""" + sid = make_session_id() + spec_dir = doc_root / "specs" / "mid-impl" + spec_dir.mkdir(parents=True) + (spec_dir / ".config.json").write_text(json.dumps({ + "specId": "x", + "slug": "mid-impl", + "phase": "implementation", + "workflow": "requirements-first", + "pending_selector": None, + "lock": {"holder": sid}, + }), encoding="utf-8") + _write_session( + fake_home, sid, + mode="active", + active_spec_slug="mid-impl", + active_spec_dir=str(spec_dir), + phase="implementation", + pending_selector=None, + lock_state="ok", + ) + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "更多 coding"}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + assert "选择器节点:" not in ctx # no selector segment + assert "文档优先提醒" in ctx + assert "状态行" in ctx + assert "你仍处于 spec 模式" in ctx + assert sid in ctx + + +def test_on_user_prompt_readonly_footer_has_readonly_marker( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + spec_dir = doc_root / "specs" / "ro-spec" + spec_dir.mkdir(parents=True) + (spec_dir / ".config.json").write_text(json.dumps({ + "specId": "ro", + "slug": "ro-spec", + "phase": "tasks", + "pending_selector": None, + "lock": {"holder": "someone-else"}, + }), encoding="utf-8") + _write_session( + fake_home, sid, + mode="readonly", + active_spec_slug="ro-spec", + active_spec_dir=str(spec_dir), + phase="tasks", + pending_selector=None, + lock_state="readonly", + ) + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "只读一下"}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + assert "[只读]" in ctx + assert "只读模式" in ctx + + +def test_on_user_prompt_help_fastpath_only_emits_help( + run_script, fake_home, make_session_id +): + sid = make_session_id() + # Even active sessions only emit the help fast-path when prompt matches + _write_session(fake_home, sid, mode="active", + active_spec_slug="any", phase="intake", + pending_selector="workflow-choice", + active_spec_dir="/dev/null") + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "/specode:spec -h"}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + assert "fast-path" in ctx + assert "specode v" in ctx # accept any version (dynamic since 0.10.1) + # Workflow-choice selector should NOT leak in + assert "选择器节点:工作流选择" not in ctx + + +def test_on_user_prompt_vault_status_fastpath( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="idle") + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "/specode:spec --vault-status"}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + assert "vault-status fast-path" in ctx + # The wrapped content contains JSON with either "source": "..." or "doc_root" + assert "doc_root" in ctx or "source" in ctx + + +def test_on_user_prompt_guard_off_emits_nothing( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="active", + active_spec_slug="any", phase="intake", + pending_selector="workflow-choice", + active_spec_dir="/dev/null") + cp = run_script( + "spec_session.py", "on-user-prompt", + stdin=json.dumps({"session_id": sid, "prompt": "hi"}), + extra_env={"SPECODE_GUARD": "off"}, + ) + assert cp.returncode == 0 + assert cp.stdout.strip() == "" + + +# -------------------------------------------------------------------------- +# on-stop +# -------------------------------------------------------------------------- + +def test_on_stop_active_emits_code_doc_sync( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + _write_session(fake_home, sid, mode="active", + active_spec_slug="s", phase="implementation", + active_spec_dir=str(doc_root / "specs" / "s")) + cp = run_script( + "spec_session.py", "on-stop", + stdin=json.dumps({"session_id": sid}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + assert "代码-文档同步提醒" in ctx + assert "tasks.md" in ctx + assert "implementation-log.md" in ctx + assert "你仍处于 spec 模式" in ctx + + +def test_on_stop_ended_emits_nothing( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="ended") + cp = run_script( + "spec_session.py", "on-stop", + stdin=json.dumps({"session_id": sid}) + ) + assert _parse_hook(cp.stdout) is None + + +def test_on_stop_readonly_only_readonly_reminder( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + _write_session(fake_home, sid, mode="readonly", + active_spec_slug="s", phase="implementation", + active_spec_dir=str(doc_root / "specs" / "s")) + cp = run_script( + "spec_session.py", "on-stop", + stdin=json.dumps({"session_id": sid}) + ) + ctx = _ctx(_parse_hook(cp.stdout)) + assert "只读模式" in ctx + # No code-doc sync segment in readonly + assert "代码-文档同步提醒" not in ctx + + +# -------------------------------------------------------------------------- +# on-session-end +# -------------------------------------------------------------------------- + +def test_on_session_end_releases_held_lock( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + spec_dir = doc_root / "specs" / "end-spec" + spec_dir.mkdir(parents=True) + (spec_dir / ".config.json").write_text(json.dumps({ + "specId": "e", + "slug": "end-spec", + "phase": "tasks", + "pending_selector": "tasks-execution", + "lock": {"holder": sid, "acquired_at": "2026-01-01T00:00:00Z", + "last_heartbeat_at": "2026-01-01T00:00:00Z"}, + }), encoding="utf-8") + _write_session( + fake_home, sid, + mode="active", + active_spec_slug="end-spec", + active_spec_dir=str(spec_dir), + phase="tasks", + lock_state="ok", + ) + cp = run_script( + "spec_session.py", "on-session-end", + stdin=json.dumps({"session_id": sid}) + ) + assert cp.returncode == 0 + cfg = json.loads((spec_dir / ".config.json").read_text(encoding="utf-8")) + assert cfg["lock"] is None + sess = json.loads( + (fake_home / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8") + ) + assert sess["mode"] == "ended" + assert sess["ended_at"] + + +def test_on_session_end_no_active_spec_is_ok( + run_script, fake_home, make_session_id +): + sid = make_session_id() + _write_session(fake_home, sid, mode="idle") + cp = run_script( + "spec_session.py", "on-session-end", + stdin=json.dumps({"session_id": sid}) + ) + assert cp.returncode == 0 + sess = json.loads( + (fake_home / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8") + ) + assert sess["mode"] == "ended" + + +def test_on_session_end_missing_session_is_ok( + run_script, fake_home, make_session_id +): + sid = make_session_id() + # no session file exists + cp = run_script( + "spec_session.py", "on-session-end", + stdin=json.dumps({"session_id": sid}) + ) + assert cp.returncode == 0 + # Hook returns early when session not found; no file written + sess_path = fake_home / ".specode" / "sessions" / f"{sid}.json" + assert not sess_path.exists() + + +# -------------------------------------------------------------------------- +# on-pre-tool-use: task-swarm 受控路径阻断 +# -------------------------------------------------------------------------- + +def _prep_active_with_task_swarm(fake_home, doc_root, sid: str, + run_id: str = "20260101-abcdef"): + """造一个 active spec + task_swarm_run_id 已绑定的 session,并建出 + spec_dir/.task-swarm/runs/<run_id>/ 目录给路径解析用。""" + spec_dir = doc_root / "specs" / "ts-block" + (spec_dir / ".task-swarm" / "runs" / run_id / "agents" / "coder-g1-s1-r1" + / "outbox").mkdir(parents=True, exist_ok=True) + (spec_dir / ".task-swarm" / "runs" / run_id / "state.json").write_text( + '{"phase":"coding"}', encoding="utf-8" + ) + (spec_dir / ".config.json").write_text(json.dumps({ + "specId": "ts", + "slug": "ts-block", + "phase": "tasks", + "lock": {"holder": sid, "acquired_at": "2026-01-01T00:00:00Z", + "last_heartbeat_at": "2026-01-01T00:00:00Z"}, + }), encoding="utf-8") + _write_session( + fake_home, sid, + mode="active", + active_spec_slug="ts-block", + active_spec_dir=str(spec_dir), + phase="tasks", + lock_state="ok", + task_swarm_run_id=run_id, + ) + return spec_dir + + +def test_on_pre_tool_use_blocks_edit_of_state_json( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + run_id = "20260101-deadbe" + spec_dir = _prep_active_with_task_swarm(fake_home, doc_root, sid, run_id) + target = spec_dir / ".task-swarm" / "runs" / run_id / "state.json" + + cp = run_script( + "spec_session.py", "on-pre-tool-use", + stdin=json.dumps({ + "session_id": sid, + "tool_name": "Edit", + "tool_input": {"file_path": str(target)}, + }) + ) + # exit 2 = PreToolUse hook 阻断 + assert cp.returncode == 2, f"expected deny exit=2, got {cp.returncode}\nstderr={cp.stderr}" + assert "task-swarm" in cp.stderr + assert "state.json" in cp.stderr + # 必须给出正确路径 hint + assert "task_swarm.py" in cp.stderr + + +def test_on_pre_tool_use_blocks_edit_of_agent_task_md( + run_script, fake_home, make_session_id, doc_root +): + sid = make_session_id() + run_id = "20260101-deadbe" + spec_dir = _prep_active_with_task_swarm(fake_home, doc_root, sid, run_id) + target = (spec_dir / ".task-swarm" / "runs" / run_id + / "agents" / "coder-g1-s1-r1" / "task.md") + + cp = run_script( + "spec_session.py", "on-pre-tool-use", + stdin=json.dumps({ + "session_id": sid, + "tool_name": "Edit", + "tool_input": {"file_path": str(target)}, + }) + ) + assert cp.returncode == 2 + assert "task.md" in cp.stderr + + +def test_on_pre_tool_use_blocks_write_to_agent_outbox( + run_script, fake_home, make_session_id, doc_root +): + """覆盖事故场景:主代理手工 Edit subagent outbox 补 STATUS 必须被阻断。""" + sid = make_session_id() + run_id = "20260101-deadbe" + spec_dir = _prep_active_with_task_swarm(fake_home, doc_root, sid, run_id) + target = (spec_dir / ".task-swarm" / "runs" / run_id + / "agents" / "coder-g1-s1-r1" / "outbox" / "result.md") + + cp = run_script( + "spec_session.py", "on-pre-tool-use", + stdin=json.dumps({ + "session_id": sid, + "tool_name": "Write", + "tool_input": {"file_path": str(target)}, + }) + ) + assert cp.returncode == 2 + assert "outbox" in cp.stderr + + +def test_on_pre_tool_use_blocks_edit_of_tasks_md( + run_script, fake_home, make_session_id, doc_root +): + """0.10.21+:active spec + task_swarm_run_id 进行中时,tasks.md 直写 + 从软提醒升级为强阻断(exit 2)。 + + 历史 bug:login-page 现场主代理见 writeback 越界报错就手工 Edit tasks.md + 把 [ ] 改成 [x],破坏 state.json 与 tasks.md 行号一致性,后续 writeback + 永远过不去。""" + sid = make_session_id() + run_id = "20260101-deadbe" + spec_dir = _prep_active_with_task_swarm(fake_home, doc_root, sid, run_id) + tasks_md = spec_dir / "tasks.md" + tasks_md.write_text("## 阶段 1: x\n- [ ] 1.1 t\n", encoding="utf-8") + + cp = run_script( + "spec_session.py", "on-pre-tool-use", + stdin=json.dumps({ + "session_id": sid, + "tool_name": "Edit", + "tool_input": {"file_path": str(tasks_md)}, + }) + ) + assert cp.returncode == 2, f"expected deny exit=2, got {cp.returncode}\nstderr={cp.stderr}" + assert "tasks.md" in cp.stderr + assert "task_swarm.py writeback" in cp.stderr + + +def test_on_pre_tool_use_allows_normal_source_file_edit( + run_script, fake_home, make_session_id, doc_root +): + """非 task-swarm 路径的 Edit 不能被误拦截(保留正常代码 Edit 通行)。""" + sid = make_session_id() + run_id = "20260101-deadbe" + spec_dir = _prep_active_with_task_swarm(fake_home, doc_root, sid, run_id) + normal_file = spec_dir / "src" / "foo.py" + normal_file.parent.mkdir(parents=True, exist_ok=True) + normal_file.write_text("x = 1\n", encoding="utf-8") + + cp = run_script( + "spec_session.py", "on-pre-tool-use", + stdin=json.dumps({ + "session_id": sid, + "tool_name": "Edit", + "tool_input": {"file_path": str(normal_file)}, + }) + ) + # 不阻断 + assert cp.returncode == 0 + # 也不应当 emit 阻断提示 + assert "specode 阻断" not in (cp.stderr or "") + + +def test_on_pre_tool_use_no_active_spec_does_not_block( + run_script, fake_home, make_session_id, doc_root +): + """idle/ended session 即使路径落在 .task-swarm 下也不阻断(hook 只在 active + 且绑定 task_swarm_run_id 时生效)。""" + sid = make_session_id() + spec_dir = doc_root / "specs" / "x" + fake_path = spec_dir / ".task-swarm" / "runs" / "rid" / "state.json" + fake_path.parent.mkdir(parents=True, exist_ok=True) + fake_path.write_text("{}", encoding="utf-8") + _write_session(fake_home, sid, mode="idle") # 注意:没有 task_swarm_run_id + + cp = run_script( + "spec_session.py", "on-pre-tool-use", + stdin=json.dumps({ + "session_id": sid, + "tool_name": "Edit", + "tool_input": {"file_path": str(fake_path)}, + }) + ) + assert cp.returncode == 0 diff --git a/plugins/specode/tests/test_spec_sync.py b/plugins/specode/tests/test_spec_sync.py deleted file mode 100644 index 73c573d..0000000 --- a/plugins/specode/tests/test_spec_sync.py +++ /dev/null @@ -1,255 +0,0 @@ -"""Unit tests for spec_sync.py — tasks_files extraction and decision functions.""" -from __future__ import annotations - -import sys -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import spec_sync - - -def test_extract_tasks_files_from_FILE_lines(tmp_path): - spec = tmp_path / "spec" - spec.mkdir() - (spec / "tasks.md").write_text( - "# Tasks\n" - "- [ ] FILE: src/auth/middleware.py\n" - "- [x] FILE: src/auth/session.py\n" - "- [ ] FILE:tests/auth/test_middleware.py\n" # full-width 冒号 - "- [ ] FILE: src/auth/glob/**/*.py\n" - ) - out = spec_sync.extract_tasks_files(spec) - assert "src/auth/middleware.py" in out - assert "src/auth/session.py" in out - assert "tests/auth/test_middleware.py" in out - assert "src/auth/glob/**/*.py" in out - - -def test_extract_tasks_files_from_affected_section(tmp_path): - spec = tmp_path / "spec" - spec.mkdir() - (spec / "design.md").write_text( - "# Design\n\n## Affected Files\n\n- `src/foo.py`\n- src/bar.py\n\n## Next\n- ignored\n" - ) - out = spec_sync.extract_tasks_files(spec) - assert "src/foo.py" in out - assert "src/bar.py" in out - assert "ignored" not in out - - -def test_matches_tasks_files_literal_and_glob(tmp_path): - proj = tmp_path / "proj" - (proj / "src" / "auth").mkdir(parents=True) - target_literal = proj / "src" / "foo.py" - target_glob = proj / "src" / "auth" / "middleware.py" - target_literal.touch() - target_glob.touch() - - assert spec_sync.matches_tasks_files(target_literal, ["src/foo.py"], proj) - assert spec_sync.matches_tasks_files(target_glob, ["src/auth/**/*.py"], proj) - assert not spec_sync.matches_tasks_files(target_literal, ["src/bar.py"], proj) - - -def test_classify_path_spec_doc_vs_project_code_vs_outside(tmp_path): - spec = tmp_path / "spec" - proj = tmp_path / "proj" - spec.mkdir() - proj.mkdir() - (spec / "design.md").touch() - (proj / "src.py").touch() - outside = Path("/tmp/literally-outside.txt") - - assert spec_sync.classify_path(spec / "design.md", spec, proj) == "spec-doc" - assert spec_sync.classify_path(proj / "src.py", spec, proj) == "project-code" - assert spec_sync.classify_path(outside, spec, proj) == "outside" - - -def test_check_phase_gate_forbids_pre_implementation(): - for phase in ("intake", "requirements", "design", "tasks", "bugfix"): - decision, msg = spec_sync.check_phase_gate(phase) - assert decision == "deny" - assert "INV-6" in msg - assert phase in msg - - for phase in ("implementation", "acceptance", "iteration", "ended"): - decision, _ = spec_sync.check_phase_gate(phase) - assert decision == "ok" - - -def test_check_stop_inv2_and_inv4(): - # Empty ledger → ok - ledger = spec_sync._new_ledger(Path("/tmp/x")) - assert spec_sync.check_stop(ledger) == [] - - # Code-only → INV-2 - ledger = spec_sync._new_ledger(Path("/tmp/x")) - spec_sync.append_change(ledger, "code", "/proj/src/foo.py", "Edit") - violations = spec_sync.check_stop(ledger) - assert any(v["id"] == "INV-2" for v in violations) - - # Code + doc → ok - spec_sync.append_change(ledger, "doc", "/spec/design.md", "Edit") - assert spec_sync.check_stop(ledger) == [] - - # Requirements w/o tasks.md → INV-4 - ledger = spec_sync._new_ledger(Path("/tmp/x")) - spec_sync.append_change(ledger, "doc", "/spec/requirements.md", "Edit") - violations = spec_sync.check_stop(ledger) - assert any(v["id"] == "INV-4" for v in violations) - - # Requirements + tasks.md → ok (测试要点 lives in tasks.md) - spec_sync.append_change(ledger, "doc", "/spec/tasks.md", "Edit") - assert spec_sync.check_stop(ledger) == [] - - # Bugfix w/o tasks.md → INV-4 - ledger = spec_sync._new_ledger(Path("/tmp/x")) - spec_sync.append_change(ledger, "doc", "/spec/bugfix.md", "Edit") - violations = spec_sync.check_stop(ledger) - assert any(v["id"] == "INV-4" for v in violations) - - -def test_cmd_status_with_spec_dir_bypasses_active_resolver(tmp_path, capsys, monkeypatch): - """status --spec-dir should read ledger directly, no active-pointer needed.""" - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - spec_sync.start_new_turn(ledger, tmp_path, ["src/a.py"]) - spec_sync.write_ledger(spec_dir, ledger) - - # Ensure _resolve_active_spec_dir is NOT consulted: poison it. - monkeypatch.setattr( - spec_sync, "_resolve_active_spec_dir", - lambda: (_ for _ in ()).throw(AssertionError("must not call resolver")), - ) - - rc = spec_sync.main(["status", "--spec-dir", str(spec_dir)]) - assert rc == 0 - out = capsys.readouterr().out - assert "(no active spec)" not in out - assert str(spec_dir) in out - assert "tasks_files: 1 entries" in out - - -def test_cmd_status_with_missing_spec_dir_errors(tmp_path, capsys): - rc = spec_sync.main(["status", "--spec-dir", str(tmp_path / "nope")]) - assert rc == 2 - err = capsys.readouterr().err - assert "spec_dir does not exist" in err - - -def test_find_active_spec_falls_back_to_default_sid(tmp_path, monkeypatch): - """When no env id is supplied, prefer the 'default' session before - sorting by lastActivityAt — matches normalize_session_id's fallback.""" - sys.path.insert(0, str(SCRIPTS_DIR)) - import spec_state - - monkeypatch.setattr(spec_state, "get_document_root", lambda: tmp_path) - monkeypatch.delenv("TERM_SESSION_ID", raising=False) - (tmp_path / "older-slug").mkdir() - (tmp_path / "older-slug" / ".config.json").write_text('{"specId": "s1"}') - (tmp_path / "newer-slug").mkdir() - (tmp_path / "newer-slug" / ".config.json").write_text('{"specId": "s2"}') - (tmp_path / ".active-specode.json").write_text( - '{"version": 2, "sessions": {' - '"some-tty": {"specSlug": "newer-slug", "specId": "s2", ' - '"status": "active", "lastActivityAt": "2099-01-01T00:00:00Z"},' - '"default": {"specSlug": "older-slug", "specId": "s1", ' - '"status": "active", "lastActivityAt": "2000-01-01T00:00:00Z"}' - '}}' - ) - - info = spec_state.find_active_spec(prefer_session_id=None) - assert info is not None - assert info["session_id"] == "default" - assert info["spec_slug"] == "older-slug" - - -def test_ledger_turn_lifecycle(tmp_path): - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - assert ledger["turn_id"] is None - spec_sync.start_new_turn(ledger, tmp_path, ["src/a.py"]) - assert ledger["turn_id"] is not None - assert ledger["tasks_files"] == ["src/a.py"] - spec_sync.append_change(ledger, "code", "src/a.py", "Edit") - assert len(ledger["turn_code_changes"]) == 1 - spec_sync.reset_turn(ledger) - assert ledger["turn_code_changes"] == [] - - -# ---- Advisory helpers (0.4.0) ---------------------------------------------- - -def test_record_advisory_dedupes_same_turn_file(tmp_path): - import spec_sync - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - spec_sync.start_new_turn(ledger, tmp_path / "project", []) - spec_sync.record_advisory(ledger, "INV-1", "msg", file="src/a.py") - spec_sync.record_advisory(ledger, "INV-1", "msg", file="src/a.py") # dup - spec_sync.record_advisory(ledger, "INV-1", "msg", file="src/b.py") - assert len(ledger["pending_advisories"]) == 2 - - -def test_auto_dismiss_drops_inv1_2_4_keeps_inv6(tmp_path): - import spec_sync - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - spec_sync.record_advisory(ledger, "INV-1", "m", file="x.py") - spec_sync.record_advisory(ledger, "INV-2", "m") - spec_sync.record_advisory(ledger, "INV-4", "m") - spec_sync.record_advisory(ledger, "INV-6", "m", file="y.py") - dropped = spec_sync.auto_dismiss_on_doc_change(ledger) - assert dropped == 3 - remaining = {a["id"] for a in ledger["pending_advisories"]} - assert remaining == {"INV-6"} - - -def test_dismiss_advisories_all(tmp_path): - import spec_sync - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - spec_sync.record_advisory(ledger, "INV-1", "m", file="x.py") - spec_sync.record_advisory(ledger, "INV-6", "m", file="y.py") - dropped = spec_sync.dismiss_advisories(ledger) - assert dropped == 2 - assert ledger["pending_advisories"] == [] - - -def test_dismiss_advisories_selective(tmp_path): - import spec_sync - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - spec_sync.record_advisory(ledger, "INV-1", "m", file="x.py") - spec_sync.record_advisory(ledger, "INV-6", "m", file="y.py") - dropped = spec_sync.dismiss_advisories(ledger, inv_ids=["INV-1"]) - assert dropped == 1 - assert [a["id"] for a in ledger["pending_advisories"]] == ["INV-6"] - - -def test_format_advisories_block_empty(tmp_path): - import spec_sync - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - assert spec_sync.format_advisories_block(ledger) == "" - - -def test_format_advisories_block_groups_by_inv(tmp_path): - import spec_sync - spec_dir = tmp_path / "spec" - spec_dir.mkdir() - ledger = spec_sync.read_ledger(spec_dir) - spec_sync.record_advisory(ledger, "INV-1", "m1", file="a.py") - spec_sync.record_advisory(ledger, "INV-1", "m1", file="b.py") - spec_sync.record_advisory(ledger, "INV-2", "m2") - out = spec_sync.format_advisories_block(ledger) - assert "pending advisories" in out - assert "INV-1 × 2" in out - assert "INV-2 × 1" in out diff --git a/plugins/specode/tests/test_spec_vault.py b/plugins/specode/tests/test_spec_vault.py new file mode 100644 index 0000000..65bf32a --- /dev/null +++ b/plugins/specode/tests/test_spec_vault.py @@ -0,0 +1,205 @@ +"""Tests for spec_vault.py — three-tier doc_root resolution + status/detect/set.""" +from __future__ import annotations + +import getpass +import json +import platform +from pathlib import Path + +import pytest + + +def _parse_status(stdout: str) -> dict: + """spec_vault.py status emits a single JSON object (plus trailing \\n).""" + return json.loads(stdout) + + +def _expected_device_segment() -> str: + """Mirror spec_vault._device_segment so tests can predict the suffix.""" + sys_map = {"Darwin": "macos", "Windows": "windows", "Linux": "linux"} + os_name = sys_map.get(platform.system(), platform.system().lower()) + return f"{os_name}-{getpass.getuser()}" + + +def test_status_with_env_override(run_script, fake_home, doc_root): + """When SPECODE_ROOT env is set (doc_root fixture sets it), source=env.""" + cp = run_script("spec_vault.py", "status") + assert cp.returncode == 0, cp.stderr + out = _parse_status(cp.stdout) + assert out["source"] == "env" + assert out["doc_root"] == str(doc_root) + assert out["exists"] is True + assert out["env_SPECODE_ROOT"] == str(doc_root) + + +def test_status_with_config_only(run_script, fake_home, monkeypatch): + """SPECODE_ROOT unset + config.obsidianRoot present → source=config + device 段追加。""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + cfg_dir = fake_home / ".config" / "specode" + cfg_dir.mkdir(parents=True, exist_ok=True) + target = fake_home / "my-vault" + target.mkdir() + (cfg_dir / "config.json").write_text( + json.dumps({"obsidianRoot": str(target)}), encoding="utf-8" + ) + cp = run_script("spec_vault.py", "status") + assert cp.returncode == 0, cp.stderr + out = _parse_status(cp.stdout) + assert out["source"] == "config" + # obsidianRoot 命中 → 追加 spec-in/<device> + assert out["doc_root"] == str(target / "spec-in" / _expected_device_segment()) + + +def test_status_with_root_override_no_device_suffix(run_script, fake_home, monkeypatch): + """config.rootOverride 命中 → 用户给什么用什么,不追加 device 段。""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + cfg_dir = fake_home / ".config" / "specode" + cfg_dir.mkdir(parents=True, exist_ok=True) + target = fake_home / "explicit-root" + target.mkdir() + (cfg_dir / "config.json").write_text( + json.dumps({"rootOverride": str(target)}), encoding="utf-8" + ) + cp = run_script("spec_vault.py", "status") + assert cp.returncode == 0, cp.stderr + out = _parse_status(cp.stdout) + assert out["source"] == "config" + assert out["doc_root"] == str(target) # 无 spec-in/<device> 追加 + + +def test_root_override_takes_precedence_over_obsidian_root( + run_script, fake_home, monkeypatch, +): + """同时存在 rootOverride 与 obsidianRoot 时,rootOverride 胜出(显式 > 隐式)。""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + cfg_dir = fake_home / ".config" / "specode" + cfg_dir.mkdir(parents=True, exist_ok=True) + obs = fake_home / "obs-vault" + obs.mkdir() + explicit = fake_home / "explicit" + explicit.mkdir() + (cfg_dir / "config.json").write_text( + json.dumps({"obsidianRoot": str(obs), "rootOverride": str(explicit)}), + encoding="utf-8", + ) + cp = run_script("spec_vault.py", "status") + assert cp.returncode == 0, cp.stderr + out = _parse_status(cp.stdout) + assert out["doc_root"] == str(explicit) # rootOverride 优先且不追加 + + +def test_status_with_none_returns_exit_3(run_script, fake_home, monkeypatch): + """All three tiers miss → exit 3 + hint.""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + # No config, no obsidian config (fake HOME has neither) + cp = run_script("spec_vault.py", "status") + assert cp.returncode == 3, cp.stderr + out = _parse_status(cp.stdout) + assert out["source"] == "none" + assert out["doc_root"] is None + assert out["exists"] is False + assert "hint" in out + assert "SPECODE_ROOT" in out["hint"] + + +def test_detect_returns_empty_list_when_no_obsidian(run_script, fake_home, monkeypatch): + """detect on a fake home with no obsidian.json → vaults=[].""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + cp = run_script("spec_vault.py", "detect") + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert "vaults" in out + assert out["vaults"] == [] + assert out["count"] == 0 + assert "configs_checked" in out + # configs_checked should be a non-empty list of paths under the fake home + assert all(str(fake_home) in p for p in out["configs_checked"]) + + +def test_set_vault_writes_config_and_status_reflects_config( + run_script, fake_home, monkeypatch +): + """`set --vault <p>` 写 obsidianRoot;status 反映 config + device 段追加。""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + target = fake_home / "my-vault" + target.mkdir() + cp_set = run_script("spec_vault.py", "set", "--vault", str(target)) + assert cp_set.returncode == 0, cp_set.stderr + set_payload = json.loads(cp_set.stdout) + assert set_payload["ok"] is True + # cmd_set 输出 doc_root 用 resolve_doc_root 重算 → 含 device 段 + expected = target.resolve() / "spec-in" / _expected_device_segment() + assert set_payload["doc_root"] == str(expected) + # config 字段:obsidianRoot 写入;rootOverride 不应存在 + cfg_path = fake_home / ".config" / "specode" / "config.json" + assert cfg_path.exists() + cfg = json.loads(cfg_path.read_text(encoding="utf-8")) + assert cfg["obsidianRoot"] == str(target.resolve()) + assert "rootOverride" not in cfg + # status 同样反映 + cp_status = run_script("spec_vault.py", "status") + assert cp_status.returncode == 0 + out = _parse_status(cp_status.stdout) + assert out["source"] == "config" + assert out["doc_root"] == str(expected) + + +def test_set_root_writes_root_override_no_device_suffix(run_script, fake_home, monkeypatch): + """`set --root <p>` 写 rootOverride(不是 obsidianRoot);doc_root 不追加 device 段。""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + target = fake_home / "via-root" + target.mkdir() + cp = run_script("spec_vault.py", "set", "--root", str(target)) + assert cp.returncode == 0, cp.stderr + set_payload = json.loads(cp.stdout) + # --root 不追加 device 段 + assert set_payload["doc_root"] == str(target.resolve()) + cfg = json.loads( + (fake_home / ".config" / "specode" / "config.json").read_text(encoding="utf-8") + ) + # 写入字段是 rootOverride 不是 obsidianRoot + assert cfg["rootOverride"] == str(target.resolve()) + assert "obsidianRoot" not in cfg + + +def test_set_vault_then_root_replaces_field(run_script, fake_home, monkeypatch): + """先 set --vault 再 set --root:obsidianRoot 字段被清除、只保留 rootOverride。""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + v = fake_home / "v1" + v.mkdir() + r = fake_home / "v2" + r.mkdir() + run_script("spec_vault.py", "set", "--vault", str(v)) + run_script("spec_vault.py", "set", "--root", str(r)) + cfg = json.loads( + (fake_home / ".config" / "specode" / "config.json").read_text(encoding="utf-8") + ) + assert cfg["rootOverride"] == str(r.resolve()) + assert "obsidianRoot" not in cfg + + +def test_set_nonexistent_path_returns_exit_3(run_script, fake_home, monkeypatch): + """Setting a path that does not exist must fail without writing config.""" + monkeypatch.delenv("SPECODE_ROOT", raising=False) + cp = run_script("spec_vault.py", "set", "--vault", str(fake_home / "ghost")) + assert cp.returncode == 3 + assert "不存在" in cp.stderr or "exists" not in cp.stderr.lower() + + +def test_env_overrides_config(run_script, fake_home, monkeypatch): + """When both env and config present, env wins.""" + cfg_dir = fake_home / ".config" / "specode" + cfg_dir.mkdir(parents=True, exist_ok=True) + config_target = fake_home / "config-vault" + config_target.mkdir() + (cfg_dir / "config.json").write_text( + json.dumps({"obsidianRoot": str(config_target)}), encoding="utf-8" + ) + env_target = fake_home / "env-vault" + env_target.mkdir() + monkeypatch.setenv("SPECODE_ROOT", str(env_target)) + cp = run_script("spec_vault.py", "status") + assert cp.returncode == 0 + out = _parse_status(cp.stdout) + assert out["source"] == "env" + assert out["doc_root"] == str(env_target) diff --git a/plugins/specode/tests/test_task_swarm_cli.py b/plugins/specode/tests/test_task_swarm_cli.py index fd63591..5e06b08 100644 --- a/plugins/specode/tests/test_task_swarm_cli.py +++ b/plugins/specode/tests/test_task_swarm_cli.py @@ -1,464 +1,518 @@ -"""End-to-end CLI test for task_swarm.py. - -Walks the protocol: init → next (fork coder) → write fake outbox → parse → -advance → next (fork reviewer) → ... → writeback → next (done). - -Verifies the JSON contracts that the orchestrator (main Claude session) -depends on. -""" +"""tests for task_swarm.py — CLI 子命令端到端。""" from __future__ import annotations -import io import json -import shutil +import os +import subprocess import sys -import tempfile -from contextlib import redirect_stdout from pathlib import Path -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import task_swarm as TS # noqa: E402 - - -TASKS_MD = """\ -# 任务 - -- [ ] 1. 实现 A - - [ ] 1.1 写 a - - 文件:src/a.py - - _需求:1.1_ - -- [ ] 2. 检查点 - - 运行 pytest -""" - - -def _run(argv: list[str]) -> dict: - buf = io.StringIO() - with redirect_stdout(buf): - rc = TS.main(argv) - text = buf.getvalue().strip() - assert rc == 0, f"cmd failed: {' '.join(argv)} stdout={text}" - return json.loads(text) - - -def _setup_workspace() -> dict: - tmp = Path(tempfile.mkdtemp(prefix="ts-cli-")) - spec = tmp / "spec-dir" - spec.mkdir() - project = tmp / "project" - project.mkdir() - (spec / "tasks.md").write_text(TASKS_MD, encoding="utf-8") - return {"tmp": tmp, "spec": spec, "project": project, "tasks": spec / "tasks.md"} - - -def _cleanup(ws): - shutil.rmtree(ws["tmp"], ignore_errors=True) - - -def test_full_cli_flow_happy_path(): - ws = _setup_workspace() - try: - # init - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - "--max-rounds", "3", - "--parallel", "2", - ]) - run_id = init_out["run_id"] - assert len(init_out["stages"]) == 2 - - # next → fork stage 1 coder - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt["action"] == "fork" - assert nxt["stage"] == 1 - assert nxt["role"] == "coder" - assert nxt["subagent_type"] == "specode:task-swarm-coder" - prompt_file = Path(nxt["prompt_file"]) - assert prompt_file.exists() - assert "CODER" in prompt_file.read_text() - workspace = Path(nxt["workspace"]) - - # simulate coder writing result.md - (workspace / "outbox" / "result.md").write_text( - "# 阶段 1 结果\n\n" - "## 子任务状态\n" - "- 1.1 写 a: done — src/a.py\n\n" - "## 关键变更\n- 新增 A\n\nSTATUS: ok\n", - encoding="utf-8", +import pytest + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" + + +@pytest.fixture +def run_swarm(tmp_path, monkeypatch): + """运行 task_swarm.py CLI,cwd 设到 tmp_path 让 .task-swarm 目录可解析。""" + monkeypatch.chdir(tmp_path) + # 与 conftest.fake_home 一致:HOME 也指向 tmp 避免污染 + monkeypatch.setenv("HOME", str(tmp_path / "_home")) + monkeypatch.setenv("USERPROFILE", str(tmp_path / "_home")) + + def _run(*args: str, stdin: str = "") -> subprocess.CompletedProcess: + env = os.environ.copy() + env["HOME"] = str(tmp_path / "_home") + env["USERPROFILE"] = str(tmp_path / "_home") + env.setdefault("PYTHONUTF8", "1") + env.setdefault("PYTHONIOENCODING", "utf-8") + cmd = [sys.executable, str(SCRIPTS_DIR / "task_swarm.py"), *args] + return subprocess.run(cmd, capture_output=True, text=True, + encoding="utf-8", errors="replace", + input=stdin, env=env, timeout=30, cwd=str(tmp_path)) + return _run + + +def _write_tasks_md(tmp_path: Path, num_stages: int = 2) -> Path: + p = tmp_path / "tasks.md" + lines = [] + for i in range(1, num_stages + 1): + lines.append(f"## 阶段 {i}: 阶段 {i}") + lines.append(f"- [ ] {i}.1 任务 @writes:src/f{i}.py _需求:{i}.1_") + if i > 1: + lines.append(f"- [ ] {i}.2 任务2 @writes:src/g{i}.py @depends-on:{i-1} _需求:{i}.2_") + p.write_text("\n".join(lines) + "\n", encoding="utf-8") + return p + + +def _write_coder_result(run_dir: Path, agent_key: str, status: str = "ok") -> None: + outbox = run_dir / "agents" / agent_key / "outbox" + outbox.mkdir(parents=True, exist_ok=True) + (outbox / "result.md").write_text( + "# c\n## 上下文\n- x\n## 子任务状态\n- 1.1 t: done — f.py\n## 关键变更\n- a\n\n" + f"STATUS: {status}\n", + encoding="utf-8", + ) + + +def _write_reviewer(run_dir: Path, group: int, with_p0: bool = True) -> None: + out = run_dir / "agents" / f"reviewer-g{group}-r1" / "outbox" + out.mkdir(parents=True, exist_ok=True) + p0_section = ("## P0\n- src/f1.py:5 [req:1.1] — issue\n\n" + if with_p0 else "## P0\n(none)\n\n") + (out / "review.md").write_text( + "# rev\n## 结论\napproved-with-comments\n\n" + + p0_section + + "## P1\n## P2\nSTATUS: ok\n", + encoding="utf-8", + ) + + +def _write_validator(run_dir: Path, group: int, round_: int, verdict: str = "pass", + sig_marker: str = "default") -> None: + out = run_dir / "agents" / f"validator-g{group}-r{round_}" / "outbox" + out.mkdir(parents=True, exist_ok=True) + if verdict == "pass": + body = ("# v\n## 判定\npass\n## 复现命令\n```bash\npytest\n```\n" + "## 按子任务的验证结果\n- [x] 1.1 t: pass\n\nSTATUS: ok\n") + else: + body = ("# v\n## 判定\nfail\n## 复现命令\n```bash\npytest\n```\n" + "## 按子任务的验证结果\n- [ ] 1.1 t: fail\n" + f"## 失败现场\n```\nFAILED tests/t.py::test_{sig_marker}\nAssertionError: x\n```\n" + "## 给 coder 的修复指引\n### 修复 1\n- 文件: src/f1.py\n- 位置: x\n" + "- 问题: y\n- 建议: z\n\nSTATUS: ok\n") + (out / "validation.md").write_text(body, encoding="utf-8") + + +# ------------------------------------------------------------------------- +# 测试 +# ------------------------------------------------------------------------- + +def test_init_creates_state_and_groups(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=2) + cp = run_swarm("init", "--tasks", str(p)) + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert "run_id" in out + assert len(out["groups"]) >= 1 + run_dir = Path(out["run_dir"]) + assert (run_dir / "state.json").exists() + + +def test_init_with_nonexistent_tasks_exits_1(tmp_path, run_swarm): + cp = run_swarm("init", "--tasks", str(tmp_path / "no.md")) + assert cp.returncode == 1 + + +def test_init_empty_tasks_md_exits_1(tmp_path, run_swarm): + p = tmp_path / "tasks.md" + p.write_text("# nothing\n", encoding="utf-8") + cp = run_swarm("init", "--tasks", str(p)) + assert cp.returncode == 1 + + +def test_plan_initial_returns_coding_fork(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + cp = run_swarm("plan", "--run", init["run_id"]) + assert cp.returncode == 0 + plan = json.loads(cp.stdout) + assert plan["phase"] == "coding" + assert plan["action"] == "coding-fork" + assert plan["fork"] + + +def test_status_reports_phase(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + cp = run_swarm("status", "--run", init["run_id"]) + assert cp.returncode == 0 + st = json.loads(cp.stdout) + assert st["run_id"] == init["run_id"] + assert st["phase"] in ("init", "coding") + + +def test_advance_coding_then_review_fork(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_dir = Path(init["run_dir"]) + # plan to materialize prompts + transition to coding + run_swarm("plan", "--run", init["run_id"]) + _write_coder_result(run_dir, "coder-g1-s1-r1") + cp = run_swarm("advance", "--run", init["run_id"], "--phase", "coding", "--round", "1") + assert cp.returncode == 0 + out = json.loads(cp.stdout) + assert out["ok"] + assert out["plan"]["action"] == "review-fork" + + +def test_full_cycle_no_p0_pass(tmp_path, run_swarm): + """coding → review (no P0) → validation (pass) → writeback""" + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + _write_validator(run_dir, 1, 1, verdict="pass") + cp = run_swarm("advance", "--run", run_id, "--phase", "validation", "--round", "1") + out = json.loads(cp.stdout) + assert out["plan"]["action"] == "writeback" + cp = run_swarm("writeback", "--run", run_id, "--group", "1") + assert cp.returncode == 0, cp.stderr + text = p.read_text(encoding="utf-8") + assert "- [x] 1.1" in text + + +def test_full_cycle_with_p0_fix(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=True) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + # p0-fix coder + _write_coder_result(run_dir, "coder-p0fix-g1-r1-f0") + cp = run_swarm("advance", "--run", run_id, "--phase", "p0-fix", "--round", "1") + out = json.loads(cp.stdout) + assert out["plan"]["phase"] == "validation" + + +def test_validation_fail_triggers_v_fix(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + _write_validator(run_dir, 1, 1, verdict="fail", sig_marker="first") + cp = run_swarm("advance", "--run", run_id, "--phase", "validation", "--round", "1") + out = json.loads(cp.stdout) + assert out["plan"]["phase"] == "v-fix" + + +def test_deadloop_after_3_identical_fails(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + # 3 同样的 fail + for r in range(1, 4): + _write_validator(run_dir, 1, r, verdict="fail", sig_marker="same") + cp = run_swarm("advance", "--run", run_id, "--phase", "validation", "--round", str(r)) + out = json.loads(cp.stdout) + if r < 3: + # v-fix coder 返回 + v_round = r + 1 + files = ["src/f1.py"] + for i, _f in enumerate(files): + key = f"coder-vfix-g1-r{v_round}-f{i}" + _write_coder_result(run_dir, key) + run_swarm("advance", "--run", run_id, "--phase", "v-fix", "--round", str(v_round)) + assert "deadloop" in out.get("next", "") or out.get("deadloop") is True + + +def test_writeback_invalid_group_returns_1(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + cp = run_swarm("writeback", "--run", init["run_id"], "--group", "99") + assert cp.returncode == 1 + + +def test_heartbeat_updates_state(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + cp = run_swarm("heartbeat", "--run", init["run_id"]) + assert cp.returncode == 0 + out = json.loads(cp.stdout) + assert out["run_id"] == init["run_id"] + + +def test_resolve_abort_sets_status(tmp_path, run_swarm): + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + cp = run_swarm("resolve", "--run", init["run_id"], "--abort") + assert cp.returncode == 0 + out = json.loads(cp.stdout) + assert out["status"] == "aborted" + + +def test_resolve_clears_session_task_swarm_run_id(tmp_path, run_swarm, monkeypatch): + monkeypatch.setenv("HOME", str(tmp_path / "_home")) + sessions_dir = tmp_path / "_home" / ".specode" / "sessions" + sessions_dir.mkdir(parents=True, exist_ok=True) + sid = "test-sess-123" + sess_file = sessions_dir / f"{sid}.json" + sess_file.write_text(json.dumps({"session_id": sid, "mode": "active", + "task_swarm_run_id": None}), encoding="utf-8") + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p), "--session", sid).stdout) + # 验证 init 写了 task_swarm_run_id + saved = json.loads(sess_file.read_text(encoding="utf-8")) + assert saved["task_swarm_run_id"] == init["run_id"] + # resolve 后应清空 + run_swarm("resolve", "--run", init["run_id"]) + saved2 = json.loads(sess_file.read_text(encoding="utf-8")) + assert saved2["task_swarm_run_id"] is None + + +def test_v_fix_prompt_files_match_state_in_flight(tmp_path, run_swarm): + """Regression: validation fail → begin_v_fix 之后,磁盘上 agents/<key>/task.md + 的 round 号必须与 state.json 的 vfix_in_flight 一致。 + + 历史 bug:_materialize_prompts_v_fix 用 round_=sm.round+1,但 begin_v_fix 已经 + 把 sm.round 自增过了,且 vfix_in_flight 用的就是 sm.round 命名。结果磁盘 + task.md 比 in_flight 多一个 round 号(state 是 r2、磁盘是 r3)。后续 advance + 找不到 r2 的 result.md,永远报 '产物文件不存在'。 + """ + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + + # 走完 coding + review(无 P0)→ 直接进 validation + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + + # validation round 1 fail → 触发 begin_v_fix + _materialize_prompts_v_fix + _write_validator(run_dir, 1, 1, verdict="fail", sig_marker="x") + run_swarm("advance", "--run", run_id, "--phase", "validation", "--round", "1") + + state = json.loads((run_dir / "state.json").read_text(encoding="utf-8")) + assert state["phase"] == "v-fix" + assert state["round"] == 2 + in_flight = state["vfix_in_flight"] + assert in_flight, "begin_v_fix 之后 vfix_in_flight 应非空" + + # 关键断言:每个 in_flight key 对应的 agents/<key>/task.md 必须存在 + agents_dir = run_dir / "agents" + existing = {p.name for p in agents_dir.iterdir() if p.is_dir()} + for key in in_flight: + task_md = agents_dir / key / "task.md" + assert task_md.exists(), ( + f"in_flight 含 {key!r} 但磁盘 task.md 不存在: {task_md}\n" + f"agents 目录实际成员: {sorted(existing)}\n" + f"(典型 r/r+1 漂移 bug:state 是 r{state['round']}、" + f"磁盘可能是 r{state['round']+1})" ) - # parse - parsed = _run([ - "parse", "--run", run_id, - "--stage", "1", "--role", "coder", "--round", "1", - "--project-root", str(ws["project"]), - ]) - assert parsed["judgment"] == "ok" - - # advance - _run([ - "advance", "--run", run_id, - "--stage", "1", "--role", "coder", "--round", "1", - "--judgment", "ok", - "--project-root", str(ws["project"]), - ]) - - # next → fork stage 1 reviewer - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt["action"] == "fork" - assert nxt["role"] == "reviewer" - rev_ws = Path(nxt["workspace"]) - # reviewer's inbox should contain coder's result.md (relayed) - assert (rev_ws / "inbox" / "result.md").exists() - - # simulate reviewer approving - (rev_ws / "outbox" / "review.md").write_text( - "## 结论\napproved\n\n" - "## P0 — 阻塞,coder 必须修复(修完才能进 validator)\n(none)\n\n" - "## P1 — 建议修复,不阻塞\n- 命名\n\n" - "## P2 — 可选改进\n- 风格\n\nSTATUS: ok\n", - encoding="utf-8", - ) - parsed = _run([ - "parse", "--run", run_id, - "--stage", "1", "--role", "reviewer", "--round", "1", - "--project-root", str(ws["project"]), - ]) - assert parsed["judgment"] == "approved" - _run([ - "advance", "--run", run_id, - "--stage", "1", "--role", "reviewer", "--round", "1", - "--judgment", "approved", - "--project-root", str(ws["project"]), - ]) - - # next → writeback stage 1 - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt["action"] == "writeback" - assert nxt["stage"] == 1 - - # writeback - wb_out = _run([ - "writeback", "--run", run_id, "--stage", "1", - "--project-root", str(ws["project"]), - ]) - assert wb_out["written"] is True - tasks_text = ws["tasks"].read_text(encoding="utf-8") - assert "- [x] 1. 实现 A" in tasks_text - assert "- [x] 1.1 写 a" in tasks_text - # annotation appended - assert "task-swarm 收敛" in tasks_text - # metadata preserved - assert "_需求:1.1_" in tasks_text - assert "文件:src/a.py" in tasks_text - - # next → fork checkpoint validator - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt["action"] == "fork" - assert nxt["role"] == "validator" - v_ws = Path(nxt["workspace"]) - - (v_ws / "outbox" / "validation.md").write_text( - "## 判定\npass\n\n" - "## 复现命令\n```bash\npytest\n```\n\n" - "## 按子任务的验证结果\n- [x] 1.1 a: pass\n\nSTATUS: ok\n", - encoding="utf-8", - ) - parsed = _run([ - "parse", "--run", run_id, - "--stage", "2", "--role", "validator", "--round", "1", - "--project-root", str(ws["project"]), - ]) - assert parsed["judgment"] == "pass" - _run([ - "advance", "--run", run_id, - "--stage", "2", "--role", "validator", "--round", "1", - "--judgment", "pass", - "--project-root", str(ws["project"]), - ]) - - # next → writeback stage 2 - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt["action"] == "writeback" - _run([ - "writeback", "--run", run_id, "--stage", "2", - "--project-root", str(ws["project"]), - ]) - - # next → done - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt["action"] == "done" - assert "summary" in nxt - finally: - _cleanup(ws) - - -def test_init_creates_active_run_pointer(): - ws = _setup_workspace() - try: - out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - pointer = ws["project"] / ".task-swarm" / "active-run" - assert pointer.exists() - assert pointer.read_text().strip() == out["run_id"] - finally: - _cleanup(ws) - - -def test_writeback_rejects_unconverged_stage(): - ws = _setup_workspace() - try: - out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = out["run_id"] - # No advance — stage 1 is still pending; writeback should error. - buf = io.StringIO() - with redirect_stdout(buf): - rc = TS.main([ - "writeback", "--run", run_id, "--stage", "1", - "--project-root", str(ws["project"]), - ]) - assert rc == 2 - body = json.loads(buf.getvalue()) - assert "尚未收敛" in body["error"] or "phase=pending" in body["error"] - finally: - _cleanup(ws) - - -# ---------- R5: schema-error retry ---------- - -def test_parse_schema_error_retries_and_clears_outbox(): - ws = _setup_workspace() - try: - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = init_out["run_id"] - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - workspace = Path(nxt["workspace"]) - outbox = workspace / "outbox" - - # Write a malformed result.md (missing 子任务状态 section + STATUS) - (outbox / "result.md").write_text("# 阶段 1 结果\n\n## 其他\n- ...\n", encoding="utf-8") - - parsed = _run([ - "parse", "--run", run_id, - "--stage", "1", "--role", "coder", "--round", "1", - "--project-root", str(ws["project"]), - ]) - assert parsed["judgment"] == "schema-error" - assert parsed.get("retry") is True - assert "result.md" in parsed["outbox_snapshot"] - # outbox cleared - assert not (outbox / "result.md").exists() - # advance_cmd absent so caller can't accidentally advance - assert "advance_cmd" not in parsed - - # next should reissue the same fork (in_flight was reset) - nxt2 = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - assert nxt2["action"] == "fork" - assert (nxt2["stage"], nxt2["role"], nxt2["round"]) == (1, "coder", 1) - finally: - _cleanup(ws) - - -# ---------- R6: reset-in-flight ---------- - -def test_reset_in_flight_clears_marker(): - ws = _setup_workspace() - try: - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = init_out["run_id"] - # Dispatch first fork — marks stage 1 in_flight - _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - - out = _run([ - "reset-in-flight", "--run", run_id, "--stage", "1", - "--project-root", str(ws["project"]), - ]) - assert out["count"] == 1 - assert out["cleared"][0]["stage"] == 1 - - # Second reset is a no-op - out2 = _run([ - "reset-in-flight", "--run", run_id, "--stage", "1", - "--project-root", str(ws["project"]), - ]) - assert out2["count"] == 0 - finally: - _cleanup(ws) - - -def test_reset_in_flight_all_stages_when_no_stage_arg(): - ws = _setup_workspace() - try: - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = init_out["run_id"] - _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - out = _run([ - "reset-in-flight", "--run", run_id, - "--project-root", str(ws["project"]), - ]) - assert out["count"] >= 1 - finally: - _cleanup(ws) - - -# ---------- R9: state migration ---------- - -def test_load_state_migrates_unversioned_state(): - """A state.json with no `version` key (legacy) should load + migrate.""" - import task_swarm_state as S - ws = _setup_workspace() - try: - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = init_out["run_id"] - run_dir = ws["project"] / ".task-swarm" / "runs" / run_id - - # Strip version field to simulate pre-migration state.json - state_path = run_dir / "state.json" - raw = json.loads(state_path.read_text(encoding="utf-8")) - raw.pop("version", None) - state_path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8") - - # load_state should still succeed (migrate_state is a no-op at v1 today) - loaded = S.load_state(run_dir) - assert loaded["run_id"] == run_id - - - finally: - _cleanup(ws) - - -def test_writeback_annotates_reviewer_p0_advisory(): - """R3: reviewer P0 / advisory items end up as `> ⚠️` notes in tasks.md.""" - ws = _setup_workspace() - try: - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = init_out["run_id"] - - # Fork coder, write result, advance - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - coder_ws = Path(nxt["workspace"]) - (coder_ws / "outbox" / "result.md").write_text( - "# 阶段 1 结果\n\n## 子任务状态\n- 1.1 写 a: done — src/a.py\n\n## 关键变更\n- x\n\nSTATUS: ok\n", - encoding="utf-8", - ) - _run([ - "parse", "--run", run_id, "--stage", "1", "--role", "coder", "--round", "1", - "--project-root", str(ws["project"]), - ]) - _run([ - "advance", "--run", run_id, "--stage", "1", "--role", "coder", "--round", "1", - "--judgment", "ok", "--project-root", str(ws["project"]), - ]) - - # Reviewer reports P0 + advisory_p0 - nxt = _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - rev_ws = Path(nxt["workspace"]) - (rev_ws / "outbox" / "review.md").write_text( - "## 结论\nneeds-changes\n\n" - "## P0 — 阻塞,coder 必须修复(修完才能进 validator)\n" - "- src/a.py:10 [security] — 缺密码校验\n" - "- src/a.py:20 — 主观印象,未带标签\n\n" - "## P1 — 建议\n- ...\n\n## P2 — 可选\n- ...\n\n" - "STATUS: ok\n", - encoding="utf-8", - ) - _run([ - "parse", "--run", run_id, "--stage", "1", "--role", "reviewer", "--round", "1", - "--project-root", str(ws["project"]), - ]) - _run([ - "advance", "--run", run_id, "--stage", "1", "--role", "reviewer", "--round", "1", - "--judgment", "p0", "--project-root", str(ws["project"]), - ]) - - # Writeback — annotation should land in tasks.md - _run(["next", "--run", run_id, "--project-root", str(ws["project"])]) - _run([ - "writeback", "--run", run_id, "--stage", "1", - "--project-root", str(ws["project"]), - ]) - text = ws["tasks"].read_text(encoding="utf-8") - assert "评审建议" in text - assert "[security]" in text - # advisory item present with (adv) marker - assert "(adv)" in text - # stage flipped to [x] regardless of reviewer P0 (advisory) - assert "- [x] 1. 实现 A" in text - finally: - _cleanup(ws) - - -def test_load_state_warns_on_future_version(): - import task_swarm_state as S - ws = _setup_workspace() - try: - init_out = _run([ - "init", - "--tasks", str(ws["tasks"]), - "--project-root", str(ws["project"]), - ]) - run_id = init_out["run_id"] - run_dir = ws["project"] / ".task-swarm" / "runs" / run_id - state_path = run_dir / "state.json" - raw = json.loads(state_path.read_text(encoding="utf-8")) - raw["version"] = 999 # newer than runtime - state_path.write_text(json.dumps(raw, ensure_ascii=False, indent=2), encoding="utf-8") - - loaded = S.load_state(run_dir) - assert any("newer than runtime" in w for w in loaded.get("warnings", [])) - finally: - _cleanup(ws) - - -def test_fork_description_r1_no_scope(): - """First round (round=1) gets plain '阶段 N role: title' without -rN or [scope].""" - desc = TS._fork_description(3, "coder", 1, None, "实现 A") - assert desc == "阶段 3 coder: 实现 A" - - -def test_fork_description_includes_scope_for_validator_fail_fix(): - """r2 coder triggered by validator fail must show [validator-fail-fix] so the - orchestrator can't mis-narrate it as a reviewer P0 fix loop.""" - desc = TS._fork_description(5, "coder", 2, "validator-fail-fix", "检查点 — Mascot 独立可控") - assert "-r2" in desc - assert "[validator-fail-fix]" in desc - assert "检查点 — Mascot 独立可控" in desc - assert "P0" not in desc - - -def test_fork_description_reviewer_advisory_scope(): - desc = TS._fork_description(1, "reviewer", 1, "advisory", "实现 A") - assert "reviewer" in desc - assert "[advisory]" in desc + +def test_coder_prompt_includes_project_root_from_spec_config(tmp_path, run_swarm): + """0.10.15+:spec_dir/.config.json.project_root 必须被 task-swarm prompt + 渲染为 `## 项目根目录与路径规约` 段,明确告知 subagent 写到 project_root + 不是 spec_dir。 + + note: 把 spec_dir 设为 tmp_path 本身(fixture monkeypatch.chdir 已切到 tmp_path), + 这样 _find_run_dir 能从 cwd 同级找到 run_dir。 + """ + # spec_dir == tmp_path(cwd),让 init 自动推断 + plan 能定位 run_dir + project_root = tmp_path / "my-app" + project_root.mkdir() + (tmp_path / ".config.json").write_text(json.dumps({ + "slug": "demo", + "phase": "tasks", + "project_root": str(project_root), + }), encoding="utf-8") + tasks_md = tmp_path / "tasks.md" + tasks_md.write_text( + "## 阶段 1: 阶段 1\n" + "- [ ] 1.1 任务 @writes:src/f1.py _需求:1.1_\n", + encoding="utf-8", + ) + + init = json.loads(run_swarm("init", "--tasks", str(tasks_md)).stdout) + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", init["run_id"]) + + coder_task_md = run_dir / "agents" / "coder-g1-s1-r1" / "task.md" + assert coder_task_md.exists(), f"coder task.md 不存在:{coder_task_md}" + content = coder_task_md.read_text(encoding="utf-8") + # project_root 出现在 context block + 路径规约段 + assert str(project_root) in content + assert "项目根目录与路径规约" in content + assert "spec_dir" in content + assert "严禁" in content # 禁止把代码写到 spec_dir + + +def test_writeback_handles_multi_line_reproduce_cmd(tmp_path, run_swarm): + """0.10.21+ 回归:validator pass 的 reproduce_cmd 含多行(cd ... + node -e "...") + 不应让 writeback 报"line-safe 越界"。 + + 历史 bug(login-page 现场):_format_findings_block 把 multi-line cmd 拼成 + inline string,写入 tasks.md 后被 splitlines 拆出非 `>` 前缀的行, + _verify_line_safe 报"writeback 越界:line N 原 '## 阶段 5:' 新 '# 验证 ...'"。 + """ + # 自造 tasks.md(2 阶段,触发 stage 间插入注释) + p = tmp_path / "tasks.md" + p.write_text( + "## 阶段 1: 阶段 1\n" + "- [ ] 1.1 任务 @writes:src/f1.py _需求:1.1_\n" + "\n" + "## 阶段 2: 阶段 2\n" + "- [ ] 2.1 任务2 @writes:src/g2.py @depends-on:1 _需求:2.1_\n", + encoding="utf-8", + ) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + # 手工写 validator validation.md,含多行 reproduce_cmd + out_dir = run_dir / "agents" / "validator-g1-r1" / "outbox" + out_dir.mkdir(parents=True, exist_ok=True) + multi_line_cmd_body = ( + "# v\n" + "## 判定\npass\n" + "## 复现命令\n" + "```bash\n" + "cd /tmp/project\n" + "\n" + "# 验证 P0 修复\n" + "node -e \"console.log('ok')\"\n" + "```\n" + "## 按子任务的验证结果\n- [x] 1.1: pass\n\n" + "STATUS: ok\n" + ) + (out_dir / "validation.md").write_text(multi_line_cmd_body, encoding="utf-8") + run_swarm("advance", "--run", run_id, "--phase", "validation", "--round", "1") + # writeback 必须成功(不报越界) + cp = run_swarm("writeback", "--run", run_id, "--group", "1") + assert cp.returncode == 0, ( + f"writeback 失败:stderr={cp.stderr}\nstdout={cp.stdout}" + ) + tasks_after = p.read_text(encoding="utf-8") + # 多行命令应该以 ``` fenced 块的形式出现,每行带 > + assert "validator g1-r1 pass,复现命令" in tasks_after + assert "> ```" in tasks_after + assert "> cd /tmp/project" in tasks_after + assert "> # 验证 P0 修复" in tasks_after + + +def test_init_skip_validator_flag_persists_to_state(tmp_path, run_swarm): + """0.10.20+:init --skip-validator 把 skip_validator=true 写入 state.json。""" + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads( + run_swarm("init", "--tasks", str(p), "--skip-validator").stdout + ) + run_dir = Path(init["run_dir"]) + state = json.loads((run_dir / "state.json").read_text(encoding="utf-8")) + assert state["skip_validator"] is True + + +def test_init_without_flag_defaults_to_full_mode(tmp_path, run_swarm): + """0.10.20+:默认 skip_validator=false(兼容老行为)。""" + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads(run_swarm("init", "--tasks", str(p)).stdout) + run_dir = Path(init["run_dir"]) + state = json.loads((run_dir / "state.json").read_text(encoding="utf-8")) + assert state["skip_validator"] is False + + +def test_skip_validator_review_no_p0_skips_validation(tmp_path, run_swarm): + """0.10.20+:skip_validator=true + review 无 P0 → advance review 后直接进 writeback, + 跳过 validation。""" + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads( + run_swarm("init", "--tasks", str(p), "--skip-validator").stdout + ) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + cp = run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["phase"] == "writeback", ( + f"skip_validator 模式无 P0 → 应直接进 writeback,实际 phase={out['phase']}" + ) + + +def test_skip_validator_p0_fix_done_skips_validation(tmp_path, run_swarm): + """0.10.20+:skip_validator=true + p0-fix 完成 → 直接进 writeback。""" + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads( + run_swarm("init", "--tasks", str(p), "--skip-validator").stdout + ) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=True) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + # p0-fix coder 返回 + _write_coder_result(run_dir, "coder-p0fix-g1-r1-f0") + cp = run_swarm("advance", "--run", run_id, "--phase", "p0-fix", "--round", "1") + assert cp.returncode == 0, cp.stderr + out = json.loads(cp.stdout) + assert out["phase"] == "writeback", ( + f"skip_validator 模式 p0-fix 完 → 应直接进 writeback,实际 phase={out['phase']}" + ) + + +def test_skip_validator_writeback_writes_skipped_note(tmp_path, run_swarm): + """0.10.20+:skip_validator 模式 writeback 时 tasks.md 注释块写 + "⏭️ validator 已跳过(人工验收模式)"。""" + p = _write_tasks_md(tmp_path, num_stages=1) + init = json.loads( + run_swarm("init", "--tasks", str(p), "--skip-validator").stdout + ) + run_id = init["run_id"] + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", run_id) + _write_coder_result(run_dir, "coder-g1-s1-r1") + run_swarm("advance", "--run", run_id, "--phase", "coding", "--round", "1") + _write_reviewer(run_dir, 1, with_p0=False) + run_swarm("advance", "--run", run_id, "--phase", "review", "--round", "1") + cp = run_swarm("writeback", "--run", run_id, "--group", "1") + assert cp.returncode == 0, cp.stderr + tasks_after = p.read_text(encoding="utf-8") + assert "validator 已跳过" in tasks_after + assert "人工验收模式" in tasks_after + # 不应当出现 "✅ validator g1-rN pass" + assert "✅ validator" not in tasks_after + + +def test_coder_prompt_fallback_when_project_root_unset(tmp_path, run_swarm): + """老 spec 兼容:.config.json 没有 project_root → prompt 给出 fallback 文本, + 不阻断流程(保持 0.10.14 及之前的行为)。""" + (tmp_path / ".config.json").write_text(json.dumps({ + "slug": "legacy", + "phase": "tasks", + # project_root 字段缺失(模拟 pre-0.10.15 spec) + }), encoding="utf-8") + tasks_md = tmp_path / "tasks.md" + tasks_md.write_text( + "## 阶段 1: 阶段 1\n- [ ] 1.1 任务 @writes:src/f.py _需求:1.1_\n", + encoding="utf-8", + ) + init = json.loads(run_swarm("init", "--tasks", str(tasks_md)).stdout) + run_dir = Path(init["run_dir"]) + run_swarm("plan", "--run", init["run_id"]) + + coder_task_md = run_dir / "agents" / "coder-g1-s1-r1" / "task.md" + content = coder_task_md.read_text(encoding="utf-8") + # fallback 提示出现(未设置 project_root 时) + assert "未设置" in content diff --git a/plugins/specode/tests/test_task_swarm_guard.py b/plugins/specode/tests/test_task_swarm_guard.py deleted file mode 100644 index 461b1be..0000000 --- a/plugins/specode/tests/test_task_swarm_guard.py +++ /dev/null @@ -1,248 +0,0 @@ -"""Unit tests for task_swarm_guard (INV-7/8/9/10).""" -from __future__ import annotations - -import sys -import tempfile -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import task_swarm_guard as G # noqa: E402 -import task_swarm_prompt as PR # noqa: E402 - - -# ---------- INV-7 ---------- - -def test_inv7_accepts_prefixed_types(): - for r in ("coder", "reviewer", "validator", "planner"): - decision, _ = G.check_inv7_subagent_type(f"specode:task-swarm-{r}") - assert decision == "ok" - - -def test_inv7_rejects_general_purpose(): - decision, msg = G.check_inv7_subagent_type("general-purpose") - assert decision == "deny" - assert "INV-7" in msg - assert "general-purpose" in msg - - -def test_inv7_rejects_missing_prefix(): - decision, msg = G.check_inv7_subagent_type("task-swarm-coder") - assert decision == "deny" - assert "specode:" in msg - - -def test_inv7_rejects_empty(): - decision, _ = G.check_inv7_subagent_type("") - assert decision == "deny" - - -# ---------- INV-8 ---------- - -def _make_subagent_ws(tmp: Path, writes: list[str]) -> tuple[Path, Path, Path]: - """Build a fake subagent workspace with task.md declaring writes. - - Returns (workspace, project_root, spec_dir). - """ - proj = tmp / "project" - proj.mkdir() - spec = tmp / "spec-dir" - spec.mkdir() - run_dir = proj / ".task-swarm" / "runs" / "run-1" - ws = run_dir / "agents" / "stage-1-coder" - (ws / "inbox").mkdir(parents=True) - (ws / "outbox").mkdir(parents=True) - writes_line = ", ".join(writes) if writes else "(本阶段无 @writes 声明文件)" - task_md = ( - "## 边界\n" - f"- 项目根: {proj}\n" - f"- @writes(你只能修改这些路径): {writes_line}\n" - ) - (ws / "task.md").write_text(task_md, encoding="utf-8") - return ws, proj, spec - - -def test_inv8_allows_writes_listed_file(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/a.py", "src/b.py"]) - target = proj / "src/a.py" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("x") - decision, _ = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "ok" - - -def test_inv8_rejects_unlisted_file(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/a.py"]) - target = proj / "src/other.py" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("x") - decision, msg = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "deny" - assert "INV-8" in msg - - -def test_inv8_rejects_spec_dir_writes(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/a.py"]) - target = spec / "requirements.md" - target.write_text("x") - decision, msg = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "deny" - assert "spec 文档" in msg - - -def test_inv8_allows_outbox_writes(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/a.py"]) - target = ws / "outbox" / "result.md" - decision, _ = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "ok" - - -# ---------- R4: directory-form @writes ---------- - -def test_inv8_dir_with_trailing_slash_allows_child(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/api/"]) - target = proj / "src/api/__init__.py" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("") - decision, _ = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "ok" - - -def test_inv8_dir_glob_allows_nested(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/api/**"]) - target = proj / "src/api/v1/users.py" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("") - decision, _ = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "ok" - - -def test_inv8_dir_does_not_match_sibling(): - with tempfile.TemporaryDirectory() as td: - tmp = Path(td) - ws, proj, spec = _make_subagent_ws(tmp, ["src/api/"]) - target = proj / "src/admin/users.py" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("") - decision, msg = G.check_inv8_writes_boundary(target, ws, proj, spec) - assert decision == "deny" - assert "INV-8" in msg - - -# ---------- INV-9 ---------- - -OLD_TASKS = """\ -- [ ] 1. 实现登录 - - [ ] 1.1 写 model - - 文件:src/m.py - - _需求:1.1_ - -- [ ] 2. 检查点 -""" - - -def test_inv9_allows_checkbox_swap(): - new = OLD_TASKS.replace("- [ ] 1.1", "- [x] 1.1") - decision, _ = G.check_inv9_tasks_md_diff(OLD_TASKS, new) - assert decision == "ok" - - -def test_inv9_allows_annotation_insert(): - new = OLD_TASKS.replace( - "- [ ] 2. 检查点\n", - "- [ ] 2. 检查点\n > ✔ task-swarm 收敛\n", - ) - decision, _ = G.check_inv9_tasks_md_diff(OLD_TASKS, new) - assert decision == "ok" - - -def test_inv9_rejects_traceability_change(): - new = OLD_TASKS.replace("_需求:1.1_", "_需求:1.2_") - decision, msg = G.check_inv9_tasks_md_diff(OLD_TASKS, new) - assert decision == "deny" - assert "INV-9" in msg - - -def test_inv9_rejects_file_metadata_change(): - new = OLD_TASKS.replace("文件:src/m.py", "文件:src/n.py") - decision, _ = G.check_inv9_tasks_md_diff(OLD_TASKS, new) - assert decision == "deny" - - -def test_inv9_rejects_title_change(): - new = OLD_TASKS.replace("写 model", "写 model V2") - decision, _ = G.check_inv9_tasks_md_diff(OLD_TASKS, new) - assert decision == "deny" - - -def test_inv9_rejects_arbitrary_line_insert(): - new = OLD_TASKS.replace( - "- [ ] 1.1 写 model\n", - "- [ ] 1.1 写 model\n - 新增的设计说明\n", - ) - decision, _ = G.check_inv9_tasks_md_diff(OLD_TASKS, new) - assert decision == "deny" - - -# ---------- INV-10 ---------- - -def test_inv10_passes_valid_outbox(): - with tempfile.TemporaryDirectory() as td: - outbox = Path(td) - (outbox / "result.md").write_text( - "## 子任务状态\n- 1.1 写 a: done — src/a.py\n\n## 关键变更\n- x\n\nSTATUS: ok\n" - ) - decision, _ = G.check_inv10_outbox_schema("coder", outbox) - assert decision == "ok" - - -def test_inv10_rejects_missing_subtask_section(): - with tempfile.TemporaryDirectory() as td: - outbox = Path(td) - (outbox / "result.md").write_text("## 关键变更\n- x\n\nSTATUS: ok\n") - decision, msg = G.check_inv10_outbox_schema("coder", outbox) - assert decision == "deny" - assert "INV-10" in msg - - -def test_inv10_rejects_missing_p0_section(): - with tempfile.TemporaryDirectory() as td: - outbox = Path(td) - (outbox / "review.md").write_text("## 结论\napproved\n\nSTATUS: ok\n") - decision, msg = G.check_inv10_outbox_schema("reviewer", outbox) - assert decision == "deny" - - -def test_inv10_rejects_validator_fail_without_guidance(): - with tempfile.TemporaryDirectory() as td: - outbox = Path(td) - (outbox / "validation.md").write_text( - "## 判定\nfail\n\n## 复现命令\n```bash\nx\n```\n\nSTATUS: ok\n" - ) - decision, msg = G.check_inv10_outbox_schema("validator", outbox) - assert decision == "deny" - - -# ---------- active-run discovery ---------- - -def test_active_run_detection(): - with tempfile.TemporaryDirectory() as td: - proj = Path(td) - assert G.is_task_swarm_active(proj) is False - (proj / ".task-swarm").mkdir() - (proj / ".task-swarm" / "active-run").write_text("run-x") - (proj / ".task-swarm" / "runs" / "run-x").mkdir(parents=True) - assert G.is_task_swarm_active(proj) is True diff --git a/plugins/specode/tests/test_task_swarm_hook.py b/plugins/specode/tests/test_task_swarm_hook.py new file mode 100644 index 0000000..1faffe7 --- /dev/null +++ b/plugins/specode/tests/test_task_swarm_hook.py @@ -0,0 +1,181 @@ +"""tests for hook_on_task_completed — 不在 run 期间 exit 0;在 run 期间注入正确文本。""" +from __future__ import annotations + +import json +import os +import subprocess +import sys +import uuid +from pathlib import Path + +import pytest + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" + + +@pytest.fixture +def fake_env(tmp_path, monkeypatch): + home = tmp_path / "_home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + monkeypatch.setenv("USERPROFILE", str(home)) + monkeypatch.delenv("SPECODE_GUARD", raising=False) + return home + + +def _run(script: str, *args: str, stdin: str = "", env_extra: dict = None, + cwd: Path = None) -> subprocess.CompletedProcess: + env = os.environ.copy() + env.setdefault("PYTHONUTF8", "1") + env.setdefault("PYTHONIOENCODING", "utf-8") + if env_extra: + env.update(env_extra) + return subprocess.run( + [sys.executable, str(SCRIPTS_DIR / script), *args], + capture_output=True, text=True, encoding="utf-8", errors="replace", + input=stdin, env=env, timeout=30, + cwd=str(cwd) if cwd else None, + ) + + +def _make_session(home: Path, sid: str, task_swarm_run_id: str = None, + mode: str = "active") -> Path: + sd = home / ".specode" / "sessions" + sd.mkdir(parents=True, exist_ok=True) + p = sd / f"{sid}.json" + p.write_text(json.dumps({ + "session_id": sid, + "mode": mode, + "task_swarm_run_id": task_swarm_run_id, + }), encoding="utf-8") + return p + + +def _write_tasks_md(tmp_path: Path) -> Path: + p = tmp_path / "tasks.md" + p.write_text( + "## 阶段 1: A\n" + "- [ ] 1.1 t @writes:f.py _需求:1.1_\n", + encoding="utf-8", + ) + return p + + +def test_hook_no_session_id_exits_0(fake_env): + cp = _run("spec_session.py", "on-task-completed", stdin=json.dumps({})) + assert cp.returncode == 0 + assert cp.stdout == "" + + +def test_hook_unknown_session_exits_0(fake_env): + sid = str(uuid.uuid4()) + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0 + assert cp.stdout == "" + + +def test_hook_no_task_swarm_run_id_exits_0(fake_env): + sid = str(uuid.uuid4()) + _make_session(fake_env, sid, task_swarm_run_id=None) + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid})) + assert cp.returncode == 0 + # 没在 run 中 → 不注入 + assert cp.stdout == "" + + +def test_hook_guard_off_exits_0(fake_env): + sid = str(uuid.uuid4()) + _make_session(fake_env, sid) + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid}), + env_extra={"SPECODE_GUARD": "off"}) + assert cp.returncode == 0 + assert cp.stdout == "" + + +def test_hook_in_run_injects_plan_context(fake_env, tmp_path): + # 建一个真实 task-swarm run + sid = str(uuid.uuid4()) + _make_session(fake_env, sid) + tasks_md = _write_tasks_md(tmp_path) + init_cp = _run("task_swarm.py", "init", "--tasks", str(tasks_md), + "--session", sid, cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + assert init_cp.returncode == 0, init_cp.stderr + init_out = json.loads(init_cp.stdout) + # sessions/<sid>.json.task_swarm_run_id 现在应当被设置 + sess = json.loads((fake_env / ".specode" / "sessions" / f"{sid}.json").read_text(encoding="utf-8")) + assert sess["task_swarm_run_id"] == init_out["run_id"] + # 触发 hook + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid}), + cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + assert cp.returncode == 0 + assert cp.stdout, "hook 应注入 additionalContext" + payload = json.loads(cp.stdout) + assert "hookSpecificOutput" in payload + ctx = payload["hookSpecificOutput"]["additionalContext"] + assert "task-swarm 节点提醒" in ctx + assert "本提醒仅供参考" in ctx + + +def test_hook_trailer_text_always_appended(fake_env, tmp_path): + sid = str(uuid.uuid4()) + _make_session(fake_env, sid) + tasks_md = _write_tasks_md(tmp_path) + init_cp = _run("task_swarm.py", "init", "--tasks", str(tasks_md), + "--session", sid, cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid}), + cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + payload = json.loads(cp.stdout) + ctx = payload["hookSpecificOutput"]["additionalContext"] + assert "fork 谁、是否 fork、何时 writeback 仍由你判断" in ctx + + +def test_hook_payload_session_id_synonym(fake_env, tmp_path): + """payload 用 'sessionId'(camelCase)也应被识别。""" + sid = str(uuid.uuid4()) + _make_session(fake_env, sid, task_swarm_run_id=None) + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"sessionId": sid}), + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + assert cp.returncode == 0 + # 没在 run 中 → 不注入 + assert cp.stdout == "" + + +def test_hook_event_name_is_post_tool_use(fake_env, tmp_path): + sid = str(uuid.uuid4()) + _make_session(fake_env, sid) + tasks_md = _write_tasks_md(tmp_path) + _run("task_swarm.py", "init", "--tasks", str(tasks_md), + "--session", sid, cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid}), + cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + payload = json.loads(cp.stdout) + assert payload["hookSpecificOutput"]["hookEventName"] == "PostToolUse" + + +def test_hook_when_plan_fails_falls_back(fake_env, tmp_path): + """sessions.task_swarm_run_id 指向不存在的 run → plan 失败但 hook 仍 exit 0 兜底注入。""" + sid = str(uuid.uuid4()) + _make_session(fake_env, sid, task_swarm_run_id="nonexistent-run-id") + cp = _run("spec_session.py", "on-task-completed", + stdin=json.dumps({"session_id": sid}), + cwd=tmp_path, + env_extra={"HOME": str(fake_env), "USERPROFILE": str(fake_env)}) + assert cp.returncode == 0 + # 兜底文本应包含 run_id 和 plan 调用失败提示 + if cp.stdout: + payload = json.loads(cp.stdout) + ctx = payload["hookSpecificOutput"]["additionalContext"] + assert "nonexistent-run-id" in ctx or "task-swarm" in ctx diff --git a/plugins/specode/tests/test_task_swarm_hook_integration.py b/plugins/specode/tests/test_task_swarm_hook_integration.py deleted file mode 100644 index b45e4fa..0000000 --- a/plugins/specode/tests/test_task_swarm_hook_integration.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Integration tests for task-swarm hook hooks inside spec_guard. - -Exercises the same dispatch path that hooks.json triggers, ensuring -INV-7/8/9 fire correctly when task-swarm is active. -""" -from __future__ import annotations - -import io -import json -import shutil -import sys -import tempfile -from contextlib import redirect_stderr, redirect_stdout -from pathlib import Path - -import pytest - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import spec_guard # noqa: E402 -import spec_state # noqa: E402 - - -def _call(sub: str, payload: dict) -> tuple[int, str, str]: - stdin = io.StringIO(json.dumps(payload)) - out = io.StringIO() - err = io.StringIO() - real_stdin = sys.stdin - sys.stdin = stdin - try: - with redirect_stdout(out), redirect_stderr(err): - rc = spec_guard.main(["spec_guard", sub]) - finally: - sys.stdin = real_stdin - return rc, out.getvalue(), err.getvalue() - - -@pytest.fixture -def swarm_workspace(): - """Build a project with an active task-swarm run, but NO active spec. - - Tests below exercise INV-7/8/9 in isolation — they don't need a real - spec session because find_active_spec is monkeypatched. - """ - tmp = Path(tempfile.mkdtemp(prefix="ts-hook-")) - proj = tmp / "project" - proj.mkdir() - spec = tmp / "spec-dir" - spec.mkdir() - (spec / "tasks.md").write_text( - "- [ ] 1. T\n - [ ] 1.1 a\n - 文件:src/a.py\n - _需求:1.1_\n", - encoding="utf-8", - ) - (spec / ".config.json").write_text(json.dumps({"specId": "test"})) - - run_id = "run-test" - run_dir = proj / ".task-swarm" / "runs" / run_id - (run_dir / "agents" / "stage-1-coder" / "inbox").mkdir(parents=True) - (run_dir / "agents" / "stage-1-coder" / "outbox").mkdir(parents=True) - (run_dir / "agents" / "stage-1-coder" / "task.md").write_text( - "## 边界\n- @writes(你只能修改这些路径): src/a.py\n", - encoding="utf-8", - ) - (proj / ".task-swarm" / "active-run").write_text(run_id, encoding="utf-8") - - original = spec_state.find_active_spec - - def fake_find(prefer_session_id=None): - return { - "spec_slug": "test-spec", - "spec_dir": str(spec), - "current_phase": "implementation", - "session_id": "test-sess", - "spec_id": "test", - "last_activity_at": "2026-05-15T00:00:00Z", - } - - spec_state.find_active_spec = fake_find - - yield { - "tmp": tmp, - "proj": proj, - "spec": spec, - "run_dir": run_dir, - "ws": run_dir / "agents" / "stage-1-coder", - } - - spec_state.find_active_spec = original - shutil.rmtree(tmp, ignore_errors=True) - - -# ---------- INV-7 ---------- - -def test_inv7_blocks_general_purpose_task(swarm_workspace): - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Task", - "tool_input": {"subagent_type": "general-purpose", "prompt": "x"}, - } - rc, _, err = _call("pre-tool-use", payload) - assert rc == 2 - assert "INV-7" in err - - -def test_inv7_allows_prefixed_subagent(swarm_workspace): - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Task", - "tool_input": {"subagent_type": "specode:task-swarm-coder", "prompt": "x"}, - } - rc, _, _ = _call("pre-tool-use", payload) - assert rc == 0 - - -def test_inv7_inert_when_no_active_run(swarm_workspace): - # Remove active-run pointer - (swarm_workspace["proj"] / ".task-swarm" / "active-run").unlink() - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Task", - "tool_input": {"subagent_type": "general-purpose", "prompt": "x"}, - } - rc, _, _ = _call("pre-tool-use", payload) - assert rc == 0 # no swarm active → no INV-7 - - -# ---------- INV-8 ---------- - -def test_inv8_blocks_edit_outside_writes(swarm_workspace): - target = swarm_workspace["proj"] / ".task-swarm" / "runs" / "run-test" / "agents" / "stage-1-coder" / "scratch.py" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text("x") - # Edit inside subagent ws but NOT in outbox AND not in @writes → deny - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Edit", - "tool_input": {"file_path": str(target), "old_string": "x", "new_string": "y"}, - } - rc, _, err = _call("pre-tool-use", payload) - # scratch.py is inside agent workspace but not in outbox and not in - # @writes (src/a.py) — INV-8 should deny. - assert rc == 2 - assert "INV-8" in err - - -def test_inv8_allows_outbox_writes(swarm_workspace): - target = swarm_workspace["ws"] / "outbox" / "result.md" - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Write", - "tool_input": {"file_path": str(target), "content": "STATUS: ok\n"}, - } - rc, _, _ = _call("pre-tool-use", payload) - assert rc == 0 - - -# ---------- INV-9 ---------- - -def test_inv9_blocks_traceability_change(swarm_workspace): - tasks = swarm_workspace["spec"] / "tasks.md" - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Edit", - "tool_input": { - "file_path": str(tasks), - "old_string": "_需求:1.1_", - "new_string": "_需求:1.2_", - }, - } - rc, _, err = _call("pre-tool-use", payload) - assert rc == 2 - assert "INV-9" in err - - -def test_inv9_allows_checkbox_swap(swarm_workspace): - tasks = swarm_workspace["spec"] / "tasks.md" - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Edit", - "tool_input": { - "file_path": str(tasks), - "old_string": "- [ ] 1.1 a", - "new_string": "- [x] 1.1 a", - }, - } - rc, _, err = _call("pre-tool-use", payload) - assert rc == 0, err - - -def test_user_prompt_submit_injects_swarm_block(swarm_workspace): - """When a swarm run is active, UserPromptSubmit block includes swarm summary.""" - # Init a minimal state.json so the block has data to render. - state_path = swarm_workspace["run_dir"] / "state.json" - state_path.write_text(json.dumps({ - "run_id": "run-test", - "config": {"parallel": 3, "max_rounds": 3}, - "stages": [ - {"num": 1, "phase": "running", "in_flight": {"role": "coder", "round": 2}}, - {"num": 2, "phase": "pending"}, - {"num": 3, "phase": "converged"}, - ], - }), encoding="utf-8") - payload = {"session_id": "test-sess", "cwd": str(swarm_workspace["proj"])} - rc, out, _ = _call("user-prompt-submit", payload) - assert rc == 0 - body = json.loads(out) - block = body["hookSpecificOutput"]["additionalContext"] - assert "task-swarm" in block - assert "run-test" in block - assert "stage 1 coder r2" in block - assert "next:" in block - - -def test_user_prompt_submit_no_swarm_block_when_no_run(swarm_workspace): - (swarm_workspace["proj"] / ".task-swarm" / "active-run").unlink() - payload = {"session_id": "test-sess", "cwd": str(swarm_workspace["proj"])} - rc, out, _ = _call("user-prompt-submit", payload) - assert rc == 0 - body = json.loads(out) - block = body["hookSpecificOutput"]["additionalContext"] - assert "task-swarm" not in block - - -def test_inv9_inert_when_no_swarm_active(swarm_workspace): - (swarm_workspace["proj"] / ".task-swarm" / "active-run").unlink() - tasks = swarm_workspace["spec"] / "tasks.md" - payload = { - "session_id": "test-sess", - "cwd": str(swarm_workspace["proj"]), - "tool_name": "Edit", - "tool_input": { - "file_path": str(tasks), - "old_string": "_需求:1.1_", - "new_string": "_需求:1.2_", - }, - } - rc, _, err = _call("pre-tool-use", payload) - # No swarm → INV-9 doesn't trigger. Note: classify_path will classify - # tasks.md as spec-doc → INV-3 lock check. With our fixture there's no - # real lock model so it returns "not_held" or similar (ok). Expect rc=0. - # However spec_session may raise SystemExit if .config.json is malformed - # — the check_verify_lock helper swallows it. - assert rc == 0 diff --git a/plugins/specode/tests/test_task_swarm_outbox.py b/plugins/specode/tests/test_task_swarm_outbox.py index d8effe5..173dd70 100644 --- a/plugins/specode/tests/test_task_swarm_outbox.py +++ b/plugins/specode/tests/test_task_swarm_outbox.py @@ -1,342 +1,233 @@ -"""Unit tests for task_swarm_outbox parsers.""" +"""tests for task_swarm_outbox.py — 3 类产物 schema 校验。""" from __future__ import annotations import sys -import tempfile from pathlib import Path -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) +import pytest -import task_swarm_outbox as O # noqa: E402 +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) +from task_swarm._outbox import ( # noqa: E402 + ParseError, parse_coder_result, parse_reviewer_review, parse_validator_validation, +) -# ---------- result.md ---------- -RESULT_OK = """\ -# 阶段 1 执行结果 +# ------------------------------------------------------------------------- +# coder +# ------------------------------------------------------------------------- -## 子任务状态 -- 1.1 写 user model: done — src/models/user.py -- 1.2 写 auth service: done — src/auth/service.py -- 1.3 写 controller: done — src/api/login.py +def _write(p: Path, text: str) -> Path: + p.write_text(text, encoding="utf-8") + return p -## 关键变更 -- 新增 User dataclass -STATUS: ok -""" - - -def test_parse_result_ok(): - v = O.parse_result(RESULT_OK) - assert v.judgment == "ok" - assert len(v.subtasks) == 3 - assert v.subtasks[0]["num"] == "1.1" - assert v.subtasks[0]["status"] == "done" - - -def test_parse_result_failed(): - text = RESULT_OK.replace("STATUS: ok", "STATUS: failed: ImportError 缺 init") - v = O.parse_result(text) - assert v.judgment == "failed" - assert "ImportError" in v.status_reason - - -def test_parse_result_missing_status(): - text = "## 子任务状态\n- 1.1 x: done — src/x.py\n" - v = O.parse_result(text) - assert v.judgment == "schema-error" - assert any("STATUS" in e for e in v.raw_errors) - - -def test_parse_result_missing_subtasks_section(): - text = "## 关键变更\n- 新增 X\n\nSTATUS: ok\n" - v = O.parse_result(text) - assert v.judgment == "schema-error" - assert any("子任务状态" in e for e in v.raw_errors) - - -def test_parse_result_empty_subtasks_section(): - text = "## 子任务状态\n\n## 关键变更\nx\n\nSTATUS: ok\n" - v = O.parse_result(text) - assert v.judgment == "schema-error" - - -# ---------- review.md ---------- - -REVIEW_APPROVED = """\ -## 结论 -approved-with-comments - -## P0 — 阻塞,coder 必须修复(修完才能进 validator) -(none) - -## P1 — 建议修复,不阻塞 -- src/models/user.py:12 — email 字段没做格式校验 - -## P2 — 可选改进 -- 命名 auth_svc 可改 - -STATUS: ok -""" - - -def test_parse_review_approved(): - v = O.parse_review(REVIEW_APPROVED) - assert v.judgment == "approved" - assert v.p0_count == 0 - assert v.loop_warning is False - - -REVIEW_P0 = """\ -## 结论 -needs-changes - -## P0 — 阻塞,coder 必须修复(修完才能进 validator) -- src/auth/service.py:34 [req:1.3] — login 失败没区分错误码 -- src/api/login.py:8 [security] — 缺 rate limit - -## P1 — 建议修复,不阻塞 -- src/models/user.py:12 — email - -## P2 — 可选改进 -- 命名 - -STATUS: ok -""" - - -def test_parse_review_p0_items(): - v = O.parse_review(REVIEW_P0) - assert v.judgment == "p0" - assert v.p0_count == 2 - assert "service.py" in v.p0_items[0] - - -def test_parse_review_loop_warning(): - text = "## 进入死循环风险\n连续 2 轮同 P0\n\n" + REVIEW_P0 - v = O.parse_review(text) - assert v.judgment == "loop" - assert v.loop_warning is True - - -# ---------- P0 evidence-tag rules (C) ---------- - -REVIEW_P0_NO_TAGS = """\ -## 结论 -needs-changes - -## P0 — 阻塞,coder 必须修复(修完才能进 validator) -- src/auth/service.py:34 — 我觉得这里命名不够清晰 -- src/api/login.py:8 — 可以加点防御性校验 - -## P1 — 建议修复,不阻塞 -- ... - -## P2 — 可选改进 -- 命名 - -STATUS: ok -""" - - -def test_parse_review_no_evidence_tags_downgrades_to_advisory(): - """P0 lines without [req:..]/[security]/[contract] tag → advisory only.""" - v = O.parse_review(REVIEW_P0_NO_TAGS) - assert v.judgment == "approved" # no blocking P0 → approved - assert v.p0_count == 0 - assert v.advisory_p0_count == 2 - assert "service.py" in v.advisory_p0_items[0] - - -REVIEW_P0_MIXED = """\ -## 结论 -needs-changes - -## P0 — 阻塞,coder 必须修复(修完才能进 validator) -- src/auth/service.py:34 [req:1.3] — 违反 SHALL 1.3 -- src/api/login.py:8 — 我觉得这里可以更好(无证据) -- src/api/login.py:22 [contract] — 接口契约不一致 - -## P1 — 建议 -- ... - -## P2 — 可选 -- ... - -STATUS: ok -""" - - -def test_parse_review_mixed_tags_only_tagged_block(): - v = O.parse_review(REVIEW_P0_MIXED) - assert v.judgment == "p0" - assert v.p0_count == 2 # only the two tagged items - assert v.advisory_p0_count == 1 - assert "[req:1.3]" in v.p0_items[0] - assert "[contract]" in v.p0_items[1] - assert "无证据" in v.advisory_p0_items[0] - - -def test_parse_review_req_tag_with_dotted_id(): - text = REVIEW_P0_MIXED.replace("[req:1.3]", "[req:2.4.7]") - v = O.parse_review(text) - assert v.p0_count == 2 - assert "[req:2.4.7]" in v.p0_items[0] - - -def test_parse_review_tag_case_insensitive(): - text = REVIEW_P0_MIXED.replace("[security]", "[SECURITY]").replace("[contract]", "[Contract]") - v = O.parse_review(text) - # Two tagged items remain blocking (one with [req:1.3], one with [Contract]) - assert v.p0_count == 2 - - -def test_parse_review_missing_p0_section(): - text = "## 结论\napproved\n\nSTATUS: ok\n" - v = O.parse_review(text) - assert v.judgment == "schema-error" - assert any("P0" in e for e in v.raw_errors) - - -def test_parse_review_missing_status(): - text = "## 结论\napproved\n\n## P0 — 阻塞\n(none)\n" - v = O.parse_review(text) - assert v.judgment == "schema-error" - - -def test_parse_review_short_p0_heading(): - """Reviewer might emit `## P0` instead of the long form.""" - text = ( - "## 结论\nneeds-changes\n\n" - "## P0\n- src/x.py:1 [req:1.1] — bad\n\nSTATUS: ok\n" +def test_coder_result_ok(tmp_path): + p = _write(tmp_path / "r.md", + "# coder\n\n" + "## 上下文\n- specId: x\n\n" + "## 子任务状态\n- 1.1 user: done — src/u.py\n- 1.2 sess: failed — Imp\n\n" + "## 关键变更\n- 新增 model\n\n" + "STATUS: ok\n" ) - v = O.parse_review(text) - assert v.judgment == "p0" - assert v.p0_count == 1 - - -# ---------- validation.md ---------- - -VALIDATION_PASS = """\ -## 判定 -pass - -## 复现命令 -```bash -pytest tests/test_login.py -``` - -## 按子任务的验证结果 -- [x] 1.1 user model -- [x] 1.2 auth service - -STATUS: ok -""" + res = parse_coder_result(p) + assert res.status == "ok" + assert len(res.subtasks) == 2 + assert res.subtasks[0].status == "done" + assert res.subtasks[1].status == "failed" + assert "新增 model" in res.key_changes -def test_parse_validation_pass(): - v = O.parse_validation(VALIDATION_PASS) - assert v.judgment == "pass" - - -VALIDATION_FAIL = """\ -## 判定 -fail - -## 复现命令 -```bash -pytest tests/test_lockout.py -``` - -## 失败现场 -expected 423 got 401 - -## 按子任务的验证结果 -- [ ] 1.3 controller: fail - -## 给 coder 的修复指引(必填) -- 文件: src/api/login.py -- 位置: login() 失败分支 -- 问题: 没调 lockout +def test_coder_result_failed_status(tmp_path): + p = _write(tmp_path / "r.md", + "# coder\n\n## 上下文\n- a\n\n## 子任务状态\n## 关键变更\n\nSTATUS: failed: ImportError\n" + ) + res = parse_coder_result(p) + assert res.status == "failed" + assert "ImportError" in res.status_reason -STATUS: ok -""" +def test_coder_result_blocked(tmp_path): + p = _write(tmp_path / "r.md", + "# coder\n## 子任务状态\n## 关键变更\nSTATUS: blocked: 等上游\n" + ) + res = parse_coder_result(p) + assert res.status == "blocked" + assert "等上游" in res.status_reason -def test_parse_validation_fail_with_guidance(): - v = O.parse_validation(VALIDATION_FAIL) - assert v.judgment == "fail" - assert "src/api/login.py" in v.fix_files - assert "lockout" in v.fix_guidance +def test_coder_result_missing_status_raises(tmp_path): + p = _write(tmp_path / "r.md", + "# coder\n## 子任务状态\n- 1.1 a: done\n\n## 关键变更\n- x\n" + ) + with pytest.raises(ParseError) as ei: + parse_coder_result(p) + assert "STATUS" in str(ei.value) -def test_parse_validation_fail_without_guidance(): - text = VALIDATION_FAIL.replace("## 给 coder 的修复指引(必填)", "## 注释") - v = O.parse_validation(text) - assert v.judgment == "schema-error" - assert any("修复指引" in e for e in v.raw_errors) +def test_coder_result_nonexistent_file_raises(tmp_path): + with pytest.raises(ParseError) as ei: + parse_coder_result(tmp_path / "missing.md") + assert "不存在" in str(ei.value) -def test_parse_validation_loop(): - text = "## 进入死循环风险\n同一处 fail\n\n" + VALIDATION_FAIL - v = O.parse_validation(text) - assert v.judgment == "loop" +# ------------------------------------------------------------------------- +# reviewer +# ------------------------------------------------------------------------- -def test_parse_validation_missing_judgment_heading(): - text = ( - "## 复现命令\n```bash\nx\n```\n\n" - "## 给 coder 的修复指引\n- 文件: a.py\n\nSTATUS: ok\n" +def test_reviewer_with_p0_evidence(tmp_path): + p = _write(tmp_path / "r.md", + "# rev\n\n## 结论\nneeds-changes\n\n" + "## P0\n- src/a.py:34 [req:1.2] — login 失败未区分锁/密码错\n\n" + "## P1\n- src/b.py:5 — 缺校验\n\n" + "## P2\n- 命名建议\n\n" + "## 给使用者的提示\n- 注意\n\n" + "STATUS: ok\n" ) - v = O.parse_validation(text) - assert v.judgment == "schema-error" - - -def test_parse_validation_invalid_verdict_word(): - text = "## 判定\nmaybe\n\n## 复现命令\n```bash\nx\n```\n\nSTATUS: ok\n" - v = O.parse_validation(text) - assert v.judgment == "schema-error" + rev = parse_reviewer_review(p) + assert rev.verdict == "needs-changes" + assert len(rev.p0_items) == 1 + assert rev.p0_items[0].evidence_tags == ["req:1.2"] + assert len(rev.p1_items) == 1 + assert len(rev.p2_items) == 1 + assert rev.advisory_items == [] + + +def test_reviewer_p0_no_evidence_downgrades(tmp_path): + p = _write(tmp_path / "r.md", + "# rev\n\n## 结论\napproved-with-comments\n\n" + "## P0\n- src/a.py:34 — 没标签的 P0\n- src/b.py [security] — 有安全标签\n\n" + "## P1\n(none)\n\n## P2\n\nSTATUS: ok\n" + ) + rev = parse_reviewer_review(p) + assert len(rev.p0_items) == 1 + assert "security" in rev.p0_items[0].evidence_tags + assert len(rev.advisory_items) == 1 + assert rev.advisory_items[0].severity == "advisory" -# ---------- parse_outbox dispatch ---------- +def test_reviewer_approved_no_p0(tmp_path): + p = _write(tmp_path / "r.md", + "# rev\n\n## 结论\napproved\n\n## P0\n(none)\n\n## P1\n\n## P2\n\nSTATUS: ok\n" + ) + rev = parse_reviewer_review(p) + assert rev.verdict == "approved" + assert rev.p0_items == [] -def test_parse_outbox_dispatch(): - with tempfile.TemporaryDirectory() as td: - out = Path(td) - (out / "result.md").write_text(RESULT_OK) - (out / "review.md").write_text(REVIEW_P0) - (out / "validation.md").write_text(VALIDATION_PASS) - c = O.parse_outbox("coder", out) - assert c["judgment"] == "ok" - r = O.parse_outbox("reviewer", out) - assert r["judgment"] == "p0" - assert r["p0_count"] == 2 - v = O.parse_outbox("validator", out) - assert v["judgment"] == "pass" +def test_reviewer_missing_verdict_raises(tmp_path): + p = _write(tmp_path / "r.md", + "# rev\n\n## P0\n(none)\nSTATUS: ok\n" + ) + with pytest.raises(ParseError): + parse_reviewer_review(p) -def test_parse_outbox_missing_file(): - with tempfile.TemporaryDirectory() as td: - v = O.parse_outbox("coder", Path(td)) - assert v["judgment"] == "schema-error" - assert any("result.md" in e for e in v["errors"]) +def test_reviewer_bad_status_raises(tmp_path): + p = _write(tmp_path / "r.md", + "# rev\n## 结论\napproved\n## P0\n(none)\n\nSTATUS: failed\n" + ) + with pytest.raises(ParseError): + parse_reviewer_review(p) -# ---------- R10: STATUS must be the strict last non-empty line ---------- +# ------------------------------------------------------------------------- +# validator +# ------------------------------------------------------------------------- -def test_status_in_middle_does_not_count(): - text = ( - "## 子任务状态\n" - "- 1.1 写 a: done — src/a.py\n" - "\n" +def test_validator_pass(tmp_path): + p = _write(tmp_path / "v.md", + "# v\n\n## 判定\npass\n\n" + "## 复现命令\n```bash\npytest -v\n```\n\n" + "## 按子任务的验证结果\n- [x] 1.1 a: pass\n\n" + "STATUS: ok\n" + ) + val = parse_validator_validation(p) + assert val.verdict == "pass" + assert "pytest" in val.reproduce_cmd + assert len(val.subtask_results) == 1 + + +def test_validator_fail_with_fix_targets(tmp_path): + p = _write(tmp_path / "v.md", + "# v\n\n## 判定\nfail\n\n" + "## 复现命令\n```bash\npytest -v\n```\n\n" + "## 按子任务的验证结果\n- [ ] 1.1 ctrl: fail — 5 次失败未锁\n\n" + "## 失败现场\n```\nFAILED tests/t.py::test_lockout\nAssertionError: expected 423\n```\n\n" + "## 给 coder 的修复指引\n" + "### 修复 1 — lockout 计数器\n" + "- 文件: src/login.py\n" + "- 位置: login() 失败分支\n" + "- 问题: 没计数\n" + "- 建议: 引入 lockout 模块\n" + "- _需求:1.3_\n\n" + "STATUS: ok\n" + ) + val = parse_validator_validation(p) + assert val.verdict == "fail" + assert val.failure_excerpt + assert len(val.fix_targets) == 1 + assert val.fix_targets[0].file_path == "src/login.py" + assert "1.3" in val.fix_targets[0].requirements + + +def test_validator_fail_signature_stable(tmp_path): + body = ( + "# v\n## 判定\nfail\n## 复现命令\n```bash\npytest\n```\n" + "## 按子任务的验证结果\n- [ ] 1.1 a: fail\n" + "## 失败现场\n```\nFAILED tests/t.py::test_a\nAssertionError: x\n```\n" + "## 给 coder 的修复指引\n### 修复 1\n- 文件: a.py\n" "STATUS: ok\n" - "\n" - "## 关键变更\n" - "- 后写的内容把 STATUS 推离末尾\n" ) - v = O.parse_result(text) - assert v.judgment == "schema-error" - assert any("STATUS" in e for e in v.raw_errors) + p1 = _write(tmp_path / "v1.md", body) + p2 = _write(tmp_path / "v2.md", body) + s1 = parse_validator_validation(p1).fail_signature() + s2 = parse_validator_validation(p2).fail_signature() + assert s1 == s2 + assert s1 # non-empty + + +def test_validator_fail_signature_differs_on_different_failure(tmp_path): + p1 = _write(tmp_path / "a.md", + "# v\n## 判定\nfail\n## 复现命令\n```bash\npytest\n```\n" + "## 失败现场\n```\nFAILED tests/t.py::test_a\nAssertionError: foo\n```\n" + "## 给 coder 的修复指引\n### 修复 1\n- 文件: a.py\nSTATUS: ok\n") + p2 = _write(tmp_path / "b.md", + "# v\n## 判定\nfail\n## 复现命令\n```bash\npytest\n```\n" + "## 失败现场\n```\nFAILED tests/t.py::test_b\nAssertionError: bar\n```\n" + "## 给 coder 的修复指引\n### 修复 1\n- 文件: a.py\nSTATUS: ok\n") + assert parse_validator_validation(p1).fail_signature() != parse_validator_validation(p2).fail_signature() + + +def test_validator_pass_no_signature(tmp_path): + p = _write(tmp_path / "v.md", + "# v\n## 判定\npass\n## 复现命令\n```bash\npytest\n```\nSTATUS: ok\n") + val = parse_validator_validation(p) + assert val.fail_signature() == "" + + +def test_validator_fail_missing_failure_excerpt_raises(tmp_path): + p = _write(tmp_path / "v.md", + "# v\n## 判定\nfail\n## 复现命令\n```bash\npytest\n```\n" + "## 给 coder 的修复指引\n### 修复 1\n- 文件: a.py\n\nSTATUS: ok\n") + with pytest.raises(ParseError) as ei: + parse_validator_validation(p) + assert "失败现场" in str(ei.value) + + +def test_validator_fail_missing_fix_targets_raises(tmp_path): + p = _write(tmp_path / "v.md", + "# v\n## 判定\nfail\n## 复现命令\n```bash\npytest\n```\n" + "## 失败现场\n```\nFAILED x\nAssertionError\n```\n\nSTATUS: ok\n") + with pytest.raises(ParseError) as ei: + parse_validator_validation(p) + assert "修复指引" in str(ei.value) + + +def test_validator_missing_verdict_raises(tmp_path): + p = _write(tmp_path / "v.md", + "# v\n## 复现命令\n```bash\npytest\n```\nSTATUS: ok\n") + with pytest.raises(ParseError): + parse_validator_validation(p) diff --git a/plugins/specode/tests/test_task_swarm_parse_md.py b/plugins/specode/tests/test_task_swarm_parse_md.py index cc8c935..c2450f6 100644 --- a/plugins/specode/tests/test_task_swarm_parse_md.py +++ b/plugins/specode/tests/test_task_swarm_parse_md.py @@ -1,223 +1,160 @@ -"""Unit tests for task_swarm_parse_md. - -Cover: stage/leaf identification, checkpoint linking, @swarm tag arbitration -(5-tier priority table from references/task-swarm.md), heuristic defaults, -file-union for parallelism check, invalid tags warning. -""" +"""tests for task_swarm_parse_md.py — tasks.md 解析 + group 切分。""" from __future__ import annotations import sys from pathlib import Path -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import task_swarm_parse_md as M # noqa: E402 - - -SAMPLE = """\ -- [ ] 1. 实现登录核心流程 - - [ ] 1.1 写 User model - - 文件:src/models/user.py - - 验证:pytest tests/test_user.py - - _需求:1.1_ - - [ ] 1.2 写 auth service - - 文件:src/auth/service.py - - _需求:1.2_ - - [ ] 1.3 写 controller - - 文件:src/api/login.py - - _需求:1.3_ - -- [ ] 2. 检查点 — 登录跑通 - - 运行 pytest - -- [ ] 3. 实现登出 - - [ ] 3.1 logout service - - 文件:src/auth/logout.py - - _需求:2.1_ - -- [*] 5. 优化 - - [ ] 5.1 失败计数 @swarm:coder-only - - 文件:src/auth/lockout.py -""" - - -def test_parse_basic_stages_and_leaves(): - plan = M.parse_tasks_md(SAMPLE) - assert [s.num for s in plan.stages] == [1, 2, 3, 5] - assert [s.kind for s in plan.stages] == ["stage", "checkpoint", "stage", "stage"] - s1 = plan.stages[0] - assert [l.num for l in s1.leaves] == ["1.1", "1.2", "1.3"] - assert s1.leaves[0].files == ["src/models/user.py"] - assert s1.leaves[0].requirement == "1.1" - assert s1.leaves[0].verify == "pytest tests/test_user.py" - - -def test_checkpoint_links_to_prior_stage(): - plan = M.parse_tasks_md(SAMPLE) - cp = next(s for s in plan.stages if s.kind == "checkpoint") - assert cp.num == 2 - assert cp.checkpoint_for == 1 - assert cp.deps == [1] - - -def test_optional_marker_propagates(): - plan = M.parse_tasks_md(SAMPLE) - s5 = next(s for s in plan.stages if s.num == 5) - assert s5.optional is True - - -def test_files_union_drops_skipped_leaves(): - text = ( - "- [ ] 1. 测试 union\n" - " - [ ] 1.1 a @swarm:skip\n" - " - 文件:src/a.py\n" - " - [ ] 1.2 b\n" - " - 文件:src/b.py\n" - ) - plan = M.parse_tasks_md(text) - assert plan.stages[0].files_union == ["src/b.py"] - +import pytest -# ---------- tag arbitration ---------- +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) -def test_tag_skip_wins_over_full(): - text = "- [ ] 1. T\n - [*] 1.1 x @swarm:full @swarm:skip\n" - plan = M.parse_tasks_md(text) - leaf = plan.stages[0].leaves[0] - assert leaf.policy == "skip" - assert any("skip" in w for w in plan.warnings) +from task_swarm._parse_md import ( # noqa: E402 + parse_tasks_md, group_by_file_conflict, Stage, StageItem, +) -def test_tag_full_beats_coder_only(): - text = "- [ ] 1. T\n - [ ] 1.1 x @swarm:full @swarm:coder-only\n" - plan = M.parse_tasks_md(text) - assert plan.stages[0].leaves[0].policy == "full" - assert any("full" in w and "coder-only" in w for w in plan.warnings) - - -def test_tag_coder_only_overrides_heuristic(): - text = ( - "- [ ] 1. T\n" - " - [ ] 1.1 x @swarm:coder-only\n" - " - 文件:src/x.py\n" - " - _需求:1.1_\n" - ) - plan = M.parse_tasks_md(text) - assert plan.stages[0].leaves[0].policy == "coder-only" +def _write(tmp_path: Path, text: str) -> Path: + p = tmp_path / "tasks.md" + p.write_text(text, encoding="utf-8") + return p -def test_tag_full_overrides_optional_heuristic(): - text = ( - "- [ ] 1. T\n" - " - [*] 1.1 x @swarm:full\n" - " - 文件:src/x.py\n" +def test_parse_basic_stage(tmp_path): + md = ( + "# tasks\n\n" + "## 阶段 1: 第一阶段\n" + "- [ ] 1.1 写 user @writes:src/user.py _需求:1.1_\n" + "- [ ] 1.2 写 session @writes:src/session.py _需求:1.2_\n" ) - plan = M.parse_tasks_md(text) - leaf = plan.stages[0].leaves[0] - assert leaf.policy == "full" - assert leaf.optional is True - - -def test_heuristic_optional_to_coder_only(): - text = ( - "- [ ] 1. T\n" - " - [*] 1.1 x\n" - " - 文件:src/x.py\n" - " - _需求:1.1_\n" + stages = parse_tasks_md(_write(tmp_path, md)) + assert len(stages) == 1 + assert stages[0].number == 1 + assert stages[0].title == "第一阶段" + assert len(stages[0].items) == 2 + assert stages[0].items[0].number == "1.1" + assert stages[0].items[0].writes == ["src/user.py"] + assert stages[0].items[0].requirements == ["1.1"] + + +def test_parse_multiple_stages_and_deps(tmp_path): + md = ( + "## 阶段 1: A\n" + "- [ ] 1.1 alpha @writes:a.py _需求:1.1_\n" + "## 阶段 2: B\n" + "- [ ] 2.1 beta @writes:b.py @depends-on:1 _需求:2.1_\n" ) - plan = M.parse_tasks_md(text) - assert plan.stages[0].leaves[0].policy == "coder-only" + stages = parse_tasks_md(_write(tmp_path, md)) + assert [s.number for s in stages] == [1, 2] + assert stages[1].items[0].depends_on == ["1"] + assert stages[1].depends_on == [1] -def test_heuristic_no_requirement_to_coder_only(): - text = ( - "- [ ] 1. T\n" - " - [ ] 1.1 x\n" - " - 文件:src/x.py\n" +def test_parse_recognises_chinese_colon(tmp_path): + md = ( + "## 阶段 1:中文冒号\n" + "- [ ] 1.1 任务 @writes:x.py _需求:1.1_\n" ) - plan = M.parse_tasks_md(text) - assert plan.stages[0].leaves[0].policy == "coder-only" + stages = parse_tasks_md(_write(tmp_path, md)) + assert len(stages) == 1 + assert stages[0].items[0].writes == ["x.py"] -def test_default_policy_for_tracked_leaf(): - text = ( - "- [ ] 1. T\n" - " - [ ] 1.1 x\n" - " - 文件:src/x.py\n" - " - _需求:1.1_\n" +def test_parse_ignores_non_item_lines(tmp_path): + md = ( + "## 阶段 1: A\n" + "这一段是介绍,不是任务。\n" + "- [ ] 1.1 真正任务 @writes:x.py _需求:1.1_\n" + "* [ ] 不被识别(用了 *)\n" ) - plan = M.parse_tasks_md(text) - assert plan.stages[0].leaves[0].policy == "default" - - -def test_invalid_tag_warning(): - text = "- [ ] 1. T\n - [ ] 1.1 x @swarm:strict\n - _需求:1.1_\n" - plan = M.parse_tasks_md(text) - # Invalid tag dropped; heuristic kicks in. Has requirement + not optional → default. - assert plan.stages[0].leaves[0].policy == "default" - assert any("strict" in w for w in plan.warnings) - - -# ---------- parallelism ---------- - -def test_parallelizable_disjoint_files(): - text = ( - "- [ ] 1. A\n" - " - [ ] 1.1 a\n" - " - 文件:src/a.py\n" - " - _需求:1.1_\n" - "- [ ] 3. B\n" - " - [ ] 3.1 b\n" - " - 文件:src/b.py\n" - " - _需求:3.1_\n" - ) - plan = M.parse_tasks_md(text) - a, b = plan.stages[0], plan.stages[1] - assert M.parallelizable(a, b) is True - - -def test_not_parallelizable_overlapping_files(): - text = ( - "- [ ] 1. A\n" - " - [ ] 1.1 a\n" - " - 文件:src/shared.py\n" - " - _需求:1.1_\n" - "- [ ] 3. B\n" - " - [ ] 3.1 b\n" - " - 文件:src/shared.py\n" - " - _需求:3.1_\n" - ) - plan = M.parse_tasks_md(text) - assert M.parallelizable(plan.stages[0], plan.stages[1]) is False + stages = parse_tasks_md(_write(tmp_path, md)) + assert len(stages[0].items) == 1 -def test_checkpoint_not_parallel_with_dep(): - text = ( - "- [ ] 1. A\n" - " - [ ] 1.1 a\n" - " - 文件:src/a.py\n" - " - _需求:1.1_\n" - "- [ ] 2. 检查点\n" +def test_parse_checkbox_state(tmp_path): + md = ( + "## 阶段 1: A\n" + "- [x] 1.1 已完成 @writes:a.py _需求:1.1_\n" + "- [ ] 1.2 未完成 @writes:b.py _需求:1.2_\n" ) - plan = M.parse_tasks_md(text) - assert M.parallelizable(plan.stages[0], plan.stages[1]) is False - - -def test_stages_with_role(): - plan = M.parse_tasks_md(SAMPLE) - coder_stages = list(M.stages_with_role(plan, "coder")) - assert [s.num for s in coder_stages] == [1, 3, 5] + stages = parse_tasks_md(_write(tmp_path, md)) + assert stages[0].items[0].checkbox.lower() == "x" + assert stages[0].items[1].checkbox == " " + + +def test_group_single_stage_one_group(): + s = Stage(number=1, title="A", items=[StageItem(number="1.1", title="x", writes=["a.py"])]) + groups = group_by_file_conflict([s]) + assert len(groups) == 1 + assert groups[0][0].number == 1 + + +def test_group_two_stages_disjoint_files_one_group(): + s1 = Stage(number=1, title="A", items=[StageItem(number="1.1", title="x", writes=["a.py"])]) + s2 = Stage(number=2, title="B", items=[StageItem(number="2.1", title="y", writes=["b.py"])]) + groups = group_by_file_conflict([s1, s2]) + assert len(groups) == 1 + assert {s.number for s in groups[0]} == {1, 2} + + +def test_group_same_file_split_to_two_groups(): + s1 = Stage(number=1, title="A", items=[StageItem(number="1.1", title="x", writes=["a.py"])]) + s2 = Stage(number=2, title="B", items=[StageItem(number="2.1", title="y", writes=["a.py"])]) + groups = group_by_file_conflict([s1, s2]) + assert len(groups) == 2 + assert groups[0][0].number == 1 + assert groups[1][0].number == 2 + + +def test_group_depends_on_forces_serial(): + s1 = Stage(number=1, title="A", items=[StageItem(number="1.1", title="x", writes=["a.py"])]) + s2 = Stage(number=2, title="B", + items=[StageItem(number="2.1", title="y", writes=["b.py"], depends_on=["1"])]) + groups = group_by_file_conflict([s1, s2]) + assert len(groups) == 2 + assert groups[0][0].number == 1 + assert groups[1][0].number == 2 + + +def test_group_max_parallel_capacity(): + stages = [ + Stage(number=i, title=f"S{i}", + items=[StageItem(number=f"{i}.1", title=f"x{i}", writes=[f"f{i}.py"])]) + for i in range(1, 6) + ] + groups = group_by_file_conflict(stages, max_parallel=2) + # 5 stages with max=2 → 3 groups (2,2,1) + assert sum(len(g) for g in groups) == 5 + for g in groups: + assert len(g) <= 2 + + +def test_group_full_example_3_groups(tmp_path): + md = ( + "## 阶段 1: 数据层\n" + "- [ ] 1.1 user model @writes:src/models/user.py _需求:1.1_\n" + "- [ ] 1.2 session model @writes:src/models/session.py _需求:1.2_\n" + "## 阶段 2: 服务层\n" + "- [ ] 2.1 auth service @writes:src/auth/service.py @depends-on:1 _需求:2.1_\n" + "## 阶段 3: API\n" + "- [ ] 3.1 login @writes:src/api/login.py @depends-on:2 _需求:3.1_\n" + "- [ ] 3.2 user 扩展 @writes:src/models/user.py @depends-on:1 _需求:3.2_\n" + ) + stages = parse_tasks_md(_write(tmp_path, md)) + groups = group_by_file_conflict(stages, max_parallel=4) + # 阶段 3 同时与 1 文件冲突 + depends-on=2 → 必须晚于 1 与 2 + assert len(groups) == 3 + assert groups[0][0].number == 1 + assert groups[1][0].number == 2 + assert groups[2][0].number == 3 - reviewer_stages = list(M.stages_with_role(plan, "reviewer")) - # stage 5: 5.1 is coder-only (explicit tag) → no reviewer - assert [s.num for s in reviewer_stages] == [1, 3] - validator_stages = list(M.stages_with_role(plan, "validator")) - assert [s.num for s in validator_stages] == [2] +def test_parse_empty_file(tmp_path): + p = _write(tmp_path, "") + stages = parse_tasks_md(p) + assert stages == [] -def test_to_dict_is_json_safe(): - import json - plan = M.parse_tasks_md(SAMPLE) - json.dumps(plan.to_dict()) # must not raise +def test_group_empty_returns_empty(): + assert group_by_file_conflict([]) == [] diff --git a/plugins/specode/tests/test_task_swarm_prompt.py b/plugins/specode/tests/test_task_swarm_prompt.py deleted file mode 100644 index 8366cad..0000000 --- a/plugins/specode/tests/test_task_swarm_prompt.py +++ /dev/null @@ -1,115 +0,0 @@ -"""Unit tests for task_swarm_prompt rendering + workspace prep.""" -from __future__ import annotations - -import sys -import tempfile -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import task_swarm_prompt as PR # noqa: E402 - - -def _ctx(tmp: Path, *, round_no=1, scope="", kind="stage") -> PR.StageContext: - ws = PR.prepare_workspace(tmp, 1, "coder", round_no) - return PR.StageContext( - stage_num=1, - stage_title="实现登录", - stage_kind=kind, - leaves=[ - {"num": "1.1", "title": "写 user model", "files": ["src/models/user.py"], "requirement": "1.1", "verify": "pytest", "policy": "default"}, - {"num": "1.2", "title": "写 service", "files": ["src/auth/service.py"], "requirement": "1.2", "verify": "", "policy": "default"}, - ], - spec_dir=tmp / "spec", - project_root=tmp / "proj", - workspace=ws, - round_no=round_no, - scope=scope, - ) - - -def test_workspace_name_first_round(): - assert PR.workspace_name(1, "coder", 1) == "stage-1-coder" - - -def test_workspace_name_fix_round(): - assert PR.workspace_name(3, "reviewer", 2) == "stage-3-reviewer-r2" - - -def test_prepare_workspace_creates_inbox_outbox(): - with tempfile.TemporaryDirectory() as td: - run = Path(td) - ws = PR.prepare_workspace(run, 1, "coder", 1) - assert (ws / "inbox").is_dir() - assert (ws / "outbox").is_dir() - - -def test_render_coder_initial_includes_writes_and_inbox(): - with tempfile.TemporaryDirectory() as td: - ctx = _ctx(Path(td)) - text = PR.render_coder_prompt(ctx) - assert "CODER" in text - assert "src/models/user.py" in text - assert "src/auth/service.py" in text - # No fix-round guardrail on initial run - assert "修复轮硬规则" not in text - # Output protocol always present - assert "STATUS: ok" in text - - -def test_render_coder_fix_round_includes_guardrail(): - with tempfile.TemporaryDirectory() as td: - # R3: reviewer no longer drives fix rounds; only validator-fail-fix. - ctx = _ctx(Path(td), round_no=2, scope="validator-fail-fix") - text = PR.render_coder_prompt(ctx) - assert "修复轮硬规则" in text - assert "validation.md" in text - assert "不要顺手优化" in text - - -def test_render_reviewer_no_edit_tools_warning(): - with tempfile.TemporaryDirectory() as td: - ctx = _ctx(Path(td)) - text = PR.render_reviewer_prompt(ctx) - assert "没有" in text and "Edit/Write" in text - assert "## P0" in text - - -def test_render_reviewer_is_advisory(): - """R3: reviewer prompt must declare advisory mode + non-blocking role.""" - with tempfile.TemporaryDirectory() as td: - ctx = _ctx(Path(td)) - text = PR.render_reviewer_prompt(ctx) - assert "advisory" in text - assert "不参与" in text or "不阻塞" in text - assert "tasks.md" in text # explains where the report ends up - - -def test_render_validator_checkpoint_marker(): - with tempfile.TemporaryDirectory() as td: - ctx = _ctx(Path(td), kind="checkpoint") - text = PR.render_validator_prompt(ctx) - assert "检查点" in text - assert "## 判定" in text - assert "修复指引" in text - - -def test_relay_inbox_copies_outbox(): - with tempfile.TemporaryDirectory() as td: - run = Path(td) - coder_ws = PR.prepare_workspace(run, 1, "coder", 1) - (coder_ws / "outbox" / "result.md").write_text("# result\n\nSTATUS: ok\n") - - reviewer_ws = PR.prepare_workspace(run, 1, "reviewer", 1) - copied = PR.relay_inbox(run, reviewer_ws, [(1, "coder", 1, "result.md")]) - assert "result.md" in copied - assert (reviewer_ws / "inbox" / "result.md").exists() - - -def test_write_task_file_persists_prompt(): - with tempfile.TemporaryDirectory() as td: - ctx = _ctx(Path(td)) - p = PR.write_task_file(ctx, "coder") - assert p.name == "task.md" - assert "CODER" in p.read_text() diff --git a/plugins/specode/tests/test_task_swarm_state.py b/plugins/specode/tests/test_task_swarm_state.py index 12815ba..2aef621 100644 --- a/plugins/specode/tests/test_task_swarm_state.py +++ b/plugins/specode/tests/test_task_swarm_state.py @@ -1,334 +1,196 @@ -"""Unit tests for task_swarm_state state machine. - -Covers three core scenarios from references/task-swarm.md: - 1. Two-stage sequential (stage 1 → checkpoint 2 → done) - 2. P0 reviewer loop (coder → reviewer P0 → fix-coder → reviewer approve → converged) - 3. Validator fail + loop-warning early termination -Plus: MAX_ROUNDS termination, parallelism, file-conflict serialization, -coder-only stage skips reviewer/validator. -""" +"""tests for task_swarm_state.py — phase 状态机 + 死循环检测。""" from __future__ import annotations import sys from pathlib import Path -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import task_swarm_parse_md as P # noqa: E402 -import task_swarm_state as S # noqa: E402 - - -def make_state( - text: str, - parallel: int = 3, - max_rounds: int = 3, - reviewer_rounds: int | None = None, - validator_rounds: int | None = None, -) -> dict: - plan = P.parse_tasks_md(text).to_dict() - return S.build_initial_state( - run_id="test-run", - tasks_path=Path("/tmp/tasks.md"), - spec_dir=Path("/tmp/spec"), - project_root=Path("/tmp/proj"), - plan=plan, - parallel=parallel, - max_rounds=max_rounds, - reviewer_max_rounds=reviewer_rounds, - validator_max_rounds=validator_rounds, +import pytest + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from task_swarm._state import ( # noqa: E402 + StateMachine, StageEntry, DEADLOOP_THRESHOLD, +) + + +def _make_sm(tmp_path: Path, num_stages: int = 2) -> StateMachine: + run_dir = tmp_path / "run-1" + run_dir.mkdir(parents=True, exist_ok=True) + stages = [ + StageEntry(number=i, title=f"S{i}", writes=[f"f{i}.py"], items=[ + {"number": f"{i}.1", "title": "t", "writes": [f"f{i}.py"], "reads": [], + "depends_on": [], "requirements": [str(i) + ".1"], "raw_line": "", + "checkbox": " ", "line_no": 0}, + ]) + for i in range(1, num_stages + 1) + ] + sm = StateMachine( + run_id="rid-1", tasks_md=str(tmp_path / "tasks.md"), + run_dir=str(run_dir), + groups=[[s] for s in stages], + group_status=["pending"] * num_stages, ) + sm.save() + return sm -# ---------- scenario 1: two-stage sequential ---------- - -TWO_STAGE = """\ -- [ ] 1. 实现登录 - - [ ] 1.1 写 model - - 文件:src/m.py - - _需求:1.1_ - -- [ ] 2. 检查点 - - 运行 pytest -""" - - -def test_two_stage_sequential_happy_path(): - state = make_state(TWO_STAGE) - # First action: fork stage 1 coder - a = S.next_action(state) - assert a.kind == "fork" - assert a.payload["stage"] == 1 - assert a.payload["role"] == "coder" - assert a.payload["round"] == 1 - S.mark_in_flight(state, 1, "coder", 1) - - # Coder finishes ok → reviewer - S.advance(state, 1, "coder", 1, "ok") - a = S.next_action(state) - assert a.kind == "fork" - assert (a.payload["stage"], a.payload["role"]) == (1, "reviewer") - S.mark_in_flight(state, 1, "reviewer", 1) - - # Reviewer approves → stage 1 converges → writeback before further forks - S.advance(state, 1, "reviewer", 1, "approved") - a = S.next_action(state) - assert a.kind == "writeback" - assert a.payload["stage"] == 1 - S.mark_written_back(state, 1) - - # Now stage 2 (checkpoint) can run, deps satisfied - a = S.next_action(state) - assert a.kind == "fork" - assert (a.payload["stage"], a.payload["role"]) == (2, "validator") - S.mark_in_flight(state, 2, "validator", 1) - - S.advance(state, 2, "validator", 1, "pass") - a = S.next_action(state) - assert a.kind == "writeback" - S.mark_written_back(state, 2) - - a = S.next_action(state) - assert a.kind == "done" - - -def test_checkpoint_blocked_until_upstream_writeback(): - state = make_state(TWO_STAGE) - # Try to dispatch stage 2 before stage 1 converges — should not be offered. - S.mark_in_flight(state, 1, "coder", 1) - S.advance(state, 1, "coder", 1, "ok") - # reviewer not done yet — checkpoint must wait - a = S.next_action(state) - assert a.payload.get("stage") == 1 # not 2 - - -# ---------- scenario 2: P0 reviewer loop ---------- - -SINGLE_STAGE = """\ -- [ ] 1. T - - [ ] 1.1 x - - 文件:src/x.py - - _需求:1.1_ -""" - - -# R3: reviewer is now advisory — it never fails a stage or loops back to coder. - -def test_reviewer_p0_is_advisory_stage_converges(): - """Even when reviewer reports P0, the stage converges; advisory annotation - is the only side effect. No coder fix round is scheduled.""" - state = make_state(SINGLE_STAGE) - S.mark_in_flight(state, 1, "coder", 1) - S.advance(state, 1, "coder", 1, "ok") - S.mark_in_flight(state, 1, "reviewer", 1) - S.advance(state, 1, "reviewer", 1, "p0", extra={"p0_count": 3}) - - stage = S.get_stage(state, 1) - assert stage["phase"] == "converged" - # And the next action is writeback (not another fork) - a = S.next_action(state) - assert a.kind == "writeback" - - -def test_reviewer_approved_converges_normal_stage(): - state = make_state(SINGLE_STAGE) - S.advance(state, 1, "coder", 1, "ok") - a = S.next_action(state) - assert (a.payload["role"], a.payload["round"]) == ("reviewer", 1) - S.advance(state, 1, "reviewer", 1, "approved") - assert S.get_stage(state, 1)["phase"] == "converged" - - -def test_reviewer_loop_does_not_fail_stage(): - """A reviewer loop verdict no longer fails the stage — advisory only.""" - state = make_state(SINGLE_STAGE) - S.advance(state, 1, "coder", 1, "ok") - S.advance(state, 1, "reviewer", 1, "loop") - stage = S.get_stage(state, 1) - assert stage["phase"] == "converged" - - -# ---------- scenario 3: validator fail loop ---------- - -CHECKPOINT_ONLY = """\ -- [ ] 1. 检查点 - - 运行 pytest -""" - - -def test_validator_fail_and_loop_warning(): - state = make_state(CHECKPOINT_ONLY) - a = S.next_action(state) - assert (a.payload["role"], a.payload["round"]) == ("validator", 1) - S.mark_in_flight(state, 1, "validator", 1) - S.advance(state, 1, "validator", 1, "loop") - stage = S.get_stage(state, 1) - assert stage["phase"] == "failed" - - -def test_validator_max_rounds(): - state = make_state(CHECKPOINT_ONLY, max_rounds=2) - S.advance(state, 1, "validator", 1, "fail") - S.advance(state, 1, "coder", 2, "ok") - S.advance(state, 1, "validator", 2, "fail") - stage = S.get_stage(state, 1) - assert stage["phase"] == "failed" - - -def test_validator_recovers_within_budget(): - """R2: checkpoint must converge on validator pass, not reviewer. - coder-fix → validator re-run (no reviewer post-fix in between). - """ - state = make_state(CHECKPOINT_ONLY, max_rounds=3) - S.advance(state, 1, "validator", 1, "fail") - a = S.next_action(state) - assert (a.payload["role"], a.payload["round"], a.payload["scope"]) == ("coder", 2, "validator-fail-fix") - S.advance(state, 1, "coder", 2, "ok") - # checkpoint coder fix → validator re-run directly (no reviewer) - a = S.next_action(state) - assert a.payload["role"] == "validator" - assert a.payload["scope"] == "re-run" - S.advance(state, 1, "validator", a.payload["round"], "pass") - stage = S.get_stage(state, 1) - assert stage["phase"] == "converged" - - -# ---------- coder-only stage skips reviewer ---------- - -CODER_ONLY_STAGE = """\ -- [*] 5. 优化 - - [ ] 5.1 x @swarm:coder-only - - 文件:src/x.py -""" - - -def test_coder_only_stage_no_reviewer(): - state = make_state(CODER_ONLY_STAGE) - a = S.next_action(state) - assert (a.payload["stage"], a.payload["role"]) == (5, "coder") - S.advance(state, 5, "coder", 1, "ok") - stage = S.get_stage(state, 5) - assert stage["phase"] == "converged" - a = S.next_action(state) - assert a.kind == "writeback" - assert a.payload["stage"] == 5 - - -# ---------- parallelism ---------- - -TWO_INDEP = """\ -- [ ] 1. A - - [ ] 1.1 a - - 文件:src/a.py - - _需求:1.1_ -- [ ] 3. B - - [ ] 3.1 b - - 文件:src/b.py - - _需求:3.1_ -""" - - -def test_parallel_dispatch_offers_both_stages(): - state = make_state(TWO_INDEP, parallel=2) - # First action: fork stage 1 - a = S.next_action(state) - assert a.payload["stage"] == 1 - S.mark_in_flight(state, 1, "coder", 1) - # Second action with stage 1 still in flight: should offer stage 3 - a = S.next_action(state) - assert a.kind == "fork" - assert a.payload["stage"] == 3 - - -def test_file_conflict_serializes(): - text = ( - "- [ ] 1. A\n - [ ] 1.1 a\n - 文件:src/shared.py\n - _需求:1.1_\n" - "- [ ] 3. B\n - [ ] 3.1 b\n - 文件:src/shared.py\n - _需求:3.1_\n" - ) - state = make_state(text, parallel=3) - a = S.next_action(state) - assert a.payload["stage"] == 1 - S.mark_in_flight(state, 1, "coder", 1) - # stage 3 shares src/shared.py with in-flight stage 1 → must wait - a = S.next_action(state) - assert a.kind == "wait" - - -def test_parallel_cap_respected(): - text = ( - "- [ ] 1. A\n - [ ] 1.1 a\n - 文件:src/a.py\n - _需求:1.1_\n" - "- [ ] 3. B\n - [ ] 3.1 b\n - 文件:src/b.py\n - _需求:3.1_\n" - "- [ ] 5. C\n - [ ] 5.1 c\n - 文件:src/c.py\n - _需求:5.1_\n" - ) - state = make_state(text, parallel=2) - a = S.next_action(state) - S.mark_in_flight(state, a.payload["stage"], "coder", 1) - a = S.next_action(state) - S.mark_in_flight(state, a.payload["stage"], "coder", 1) - # 2 in-flight, cap=2 → third stage blocked - a = S.next_action(state) - assert a.kind == "wait" - - -# ---------- skipped stages ---------- - -ALL_SKIP_STAGE = """\ -- [ ] 1. 全跳过 - - [ ] 1.1 a @swarm:skip - - 文件:src/a.py -""" - - -def test_all_skip_stage_marked_skipped(): - state = make_state(ALL_SKIP_STAGE) - stage = S.get_stage(state, 1) - assert stage["phase"] == "skipped" - a = S.next_action(state) - assert a.kind == "done" - +def test_save_and_load_roundtrip(tmp_path): + sm = _make_sm(tmp_path) + sm2 = StateMachine.load(Path(sm.run_dir)) + assert sm2.run_id == sm.run_id + assert len(sm2.groups) == len(sm.groups) + assert sm2.groups[0][0].number == 1 -# ---------- json safety ---------- -def test_state_is_json_serializable(): +def test_load_migrates_legacy_claude_session_id(tmp_path): + """老 state.json 字段名是 claude_session_id;StateMachine.load 应回填到 session_id 字段。""" import json - state = make_state(TWO_STAGE) - json.dumps(state) - - -# ---------- R3: reviewer is advisory; only validator_rounds matters ---------- - -def test_validator_rounds_cap_terminates_checkpoint(): - """validator fail at cap → failed (advisory reviewer no longer involved).""" - state = make_state(CHECKPOINT_ONLY, validator_rounds=2) - S.advance(state, 1, "validator", 1, "fail") - S.advance(state, 1, "coder", 2, "ok") - S.advance(state, 1, "validator", 2, "fail") - stage = S.get_stage(state, 1) - assert stage["phase"] == "failed" - assert "validator FAIL" in stage["fail_reason"] - - -def test_max_rounds_fallback_when_validator_cap_unspecified(): - """When only --max-rounds is given, validator uses it.""" - state = make_state(SINGLE_STAGE, max_rounds=2) - cfg = state["config"] - assert cfg["validator_max_rounds"] == 2 - - -def test_legacy_state_without_caps_falls_back(): - """state.json missing validator_max_rounds (legacy) falls back to max_rounds.""" - state = make_state(CHECKPOINT_ONLY, max_rounds=3) - del state["config"]["validator_max_rounds"] - state["config"].pop("reviewer_max_rounds", None) - S.advance(state, 1, "validator", 1, "fail") - a = S.next_action(state) - assert (a.payload["role"], a.payload["round"]) == ("coder", 2) - - -def test_reviewer_rounds_param_is_deprecated_no_op(): - """Passing --reviewer-rounds doesn't gate anything anymore (R3).""" - state = make_state(SINGLE_STAGE, reviewer_rounds=1, validator_rounds=3) - S.advance(state, 1, "coder", 1, "ok") - # reviewer with any judgment still converges the stage - S.advance(state, 1, "reviewer", 1, "p0") - assert S.get_stage(state, 1)["phase"] == "converged" + sm = _make_sm(tmp_path) + state_path = StateMachine.state_path(Path(sm.run_dir)) + data = json.loads(state_path.read_text(encoding="utf-8")) + # 模拟老 state.json:删除新 key,回退到老 key + data.pop("session_id", None) + data["claude_session_id"] = "legacy-sess-xyz" + state_path.write_text(json.dumps(data), encoding="utf-8") + sm2 = StateMachine.load(Path(sm.run_dir)) + assert sm2.session_id == "legacy-sess-xyz" + + +def test_begin_coding_sets_in_flight(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + assert sm.phase == "coding" + assert sm.round == 1 + assert sm.coder_in_flight == ["coder-g1-s1-r1"] + + +def test_mark_coder_done(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + assert sm.coder_in_flight == [] + assert sm.coder_done == ["coder-g1-s1-r1"] + assert sm.all_coders_returned() + + +def test_begin_review_then_validation(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.begin_review() + assert sm.phase == "review" + sm.mark_reviewer_done() + sm.begin_validation() + assert sm.phase == "validation" + + +def test_p0_fix_phase(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.begin_review() + sm.mark_reviewer_done() + pending = [{"text": "x", "evidence_tags": ["req:1.1"], "file_hint": "f1.py"}, + {"text": "y", "evidence_tags": ["security"], "file_hint": "f2.py"}] + sm.begin_p0_fix(pending) + assert sm.phase == "p0-fix" + assert len(sm.p0_in_flight) == 2 # 2 unique files + + +def test_v_fix_phase_round_increment(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.begin_review() + sm.mark_reviewer_done() + sm.begin_validation() + sm.mark_validator_done() + sm.record_round_signature("fail_sig_1") + fix_targets = [{"file_path": "f1.py"}] + initial_round = sm.round + sm.begin_v_fix(fix_targets) + assert sm.phase == "v-fix" + assert sm.round == initial_round + 1 + assert len(sm.vfix_in_flight) == 1 + + +def test_deadloop_threshold(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.begin_review() + sm.mark_reviewer_done() + # Simulate DEADLOOP_THRESHOLD identical fail rounds + for i in range(DEADLOOP_THRESHOLD): + sm.round = i + 1 + sm.record_round_signature("samesig") + assert sm.detect_deadloop() + + +def test_deadloop_not_triggered_with_different_sigs(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.begin_review() + sm.mark_reviewer_done() + for i, sig in enumerate(["a", "b", "c"]): + sm.round = i + 1 + sm.record_round_signature(sig) + assert not sm.detect_deadloop() + + +def test_deadloop_below_threshold(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.begin_review() + sm.mark_reviewer_done() + for i in range(DEADLOOP_THRESHOLD - 1): + sm.round = i + 1 + sm.record_round_signature("samesig") + assert not sm.detect_deadloop() + + +def test_finalize_group_advances_index(tmp_path): + sm = _make_sm(tmp_path, num_stages=2) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.finalize_group("done") + assert sm.current_group_index == 1 + assert sm.group_status[0] == "done" + + +def test_finalize_last_group_marks_done(tmp_path): + sm = _make_sm(tmp_path, num_stages=1) + sm.begin_coding() + sm.mark_coder_done("coder-g1-s1-r1") + sm.finalize_group("done") + assert sm.phase == "done" + assert sm.failed_status == "done" + assert sm.completed_at is not None + + +def test_fail_group_deadloop_sets_failed_status(tmp_path): + sm = _make_sm(tmp_path) + sm.begin_coding() + sm.fail_group_deadloop() + assert sm.failed_status == "failed-deadloop" + assert sm.group_status[0] == "failed-deadloop" + assert sm.phase == "error" + + +def test_events_append_timestamps(tmp_path): + sm = _make_sm(tmp_path) + sm.events_append({"type": "test"}) + assert len(sm.events) == 1 + assert "at" in sm.events[0] + + +def test_state_save_atomic_then_load(tmp_path): + sm = _make_sm(tmp_path) + sm.round = 7 + sm.save() + sm2 = StateMachine.load(Path(sm.run_dir)) + assert sm2.round == 7 diff --git a/plugins/specode/tests/test_task_swarm_writeback.py b/plugins/specode/tests/test_task_swarm_writeback.py new file mode 100644 index 0000000..641e85c --- /dev/null +++ b/plugins/specode/tests/test_task_swarm_writeback.py @@ -0,0 +1,164 @@ +"""tests for task_swarm_writeback.py — line-safe diff + 越界拒绝。""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from task_swarm._writeback import ( # noqa: E402 + GroupFindings, StageFinding, WriteBackError, writeback_tasks_md, +) + + +def _make_tasks_md(tmp_path: Path) -> Path: + p = tmp_path / "tasks.md" + p.write_text( + "# tasks\n\n" + "## 阶段 1: 数据层\n" + "- [ ] 1.1 user model @writes:src/u.py _需求:1.1_\n" + "- [ ] 1.2 session @writes:src/s.py _需求:1.2_\n" + "\n" + "## 阶段 2: 服务层\n" + "- [ ] 2.1 auth @writes:src/auth.py @depends-on:1 _需求:2.1_\n", + encoding="utf-8", + ) + return p + + +def test_writeback_basic_checkbox_toggle(tmp_path): + p = _make_tasks_md(tmp_path) + gf = GroupFindings( + group_index=0, stages=[1], findings=[], + validator_history=[{"group": 1, "round": 1, "verdict": "pass"}], + final_verdict="pass", reproduce_cmd="pytest -v", + ) + res = writeback_tasks_md(p, gf) + new_text = p.read_text(encoding="utf-8") + assert "- [x] 1.1" in new_text + assert "- [x] 1.2" in new_text + # stage 2 unchanged + assert "- [ ] 2.1" in new_text + + +def test_writeback_appends_findings_block(tmp_path): + p = _make_tasks_md(tmp_path) + gf = GroupFindings( + group_index=0, stages=[1], + findings=[ + StageFinding(severity="p0", text="src/u.py:5 [req:1.1] — unique", + fix_status="已修复"), + StageFinding(severity="advisory", text="src/u.py:50 — style", + fix_status="未修复"), + StageFinding(severity="p1", text="src/s.py:10 — token len", + fix_status="未修复"), + ], + validator_history=[{"group": 1, "round": 1, "verdict": "pass"}], + final_verdict="pass", reproduce_cmd="pytest", + ) + writeback_tasks_md(p, gf) + txt = p.read_text(encoding="utf-8") + assert "[P0 已修复]" in txt + assert "[adv 未修复]" in txt + assert "[P1 未修复]" in txt + assert "validator g1-r1 pass" in txt + + +def test_writeback_rejects_missing_stage(tmp_path): + p = _make_tasks_md(tmp_path) + gf = GroupFindings(group_index=0, stages=[99], findings=[]) + with pytest.raises(WriteBackError) as ei: + writeback_tasks_md(p, gf) + assert "99" in str(ei.value) + + +def test_writeback_preserves_writes_and_req_tags(tmp_path): + p = _make_tasks_md(tmp_path) + gf = GroupFindings(group_index=0, stages=[1], findings=[], + validator_history=[{"group": 1, "round": 1, "verdict": "pass"}], + final_verdict="pass") + writeback_tasks_md(p, gf) + txt = p.read_text(encoding="utf-8") + assert "@writes:src/u.py" in txt + assert "_需求:1.1_" in txt + assert "@depends-on:1" in txt + + +def test_writeback_failed_deadloop_message(tmp_path): + p = _make_tasks_md(tmp_path) + gf = GroupFindings( + group_index=0, stages=[1], findings=[], + validator_history=[ + {"group": 1, "round": 1, "verdict": "fail", "signature": "abc"}, + {"group": 1, "round": 2, "verdict": "fail", "signature": "abc"}, + {"group": 1, "round": 3, "verdict": "fail", "signature": "abc"}, + ], + final_verdict="failed-deadloop", + ) + writeback_tasks_md(p, gf) + txt = p.read_text(encoding="utf-8") + assert "failed-deadloop" in txt + + +def test_writeback_validator_history_lines(tmp_path): + p = _make_tasks_md(tmp_path) + gf = GroupFindings( + group_index=0, stages=[1], findings=[], + validator_history=[ + {"group": 1, "round": 1, "verdict": "fail", "signature": "abc12345"}, + {"group": 1, "round": 2, "verdict": "pass"}, + ], + final_verdict="pass", reproduce_cmd="pytest tests/", + ) + writeback_tasks_md(p, gf) + txt = p.read_text(encoding="utf-8") + assert "g1-r1: fail" in txt + assert "g1-r2: pass" in txt + + +def test_writeback_idempotent_already_checked(tmp_path): + p = _make_tasks_md(tmp_path) + # 第一次 writeback + gf = GroupFindings(group_index=0, stages=[1], findings=[], + validator_history=[{"group": 1, "round": 1, "verdict": "pass"}], + final_verdict="pass") + writeback_tasks_md(p, gf) + # 第二次(应该不出错,checkbox 保持 x) + writeback_tasks_md(p, gf) + txt = p.read_text(encoding="utf-8") + assert "- [x] 1.1" in txt + + +def test_writeback_rejects_nonexistent_file(tmp_path): + with pytest.raises(WriteBackError): + writeback_tasks_md(tmp_path / "missing.md", GroupFindings(group_index=0, stages=[1])) + + +def test_writeback_multi_stage_group_only_appends_to_last(tmp_path): + p = tmp_path / "tasks.md" + p.write_text( + "## 阶段 1: A\n" + "- [ ] 1.1 a @writes:a.py _需求:1.1_\n" + "## 阶段 2: B\n" + "- [ ] 2.1 b @writes:b.py _需求:2.1_\n", + encoding="utf-8", + ) + gf = GroupFindings(group_index=0, stages=[1, 2], findings=[], + validator_history=[{"group": 1, "round": 1, "verdict": "pass"}], + final_verdict="pass") + writeback_tasks_md(p, gf) + txt = p.read_text(encoding="utf-8") + lines = txt.splitlines() + # 注释块应在 stage 2 之后(文件末尾),而不是 stage 1 末尾 + # 也即:阶段 1 后面紧跟 ## 阶段 2: + idx_stage1_item = next(i for i, l in enumerate(lines) if "1.1" in l) + idx_stage2_header = next(i for i, l in enumerate(lines) if "## 阶段 2:" in l) + # 这两个之间不应有 `>` 注释 + for l in lines[idx_stage1_item + 1: idx_stage2_header]: + assert not l.startswith(">") + # 但末尾有 `>` 注释 + assert any(l.startswith(">") for l in lines[idx_stage2_header:]) diff --git a/plugins/specode/tests/test_telemetry.py b/plugins/specode/tests/test_telemetry.py deleted file mode 100644 index 0f9683f..0000000 --- a/plugins/specode/tests/test_telemetry.py +++ /dev/null @@ -1,235 +0,0 @@ -"""Tests for spec_telemetry: opt-in gate, emit, rotation, summary.""" -from __future__ import annotations - -import argparse -import io -import json -import sys -from contextlib import redirect_stdout, redirect_stderr -from pathlib import Path - -SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -import spec_state -import spec_telemetry - - -def _telemetry_path(tmp_path: Path, monkeypatch) -> Path: - p = tmp_path / "telemetry.jsonl" - monkeypatch.setenv(spec_telemetry._ENV_PATH, str(p)) - return p - - -def test_opt_out_is_noop(tmp_path, monkeypatch): - """Without SPECODE_TELEMETRY=on, emit writes nothing.""" - p = _telemetry_path(tmp_path, monkeypatch) - monkeypatch.delenv(spec_telemetry._ENV_FLAG, raising=False) - spec_telemetry.emit("test.event", k="v") - assert not p.exists() - - -def test_opt_in_emits_record(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - spec_telemetry.emit("spec.init", spec_slug="foo", workflow="requirements-first") - - assert p.exists() - lines = [json.loads(l) for l in p.read_text(encoding="utf-8").splitlines() if l.strip()] - assert len(lines) == 1 - rec = lines[0] - assert rec["event"] == "spec.init" - assert rec["spec_slug"] == "foo" - assert rec["workflow"] == "requirements-first" - assert "ts" in rec - - -def test_flag_truthy_values(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - for val in ["on", "1", "true", "YES", "Y"]: - monkeypatch.setenv(spec_telemetry._ENV_FLAG, val) - assert spec_telemetry.is_enabled(), f"{val!r} should enable telemetry" - for val in ["off", "0", "false", "no", ""]: - monkeypatch.setenv(spec_telemetry._ENV_FLAG, val) - assert not spec_telemetry.is_enabled(), f"{val!r} should keep telemetry off" - - -def test_rotation_when_over_cap(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - monkeypatch.setenv(spec_telemetry._ENV_MAX, "512") - - # Fill past the cap. - pad = "x" * 60 - for i in range(20): - spec_telemetry.emit("pad", i=i, junk=pad) - assert p.exists() - - # One more emit triggers rotation since file now > cap. - spec_telemetry.emit("spec.init", spec_slug="after-rotate") - - rotated = spec_telemetry._rotated_for(p) - assert rotated.exists(), "rotated .0 file should exist" - # New file contains only the post-rotation record. - post = [json.loads(l) for l in p.read_text(encoding="utf-8").splitlines() if l.strip()] - assert len(post) == 1 - assert post[0]["spec_slug"] == "after-rotate" - - -def test_rotation_overwrites_prior_rotated(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - rotated = spec_telemetry._rotated_for(p) - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - monkeypatch.setenv(spec_telemetry._ENV_MAX, "256") - - # Prime an old .0 file with stale content. - rotated.write_text(json.dumps({"event": "stale"}) + "\n", encoding="utf-8") - pad = "x" * 60 - for i in range(20): - spec_telemetry.emit("pad", i=i, junk=pad) - spec_telemetry.emit("trigger.rotate") - - # The new .0 should have replaced the stale one. - rotated_records = [ - json.loads(l) for l in rotated.read_text(encoding="utf-8").splitlines() if l.strip() - ] - assert all(r.get("event") != "stale" for r in rotated_records) - - -def test_iter_records_reads_rotated_first(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - rotated = spec_telemetry._rotated_for(p) - rotated.write_text(json.dumps({"event": "old", "n": 1}) + "\n", encoding="utf-8") - p.write_text(json.dumps({"event": "new", "n": 2}) + "\n", encoding="utf-8") - seq = [r["event"] for r in spec_telemetry.iter_records(p)] - assert seq == ["old", "new"] - - -def test_emit_swallows_io_errors(tmp_path, monkeypatch): - """If the target path can't be written, emit must not raise.""" - bogus = tmp_path / "nonexistent" / "blocker" / "telemetry.jsonl" - monkeypatch.setenv(spec_telemetry._ENV_PATH, str(bogus)) - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - # Create a regular file where a dir is expected — parent.mkdir will fail. - (tmp_path / "nonexistent").write_text("not a dir") - spec_telemetry.emit("event.that.fails") - # No assertion needed beyond "did not raise". - - -def test_iter_records_skips_malformed_lines(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - p.write_text( - json.dumps({"event": "ok1"}) + "\n" - + "not-json-at-all\n" - + json.dumps({"event": "ok2"}) + "\n", - encoding="utf-8", - ) - seq = [r["event"] for r in spec_telemetry.iter_records(p)] - assert seq == ["ok1", "ok2"] - - -def _run_summary(tmp_path, monkeypatch, **kwargs) -> tuple[str, str, int]: - p = _telemetry_path(tmp_path, monkeypatch) - if "records" in kwargs: - for rec in kwargs["records"]: - p.parent.mkdir(parents=True, exist_ok=True) - with p.open("a", encoding="utf-8") as f: - f.write(json.dumps(rec) + "\n") - ns = argparse.Namespace( - days=kwargs.get("days", 0), - json=kwargs.get("json", False), - force=kwargs.get("force", True), - ) - out = io.StringIO() - err = io.StringIO() - with redirect_stdout(out), redirect_stderr(err): - rc = spec_state._cmd_telemetry_summary(ns) - return out.getvalue(), err.getvalue(), rc - - -def test_summary_text_output(tmp_path, monkeypatch): - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - records = [ - {"ts": "2026-05-01T00:00:00+00:00", "event": "spec.init", "spec_slug": "alpha"}, - {"ts": "2026-05-01T01:00:00+00:00", "event": "spec.phase_transition", - "spec_slug": "alpha", "from_phase": "intake", "to_phase": "requirements"}, - {"ts": "2026-05-01T02:00:00+00:00", "event": "inv.violation", - "inv": "INV-1", "spec_slug": "alpha"}, - {"ts": "2026-05-01T03:00:00+00:00", "event": "inv.violation", - "inv": "INV-2", "spec_slug": "alpha"}, - {"ts": "2026-05-01T04:00:00+00:00", "event": "inv.violation", - "inv": "INV-1", "spec_slug": "beta"}, - {"ts": "2026-05-01T05:00:00+00:00", "event": "swarm.run_start", "run_id": "r1"}, - {"ts": "2026-05-01T06:00:00+00:00", "event": "swarm.stage_done", - "run_id": "r1", "stage": 1, "rounds": {"coder": 2, "reviewer": 2, "validator": 1}}, - {"ts": "2026-05-01T07:00:00+00:00", "event": "swarm.run_end", - "run_id": "r1", "converged": 1, "failed": 0}, - ] - out, err, rc = _run_summary(tmp_path, monkeypatch, records=records) - assert rc == 0 - assert "8 record" in out - assert "INV-1" in out and "INV-2" in out - assert "alpha" in out and "beta" in out - assert "task-swarm: 1 run" in out - # 2+2+1 = 5 rounds across one stage - assert "5.00" in out - - -def test_summary_json_output(tmp_path, monkeypatch): - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - records = [ - {"ts": "2026-05-01T00:00:00+00:00", "event": "inv.violation", "inv": "INV-1", "spec_slug": "x"}, - {"ts": "2026-05-01T01:00:00+00:00", "event": "inv.violation", "inv": "INV-1", "spec_slug": "x"}, - {"ts": "2026-05-01T02:00:00+00:00", "event": "inv.violation", "inv": "INV-2", "spec_slug": "y"}, - ] - out, _, rc = _run_summary(tmp_path, monkeypatch, records=records, json=True) - assert rc == 0 - data = json.loads(out) - assert data["total_records"] == 3 - assert data["by_inv"] == {"INV-1": 2, "INV-2": 1} - assert data["inv_violations_by_spec"] == {"x": 2, "y": 1} - - -def test_summary_warns_when_disabled(tmp_path, monkeypatch): - p = _telemetry_path(tmp_path, monkeypatch) - p.write_text(json.dumps({"ts": "2026-05-01T00:00:00+00:00", "event": "x"}) + "\n", encoding="utf-8") - monkeypatch.delenv(spec_telemetry._ENV_FLAG, raising=False) - ns = argparse.Namespace(days=0, json=False, force=False) - out = io.StringIO() - err = io.StringIO() - with redirect_stdout(out), redirect_stderr(err): - rc = spec_state._cmd_telemetry_summary(ns) - assert rc == 0 - assert "telemetry is disabled" in err.getvalue() - # Still prints the existing file's contents. - assert "1 record" in out.getvalue() - - -def test_summary_no_file(tmp_path, monkeypatch): - _telemetry_path(tmp_path, monkeypatch) - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - ns = argparse.Namespace(days=0, json=False, force=False) - out = io.StringIO() - err = io.StringIO() - with redirect_stdout(out), redirect_stderr(err): - rc = spec_state._cmd_telemetry_summary(ns) - assert rc == 0 - assert "no telemetry file" in err.getvalue() - - -def test_emit_integration_from_spec_init(tmp_path, monkeypatch): - """spec_init emits a spec.init record (smoke-level integration).""" - p = _telemetry_path(tmp_path, monkeypatch) - monkeypatch.setenv(spec_telemetry._ENV_FLAG, "on") - spec_telemetry.emit( - "spec.init", - spec_slug="demo", - spec_dir=str(tmp_path / "demo"), - workflow="requirements-first", - spec_type="feature", - persistent=False, - initial_phase=None, - created_count=4, - ) - records = [r for r in spec_telemetry.iter_records(p)] - assert any(r["event"] == "spec.init" and r["spec_slug"] == "demo" for r in records)