From 0c46a929d2b2f6b7b94e9e27b89bf55977632442 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:25:46 +0000
Subject: [PATCH 1/4] chore(deps): bump pytest from 8.3.5 to 9.0.3

Bumps [pytest](https://github.com/pytest-dev/pytest) from 8.3.5 to 9.0.3.
- [Release notes](https://github.com/pytest-dev/pytest/releases)
- [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest/compare/8.3.5...9.0.3)

---
updated-dependencies:
- dependency-name: pytest
  dependency-version: 9.0.3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pyproject.toml | 2 +-
 uv.lock        | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 07cb87c..56165d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ Documentation = "https://github.com/nullhack/python-project-template/tree/main/d
 [project.optional-dependencies]
 dev = [
     "pdoc>=14.0",
-    "pytest>=8.3.5",
+    "pytest>=9.0.3",
     "pytest-cov>=6.1.1",
     "pytest-html>=4.1.1",
     "pytest-mock>=3.14.0",
diff --git a/uv.lock b/uv.lock
index e6e3da4..219fdd9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -644,17 +644,18 @@ wheels = [
 
 [[package]]
 name = "pytest"
-version = "8.3.5"
+version = "9.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
+    { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
 ]
 
 [[package]]
@@ -754,7 +755,7 @@ requires-dist = [
     { name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6.148.4" },
     { name = "pdoc", marker = "extra == 'dev'", specifier = ">=14.0" },
     { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.407" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.5" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" },
     { name = "pytest-html", marker = "extra == 'dev'", specifier = ">=4.1.1" },
     { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },

From f12e5281203b407ca9fa010d6c43579fdbc34f19 Mon Sep 17 00:00:00 2001
From: nullhack <nullhack@users.noreply.github.com>
Date: Tue, 14 Apr 2026 13:54:15 -0400
Subject: [PATCH 2/4] chore(conventions): overhaul workflow standards, add
 extend-criteria skill

- Rename test functions to test_<short_title> (drop _should_ pattern)
- UUID-only first line in test docstrings (no description after UUID)
- Add mandatory Source: field to acceptance criteria format
- Add extend-criteria skill for gap/defect handling mid-flight
- Replace bare 'task' with 'uv run task' across all skill/agent files
- Update to flat tests/ layout (no unit/ integration/ subdirs)
- Split compound acceptance criterion into two single-outcome criteria
- Remove Test strategy from PO scope (developer decision)
- Add [~] and [-] TODO.md status markers
- Move mv command ownership to developer Step 2
- Add --doctest-modules note to implementation skill
- Align pytest.skip prohibition: allowed with written justification
- Remove stale auto-publish-docs.md from in-progress
- Add display-version.md as completed reference feature
- Update README with uv sync --all-extras and uv run task commands
---
 .opencode/agents/developer.md                 |  39 +++--
 .opencode/agents/product-owner.md             |  21 ++-
 .opencode/agents/reviewer.md                  |  18 +--
 .opencode/skills/extend-criteria/SKILL.md     | 102 ++++++++++++
 .opencode/skills/implementation/SKILL.md      |  22 +--
 .opencode/skills/scope/SKILL.md               |  53 +++---
 .opencode/skills/tdd/SKILL.md                 |  44 +++--
 .opencode/skills/verify/SKILL.md              |  27 ++--
 AGENTS.md                                     |  59 ++++---
 README.md                                     |  32 ++--
 docs/features/completed/display-version.md    |  67 ++++++++
 .../features/in-progress/auto-publish-docs.md |  43 -----
 tests/version_test.py                         | 153 ++++++++++--------
 13 files changed, 434 insertions(+), 246 deletions(-)
 create mode 100644 .opencode/skills/extend-criteria/SKILL.md
 create mode 100644 docs/features/completed/display-version.md
 delete mode 100644 docs/features/in-progress/auto-publish-docs.md

diff --git a/.opencode/agents/developer.md b/.opencode/agents/developer.md
index 74b7148..b7f157f 100644
--- a/.opencode/agents/developer.md
+++ b/.opencode/agents/developer.md
@@ -34,10 +34,16 @@ You build everything: architecture, tests, code, and releases. You own technical
 Every session: load `skill session-workflow` first. Read TODO.md to find current step and feature.
 
 ### Step 2 — BOOTSTRAP + ARCHITECTURE
-When a new feature lands in `docs/features/in-progress/`:
-
-1. Read the feature doc. Understand all acceptance criteria and their UUIDs.
-2. Add an `## Architecture` section to the feature doc:
+When a new feature is ready in `docs/features/backlog/`:
+
+1. Move the feature doc to in-progress:
+   ```bash
+   mv docs/features/backlog/<feature-name>.md docs/features/in-progress/<feature-name>.md
+   git add -A
+   git commit -m "chore(workflow): start <feature-name>"
+   ```
+2. Read the feature doc. Understand all acceptance criteria and their UUIDs.
+3. Add an `## Architecture` section to the feature doc:
    - Module structure (which files you will create/modify)
    - Key decisions — write an ADR for any non-obvious choice:
      ```
@@ -47,10 +53,10 @@ When a new feature lands in `docs/features/in-progress/`:
      Alternatives considered: <what you rejected and why>
      ```
    - Build changes that need PO approval: new runtime deps, new packages, changed entry points
-3. If build changes need PO approval, ask before proceeding. Tooling changes (coverage, lint rules, test config) are your autonomy.
-4. Update `pyproject.toml` and project structure as needed.
-5. Run `task test` — must still pass.
-6. Commit: `feat(bootstrap): configure build for <feature-name>`
+4. If build changes need PO approval, ask before proceeding. Tooling changes (coverage, lint rules, test config) are your autonomy.
+5. Update `pyproject.toml` and project structure as needed.
+6. Run `uv run task test` — must still pass.
+7. Commit: `feat(bootstrap): configure build for <feature-name>`
 
 ### Step 3 — TEST FIRST
 Load `skill tdd`. Write failing tests mapped 1:1 to each UUID acceptance criterion.
@@ -59,7 +65,8 @@ Commit: `test(<feature-name>): add failing tests for all acceptance criteria`
 ### Step 4 — IMPLEMENT
 Load `skill implementation`. Make tests green one at a time.
 Commit after each test goes green: `feat(<feature-name>): implement <component>`
-Self-verify: run `task test` and `timeout 10s task run` after each commit.
+Self-verify after each commit: run all four commands in the Self-Verification block below.
+If you discover a missing behavior during implementation, load `skill extend-criteria`.
 
 ### After reviewer approves (Step 5)
 Load `skill pr-management` and `skill git-release` as needed.
@@ -100,10 +107,10 @@ When making a non-obvious architecture decision, write a brief ADR in the featur
 
 Before declaring any step complete and before requesting reviewer verification, run:
 ```bash
-task lint                # must exit 0
-task static-check        # must exit 0, 0 errors
-task test                # must exit 0, all tests pass
-timeout 10s task run     # must exit 0 or 124; exit 124 = timeout (infinite loop) = fix it
+uv run task lint                # must exit 0
+uv run task static-check        # must exit 0, 0 errors
+uv run task test                # must exit 0, all tests pass
+timeout 10s uv run task run     # must exit non-124; exit 124 = timeout (infinite loop) = fix it
 ```
 
 Do not hand off broken work to the reviewer.
@@ -112,9 +119,8 @@ Do not hand off broken work to the reviewer.
 
 ```
 <package>/             # production code (named after the project)
-tests/
-  unit/                # @pytest.mark.unit — isolated, one function/class
-  integration/         # @pytest.mark.integration — multiple components
+tests/                 # flat layout — no unit/ or integration/ subdirectories
+  <name>_test.py       # marker (@pytest.mark.unit/integration) determines category
 pyproject.toml         # version, deps, tasks, test config
 ```
 
@@ -127,6 +133,7 @@ pyproject.toml         # version, deps, tasks, test config
 - `session-workflow` — read/update TODO.md at session boundaries
 - `tdd` — write failing tests with UUID traceability (Step 3)
 - `implementation` — Red-Green-Refactor cycle (Step 4)
+- `extend-criteria` — add gap criteria discovered during implementation or review
 - `code-quality` — ruff, pyright, coverage standards
 - `pr-management` — create PRs with conventional commits
 - `git-release` — calver versioning and themed release naming
diff --git a/.opencode/agents/product-owner.md b/.opencode/agents/product-owner.md
index 1a1b81f..6a19821 100644
--- a/.opencode/agents/product-owner.md
+++ b/.opencode/agents/product-owner.md
@@ -53,28 +53,43 @@ Every criterion must have a UUID (generate with `python -c "import uuid; print(u
 
 ```markdown
 - `<uuid>`: <Short description>.
+  Source: <stakeholder | po | developer | reviewer | bug>
+
   Given: <precondition>
   When: <action>
   Then: <expected outcome>
-  Test strategy: unit | integration
 ```
 
 All UUIDs must be unique. Every story must have at least one criterion. Every criterion must be independently testable.
 
+**Source field** (mandatory): records who originated this criterion.
+- `stakeholder` — an external stakeholder gave this requirement to the PO
+- `po` — the PO originated this criterion independently
+- `developer` — a gap found during Step 4 implementation
+- `reviewer` — a gap found during Step 5 verification
+- `bug` — a post-merge regression; the feature doc was reopened
+
+When adding criteria discovered after initial scope, load `skill extend-criteria`.
+
 ## Feature Document Structure
 
+Filename: `<verb>-<object>.md` — imperative verb first, kebab-case, 2–4 words.
+Examples: `display-version.md`, `authenticate-user.md`, `export-metrics-csv.md`
+Title matches: `# Feature: <Verb> <Object>` in Title Case.
+
 ```markdown
-# Feature: <Name>
+# Feature: <Verb> <Object>
 
 ## User Stories
 - As a <role>, I want <goal> so that <benefit>
 
 ## Acceptance Criteria
 - `<uuid>`: <Short description>.
+  Source: <stakeholder | po>
+
   Given: ...
   When: ...
   Then: ...
-  Test strategy: unit | integration
 
 ## Notes
 <constraints, risks, out-of-scope items>
diff --git a/.opencode/agents/reviewer.md b/.opencode/agents/reviewer.md
index d867f81..0038dfe 100644
--- a/.opencode/agents/reviewer.md
+++ b/.opencode/agents/reviewer.md
@@ -55,10 +55,10 @@ Load `skill verify`. Run all commands, check all criteria, produce a written rep
 Run these in order. If any fails, stop and report — do not continue to the next:
 
 ```bash
-task lint                # must exit 0
-task static-check        # must exit 0, 0 errors
-task test                # must exit 0, 0 failures, coverage >= 100%
-timeout 10s task run     # must exit 0 or 124; exit 124 = timeout (infinite loop) = FAIL
+uv run task lint                # must exit 0
+uv run task static-check        # must exit 0, 0 errors
+uv run task test                # must exit 0, 0 failures, coverage >= 100%
+timeout 10s uv run task run     # must exit non-124; exit 124 = timeout (infinite loop) = FAIL
 ```
 
 ## Code Review Checklist
@@ -95,7 +95,7 @@ After all commands pass, review source code for:
 9. No getters/setters (tell, don't ask)
 
 **Tests**
-- [ ] Every test has UUID docstring with Given/When/Then
+- [ ] Every test has UUID-only first line docstring, blank line, then Given/When/Then
 - [ ] Tests assert behavior, not structure
 - [ ] Every acceptance criterion has a mapped test
 - [ ] No test verifies isinstance, type(), or internal attributes
@@ -111,10 +111,10 @@ After all commands pass, review source code for:
 ## Step 5 Verification Report
 
 ### Commands
-- task lint: PASS | FAIL — <output if fail>
-- task static-check: PASS | FAIL — <errors if fail>
-- task test: PASS | FAIL — <failures/coverage if fail>
-- timeout 10s task run: PASS | FAIL | TIMEOUT — <error or "process did not exit within 10s" if fail>
+- uv run task lint: PASS | FAIL — <output if fail>
+- uv run task static-check: PASS | FAIL | NOT RUN — <errors if fail, or "stopped after previous failure">
+- uv run task test: PASS | FAIL | NOT RUN — <failures/coverage if fail, or "stopped after previous failure">
+- timeout 10s uv run task run: PASS | FAIL | TIMEOUT | NOT RUN — <error or "process did not exit within 10s" if fail, or "stopped after previous failure">
 
 ### Code Review
 - PASS | FAIL: <finding with file:line reference>
diff --git a/.opencode/skills/extend-criteria/SKILL.md b/.opencode/skills/extend-criteria/SKILL.md
new file mode 100644
index 0000000..8bf6e06
--- /dev/null
+++ b/.opencode/skills/extend-criteria/SKILL.md
@@ -0,0 +1,102 @@
+---
+name: extend-criteria
+description: Add acceptance criteria discovered after scope is written — gaps found during implementation or review, and post-merge defects
+version: "1.0"
+author: any
+audience: developer, reviewer, product-owner
+workflow: feature-lifecycle
+---
+
+# Extend Criteria
+
+This skill is loaded when any agent discovers a missing behavior that is not covered by the existing acceptance criteria. It provides the decision rule, UUID assignment, and commit protocol for adding new criteria mid-flight or post-merge.
+
+## When to Use
+
+- **Developer (Step 4)**: implementation reveals an untested behavior
+- **Reviewer (Step 5)**: code review reveals an observable behavior with no acceptance criterion
+- **Post-merge**: a defect is found in production and a regression criterion must be added
+
+Do not use this skill to scope new features. New observable behaviors that go beyond the current feature's user stories must be escalated to the PO.
+
+## Decision Rule: Is This a Gap or a New Feature?
+
+Ask: "Does this behavior fall within the intent of the current user stories?"
+
+| Situation | Action |
+|---|---|
+| Edge case or error path within approved scope | Add criterion with `Source: developer` or `Source: reviewer` |
+| New observable behavior users did not ask for | Escalate to PO; do not add criterion unilaterally |
+| Post-merge regression (the feature was accepted and broke later) | Reopen feature doc; add criterion with `Source: bug` |
+| Behavior already present but criterion was never written | Add criterion with appropriate `Source:` |
+
+When in doubt, ask the PO before adding.
+
+## Criterion Format
+
+All criteria use this format (mandatory `Source:` field):
+
+```markdown
+- `<uuid>`: <Short description ending with a period>.
+  Source: <source>
+
+  Given: <precondition>
+  When: <action>
+  Then: <single observable outcome>
+```
+
+**Source values** (choose exactly one):
+- `stakeholder` — an external stakeholder gave this requirement to the PO
+- `po` — the PO originated this criterion independently
+- `developer` — a gap found during Step 4 implementation
+- `reviewer` — a gap found during Step 5 verification
+- `bug` — a post-merge regression; the feature doc was reopened
+
+**Rules**:
+- UUID must be unique across the entire project
+- Generate: `python -c "import uuid; print(uuid.uuid4())"`
+- `Then` must be a single observable, measurable outcome — no "and"
+- Do not add `Source:` retroactively to criteria that predate this field
+
+## Procedure by Role
+
+### Developer (Step 4)
+
+1. Determine whether this is a gap within scope or a new feature (use the decision table above)
+2. If it is within scope:
+   a. Add the criterion to the feature doc with `Source: developer`
+   b. Write the failing test for it (load `skill tdd`)
+   c. Make it green (continue Red-Green-Refactor)
+   d. Commit: `test(<feature-name>): add gap criterion <uuid>`
+3. If it is out of scope: write a note in TODO.md under `## Next`, flag it for the PO after Step 5
+
+### Reviewer (Step 5)
+
+1. Determine whether this is a gap within scope or a new feature
+2. If it is within scope:
+   - Add the criterion to the feature doc with `Source: reviewer`
+   - Record in the REJECTED report: "Added criterion `<uuid>` — developer must implement before resubmitting"
+3. If it is out of scope:
+   - Do not add the criterion
+   - Note it in the report as a future backlog item
+
+### Post-merge Defect
+
+1. Move the feature doc back to in-progress:
+   ```bash
+   mv docs/features/completed/<name>.md docs/features/in-progress/<name>.md
+   git add -A
+   git commit -m "chore(workflow): reopen <name> for bug fix"
+   ```
+2. Add the new criterion with `Source: bug`
+3. Return to Step 3 (write failing test) then Step 4 (implement) then Step 5 (verify) then Step 6 (accept)
+4. Update TODO.md to reflect the reopened feature at the correct step
+
+## Checklist
+
+Before committing a new criterion:
+- [ ] UUID is unique (search: `grep -r "<uuid>" docs/features/` and `grep -r "<uuid>" tests/`)
+- [ ] `Source:` value is one of the five valid values
+- [ ] `Then` is a single, observable outcome (no "and")
+- [ ] Blank line between `Source:` line and `Given:`
+- [ ] A corresponding test will be written (or already exists)
diff --git a/.opencode/skills/implementation/SKILL.md b/.opencode/skills/implementation/SKILL.md
index cbf1b35..833af9d 100644
--- a/.opencode/skills/implementation/SKILL.md
+++ b/.opencode/skills/implementation/SKILL.md
@@ -102,7 +102,7 @@ def register_user(email: EmailAddress, repo: UserRepository) -> "User":
 ## RED — Confirm the Test Fails
 
 ```bash
-pytest tests/unit/<file>_test.py::test_<name> -v
+uv run pytest tests/<file>_test.py::test_<name> -v
 ```
 
 Expected: `FAILED` or `ERROR`. If it passes before you've written code, the test is wrong — fix it.
@@ -116,8 +116,8 @@ Write the least code that makes the test pass. Apply during GREEN:
 Do NOT apply during GREEN: DRY, SOLID, Object Calisthenics — those come in refactor.
 
 ```bash
-pytest tests/unit/<file>_test.py::test_<name> -v   # must be PASSED
-task test                                            # must all still pass
+uv run pytest tests/<file>_test.py::test_<name> -v   # must be PASSED
+uv run task test                                      # must all still pass
 ```
 
 ## REFACTOR — Apply Principles (in priority order)
@@ -137,10 +137,12 @@ task test                                            # must all still pass
 4. **Type hints**: add/fix type annotations on all public functions and classes
 5. **Docstrings**: Google-style on all public functions and classes
 
+> **Note**: `uv run task test` runs `--doctest-modules`, which executes code examples embedded in source docstrings. Keep `Examples:` blocks in Google-style docstrings valid and executable. If an example should not be run, mark it with `# doctest: +SKIP`.
+
 ```bash
-task test          # must still pass
-task lint          # must exit 0
-task static-check  # must exit 0
+uv run task test          # must still pass
+uv run task lint          # must exit 0
+uv run task static-check  # must exit 0
 ```
 
 ## COMMIT
@@ -157,10 +159,10 @@ Then move to the next failing test.
 After all tests are green, before telling the reviewer you are ready:
 
 ```bash
-task lint                # exit 0
-task static-check        # exit 0, 0 errors
-task test                # exit 0, all pass, coverage 100%
-timeout 10s task run     # exit 0 or non-124; exit 124 = hung process = fix it
+uv run task lint                # exit 0
+uv run task static-check        # exit 0, 0 errors
+uv run task test                # exit 0, all pass, coverage 100%
+timeout 10s uv run task run     # exit non-124; exit 124 = hung process = fix it
 ```
 
 All four must pass. Do not hand off broken work.
diff --git a/.opencode/skills/scope/SKILL.md b/.opencode/skills/scope/SKILL.md
index bcc7eb4..afc439d 100644
--- a/.opencode/skills/scope/SKILL.md
+++ b/.opencode/skills/scope/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: scope
-description: Step 1 — define user stories, acceptance criteria with UUID traceability, and test strategy
+description: Step 1 — define user stories and acceptance criteria with UUID traceability
 version: "1.0"
 author: product-owner
 audience: product-owner
@@ -19,10 +19,11 @@ When the PO is starting a new feature. The output is a feature document in `docs
 
 ### 1. Create the Feature Document
 
-Create `docs/features/backlog/<feature-name>.md`. Use kebab-case for the filename.
+Create `docs/features/backlog/<verb>-<object>.md`. Filename must be kebab-case, imperative verb first, 2–4 words.
+Examples: `display-version.md`, `authenticate-user.md`, `export-metrics-csv.md`
 
 ```markdown
-# Feature: <Name>
+# Feature: <Verb> <Object>
 
 ## User Stories
 - As a <role>, I want <goal> so that <benefit>
@@ -30,10 +31,11 @@ Create `docs/features/backlog/<feature-name>.md`. Use kebab-case for the filenam
 ## Acceptance Criteria
 
 - `<uuid>`: <Short description>.
+  Source: <stakeholder | po | developer | reviewer | bug>
+
   Given: <precondition>
   When: <action>
   Then: <expected outcome>
-  Test strategy: unit | integration
 
 ## Notes
 <constraints, risks, out-of-scope items, dependencies>
@@ -65,17 +67,24 @@ python -c "import uuid; print(uuid.uuid4())"
 **Format** (mandatory — exactly this structure):
 ```markdown
 - `a1b2c3d4-e5f6-7890-abcd-ef1234567890`: Ball bounces off top wall.
+  Source: stakeholder
+
   Given: A ball moving upward reaches y=0
   When: The physics engine processes the next frame
   Then: The ball velocity y-component becomes positive
-  Test strategy: unit
 ```
 
+**Source values** (choose exactly one):
+- `stakeholder` — an external stakeholder gave this requirement to the PO
+- `po` — the PO originated this criterion independently
+- `developer` — a gap found during Step 4 implementation
+- `reviewer` — a gap found during Step 5 verification
+- `bug` — a post-merge regression; the feature doc was reopened
+
 **Rules**:
 - UUID must be unique across the entire project, not just this feature
 - First line: UUID + colon + short description ending with a period
-- Given/When/Then on separate indented lines
-- Test strategy is `unit` (isolated) or `integration` (multiple components)
+- `Source:` on the next line, followed by a blank line, then Given/When/Then
 - Use plain English, not technical jargon in Given/When/Then
 - "Then" must be a single observable, measurable outcome — no "and"
 
@@ -85,42 +94,28 @@ python -c "import uuid; print(uuid.uuid4())"
 - Multiple behaviors in one criterion (split them)
 - Criteria that test implementation details ("Then: the Strategy pattern is used")
 
-### 4. Identify Test Strategy Per Criterion
-
-For each criterion, decide:
-
-| Strategy | Use When |
-|---|---|
-| `unit` | One function or class in isolation; no external dependencies |
-| `integration` | Multiple components working together; external state (DB, filesystem, network) |
-
-When in doubt, start with `unit`. The developer may upgrade to `integration` if the implementation requires it.
-
-### 5. Review Checklist
+### 4. Review Checklist
 
 Before committing:
+- [ ] Filename is `<verb>-<object>.md`, imperative verb first, 2–4 words
+- [ ] Title matches filename: `# Feature: <Verb> <Object>` in Title Case
 - [ ] Every user story has at least one acceptance criterion
 - [ ] Every UUID is unique (check existing feature docs)
-- [ ] Every criterion has Given/When/Then and a test strategy
+- [ ] Every criterion has a `Source:` field with one of the five valid values
+- [ ] Every criterion has Given/When/Then
+- [ ] Blank line between `Source:` and `Given:`
 - [ ] "Then" is a single, observable, measurable outcome
 - [ ] No criterion tests implementation details
 - [ ] Out-of-scope items are explicitly listed in Notes
 
-### 6. Commit and Notify Developer
+### 5. Commit and Notify Developer
 
 ```bash
 git add docs/features/backlog/<feature-name>.md
 git commit -m "feat(scope): define <feature-name> acceptance criteria"
 ```
 
-Then move the feature to in-progress when ready to start:
-```bash
-mv docs/features/backlog/<feature-name>.md docs/features/in-progress/<feature-name>.md
-git add -A
-git commit -m "chore(workflow): start <feature-name>"
-```
-
-Update TODO.md to reflect the new current feature.
+The developer moves the feature from `backlog/` to `in-progress/` as the first act of Step 2.
 
 ## MoSCoW Prioritization
 
diff --git a/.opencode/skills/tdd/SKILL.md b/.opencode/skills/tdd/SKILL.md
index 7f3321f..e493814 100644
--- a/.opencode/skills/tdd/SKILL.md
+++ b/.opencode/skills/tdd/SKILL.md
@@ -23,30 +23,30 @@ Write tests before writing any production code. Every test must fail when first
 
 ## Test File Structure
 
-Mirror the source tree. File naming: `<descriptive-name>_test.py` — never `test_<name>.py`.
+File naming: `<descriptive-name>_test.py` — never `test_<name>.py`. All test files live directly in `tests/` (flat layout, no subdirectories).
 
 | Source | Test |
 |---|---|
-| `<package>/module.py` | `tests/unit/module_test.py` |
-| `<package>/domain/service.py` | `tests/unit/domain/service_test.py` |
-| `<package>/api/routes.py` | `tests/integration/api/routes_test.py` |
+| `<package>/module.py` | `tests/module_test.py` |
+| `<package>/domain/service.py` | `tests/service_test.py` |
+| `<package>/api/routes.py` | `tests/routes_test.py` |
 
 ## Test Function Naming
 
 ```
-test_<condition>_should_<outcome>
+test_<short_title>
 ```
 
 Examples:
-- `test_ball_hitting_top_wall_should_reverse_vertical_velocity`
-- `test_user_with_invalid_email_should_raise_validation_error`
-- `test_empty_cart_should_return_zero_total`
+- `test_ball_bounces_off_top_wall`
+- `test_email_requires_at_symbol`
+- `test_empty_cart_returns_zero_total`
 
 ## Docstring Format (mandatory)
 
 ```python
-def test_ball_hitting_top_wall_should_reverse_vertical_velocity():
-    """a1b2c3d4-e5f6-7890-abcd-ef1234567890: Ball bounces off top wall.
+def test_ball_bounces_off_top_wall():
+    """a1b2c3d4-e5f6-7890-abcd-ef1234567890
 
     Given: A ball moving upward reaches y=0
     When: The physics engine processes the next frame
@@ -61,11 +61,11 @@ def test_ball_hitting_top_wall_should_reverse_vertical_velocity():
 ```
 
 **Rules**:
-- First line: `<uuid>: <short description ending with a period>`
-- Mandatory blank line between first line and Given
+- First line: `<uuid>` only — no description
+- Mandatory blank line between UUID and Given
 - Given/When/Then on separate indented lines
 - `# Given`, `# When`, `# Then` comments in the test body mirror the docstring
-- UUID must exactly match the acceptance criterion UUID in the feature doc
+- UUID must exactly match the UUID on the criterion's first line in the feature doc
 
 ## Markers
 
@@ -77,15 +77,25 @@ Slow tests additionally get `@pytest.mark.slow` (anything > 50ms: DB, network, H
 
 ```python
 @pytest.mark.unit
-def test_ball_hitting_top_wall_should_reverse_vertical_velocity():
+def test_ball_bounces_off_top_wall():
     ...
 
 @pytest.mark.integration
 @pytest.mark.slow
-def test_checkout_flow_should_persist_order_to_database():
+def test_checkout_persists_order_to_database():
     ...
 ```
 
+### Choosing a Marker
+
+| Marker | Use When |
+|---|---|
+| `unit` | One function or class in isolation; no external dependencies |
+| `integration` | Multiple components working together; external state (DB, filesystem, network) |
+| `slow` | Test takes > 50ms — add alongside `unit` or `integration`, never alone |
+
+When in doubt, start with `unit`. Upgrade to `integration` if the implementation requires external state.
+
 ## Hypothesis Tests
 
 Use `@given` with `@example` for known edge cases and `assume` for precondition filtering. Configure via `@settings`, not markers.
@@ -100,8 +110,8 @@ from hypothesis import strategies as st
 @example(x=0.0)
 @example(x=-100.0)
 @settings(max_examples=200)
-def test_compute_distance_should_always_return_non_negative(x: float) -> None:
-    """b2c3d4e5-f6a7-8901-bcde-f12345678901: Distance is always non-negative.
+def test_compute_distance_always_non_negative(x: float) -> None:
+    """b2c3d4e5-f6a7-8901-bcde-f12345678901
 
     Given: Any floating point input value
     When: compute_distance is called
diff --git a/.opencode/skills/verify/SKILL.md b/.opencode/skills/verify/SKILL.md
index 1ce8fe6..ed36319 100644
--- a/.opencode/skills/verify/SKILL.md
+++ b/.opencode/skills/verify/SKILL.md
@@ -21,7 +21,6 @@ After the developer signals Step 4 is complete. Do not start verification until
 
 Read `docs/features/in-progress/<feature-name>.md`. Extract:
 - All UUIDs and their descriptions
-- The test strategy for each criterion (unit/integration)
 
 ### 2. Check Commit History
 
@@ -39,22 +38,22 @@ Verify:
 Run each command. Record the exact exit code and output summary.
 
 ```bash
-task lint
+uv run task lint
 ```
 Expected: exit 0, no issues. If ruff makes auto-fixes, that is a FAIL (developer should have run lint before handing off).
 
 ```bash
-task static-check
+uv run task static-check
 ```
 Expected: exit 0, `0 errors, 0 warnings` from pyright.
 
 ```bash
-task test
+uv run task test
 ```
 Expected: exit 0, all tests pass, coverage ≥ 100%.
 
 ```bash
-timeout 10s task run
+timeout 10s uv run task run
 ```
 Expected: exit 0 (app completes) or any non-124 exit. **Exit code 124 means the process was killed by timeout — the app hung or is an infinite loop. This is a FAIL.** For interactive/long-running apps, check that startup completes without error before the timeout.
 
@@ -64,11 +63,13 @@ Expected: exit 0 (app completes) or any non-124 exit. **Exit code 124 means the
 
 For each acceptance criterion UUID in the feature doc:
 - Find the corresponding test function using `grep -r "<uuid>" tests/`
-- Verify the test function name follows `test_<condition>_should_<outcome>`
-- Verify the test docstring contains the UUID on the first line
+- Verify the test function name follows `test_<short_title>`
+- Verify the test docstring contains only the UUID on the first line (no description)
 
 Flag any UUID with no corresponding test as UNCOVERED.
 
+If you identify a missing behavior that has no acceptance criterion, load `skill extend-criteria` to determine whether it is a gap within scope (add criterion with `Source: reviewer`) or a new feature to escalate to the PO.
+
 ### 5. Code Review
 
 Read the source files changed in this feature. Check:
@@ -103,7 +104,7 @@ Read the source files changed in this feature. Check:
 9. No getters/setters; use commands and queries
 
 **Tests**
-- Every test has UUID docstring: `<uuid>: <description>.` on the first line, blank line, then Given/When/Then
+- Every test has UUID docstring: `<uuid>` only on the first line, blank line, then Given/When/Then
 - Tests assert behavior, not structure (no `isinstance`, no `type()`, no internal attribute access)
 - `# Given`, `# When`, `# Then` comments in test body
 - No `pytest.skip`, no `pytest.mark.xfail` without explicit justification
@@ -123,15 +124,15 @@ Read the source files changed in this feature. Check:
 ### Commands
 | Command | Result | Notes |
 |---------|--------|-------|
-| task lint | PASS / FAIL | <details if fail> |
-| task static-check | PASS / FAIL | <errors if fail> |
-| task test | PASS / FAIL | <failures or coverage% if fail> |
-| timeout 10s task run | PASS / FAIL / TIMEOUT | <error or timeout if fail> |
+| uv run task lint | PASS / FAIL | <details if fail> |
+| uv run task static-check | PASS / FAIL | <errors if fail> |
+| uv run task test | PASS / FAIL | <failures or coverage% if fail> |
+| timeout 10s uv run task run | PASS / FAIL / TIMEOUT | <error or timeout if fail> |
 
 ### UUID Traceability
 | UUID | Description | Test | Status |
 |------|-------------|------|--------|
-| `<uuid>` | <description> | `tests/unit/<file>:<function>` | COVERED / NOT COVERED |
+| `<uuid>` | <description> | `tests/<file>:<function>` | COVERED / NOT COVERED |
 
 ### Code Review Findings
 - PASS: <aspect>
diff --git a/AGENTS.md b/AGENTS.md
index f91ac86..3f4ba53 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -39,38 +39,39 @@ STEP 6: ACCEPT         (product-owner)  → demo, validate, merge, tag
 | `code-quality` | developer | pre-handoff |
 | `pr-management` | developer | 6 |
 | `git-release` | developer | 6 |
+| `extend-criteria` | any agent | when a gap is found |
 | `create-skill` | developer | meta |
 
 ## Development Commands
 
 ```bash
 # Install dependencies
-uv venv && uv pip install '.[dev]'
+uv sync --all-extras
 
 # Run the application (for humans)
-task run
+uv run task run
 
 # Run the application with timeout (for agents — prevents hanging on infinite loops)
 # Exit code 124 means the process was killed; treat as FAIL
-timeout 10s task run
+timeout 10s uv run task run
 
 # Run tests (fast, no coverage)
-task test-fast
+uv run task test-fast
 
 # Run full test suite with coverage
-task test
+uv run task test
 
 # Run slow tests only
-task test-slow
+uv run task test-slow
 
 # Lint and format
-task lint
+uv run task lint
 
 # Type checking
-task static-check
+uv run task static-check
 
 # Serve documentation
-task doc-serve
+uv run task doc-serve
 ```
 
 ## Test Conventions
@@ -85,13 +86,13 @@ Every test gets exactly one of `unit` or `integration`. Slow tests additionally
 ### File and Function Naming
 ```
 <descriptive-group-name>_test.py         # file name
-test_<condition>_should_<outcome>        # function name
+test_<short_title>                       # function name
 ```
 
 ### Docstring Format (mandatory)
 ```python
-def test_user_with_invalid_email_should_raise_validation_error():
-    """a1b2c3d4-e5f6-7890-abcd-ef1234567890: Email validation rejects invalid input.
+def test_email_requires_at_symbol():
+    """a1b2c3d4-e5f6-7890-abcd-ef1234567890
 
     Given: An email address without an @ symbol
     When: EmailAddress is constructed
@@ -99,17 +100,19 @@ def test_user_with_invalid_email_should_raise_validation_error():
     """
     # Given
     invalid = "not-an-email"
-    # When / Then
+    # When
+    # Then
     with pytest.raises(ValueError):
         EmailAddress(invalid)
 ```
 
 Rules:
-- First line: `<uuid>: <short description ending with a period>`
-- Mandatory blank line between first line and Given
+- First line: `<uuid>` only — no description
+- Mandatory blank line between UUID and Given
 - `# Given`, `# When`, `# Then` comments in the test body
 - Assert behavior, not structure — no `isinstance()`, `type()`, or internal attributes
-- Never use `noqa`, `pytest.skip`, or `type: ignore`
+- Never use `noqa` or `type: ignore`
+- Never use `pytest.skip` or `pytest.mark.xfail` without written justification in the docstring
 
 ## Code Quality Standards
 
@@ -126,18 +129,23 @@ Rules:
 
 One file per feature, lives in `docs/features/`. PO writes the top sections; developer adds `## Architecture`.
 
+**Naming:** `<verb>-<object>.md` — imperative verb first, kebab-case, 2–4 words.
+Examples: `display-version.md`, `authenticate-user.md`, `export-metrics-csv.md`
+Title matches: `# Feature: <Verb> <Object>` in Title Case.
+
 ```markdown
-# Feature: <Name>
+# Feature: <Verb> <Object>
 
 ## User Stories
 - As a <role>, I want <goal> so that <benefit>
 
 ## Acceptance Criteria
 - `<uuid>`: <Short description ending with a period>.
+  Source: <stakeholder | po | developer | reviewer | bug>
+
   Given: <precondition>
   When: <action>
   Then: <single observable outcome>
-  Test strategy: unit | integration
 
 ## Notes
 <constraints, risks, out-of-scope items>
@@ -148,6 +156,15 @@ One file per feature, lives in `docs/features/`. PO writes the top sections; dev
 ### Build Changes (needs PO approval: yes/no)
 ```
 
+**Source field values:**
+- `stakeholder` — an external stakeholder gave this requirement to the PO
+- `po` — the PO originated this criterion independently
+- `developer` — a gap found during Step 4 implementation
+- `reviewer` — a gap found during Step 5 verification
+- `bug` — a post-merge regression; the feature doc was reopened
+
+**Gaps and Defects:** When any agent finds a missing behavior, load `skill extend-criteria`. It provides the decision rule (gap within scope vs. new feature), UUID assignment, and commit protocol. For post-merge defects, the feature doc moves from `completed/` back to `in-progress/`.
+
 ## Release Management
 
 Version format: `v{major}.{minor}.{YYYYMMDD}`
@@ -171,8 +188,10 @@ Step: <1-6> (<step name>)
 Source: docs/features/in-progress/<name>.md
 
 ## Progress
-- [x] `<uuid>`: <description>
-- [ ] `<uuid>`: <description>  ← next
+- [x] `<uuid>`: <description>          ← done
+- [~] `<uuid>`: <description>          ← in progress
+- [ ] `<uuid>`: <description>          ← next
+- [-] `<uuid>`: <description>          ← cancelled
 
 ## Next
 <One actionable sentence>
diff --git a/README.md b/README.md
index 0d4318d..317963d 100644
--- a/README.md
+++ b/README.md
@@ -23,13 +23,13 @@ cd python-project-template
 curl -LsSf https://astral.sh/uv/install.sh | sh
 
 # 3. Set up the development environment
-uv venv && uv pip install -e '.[dev]'
+uv sync --all-extras
 
 # 4. Customize template placeholders for your project
 opencode && @setup-project
 
 # 5. Validate everything works
-task test && task lint && task static-check && timeout 10s task run
+uv run task test && uv run task lint && uv run task static-check && timeout 10s uv run task run
 ```
 
 ## What This Template Provides
@@ -81,16 +81,16 @@ docs/features/completed/     ← accepted and shipped features
 ## Development Commands
 
 ```bash
-task run              # Run the application (humans)
-timeout 10s task run  # Run with timeout (agents — exit 124 = hung = FAIL)
-task test             # Full test suite with coverage report
-task test-fast        # Tests without coverage (faster iteration)
-task test-slow        # Only slow tests
-task lint             # ruff check + format
-task static-check     # pyright type checking
-task doc-build        # Generate API docs + coverage + test reports
-task doc-publish      # Publish unified docs site to GitHub Pages
-task doc-serve        # Live API doc server at localhost:8080
+uv run task run              # Run the application (humans)
+timeout 10s uv run task run  # Run with timeout (agents — exit 124 = hung = FAIL)
+uv run task test             # Full test suite with coverage report
+uv run task test-fast        # Tests without coverage (faster iteration)
+uv run task test-slow        # Only slow tests
+uv run task lint             # ruff check + format
+uv run task static-check     # pyright type checking
+uv run task doc-build        # Generate API docs + coverage + test reports
+uv run task doc-publish      # Publish unified docs site to GitHub Pages
+uv run task doc-serve        # Live API doc server at localhost:8080
 ```
 
 ## Code Quality Standards
@@ -108,8 +108,8 @@ task doc-serve        # Live API doc server at localhost:8080
 ## Test Conventions
 
 ```python
-def test_<condition>_should_<outcome>():
-    """a1b2c3d4-e5f6-7890-abcd-ef1234567890: Short description ending with a period.
+def test_<short_title>():
+    """a1b2c3d4-e5f6-7890-abcd-ef1234567890
 
     Given: precondition
     When: action
@@ -155,8 +155,8 @@ Each release gets a unique **adjective-animal** name generated from the commit/P
 
 ```bash
 git clone https://github.com/nullhack/python-project-template
-uv venv && uv pip install -e '.[dev]'
-task test && task lint
+uv sync --all-extras
+uv run task test && uv run task lint
 ```
 
 ## License
diff --git a/docs/features/completed/display-version.md b/docs/features/completed/display-version.md
new file mode 100644
index 0000000..99c70cf
--- /dev/null
+++ b/docs/features/completed/display-version.md
@@ -0,0 +1,67 @@
+# Feature: Display Version
+
+## User Stories
+- As a developer, I want to retrieve the application version programmatically so that I can display or log it at runtime.
+- As a developer, I want to control log verbosity via a parameter so that I can tune output for different environments.
+
+## Acceptance Criteria
+- `3f2a1b4c-d5e6-7890-abcd-ef1234567890`: Version string is read from pyproject.toml.
+  Source: po
+
+  Given: pyproject.toml exists with a version field
+  When: version() is called
+  Then: The returned string matches the version in pyproject.toml
+
+- `7a8b9c0d-e1f2-3456-bcde-f12345678901`: Version call emits a log message.
+  Source: po
+
+  Given: pyproject.toml exists with a version field
+  When: version() is called
+  Then: An INFO log message in the format "Version: <version>" is emitted
+
+- `a1b2c3d4-e5f6-7890-abcd-ef1234567890`: Version appears in logs at DEBUG and INFO verbosity.
+  Source: po
+
+  Given: A verbosity level of DEBUG or INFO is passed to main()
+  When: main() is called
+  Then: The version string appears in the log output
+
+- `b2c3d4e5-f6a7-8901-bcde-f12345678901`: Version is absent from logs at WARNING and above.
+  Source: po
+
+  Given: A verbosity level of WARNING, ERROR, or CRITICAL is passed to main()
+  When: main() is called
+  Then: The version string does not appear in the log output
+
+- `e5f6a7b8-c9d0-1234-defa-012345678903`: Invalid verbosity raises a descriptive error.
+  Source: po
+
+  Given: An invalid verbosity string is passed to main()
+  When: main() is called
+  Then: A ValueError is raised with the invalid value and valid options listed
+
+## Notes
+- This is the template example feature shipped with the project skeleton.
+- Tests live in `tests/version_test.py`.
+- No out-of-scope items; this feature is complete and serves as a reference implementation.
+
+## Architecture
+
+### Module Structure
+- `app/version.py` — `version()` function; reads `pyproject.toml` via `tomllib`
+- `main.py` — `main(verbosity)` entry point; configures logging, calls `version()`
+
+### Key Decisions (ADRs)
+
+ADR-001: Read version from pyproject.toml at runtime
+Decision: Use `tomllib` to read the version field from `pyproject.toml` at runtime
+Reason: Avoids duplicating the version between `pyproject.toml` and a `__version__` constant
+Alternatives considered: Hardcoded `__version__` in `app/__init__.py` — rejected to keep a single source of truth
+
+ADR-002: Enforce verbosity via Literal type alias
+Decision: Define `ValidVerbosity` as a `Literal` type alias for the five standard log level strings
+Reason: Catches invalid verbosity values at the type-checker level before runtime
+Alternatives considered: Accepting a plain `str` and validating at runtime only — rejected because it defers errors that the type checker can catch earlier
+
+### Build Changes (needs PO approval: yes/no)
+no
diff --git a/docs/features/in-progress/auto-publish-docs.md b/docs/features/in-progress/auto-publish-docs.md
deleted file mode 100644
index 34b4ca1..0000000
--- a/docs/features/in-progress/auto-publish-docs.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Feature: Auto-Publish Documentation on Merge to Main
-
-## User Stories
-- As a maintainer, I want documentation to be automatically built and published to GitHub Pages every time a PR is merged to main, so that the published docs are always in sync with the latest code.
-
-## Acceptance Criteria
-
-- `e9b6be8c-c786-4113-9920-f098a954869d`: Docs publish job only runs on push to main.
-  Given: A workflow event is triggered
-  When: The event is a pull_request (not a merge to main)
-  Then: The docs publish job is skipped
-  Test strategy: integration
-
-- `75619a7d-7eb8-45ac-901c-b86486c30690`: Docs are built before publishing.
-  Given: A commit is merged to main
-  When: The publish job runs
-  Then: `task doc-build` runs successfully and produces output in `docs/api/` and `docs/coverage/`
-  Test strategy: integration
-
-- `23cf4a4e-3960-458d-994d-efea5d854895`: Docs are published to GitHub Pages.
-  Given: The doc-build step succeeded
-  When: The publish step runs
-  Then: `ghp-import` (or equivalent) deploys the `docs/` directory to the `gh-pages` branch
-  Test strategy: integration
-
-- `c580dcb9-00c7-4124-9762-70ff1192bd09`: Publish job runs only after quality and tests pass.
-  Given: The quality or test job fails
-  When: The CI workflow runs on main
-  Then: The publish job does not execute
-  Test strategy: integration
-
-- `c942990a-a69c-473e-a820-8ce51e337262`: Workflow has least-privilege permissions for Pages deployment.
-  Given: The publish job needs to write to GitHub Pages
-  When: The job is defined
-  Then: It has `contents: write` (or `pages: write` + `id-token: write`) and no broader permissions
-  Test strategy: integration
-
-## Notes
-- GitHub Pages must be configured on the repo (source: `gh-pages` branch or GitHub Actions deployment)
-- The existing `task doc-publish` already runs `ghp-import -n -p -f docs` — reuse it or inline the equivalent steps
-- Out of scope: publishing on tags/releases (covered by git-release skill), PR preview deployments
-- Out of scope: changing the docs toolchain (pdoc stays)
-- Priority: Must
diff --git a/tests/version_test.py b/tests/version_test.py
index 755b198..d6447fe 100644
--- a/tests/version_test.py
+++ b/tests/version_test.py
@@ -2,12 +2,13 @@
 
 import logging
 import tomllib
+from io import StringIO
 from pathlib import Path
 from typing import cast
 from unittest.mock import patch
 
 import pytest
-from hypothesis import assume, example, given
+from hypothesis import example, given
 from hypothesis import strategies as st
 
 from app import version as m
@@ -15,123 +16,135 @@
 
 
 @pytest.mark.unit
-def test_version_called_should_return_correct_string() -> None:
-    """
-    Given: pyproject.toml exists with version
+def test_version_returns_string_from_pyproject() -> None:
+    """3f2a1b4c-d5e6-7890-abcd-ef1234567890
+
+    Given: pyproject.toml exists with a version field
     When: version() is called
-    Then: Should return version string from pyproject.toml
+    Then: The returned string matches the version in pyproject.toml
     """
-    # Read expected version from same source
+    # Given
     pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
     with Path(pyproject_path).open("rb") as f:
         expected = tomllib.load(f)["project"]["version"]
-
+    # When
     result = m.version()
+    # Then
     assert result == expected
 
 
 @pytest.mark.unit
-def test_version_called_should_log_correct_message(caplog) -> None:
-    """
-    Given: pyproject.toml exists with version
+def test_version_logs_correct_message(caplog) -> None:
+    """7a8b9c0d-e1f2-3456-bcde-f12345678901
+
+    Given: pyproject.toml exists with a version field
     When: version() is called
-    Then: Should log the exact version message format
+    Then: An INFO log message in the format "Version: <version>" is emitted
     """
-    # Read expected version from same source
+    # Given
     pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
     with Path(pyproject_path).open("rb") as f:
         expected_version = tomllib.load(f)["project"]["version"]
-
-    # Capture logs at INFO level
+    # When
     with caplog.at_level(logging.INFO):
-        result = m.version()
-
-    # Verify the exact log message format
+        m.version()
+    # Then
     assert f"Version: {expected_version}" in caplog.text
-    assert result == expected_version
 
 
 @pytest.mark.integration
+@pytest.mark.slow
 @example(verbosity="DEBUG")
 @example(verbosity="INFO")
-@given(verbosity=st.sampled_from(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]))
-def test_main_with_verbosity_level_should_control_version_output(
+@given(verbosity=st.sampled_from(["DEBUG", "INFO"]))
+def test_version_appears_in_logs_at_debug_and_info(
     verbosity: str,
 ) -> None:
-    """
-    Given: Different verbosity levels
-    When: main() is called with that verbosity
-    Then: Version should appear in logs for DEBUG and INFO levels,
-          but not for WARNING and above
-    """
-    assume(verbosity != "CRITICAL")
+    """a1b2c3d4-e5f6-7890-abcd-ef1234567890
 
-    # Read expected version dynamically
+    Given: A verbosity level of DEBUG or INFO is passed to main()
+    When: main() is called
+    Then: The version string appears in the log output
+    """
+    # Given
     pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
     with Path(pyproject_path).open("rb") as f:
         expected_version = tomllib.load(f)["project"]["version"]
-
     expected_level = getattr(logging, verbosity.upper())
-
-    # Create a custom logger handler to capture messages instead of using caplog
-    from io import StringIO
-
     log_stream = StringIO()
     handler = logging.StreamHandler(log_stream)
     handler.setLevel(expected_level)
 
-    # Mock logging.basicConfig to use our custom handler instead
     def mock_basic_config(**kwargs):
-        # Set up the logger with our custom handler for testing
         logger = logging.getLogger("app")
         logger.handlers.clear()
         logger.addHandler(handler)
         logger.setLevel(kwargs.get("level", logging.INFO))
 
-    with patch(
-        "main.logging.basicConfig", side_effect=mock_basic_config
-    ) as mock_basic_config:
-        # Call main() directly with the verbosity level (cast to satisfy type checker)
+    # When
+    with patch("main.logging.basicConfig", side_effect=mock_basic_config):
         main(cast(ValidVerbosity, verbosity))
+    # Then
+    log_output = log_stream.getvalue()
+    assert f"Version: {expected_version}" in log_output, (
+        f"Expected version message at {verbosity} level, but got output: {log_output!r}"
+    )
 
-        # Verify that logging.basicConfig was called with the correct level
-        mock_basic_config.assert_called_once()
-        _args, kwargs = mock_basic_config.call_args
-        assert kwargs["level"] == expected_level
 
-    # Check the captured log output
-    log_output = log_stream.getvalue()
+@pytest.mark.integration
+@pytest.mark.slow
+@example(verbosity="WARNING")
+@example(verbosity="ERROR")
+@given(verbosity=st.sampled_from(["WARNING", "ERROR", "CRITICAL"]))
+def test_version_absent_from_logs_at_warning_and_above(
+    verbosity: str,
+) -> None:
+    """b2c3d4e5-f6a7-8901-bcde-f12345678901
+
+    Given: A verbosity level of WARNING, ERROR, or CRITICAL is passed to main()
+    When: main() is called
+    Then: The version string does not appear in the log output
+    """
+    # Given
+    pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
+    with Path(pyproject_path).open("rb") as f:
+        expected_version = tomllib.load(f)["project"]["version"]
+    expected_level = getattr(logging, verbosity.upper())
+    log_stream = StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(expected_level)
 
-    # Standard logging behavior: DEBUG and INFO levels should show INFO messages
-    # WARNING, ERROR, CRITICAL levels should NOT show INFO messages
-    if verbosity in ["WARNING", "ERROR", "CRITICAL"]:
-        # These levels should NOT show INFO messages since INFO < WARNING/ERROR/CRITICAL
-        assert f"Version: {expected_version}" not in log_output, (
-            f"Expected no version messages at {verbosity} level, "
-            f"but got output: {log_output!r}"
-        )
-    else:
-        # DEBUG and INFO levels should show INFO messages
-        # since INFO >= DEBUG and INFO >= INFO
-        assert f"Version: {expected_version}" in log_output, (
-            f"Expected version message at {verbosity} level, "
-            f"but got output: {log_output!r}"
-        )
+    def mock_basic_config(**kwargs):
+        logger = logging.getLogger("app")
+        logger.handlers.clear()
+        logger.addHandler(handler)
+        logger.setLevel(kwargs.get("level", logging.INFO))
+
+    # When
+    with patch("main.logging.basicConfig", side_effect=mock_basic_config):
+        main(cast(ValidVerbosity, verbosity))
+    # Then
+    log_output = log_stream.getvalue()
+    assert f"Version: {expected_version}" not in log_output, (
+        f"Expected no version messages at {verbosity} level, "
+        f"but got output: {log_output!r}"
+    )
 
 
 @pytest.mark.unit
-def test_main_with_invalid_verbosity_should_raise_value_error() -> None:
-    """
-    Given: An invalid verbosity level
-    When: main() is called with invalid verbosity
-    Then: Should raise ValueError with helpful message
+def test_invalid_verbosity_raises_value_error() -> None:
+    """e5f6a7b8-c9d0-1234-defa-012345678903
+
+    Given: An invalid verbosity string is passed to main()
+    When: main() is called
+    Then: A ValueError is raised with the invalid value and valid options listed
     """
-    # Test that calling main() with invalid verbosity raises ValueError
-    # Use cast to bypass type checking for this intentionally invalid test
+    # Given
+    invalid_verbosity = "INVALID_LEVEL"
+    # When
     with pytest.raises(ValueError, match=r"Invalid verbosity level") as exc_info:
-        main(cast(ValidVerbosity, "INVALID_LEVEL"))
-
-    # Verify the error message contains expected details
+        main(cast(ValidVerbosity, invalid_verbosity))
+    # Then
     error_message = str(exc_info.value)
     assert "Invalid verbosity level 'INVALID_LEVEL'" in error_message
     assert "Valid options: DEBUG, INFO, WARNING, ERROR, CRITICAL" in error_message

From 37a8d40dedbeb6b129bb44e107b28d2dc0438c55 Mon Sep 17 00:00:00 2001
From: nullhack <nullhack@users.noreply.github.com>
Date: Tue, 14 Apr 2026 13:55:55 -0400
Subject: [PATCH 3/4] chore(release): bump version to v3.1.20260414 - Tidal
 Capybara

---
 CHANGELOG.md   | 28 ++++++++++++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d80cc5f..961941d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,34 @@
 
 All notable changes to this template will be documented in this file.
 
+## [v3.1.20260414] - Tidal Capybara - 2026-04-14
+
+### Added
+- **extend-criteria skill**: New skill for any agent to add acceptance criteria discovered mid-flight or post-merge, with decision rule (gap within scope vs. new feature), per-role procedures, and commit protocol
+- **Source: field on acceptance criteria**: Mandatory traceability field on every criterion (`stakeholder | po | developer | reviewer | bug`) — records who originated the requirement
+
+### Changed
+- **Test function naming**: `test_<short_title>` replaces `test_<condition>_should_<outcome>`
+- **Test docstring first line**: UUID only (no trailing description) — `"""<uuid>\n\nGiven: ...`
+- **development commands**: All skill and agent files now use `uv run task` consistently (not bare `task`)
+- **tests/ layout**: Documented as flat (no unit/ or integration/ subdirectories)
+- **pytest.skip prohibition**: Aligned across files — allowed with written justification in the docstring
+- **Marker decision table**: Moved to tdd/SKILL.md only (developer's decision, not PO's)
+- **mv to in-progress**: Ownership reassigned to developer Step 2 (not PO scope step)
+- **TODO.md status markers**: Added `[~]` (in progress) and `[-]` (cancelled) to documented legend
+- **--doctest-modules**: Documented in implementation/SKILL.md (task test runs doctest modules)
+- **verify/SKILL.md**: Report template uses flat `tests/<file>:<function>` path format
+- **exit code wording**: `exit non-124` (was ambiguous `exit 0 or 124`) in developer.md
+- **README.md**: `uv sync --all-extras` and `uv run task` commands throughout
+
+### Fixed
+- Removed stale `docs/features/in-progress/auto-publish-docs.md`
+- Split compound acceptance criterion (two outcomes in one Then) into two single-outcome criteria
+- Added `@pytest.mark.slow` to Hypothesis tests in reference implementation
+- Added `# Given / # When / # Then` body comments to all reference tests
+- Removed duplicate assertion from `test_version_logs_correct_message`
+- Moved `StringIO` import from test body to module-level imports
+
 ## [v3.0.20260414] - Drifting Axolotl - 2026-04-14
 
 ### Breaking Changes
diff --git a/pyproject.toml b/pyproject.toml
index 56165d2..4eebc1d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "python-project-template"
-version = "3.0.20260414"
+version = "3.1.20260414"
 description = "Python template with some awesome tools to quickstart any Python project"
 readme = "README.md"
 requires-python = ">=3.13"

From f1297f61bfddeb67c3b497e9c9a7d866812dab95 Mon Sep 17 00:00:00 2001
From: nullhack <nullhack@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:12:24 -0400
Subject: [PATCH 4/4] chore: update uv.lock for v3.1.20260414

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index 219fdd9..8131d68 100644
--- a/uv.lock
+++ b/uv.lock
@@ -723,7 +723,7 @@ wheels = [
 
 [[package]]
 name = "python-project-template"
-version = "3.0.20260414"
+version = "3.1.20260414"
 source = { virtual = "." }
 dependencies = [
     { name = "fire" },