Unsupervisedcom · nhorton · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.claude/settings.json b/.claude/settings.json
@@ -0,0 +1,5 @@
+{
+  "enabledPlugins": {
+    "deepwork@deepwork-plugins": true
+  }
+}
diff --git a/.deepreview b/.deepreview
@@ -0,0 +1,249 @@
+prompt_best_practices:
+  description: "Review prompt/instruction files for Anthropic prompt engineering best practices."
+  match:
+    include:
+      - "**/CLAUDE.md"
+      - "**/AGENTS.md"
+      - ".claude/**/*.md"
+      - "prompts/*.txt"
+      - "prompts/*.md"
+      - ".deepwork/review/*.md"
+      - ".deepwork/jobs/**/*.md"
+  review:
+    strategy: individual
+    instructions: |
+      Review this file as a prompt or instruction file, evaluating it
+      against Anthropic's prompt engineering best practices.
+
+      For each issue found, report:
+      1. Location (section or line)
+      2. Severity (Critical / High / Medium / Low)
+      3. Best practice violated
+      4. Description of the issue
+      5. Suggested improvement
+
+      Check for:
+      - Clarity and specificity (concrete criteria vs vague language like
+        "do a good job", "be thorough")
+      - Structure and formatting (XML tags, headers, numbered lists for
+        distinct sections; logical separation of context, instructions,
+        constraints, output format)
+      - Role and context (enough context for the AI, explicit assumptions)
+      - Examples for complex/nuanced tasks (few-shot, edge cases)
+      - Output format specification (JSON, markdown, length constraints)
+      - Prompt anti-patterns (contradictions, instruction overload without
+        ranking, critical instructions buried in walls of text, relying on
+        the AI to infer important constraints)
+      - Variable/placeholder clarity (only when the prompt parameterizes
+        dynamic inputs)
+
+      Use judgment proportional to the file's complexity. A short, focused
+      instruction for a simple task does not need few-shot examples or XML
+      tags. Do not flag issues for best practices that are irrelevant to the
+      file's purpose.
+
+      Note: prompts/review.txt is the production prompt that this action
+      ships to Claude Code in CI. It is a CRITICAL file for this repo —
+      review it strictly, especially the "Automation Rules" section, the
+      inline-comment posting instructions, and the no-git-write-commands rule.
+
+update_action_surface_docs:
+  description: "Keep README.md and CLAUDE.md in sync with action.yml, the production prompt, and the example workflow."
+  match:
+    include:
+      - "action.yml"
+      - "prompts/review.txt"
+      - "examples/deepwork-review.yml"
+      - "README.md"
+      - "CLAUDE.md"
+  review:
+    strategy: matches_together
+    instructions: |
+      When source files change, check whether the following documentation
+      files need updating:
+      - README.md
+      - CLAUDE.md
+
+      Read each documentation file and compare its content against the
+      changed source files. Flag any sections that are now outdated or
+      inaccurate. If a documentation file itself was changed, verify the
+      updates are correct and consistent with the source files.
+
+      ## High-risk drift points in README.md
+
+      1. The "Inputs" table — every input listed must exist in action.yml
+         with the documented default. Defaults to verify: model, max_turns,
+         commit_message. If action.yml adds, removes, or renames an input,
+         the table is wrong.
+
+      2. The "How It Works" numbered list — must mirror action.yml's
+         actual behavior. action.yml currently delegates the review,
+         auto-commit, and inline-comment posting to
+         `anthropics/claude-code-action@v1`; if that underlying action is
+         swapped out or its inputs change (`plugins`,
+         `plugin_marketplaces`, `track_progress`, `use_commit_signing`,
+         `bot_name`, `claude_args`), this list is wrong.
+
+      3. The "Usage" section's example workflow — must stay aligned with
+         examples/deepwork-review.yml. The concurrency group, the
+         permissions block (including `id-token: write`), the checkout
+         config, and the action invocation must all match. If example.yml
+         is updated, README's snippet must be updated to match (or vice
+         versa).
+
+      4. The "Security" section — claims about which underlying action is
+         used (`anthropics/claude-code-action@v1`) and the
+         `deepwork-action[bot]` identity. These must match the current
+         action.yml's `uses:` line and `bot_name:` input.
+
+      5. The "Caching" section — claims caching of `.deepwork/tmp` keyed on
+         PR number. If action.yml's cache step changes its path or key
+         strategy, this section is wrong.
+
+      6. The "Review Comments" section — describes a SINGLE tracking
+         comment posted via `track_progress: true` (no per-line inline
+         comments — the upstream `claude-code-action@v1` system prompt
+         forbids creating new comments on `pull_request` events). If we
+         ever switch to a different output mechanism, or the upstream
+         action changes its policy and we start posting inline comments
+         again, this section is wrong.
+
+      7. The "no self-trigger guard needed" claim — relies on GitHub's
+         built-in rule that GITHUB_TOKEN-pushed commits do not retrigger
+         workflows. If action.yml starts pushing via a PAT or GitHub App
+         token instead, the README claim becomes false and the example
+         workflow will need a guard added back.
+
+      ## High-risk drift points in CLAUDE.md
+
+      8. The "End-to-end flow" numbered list must mirror action.yml's
+         actual composite steps in order. CLAUDE.md names specific inputs
+         passed to `anthropics/claude-code-action@v1`
+         (`plugin_marketplaces`, `plugins`, `track_progress`,
+         `use_commit_signing`, `bot_name`, `claude_args`); verify each is
+         still set in action.yml.
+
+      9. The "What used to be here and isn't anymore" section — describes
+         what was deleted in the rewrite (the 7-step composite,
+         `scripts/post-review-comments.py`, `/tmp/deepwork_changes.json`,
+         `claude-code-base-action@beta`). This section should stay stable
+         unless someone adds those things back. If any of those deleted
+         artifacts reappear in the repo, the section is a LIE and must be
+         updated.
+
+      10. The "Self-trigger guard" section asserts that GITHUB_TOKEN pushes
+          don't retrigger. If action.yml's push path ever switches to a
+          non-GITHUB_TOKEN credential (PAT, GitHub App token), this
+          assertion becomes false.
+
+      11. The "Repository layout" bullet list must reflect the actual
+          top-level files and directories. Verify that action.yml,
+          prompts/review.txt, examples/deepwork-review.yml, .deepwork/
+          (and in particular .deepwork/review/ but NOT .deepwork/tmp/),
+          and .deepreview are all still present and still described
+          correctly. If `scripts/` reappears, or a new top-level directory
+          is added, this list is incomplete.
+
+      12. The "prompt contract" section enumerates 5 essential guarantees
+          about prompts/review.txt. Verify that review.txt still
+          (a) starts with `/review`, (b) enforces CI-mode rules including
+          never using AskUserQuestion, (c) tells Claude to commit and
+          push its own edits using `git add`, `git commit`, and the
+          upstream action's `git-push.sh` helper script,
+          (d) directs Claude's output to the upstream tracking comment
+          via `mcp__github_comment__update_claude_comment` and forbids
+          creating new PR comments, (e) prefers correctness over style
+          when findings conflict. If review.txt diverges from any of
+          these, the CLAUDE.md section is wrong. Also verify that
+          review.txt does NOT contain any of the now-removed instructions
+          (the old "NEVER run git write commands" rule, or the old
+          `mcp__github_inline_comment__create_inline_comment` posting
+          instructions) — if it does, those are stale leftovers.
+    additional_context:
+      unchanged_matching_files: true
+
+python_code_review:
+  description: "Review Python files against project conventions, plus DRY and comment-accuracy checks."
+  match:
+    include:
+      - "**/*.py"
+    exclude:
+      - "**/__pycache__/**"
+      - "**/.venv/**"
+  review:
+    strategy: individual
+    instructions: |
+      Review this Python file against the project's conventions documented in
+      `.deepwork/review/python_conventions.md`. Read that file first.
+
+      Check for:
+      - Module structure: shebang, module docstring, `from __future__ import
+        annotations`, import ordering (stdlib only when possible), section
+        banner comments for logical separation in single-file scripts.
+      - Naming and types: snake_case, built-in generic type hints
+        (`list[str]`, not `typing.List`), explicit return types on every
+        function, sparing use of `Any`.
+      - Functions and structure: small named top-level functions, `main()`
+        orchestration entry point, no gratuitous classes or dataclasses.
+      - I/O and shell commands: `subprocess.run(..., capture_output=True,
+        text=True)` via a `run()` wrapper, `pathlib.Path` for file I/O,
+        `os.environ.get("NAME", default)` not raw indexing for inputs that
+        might be missing.
+      - Error handling: narrow exception types (never bare `except:` or
+        `except Exception:`), warnings printed to `sys.stderr`. Non-fatal
+        failures should `sys.exit(0)` to avoid failing the calling CI step;
+        use `sys.exit(1)` only for actually broken state.
+      - Strings: f-strings only, parenthesized multi-line concatenation.
+      - Docstrings: triple-quoted, one-line summaries unless the function
+        does something subtle that justifies more.
+
+      Additionally, ALWAYS check:
+
+      - **DRY violations**: Is there duplicated logic or repeated patterns
+        that should be extracted into a shared function or helper? In a
+        single-file script the bar for extraction is higher than in a
+        library, but three near-identical blocks is still too many.
+
+      - **Comment accuracy**: Are all comments, docstrings, and inline
+        documentation still accurate after the changes? Flag any comment
+        that describes behavior that no longer matches the code. This is
+        especially important for module-level docstrings that describe
+        what files the script reads/writes — those drift silently when
+        the file paths change.
+
+      Output: PASS if the file is consistent with these conventions and no
+      DRY or comment-accuracy issues are found. Otherwise FAIL with a
+      bulleted list of specific issues, each tied to a line number.
+
+suggest_new_reviews:
+  description: "Analyze all changes and suggest new review rules that would catch issues going forward."
+  match:
+    include:
+      - "**/*"
+    exclude:
+      - ".github/**"
+      - ".deepwork/tmp/**"
+  review:
+    strategy: matches_together
+    instructions: |
+      Analyze the changeset to determine whether any new DeepWork review rules
+      should be added.
+
+      1. Call get_configured_reviews to see all currently configured review
+         rules. Also call get_named_schemas to see existing DeepSchemas.
+         Understand what's already covered. If these tools are not available,
+         read the `.deepreview` file(s) directly instead.
+      2. For each change, consider:
+         - Did this change introduce a type of issue a review rule could catch?
+         - Is there a pattern likely to recur?
+         - Would an existing rule benefit from a small scope expansion (e.g.
+           adding a glob to an existing include list)?
+      3. Be extremely conservative. Only suggest rules that are:
+         - Extremely narrow (targets 1 specific file or small bounded set), OR
+         - A slight addition to an existing rule (no new agent spawned), OR
+         - Catches an issue likely to recur and worth the ongoing cost
+      4. Write new rules directly to the appropriate `.deepreview` file. If a
+         rule needs a dedicated instruction file, create it in
+         `.deepwork/review/`.
+      5. If no rules are warranted, say so. An empty suggestion list is valid.
+         Do not invent rules just to have output.
diff --git a/.deepwork/review/python_conventions.md b/.deepwork/review/python_conventions.md
@@ -0,0 +1,74 @@
+# Python Conventions
+
+Conventions for Python code in this repository. Keep this short and
+actionable — it's a reference for reviewers, not an exhaustive style
+guide. (Originally derived from `scripts/post-review-comments.py`, which
+has since been deleted. The conventions remain valid for any future Python
+files added to the repo.)
+
+## Module structure
+
+- Start with `#!/usr/bin/env python3` shebang for executable scripts.
+- Module-level docstring (triple-quoted) immediately after the shebang,
+  describing what the script does, what it reads, what it writes, and how
+  it's invoked. Multi-line is fine.
+- `from __future__ import annotations` near the top so type hints don't
+  evaluate at runtime.
+- Imports: stdlib only when possible (this repo runs in CI with no extra
+  pip installs). Order: `__future__` → stdlib → third-party → local.
+- Use `# ---------------------------------------------------------------------------`
+  banners with a `# Section Title` line to separate logical sections inside
+  a single-file script. This makes a flat script readable without splitting
+  it into modules.
+
+## Naming and types
+
+- `snake_case` for functions and variables; `PascalCase` for classes (none
+  in current code).
+- Type-hint every function signature, including return types. Use the
+  built-in generic syntax (`list[str]`, `dict[str, Any]`) — not the
+  `typing.List` / `typing.Dict` legacy aliases. `from __future__ import
+  annotations` makes this work on older Pythons.
+- Use `Any` (`from typing import Any`) sparingly — only when the structure
+  is genuinely dynamic (e.g., a JSON-decoded payload).
+
+## Functions and structure
+
+- Prefer small, named top-level functions over inline blocks. Each
+  logical step should be its own function.
+- Keep `main()` as the orchestration entry point. Wire it via
+  `if __name__ == "__main__": main()`.
+- Don't introduce dataclasses or classes unless there's actual state to
+  hold. The script-style this repo uses is dict-based.
+
+## I/O and external commands
+
+- For shell commands, use `subprocess.run(cmd, capture_output=True, text=True)`
+  via a small `run()` wrapper rather than `os.system` or `subprocess.call`.
+- Read paths via `pathlib.Path`, not `open(string)`.
+- Read environment variables with `os.environ.get("NAME", default)` — never
+  raw `os.environ["NAME"]` for inputs that might be missing.
+- For non-fatal failures, print a warning to `sys.stderr` and
+  `sys.exit(0)` so the calling CI step doesn't fail. Hard-fail with
+  `sys.exit(1)` only for actual broken state.
+
+## Strings and formatting
+
+- f-strings for interpolation. No `%` formatting, no `.format()`.
+- Multi-line string concatenation with parentheses, not `+`.
+
+## Error handling
+
+- Catch specific exception classes, not bare `except:` or `except Exception:`.
+  Keep exception lists narrow (e.g., `except (json.JSONDecodeError, OSError) as exc:`).
+- Print warnings with the exception value: `print(f"Warning: ... {exc}",
+  file=sys.stderr)`.
+
+## Comments and docstrings
+
+- Function docstrings are triple-quoted, one-line summaries unless the
+  function does something subtle that justifies a multi-line docstring.
+- Inline comments explain *why*, not *what*. Let function names carry the
+  meaning; keep inline comments sparse.
+- Section banners (`# ----` blocks) are the exception: they're structural,
+  not explanatory.
diff --git a/.github/workflows/self-review.yml b/.github/workflows/self-review.yml
@@ -0,0 +1,36 @@
+name: DeepWork Self-Review
+
+# This repo dogfoods its own action on every PR. Unlike examples/deepwork-review.yml
+# (the copy-pasteable reference for consumers, which pins @v1), this workflow uses
+# `uses: ./` so it exercises the action.yml from the PR branch itself — otherwise
+# PRs could never test changes to the action before they are tagged as v1.x.y.
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+
+concurrency:
+  group: deepwork-self-review-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  deepwork-review:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write        # push auto-fix commits to the PR branch
+      pull-requests: write   # post inline PR review comments and progress tracker
+      id-token: write        # OIDC for anthropics/claude-code-action
+
+    steps:
+      - name: Checkout PR branch
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+          ref: ${{ github.event.pull_request.head.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Run DeepWork Review (from PR branch)
+        uses: ./
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
-scripts/__pycache__/
+__pycache__/
+
+# DeepWork review state — restored from GitHub Actions cache at runtime,
+# regenerated by every /review run. Not source.
+.deepwork/tmp/