diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml
index 1f75c83..1e8913c 100644
--- a/.github/workflows/example-usage.yml
+++ b/.github/workflows/example-usage.yml
@@ -1,126 +1,23 @@
-name: Example Usage of CodeBoarding Action
+name: Architecture diff
+# Reference example only (the README shows the real pull_request usage). Manual
+# trigger so it doesn't run the published @v1 against this repo's own PRs.
on:
workflow_dispatch:
- inputs:
- repository_url:
- description: 'Repository URL to test with'
- required: false
- default: 'https://github.com/microsoft/markitdown'
- type: string
- source_branch:
- description: 'Source branch for comparison'
- required: false
- default: 'main'
- type: string
- target_branch:
- description: 'Target branch for comparison'
- required: false
- default: 'develop'
- type: string
- output_format:
- description: 'Output format for documentation'
- required: false
- default: '.md'
- type: choice
- options:
- - '.md'
- - '.rst'
-
- pull_request:
- branches: [ main, master ]
- types: [opened, synchronize, reopened]
-
- schedule:
- # Run daily at 2 AM UTC
- - cron: '0 2 * * *'
+
+# Only a PR comment is posted โ no image is pushed โ so contents:write is not needed.
+permissions:
+ pull-requests: write
jobs:
- update-docs-action-usage:
+ architecture-diff:
runs-on: ubuntu-latest
- permissions:
- contents: write
- pull-requests: write
-
+ if: github.event.pull_request.draft == false
+ timeout-minutes: 60
steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- with:
- token: ${{ secrets.GITHUB_TOKEN }}
- fetch-depth: 0 # Required to access branch history
-
- # Determine branches based on context
- - name: Set branch variables
- id: set-branches
- run: |
- if [ "${{ github.event_name }}" = "pull_request" ]; then
- echo "source_branch=${{ github.head_ref }}" >> $GITHUB_OUTPUT
- echo "target_branch=${{ github.base_ref }}" >> $GITHUB_OUTPUT
- elif [ "${{ github.event.inputs.source_branch }}" != "" ] && [ "${{ github.event.inputs.target_branch }}" != "" ]; then
- echo "source_branch=${{ github.event.inputs.source_branch }}" >> $GITHUB_OUTPUT
- echo "target_branch=${{ github.event.inputs.target_branch }}" >> $GITHUB_OUTPUT
- else
- # Default to current branch and main
- echo "source_branch=${{ github.ref_name }}" >> $GITHUB_OUTPUT
- echo "target_branch=main" >> $GITHUB_OUTPUT
- fi
-
- - name: Fetch CodeBoarding Documentation
- id: codeboarding
- uses: ./
- with:
- repository_url: ${{ github.event.inputs.repository_url }}
- source_branch: ${{ steps.set-branches.outputs.source_branch }}
- target_branch: ${{ steps.set-branches.outputs.target_branch }}
- output_directory: 'docs'
- output_format: ${{ github.event.inputs.output_format || '.md' }}
-
- - name: Display Action Results
- run: |
- echo "Documentation files created: ${{ steps.codeboarding.outputs.markdown_files_created }}"
- echo "JSON files created: ${{ steps.codeboarding.outputs.json_files_created }}"
- echo "Documentation directory: ${{ steps.codeboarding.outputs.output_directory }}"
- echo "JSON directory: ${{ steps.codeboarding.outputs.json_directory }}"
- echo "Has changes: ${{ steps.codeboarding.outputs.has_changes }}"
-
- # Check if we have any changes to commit
- - name: Check for changes
- id: git-changes
- run: |
- if [ -n "$(git status --porcelain)" ]; then
- echo "has_git_changes=true" >> $GITHUB_OUTPUT
- else
- echo "has_git_changes=false" >> $GITHUB_OUTPUT
- fi
-
- - name: Create Pull Request
- if: steps.git-changes.outputs.has_git_changes == 'true' && steps.codeboarding.outputs.has_changes == 'true'
- uses: peter-evans/create-pull-request@v5
+ - uses: codeboarding/codeboarding-action@v1
with:
- token: ${{ secrets.GITHUB_TOKEN }}
- commit-message: "docs: update codeboarding documentation"
- title: "๐ CodeBoarding Documentation Update"
- body: |
- ## ๐ Documentation Update
-
- This PR contains updated documentation files fetched from the CodeBoarding service.
-
- ### ๐ Summary
- - **Documentation files created/updated**: ${{ steps.codeboarding.outputs.markdown_files_created }}
- - **JSON files created/updated**: ${{ steps.codeboarding.outputs.json_files_created }}
- - **Documentation directory**: `${{ steps.codeboarding.outputs.output_directory }}/`
- - **JSON directory**: `${{ steps.codeboarding.outputs.json_directory }}/`
- - **Source branch**: `${{ steps.set-branches.outputs.source_branch }}`
- - **Target branch**: `${{ steps.set-branches.outputs.target_branch }}`
- - **Output format**: `${{ github.event.inputs.output_format || '.md' }}`
- - **Repository analyzed**: `${{ steps.codeboarding.outputs.repo_url }}`
-
- ### ๐ Changes
- Files have been updated with fresh documentation content based on code changes between branches.
-
- ---
-
- ๐ค This PR was automatically generated by the CodeBoarding documentation update workflow.
- branch: docs/codeboarding-update
- base: ${{ steps.set-branches.outputs.target_branch }}
- delete-branch: true
+ llm_api_key: ${{ secrets.OPENROUTER_API_KEY }}
+ # depth_level: '1' # 1-3, higher = more detail
+ # diagram_direction: 'LR' # LR | TD | TB | RL | BT
+ # changed_only: 'false' # 'true' to draw only changed components
diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml
new file mode 100644
index 0000000..302e014
--- /dev/null
+++ b/.github/workflows/test-self.yml
@@ -0,0 +1,24 @@
+name: Self-test architecture diff
+
+# Exercises THIS branch's action (uses: ./) against the action repo itself, so a
+# PR posts a Mermaid architecture-diff comment. Remove before merge.
+
+on:
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+permissions:
+ pull-requests: write
+
+jobs:
+ diagram:
+ runs-on: ubuntu-latest
+ timeout-minutes: 60
+ steps:
+ - uses: actions/checkout@v4
+ - uses: ./
+ with:
+ llm_api_key: ${{ secrets.OPENROUTER_API_KEY }}
+ agent_model: ${{ secrets.AGENT_MODEL }}
+ parsing_model: ${{ secrets.PARSING_MODEL }}
+ cta_base_url: https://codeboarding.pontux-inc.workers.dev
diff --git a/.gitignore b/.gitignore
index 865fddd..ea8864f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,9 @@
test_response.json
test_codeboarding/
+# Local test harness output (scripts/run_local.sh)
+.cb-local/
+
# Environment files
.env
diff --git a/README.md b/README.md
index 043c743..3ba9387 100644
--- a/README.md
+++ b/README.md
@@ -1,111 +1,134 @@

-
- # CodeBoarding [Diagram-First Documentation]
-
- [](https://github.com/marketplace/actions/codeboarding-diagram-first-documentation)
+
+ # CodeBoarding Architecture Diff (Mermaid)
+
+ Posts a PR comment with a **Mermaid** architecture diagram showing which components changed โ **green** added, **yellow** modified, **red** deleted โ for both nodes and arrows.
-Generates diagram-first visualizations of your codebase using static analysis and large language models.
+## What it does
+
+On every pull request, this action:
+
+1. Resolves a **base ("before") analysis**: it reads the `.codeboarding/analysis.json` committed at the PR base commit if one exists; otherwise it runs a full CodeBoarding analysis on the base commit to produce one.
+2. Runs an **incremental analysis on the PR head**, seeded from the base analysis โ only LLM-calling the components whose code actually changed, so a typical PR costs a handful of LLM calls.
+3. **Diffs the two analyses** and renders the architecture graph as a Mermaid block with changed components and relations colored:
+ - **green** โ added
+ - **yellow** โ modified
+ - **red** (dashed) โ deleted
+4. Posts a sticky PR comment containing the Mermaid block. **GitHub renders the diagram inline** โ no image, no Playwright, no extra branch.
## Usage
```yaml
-name: Generate Documentation
+name: Architecture diff
on:
- push:
- branches: [ main ]
pull_request:
- branches: [ main ]
- types: [opened, synchronize, reopened]
+ types: [opened, synchronize, reopened, ready_for_review]
+
+permissions:
+ pull-requests: write # the only permission needed โ nothing is pushed
jobs:
- documentation:
+ diagram:
runs-on: ubuntu-latest
+ if: github.event.pull_request.draft == false
+ timeout-minutes: 60
steps:
- - name: Checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0 # Required to access branch history
-
- - name: Generate Documentation
- uses: codeboarding/codeboarding-ghaction@v1
+ - uses: codeboarding/codeboarding-action@v1
with:
- repository_url: ${{ github.server_url }}/${{ github.repository }}
- source_branch: ${{ github.head_ref || github.ref_name }}
- target_branch: ${{ github.base_ref || 'main' }}
- output_directory: 'docs'
- output_format: '.md'
-
- - name: Upload Documentation
- uses: actions/upload-artifact@v4
- with:
- name: documentation
- path: |
- docs/
- .codeboarding/
+ llm_api_key: ${{ secrets.OPENROUTER_API_KEY }}
```
+You need **one secret**: an LLM API key. OpenRouter is the default; pass your own model via the `agent_model` / `parsing_model` inputs if you prefer.
+
## Inputs
-| Input | Description | Required | Default |
-|-------|-------------|----------|---------|
-| `repository_url` | Repository URL for which documentation will be generated | Yes | - |
-| `source_branch` | Source branch for comparison (typically the PR branch) | Yes | - |
-| `target_branch` | Target branch for comparison (typically the base branch) | Yes | - |
-| `output_directory` | Directory where documentation files will be saved | No | `docs` |
-| `output_format` | Format for documentation files (either `.md` or `.rst`) | No | `.md` |
+| Input | Default | Description |
+|---|---|---|
+| `llm_api_key` | (required) | LLM API key. Currently OpenRouter (`OPENROUTER_API_KEY`). |
+| `github_token` | `${{ github.token }}` | Token used to post the comment. |
+| `engine_ref` | `main` | Git ref of `CodeBoarding/CodeBoarding`. Pin in production. |
+| `depth_level` | `1` | Diagram depth (1โ3). Higher = slower + more detail. |
+| `agent_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for analysis. |
+| `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for parsing. |
+| `comment_header` | `Architecture review` | Header line of the PR comment. |
+| `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. |
+| `changed_only` | `false` | Draw only changed components and their incident edges. |
+| `nested` | `false` | Draw depth>1 sub-components as nested subgraphs (pair with `depth_level >= 2`). |
## Outputs
| Output | Description |
-|--------|-------------|
-| `markdown_files_created` | Number of documentation files created |
-| `json_files_created` | Number of JSON files created |
-| `output_directory` | Directory where documentation files were saved |
-| `json_directory` | Directory where JSON files were saved (always `.codeboarding`) |
-| `has_changes` | Whether any files were created or changed |
+|---|---|
+| `diagram_md` | Path to the rendered ```` ```mermaid ```` block in the runner workspace. |
+| `n_changed` | Number of top-level components added/modified/deleted. |
+| `truncated` | `true` if the diagram was reduced to changed-only to fit GitHub's Mermaid limit. |
+
+## How the diff is colored
+
+Nodes are styled with Mermaid `classDef` / `class`; arrows are styled with positional `linkStyle`. A relation counts as **modified** when its endpoints are unchanged but its label text changed. Example of the emitted block:
+
+```mermaid
+graph LR
+ Api["API Gateway"]
+ Auth["Auth Service"]
+ Cache["Cache"]
+ Api -- "routes to" --> Auth
+ Auth -- "reads/writes" --> Cache
+ classDef added fill:#1f883d,stroke:#0b5d23,color:#ffffff;
+ classDef modified fill:#bf8700,stroke:#7d4e00,color:#ffffff;
+ classDef deleted fill:#cf222e,stroke:#82071e,color:#ffffff,stroke-dasharray:5 3;
+ class Cache added;
+ class Auth modified;
+ class Api deleted;
+ linkStyle 0 stroke:#cf222e,stroke-width:2px,stroke-dasharray:5 3;
+ linkStyle 1 stroke:#1f883d,stroke-width:2px;
+```
-## How It Works
+## No baseline required
-The action works by:
+If `.codeboarding/analysis.json` isn't committed at the PR base commit, the action **generates the baseline itself** by running a full analysis on the base commit, then diffs the head against it. Committing a baseline on your default branch makes runs cheaper (the base run is skipped) and the diff more stable, but it is not required.
-1. Analyzing the differences introduced in the source branch and putting the results in the target branch
-2. Generating documentation files based on the latest version of the source branch
-3. Outputting two types of files:
- - Documentation files (Markdown or RST) in the specified output directory
- - Metadata files in the `.codeboarding` directory
+## Fork PRs
-## License
+Because nothing is pushed (the diagram is inline Mermaid), there is no image step to skip on forks. The one caveat is GitHub's own policy: **secrets are withheld from `pull_request`-triggered runs on forks**, so the LLM key is unavailable and the run fails early with a clear message. A maintainer can re-run from the Actions tab, or use `pull_request_target` if you understand its security implications.
-MIT License - see [LICENSE](LICENSE) file for details.
+## Limitations
-# CodeBoarding GitHub Action
+- **GitHub Mermaid caps.** Inline Mermaid in comments is capped (โ500 edges / 50 000 chars). The action stays under this by auto-falling-back to a changed-only graph; if even that overflows it posts a text summary instead of a broken diagram.
+- **Nesting.** By default only the top-level component graph is drawn (matching the engine's default `graph LR`). Set `nested: true` with `depth_level >= 2` to draw sub-components as nested subgraphs โ leaf nodes filled, parent containers outlined, both colored by status. Large nested graphs are more likely to hit GitHub's Mermaid caps (above), in which case the action degrades to changed-only or a text summary.
+- **Renames show as remove + add.** Components are matched across the two analyses by name (the stable join), so a renamed component appears as a red removal plus a green addition rather than a single yellow change.
+- **No click-through.** GitHub renders Mermaid in strict security mode, so node hyperlinks are disabled.
-## Important: Timeout Configuration
+## Local testing
-For large repositories, the analysis can take 15-45 minutes. Make sure to configure appropriate timeouts in your workflow:
+A GitHub run is slow (engine install + two analyses). To iterate locally, use `scripts/run_local.sh`. It mirrors `action.yml` and writes `.cb-local/diagram.md` plus a `.cb-local/preview.html` you open in a browser (rendered with mermaid.js in GitHub's strict mode, so it looks like the comment will).
-```yaml
-jobs:
- generate-docs:
- runs-on: ubuntu-latest
- timeout-minutes: 60 # Set to 60+ minutes for large repositories
- steps:
- - uses: actions/checkout@v4
- - uses: your-username/codeboarding-ghaction@v1
- with:
- # your inputs here
+**Fast โ no LLM, instant.** Diff two existing `analysis.json` files. Great for iterating on colors/layout. For a realistic pair, pull two revisions of a committed analysis:
+
+```bash
+git show :.codeboarding/analysis.json > /tmp/base.json
+git show :.codeboarding/analysis.json > /tmp/head.json
+scripts/run_local.sh --base-json /tmp/base.json --head-json /tmp/head.json
```
-## Timeout Guidelines
+**Full pipeline โ needs an LLM key.** Runs the engine on two refs of a local repo exactly like the action (committed-or-generated base, then incremental head):
+
+```bash
+export OPENROUTER_API_KEY=sk-or-...
+scripts/run_local.sh --repo /path/to/repo --base --head \
+ --engine /path/to/CodeBoarding # defaults to ../CodeBoarding
+```
-- **Small repositories** (<1k files): 10-15 minutes
-- **Medium repositories** (1k-5k files): 20-30 minutes
-- **Large repositories** (5k+ files): 30-60 minutes
-- **Very large repositories** (10k+ files): 45-90 minutes
+Flags: `--depth N`, `--direction LR|TD|โฆ`, `--nested`, `--changed-only`, `--no-edge-labels`, `--out DIR`, `--no-open`.
+
+The diagram step alone is also directly runnable:
+
+```bash
+python3 scripts/diff_to_mermaid.py --base base/analysis.json --head head/analysis.json --out diagram.md
+```
+
+## License
-If your workflow consistently times out, consider:
-1. Increasing `timeout-minutes` to 90 or higher
-2. Running the action on a schedule during off-peak hours
-3. Analyzing specific branches with smaller diffs
+MIT โ see [LICENSE](LICENSE).
diff --git a/action.yml b/action.yml
index 6f17d1d..e37c80e 100644
--- a/action.yml
+++ b/action.yml
@@ -1,447 +1,451 @@
-name: 'CodeBoarding [Diagram-First Documentation]'
-description: 'Generates diagram-first visualizations of your codebase using static analysis and large language models.'
+name: 'CodeBoarding Architecture Diff (Mermaid)'
+description: 'Posts a PR comment with a Mermaid architecture diagram showing which components changed (green added / yellow modified / red deleted) โ nodes and arrows.'
author: 'CodeBoarding'
branding:
- icon: 'book-open' # or 'layers', 'git-branch', 'book-open', 'target'
+ icon: 'git-pull-request'
color: 'blue'
inputs:
- output_directory:
- description: 'Directory where documentation files will be saved'
- required: false
- default: 'docs'
- repository_url:
- description: 'Repository URL to fetch documentation for (defaults to current repository)'
- required: true
- source_branch:
- description: 'Source branch for comparison'
- required: true
- target_branch:
- description: 'Target branch for comparison'
+ llm_api_key:
+ description: 'LLM API key (OpenRouter by default). Required.'
required: true
- output_format:
- description: 'Output format for documentation files (.md, .mdx, .rst, or .html)'
+ github_token:
+ description: 'GITHUB_TOKEN used to post the PR comment. Defaults to the workflow token.'
+ required: false
+ default: ${{ github.token }}
+ engine_ref:
+ description: 'Git ref of CodeBoarding/CodeBoarding to use as the analysis engine.'
+ required: false
+ default: 'main'
+ depth_level:
+ description: 'Diagram depth (1-3). Higher is slower and more detailed.'
+ required: false
+ default: '1'
+ agent_model:
+ description: 'LLM model identifier used for analysis (AGENT_MODEL env var).'
+ required: false
+ default: 'openrouter/anthropic/claude-sonnet-4'
+ parsing_model:
+ description: 'LLM model identifier used for parsing (PARSING_MODEL env var).'
+ required: false
+ default: 'openrouter/anthropic/claude-sonnet-4'
+ comment_header:
+ description: 'Header line used inside the sticky PR comment.'
+ required: false
+ default: 'Architecture review'
+ diagram_direction:
+ description: 'Mermaid layout direction: LR, TD, TB, RL, or BT.'
required: false
- default: '.md'
+ default: 'LR'
+ changed_only:
+ description: 'Render only changed components and their incident edges (also auto-applied when the full graph exceeds GitHub''s Mermaid limit).'
+ required: false
+ default: 'false'
+ nested:
+ description: 'Draw depth>1 sub-components as nested subgraphs (pair with depth_level >= 2).'
+ required: false
+ default: 'false'
+ cta_base_url:
+ description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in workspace" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.'
+ required: false
+ default: ''
outputs:
- markdown_files_created:
- description: 'Number of Markdown files created'
- value: ${{ steps.process-docs.outputs.markdown_files_created }}
- json_files_created:
- description: 'Number of JSON files created'
- value: ${{ steps.process-docs.outputs.json_files_created }}
- output_directory:
- description: 'Directory where Markdown files were saved'
- value: ${{ steps.process-docs.outputs.output_directory }}
- json_directory:
- description: 'Directory where JSON files were saved (.codeboarding)'
- value: ${{ steps.process-docs.outputs.json_directory }}
- has_changes:
- description: 'Whether any files were created or changed'
- value: ${{ steps.process-docs.outputs.has_changes }}
- repo_url:
- description: 'Repository URL that was analyzed'
- value: ${{ steps.repo-url.outputs.repo_url }}
+ diagram_md:
+ description: 'Path to the rendered ```mermaid block (in the runner workspace).'
+ value: ${{ steps.diagram.outputs.diagram_md }}
+ n_changed:
+ description: 'Number of top-level components added/modified/deleted.'
+ value: ${{ steps.diagram.outputs.n_changed }}
+ truncated:
+ description: 'True if the diagram was reduced to changed-only to fit GitHub''s Mermaid limit.'
+ value: ${{ steps.diagram.outputs.truncated }}
runs:
using: 'composite'
steps:
- - name: Determine repository URL
- id: repo-url
+ - name: Guard โ PR event only
+ id: guard
shell: bash
run: |
- # Use the provided repository URL if it's not empty
- if [ -n "${{ inputs.repository_url }}" ]; then
- REPO_URL="${{ inputs.repository_url }}"
- echo "Using provided repository URL: $REPO_URL"
- # Otherwise try to determine from git if we're in a git repository
- elif git config --get remote.origin.url > /dev/null 2>&1; then
- REPO_URL=$(git config --get remote.origin.url)
- # Convert SSH URL to HTTPS if needed
- if [[ $REPO_URL == git@* ]]; then
- REPO_URL=$(echo $REPO_URL | sed 's|git@github.com:|https://github.com/|')
- fi
- echo "Using git remote URL: $REPO_URL"
+ if [ -z "${{ github.event.pull_request.number }}" ]; then
+ echo "::warning::CodeBoarding Architecture Diff only runs on pull_request events. Skipping."
+ echo "skip=true" >> $GITHUB_OUTPUT
else
- REPO_URL="${{ github.server_url }}/${{ github.repository }}"
- echo "Using GitHub context URL: $REPO_URL"
+ echo "skip=false" >> $GITHUB_OUTPUT
+ echo "base_sha=${{ github.event.pull_request.base.sha }}" >> $GITHUB_OUTPUT
+ echo "head_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT
+ echo "pr_number=${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT
fi
- echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT
- - name: Create and poll documentation job
- id: fetch-docs
+ - name: Checkout CodeBoarding engine
+ if: steps.guard.outputs.skip != 'true'
+ uses: actions/checkout@v4
+ with:
+ repository: CodeBoarding/CodeBoarding
+ ref: ${{ inputs.engine_ref }}
+ path: codeboarding-engine
+
+ - name: Checkout target repository (PR head)
+ if: steps.guard.outputs.skip != 'true'
+ uses: actions/checkout@v4
+ with:
+ path: target-repo
+ fetch-depth: 0
+ ref: ${{ steps.guard.outputs.head_sha }}
+
+ - name: Ensure PR base commit is fetched
+ if: steps.guard.outputs.skip != 'true'
+ shell: bash
+ working-directory: target-repo
+ run: |
+ git fetch origin "${{ steps.guard.outputs.base_sha }}" --depth=1 || true
+ git cat-file -e "${{ steps.guard.outputs.base_sha }}" && echo "Base commit reachable." || \
+ (echo "::error::Base commit ${{ steps.guard.outputs.base_sha }} is not reachable." && exit 1)
+
+ - name: Set up Python 3.13
+ if: steps.guard.outputs.skip != 'true'
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.13'
+
+ - name: Set up Node.js 20
+ if: steps.guard.outputs.skip != 'true'
+ uses: actions/setup-node@v4
+ with:
+ node-version: '20'
+
+ - name: Install uv
+ if: steps.guard.outputs.skip != 'true'
+ uses: astral-sh/setup-uv@v4
+
+ - name: Cache uv venv (engine)
+ if: steps.guard.outputs.skip != 'true'
+ uses: actions/cache@v4
+ with:
+ path: codeboarding-engine/.venv
+ key: cb-uv-${{ runner.os }}-${{ hashFiles('codeboarding-engine/pyproject.toml', 'codeboarding-engine/uv.lock') }}
+ restore-keys: |
+ cb-uv-${{ runner.os }}-
+
+ - name: Cache LSP servers
+ if: steps.guard.outputs.skip != 'true'
+ uses: actions/cache@v4
+ with:
+ path: |
+ codeboarding-engine/static_analyzer/servers/node_modules
+ codeboarding-engine/static_analyzer/servers/bin
+ key: cb-lsp-${{ runner.os }}-v1
+ restore-keys: |
+ cb-lsp-${{ runner.os }}-
+
+ - name: Install Python dependencies
+ if: steps.guard.outputs.skip != 'true'
+ shell: bash
+ working-directory: codeboarding-engine
+ run: |
+ uv venv --clear
+ uv pip install -e .
+
+ - name: Install LSP servers
+ if: steps.guard.outputs.skip != 'true'
+ shell: bash
+ working-directory: codeboarding-engine
+ run: |
+ uv run python install.py --auto-install-npm
+
+ - name: Prepare & verify LLM key
+ if: steps.guard.outputs.skip != 'true'
shell: bash
+ env:
+ RAW_KEY: ${{ inputs.llm_api_key }}
+ RAW_AGENT_MODEL: ${{ inputs.agent_model }}
+ RAW_PARSING_MODEL: ${{ inputs.parsing_model }}
run: |
- CREATE_JOB_URL="https://server.codeboarding.org/github_action/jobs"
- REPO_URL="${{ steps.repo-url.outputs.repo_url }}"
- SOURCE_BRANCH="${{ inputs.source_branch }}"
- TARGET_BRANCH="${{ inputs.target_branch }}"
- OUTPUT_DIRECTORY="${{ inputs.output_directory }}"
- OUTPUT_FORMAT="${{ inputs.output_format }}"
-
- echo "๐ Creating CodeBoarding analysis job...$CREATE_JOB_URL"
- echo "๐ Repository: $REPO_URL"
- echo "๐ฟ Source branch: $SOURCE_BRANCH"
- echo "๐ฏ Target branch: $TARGET_BRANCH"
- echo "๐ Output format: $OUTPUT_FORMAT"
-
- # Create JSON payload
- JSON_PAYLOAD=$(jq -n \
- --arg url "$REPO_URL" \
- --arg source_branch "$SOURCE_BRANCH" \
- --arg target_branch "$TARGET_BRANCH" \
- --arg output_directory "$OUTPUT_DIRECTORY" \
- --arg extension "$OUTPUT_FORMAT" \
- '{
- url: $url,
- source_branch: $source_branch,
- target_branch: $target_branch,
- output_directory: $output_directory,
- extension: $extension
- }')
-
- echo "๐ Request payload:"
- echo "$JSON_PAYLOAD"
-
- # Create temporary file for response
- TEMP_FILE=$(mktemp)
-
- echo "๐ Making API request to create job..."
-
- # Make the API call to create job
- response=$(curl -s -w "%{http_code}" -o "$TEMP_FILE" \
- -X POST \
- -H "Content-Type: application/json" \
- -d "$JSON_PAYLOAD" \
- --max-time 60 \
- --connect-timeout 30 \
- "$CREATE_JOB_URL")
- curl_exit_code=$?
-
- http_code=${response: -3}
-
- echo "โ
Job creation request completed!"
- echo "๐ Response status code: $http_code"
- echo "๐ง Curl exit code: $curl_exit_code"
-
- # Handle curl errors
- if [ $curl_exit_code -ne 0 ]; then
- echo "โ Error: Curl failed with exit code $curl_exit_code"
- case $curl_exit_code in
- 6) echo "๐ Couldn't resolve host - check network connectivity" ;;
- 7) echo "๐ Failed to connect to host - server might be down" ;;
- 28) echo "โฐ Request timed out - server might be busy" ;;
- *) echo "โ Unknown curl error - check network and server status" ;;
- esac
- rm -f "$TEMP_FILE"
+ if [ -z "$RAW_KEY" ]; then
+ echo "::error::llm_api_key is empty. On fork PRs, repo secrets are withheld by GitHub โ a maintainer must re-run, or use pull_request_target."
exit 1
fi
-
- if [ "$http_code" != "202" ]; then
- echo "โ Error: Job creation failed with status code $http_code"
- echo "๐ Response content:"
- cat "$TEMP_FILE"
-
- # Try to parse as JSON for better error message
- if jq -e '.detail' "$TEMP_FILE" > /dev/null 2>&1; then
- echo "๐ Error details: $(jq -r '.detail' "$TEMP_FILE")"
- fi
-
- rm -f "$TEMP_FILE"
+ # Pasting a key into the secret UI often picks up trailing newlines,
+ # wrapping quotes, or a whole `KEY=value` line. Normalize all of that.
+ _strip() { printf '%s' "$1" | tr -d '[:space:]' | sed -e 's/^"//;s/"$//' -e "s/^'//;s/'\$//"; }
+ KEY="$(_strip "$RAW_KEY")"
+ case "$KEY" in
+ OPENROUTER_API_KEY=*) KEY="${KEY#OPENROUTER_API_KEY=}";;
+ openrouter_api_key=*) KEY="${KEY#openrouter_api_key=}";;
+ esac
+ KEY="$(_strip "$KEY")"
+ AGENT_MODEL="$(_strip "$RAW_AGENT_MODEL")"
+ PARSING_MODEL="$(_strip "$RAW_PARSING_MODEL")"
+
+ # Mask the cleaned value (it may differ from the registered secret).
+ echo "::add-mask::$KEY"
+
+ case "$KEY" in sk-or-v1-*) PFX=1 ;; *) PFX=0 ;; esac
+ echo "OPENROUTER_API_KEY length: ${#KEY}; looks-like-OpenRouter: $PFX"
+ STATUS=$(curl -sS -o /tmp/openrouter-auth.json -w "%{http_code}" \
+ -H "Authorization: Bearer $KEY" --max-time 10 \
+ https://openrouter.ai/api/v1/auth/key || echo "curl-fail")
+ echo "OpenRouter /auth/key response: HTTP $STATUS"
+ if [ "$STATUS" != "200" ]; then
+ echo "::error::OpenRouter rejected the API key (HTTP $STATUS). Verify the OPENROUTER_API_KEY secret."
+ cat /tmp/openrouter-auth.json 2>/dev/null || true
exit 1
fi
-
- # Check if response is valid JSON
- if ! jq empty "$TEMP_FILE" 2>/dev/null; then
- echo "โ Error: Invalid JSON response"
- echo "๐ Response content:"
- cat "$TEMP_FILE"
- rm -f "$TEMP_FILE"
- exit 1
+
+ # Export (masked) for the analysis steps.
+ {
+ echo "OPENROUTER_API_KEY=$KEY"
+ echo "AGENT_MODEL=$AGENT_MODEL"
+ echo "PARSING_MODEL=$PARSING_MODEL"
+ } >> "$GITHUB_ENV"
+
+ - name: Resolve base analysis (committed baseline)
+ if: steps.guard.outputs.skip != 'true'
+ id: base
+ shell: bash
+ working-directory: target-repo
+ run: |
+ BASE_SHA="${{ steps.guard.outputs.base_sha }}"
+ BASE_DIR="${RUNNER_TEMP}/cb-base"
+ HEAD_DIR="${RUNNER_TEMP}/cb-head"
+ mkdir -p "$BASE_DIR" "$HEAD_DIR"
+ echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT
+ echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT
+ if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then
+ git show "${BASE_SHA}:.codeboarding/static_analysis.pkl" > "${BASE_DIR}/static_analysis.pkl" 2>/dev/null \
+ && echo "Seeded committed static_analysis.pkl from base." \
+ || rm -f "${BASE_DIR}/static_analysis.pkl"
+ echo "committed=true" >> $GITHUB_OUTPUT
+ echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}."
+ else
+ rm -f "${BASE_DIR}/analysis.json"
+ echo "committed=false" >> $GITHUB_OUTPUT
+ echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit."
fi
-
- # Extract job_id from response
- JOB_ID=$(jq -r '.job_id' "$TEMP_FILE")
-
- if [ "$JOB_ID" = "null" ] || [ -z "$JOB_ID" ]; then
- echo "โ Error: No job_id found in response"
- echo "๐ Response content:"
- cat "$TEMP_FILE"
- rm -f "$TEMP_FILE"
+
+ - name: Generate base analysis (no committed baseline)
+ if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false'
+ shell: bash
+ working-directory: codeboarding-engine
+ env:
+ STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml
+ PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine
+ DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }}
+ CACHING_DOCUMENTATION: 'false'
+ ENABLE_MONITORING: 'false'
+ run: |
+ BASE_SRC="${RUNNER_TEMP}/base-src"
+ rm -rf "$BASE_SRC"
+ git -C "${{ github.workspace }}/target-repo" worktree add --detach "$BASE_SRC" "${{ steps.guard.outputs.base_sha }}"
+ uv run python -c "
+ from pathlib import Path
+ from codeboarding_workflows.analysis import run_full
+ res = run_full(
+ repo_name='${{ github.event.repository.name }}',
+ repo_path=Path('$BASE_SRC'),
+ output_dir=Path('${{ steps.base.outputs.base_dir }}'),
+ run_id='${{ github.run_id }}-${{ github.run_attempt }}-base',
+ log_path='/tmp/cb-base.log',
+ depth_level=int('${{ inputs.depth_level }}'),
+ source_sha='${{ steps.guard.outputs.base_sha }}',
+ )
+ print(f'Base analysis written: {res}')
+ "
+ if [ ! -f "${{ steps.base.outputs.base_dir }}/analysis.json" ]; then
+ echo "::error::Base full analysis ran but analysis.json is missing."
exit 1
fi
-
- echo "โ
Job created successfully!"
- echo "๐ Job ID: $JOB_ID"
-
- # Start polling job status
- STATUS_URL="https://server.codeboarding.org/github_action/jobs/$JOB_ID"
-
- echo "๐ Starting job status polling..."
- echo "โฐ This may take 15-45 minutes for large repositories..."
- echo "๐ก If your workflow times out, increase 'timeout-minutes' in your job configuration"
-
- # Polling loop
- POLL_COUNT=0
- MAX_POLLS=90 # 90 minutes max (90 * 1 minute intervals)
-
- while [ $POLL_COUNT -lt $MAX_POLLS ]; do
- POLL_COUNT=$((POLL_COUNT + 1))
-
- echo "๐ Polling attempt $POLL_COUNT of $MAX_POLLS ($(date '+%H:%M:%S'))"
-
- # Make status check API call
- response=$(curl -s -w "%{http_code}" -o "$TEMP_FILE" \
- --max-time 30 \
- --connect-timeout 10 \
- "$STATUS_URL")
-
- curl_exit_code=$?
- http_code=${response: -3}
-
- # Handle curl errors
- if [ $curl_exit_code -ne 0 ]; then
- echo "โ ๏ธ Warning: Status check failed with curl exit code $curl_exit_code"
- echo "๐ Retrying in 30 seconds..."
- sleep 30
- continue
- fi
-
- if [ "$http_code" != "200" ]; then
- echo "โ ๏ธ Warning: Status check failed with HTTP code $http_code"
- echo "๐ Response content:"
- cat "$TEMP_FILE"
- echo "๐ Retrying in 30 seconds..."
- sleep 30
- continue
- fi
-
- # Check if response is valid JSON
- if ! jq empty "$TEMP_FILE" 2>/dev/null; then
- echo "โ ๏ธ Warning: Invalid JSON response"
- echo "๐ Response content:"
- cat "$TEMP_FILE"
- echo "๐ Retrying in 30 seconds..."
- sleep 30
- continue
- fi
-
- # Extract status from response
- STATUS=$(jq -r '.status' "$TEMP_FILE")
-
- echo "๐ Current job status: $STATUS"
-
- if [ "$STATUS" = "COMPLETED" ]; then
- echo "โ
Job completed successfully!"
-
- # Check if result field exists and contains files
- if jq -e '.result' "$TEMP_FILE" > /dev/null; then
- echo "๐ฆ Result field found, preparing output..."
-
- # Check if result is a JSON string or already a JSON object
- RESULT_TYPE=$(jq -r '.result | type' "$TEMP_FILE")
-
- if [ "$RESULT_TYPE" = "string" ]; then
- echo "๐ง Result is a JSON string, parsing it..."
- # Parse the JSON string in the result field
- jq -r '.result' "$TEMP_FILE" | jq '.' > "${TEMP_FILE}_result"
- else
- echo "๐ง Result is already a JSON object, extracting it..."
- # Extract the result object directly
- jq '.result' "$TEMP_FILE" > "${TEMP_FILE}_result"
- fi
-
- # Verify the extracted result
- if jq -e '.files' "${TEMP_FILE}_result" > /dev/null; then
- echo "โ
Files extracted successfully"
- mv "${TEMP_FILE}_result" "$TEMP_FILE"
- echo "response_file=$TEMP_FILE" >> $GITHUB_OUTPUT
- exit 0 # Successfully extracted files, exit with success
- else
- echo "โ Error: Extracted result is missing files structure"
- echo "๐ Extracted content:"
- cat "${TEMP_FILE}_result"
- rm -f "${TEMP_FILE}_result" "$TEMP_FILE"
- exit 1
- fi
- else
- echo "โ Error: Job completed but no result or result.files found in response"
- echo "๐ Response structure:"
- jq '.' "$TEMP_FILE"
-
- # If result exists, show what it contains
- if jq -e '.result' "$TEMP_FILE" > /dev/null; then
- echo "๐ Result field content:"
- RESULT_TYPE=$(jq -r '.result | type' "$TEMP_FILE")
- echo "Result type: $RESULT_TYPE"
-
- if [ "$RESULT_TYPE" = "string" ]; then
- echo "Result string content:"
- jq -r '.result' "$TEMP_FILE"
- else
- echo "Result object content:"
- jq '.result' "$TEMP_FILE"
- fi
- fi
-
- rm -f "$TEMP_FILE"
- exit 1
- fi
- elif [ "$STATUS" = "FAILED" ] || [ "$STATUS" = "ERROR" ]; then
- echo "โ Job failed with status: $STATUS"
- echo "๐ Response content:"
- cat "$TEMP_FILE"
- rm -f "$TEMP_FILE"
- exit 1
- else
- # Job still in progress
- echo "โณ Job in progress (status: $STATUS)..."
-
- # Show additional progress information if available
- if jq -e '.updated_at' "$TEMP_FILE" > /dev/null; then
- UPDATED_AT=$(jq -r '.updated_at' "$TEMP_FILE")
- echo "๐ Last updated: $UPDATED_AT"
- fi
-
- echo "๐ค Waiting 15 seconds before next check..."
- sleep 15
- fi
- done
-
- # Only reach here if we've exceeded max polls without completion
- echo "โ Error: Job polling timed out after $MAX_POLLS attempts"
- echo "๐๏ธ The repository analysis is taking longer than expected."
- echo "๐ This might be due to:"
- echo " โข Very large repository size (>10k files)"
- echo " โข Complex codebase requiring extensive analysis"
- echo " โข Server load or processing delays"
- echo ""
- echo "๐ก Suggestions:"
- echo " โข Try again later when server load might be lower"
- echo " โข Consider analyzing smaller branches or specific directories"
- echo " โข Increase your GitHub Actions job timeout-minutes to 120+"
- echo " โข Contact support if the issue persists"
-
- rm -f "$TEMP_FILE"
- exit 1
- - name: Process documentation files
- id: process-docs
+ - name: Analyze PR head (incremental from base)
+ if: steps.guard.outputs.skip != 'true'
+ id: analyze
shell: bash
+ working-directory: codeboarding-engine
+ env:
+ STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml
+ PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine
+ DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }}
+ CACHING_DOCUMENTATION: 'false'
+ ENABLE_MONITORING: 'false'
run: |
- RESPONSE_FILE="${{ steps.fetch-docs.outputs.response_file }}"
- MD_OUTPUT_DIR="${{ inputs.output_directory }}"
- JSON_OUTPUT_DIR=".codeboarding"
- OUTPUT_FORMAT="${{ inputs.output_format }}"
-
- # Validate output format
- if [[ "$OUTPUT_FORMAT" != ".md" && "$OUTPUT_FORMAT" != ".mdx" && "$OUTPUT_FORMAT" != ".rst" && "$OUTPUT_FORMAT" != ".html" ]]; then
- echo "Error: Invalid output format '$OUTPUT_FORMAT'. Must be either '.md', '.mdx', '.rst', or '.html'"
+ BASE_DIR="${{ steps.base.outputs.base_dir }}"
+ HEAD_DIR="${{ steps.base.outputs.head_dir }}"
+ # Seed the head dir from the base analysis so incremental stitches
+ # component ids from the baseline (stable diff). Base dir is left
+ # untouched as the "before" snapshot for the diff.
+ cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true
+ uv run python -c "
+ from pathlib import Path
+ from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError
+ from diagram_analysis.exceptions import IncrementalCacheMissingError
+ base_sha='${{ steps.guard.outputs.base_sha }}'
+ head_sha='${{ steps.guard.outputs.head_sha }}'
+ repo=Path('${{ github.workspace }}/target-repo')
+ out=Path('$HEAD_DIR')
+ name='${{ github.event.repository.name }}'
+ rid='${{ github.run_id }}-${{ github.run_attempt }}-head'
+ try:
+ res = run_incremental(
+ repo_path=repo, output_dir=out, project_name=name, run_id=rid,
+ log_path='/tmp/cb-head.log', base_ref=base_sha, target_ref=head_sha,
+ source_sha=head_sha,
+ )
+ except (IncrementalCacheMissingError, BaselineUnavailableError) as exc:
+ print(f'Incremental unavailable ({exc}); running full analysis on head.')
+ for p in out.glob('*'):
+ if p.is_file():
+ p.unlink()
+ res = run_full(
+ repo_name=name, repo_path=repo, output_dir=out, run_id=rid,
+ log_path='/tmp/cb-head.log', depth_level=int('${{ inputs.depth_level }}'),
+ source_sha=head_sha,
+ )
+ print(f'Head analysis written: {res}')
+ "
+ if [ ! -f "$HEAD_DIR/analysis.json" ]; then
+ echo "::error::Head analysis ran but analysis.json is missing."
exit 1
fi
-
- # Clean and create the output directories
- mkdir -p "$MD_OUTPUT_DIR"
-
- # Remove existing .codeboarding files before adding new ones
- if [ -d "$JSON_OUTPUT_DIR" ]; then
- echo "Cleaning existing JSON files from $JSON_OUTPUT_DIR"
- rm -rf "$JSON_OUTPUT_DIR"
- fi
- mkdir -p "$JSON_OUTPUT_DIR"
-
- # Initialize counters
- MARKDOWN_FILES_CREATED=0
- JSON_FILES_CREATED=0
-
- echo "=== Processing Documentation Files ==="
- echo "Response JSON structure:"
- jq . "$RESPONSE_FILE"
- echo "Using output format: $OUTPUT_FORMAT"
- # Parse JSON response and create files using keys as filenames
- if jq -e '.files' "$RESPONSE_FILE" > /dev/null; then
- echo "Files key found, proceeding to create files..."
-
- # Check if files object is empty
- FILES_COUNT=$(jq '.files | length' "$RESPONSE_FILE")
- if [ "$FILES_COUNT" -eq 0 ]; then
- echo "โน๏ธ No documentation files were generated for this repository/branch combination."
- echo "๐ This might be because:"
- echo " โข No changes were detected between the source and target branches"
- echo " โข The repository or branches don't exist or are not accessible"
- echo " โข No analyzable code files were found"
- echo " โข The branches are identical (no diff to analyze)"
- else
- # Get each key from files object and create a file with that name
- while IFS= read -r filename; do
- echo "Processing file: $filename"
-
- # Get the content for this filename
- content=$(jq -r ".files[\"$filename\"]" "$RESPONSE_FILE")
-
- # Determine file type and destination
- if [[ "$filename" == *.json ]]; then
- # JSON file
- output_dir="$JSON_OUTPUT_DIR"
- output_filename="$filename"
- echo "$content" > "$output_dir/$output_filename"
- echo "Created JSON file: $output_dir/$output_filename"
- JSON_FILES_CREATED=$((JSON_FILES_CREATED + 1))
- else
- # Documentation file - add appropriate extension if not present
- output_dir="$MD_OUTPUT_DIR"
-
- # Check if filename has an extension
- if [[ "$filename" == *.* ]]; then
- # Extract basename without extension
- basename="${filename%.*}"
- else
- basename="$filename"
- fi
-
- # Add the selected output format extension
- output_filename="${basename}${OUTPUT_FORMAT}"
-
- echo "$content" > "$output_dir/$output_filename"
- echo "Created documentation file: $output_dir/$output_filename"
- MARKDOWN_FILES_CREATED=$((MARKDOWN_FILES_CREATED + 1))
- fi
- done < <(jq -r '.files | keys[]' "$RESPONSE_FILE")
- fi
- else
- echo "No 'files' key found in response JSON - checking if job completed with no results"
- fi
-
- # Clean up temporary file
- rm -f "$RESPONSE_FILE"
-
- # Check if any files were created
- TOTAL_FILES=$((MARKDOWN_FILES_CREATED + JSON_FILES_CREATED))
- if [ "$TOTAL_FILES" -gt 0 ]; then
- HAS_CHANGES="true"
- echo "Created $MARKDOWN_FILES_CREATED Markdown files in $MD_OUTPUT_DIR"
- echo "Created $JSON_FILES_CREATED JSON files in $JSON_OUTPUT_DIR"
-
- # List created files
- if [ "$MARKDOWN_FILES_CREATED" -gt 0 ]; then
- echo "Markdown files created:"
- ls -la "$MD_OUTPUT_DIR"
+ echo "base_analysis=$BASE_DIR/analysis.json" >> $GITHUB_OUTPUT
+ echo "head_analysis=$HEAD_DIR/analysis.json" >> $GITHUB_OUTPUT
+
+ - name: Architecture health check (best-effort)
+ if: steps.guard.outputs.skip != 'true'
+ id: health
+ continue-on-error: true
+ shell: bash
+ working-directory: codeboarding-engine
+ env:
+ STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml
+ PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine
+ run: |
+ HEAD_DIR="${{ steps.base.outputs.head_dir }}"
+ TARGET="${{ github.workspace }}/target-repo"
+ # Count real WARNING/CRITICAL findings from the head analysis. Never fails
+ # the run โ a missing/old health module just yields 0 issues (no banner).
+ uv run python -c "
+ import json
+ from pathlib import Path
+ issues = 0
+ try:
+ from static_analyzer.analysis_cache import StaticAnalysisCache
+ from health.runner import run_health_checks
+ from health.models import Severity
+ cache = StaticAnalysisCache(artifact_dir=Path('$HEAD_DIR'), repo_root=Path('$TARGET'))
+ sa = cache.get()
+ if sa is not None:
+ report = run_health_checks(sa, repo_name='${{ github.event.repository.name }}', repo_path=Path('$TARGET'))
+ if report is not None:
+ for cs in report.check_summaries:
+ for fg in getattr(cs, 'finding_groups', []):
+ if getattr(fg, 'severity', None) in (Severity.WARNING, Severity.CRITICAL):
+ issues += len(fg.entities)
+ print(f'Architecture issues found: {issues}')
+ except Exception as exc:
+ print(f'Health check skipped ({exc}).')
+ Path('/tmp/cb-issues.txt').write_text(str(issues))
+ "
+ N=$(cat /tmp/cb-issues.txt 2>/dev/null || echo 0)
+ echo "issues=$N" >> $GITHUB_OUTPUT
+ echo "Architecture issues: $N"
+
+ - name: Diff analyses โ Mermaid
+ if: steps.guard.outputs.skip != 'true'
+ id: diagram
+ shell: bash
+ run: |
+ FLAG=""
+ [ "${{ inputs.changed_only }}" = "true" ] && FLAG="$FLAG --changed-only"
+ [ "${{ inputs.nested }}" = "true" ] && FLAG="$FLAG --nested"
+ META=$(python3 ${{ github.action_path }}/scripts/diff_to_mermaid.py \
+ --base "${{ steps.analyze.outputs.base_analysis }}" \
+ --head "${{ steps.analyze.outputs.head_analysis }}" \
+ --out "${RUNNER_TEMP}/diagram.md" \
+ --direction "${{ inputs.diagram_direction }}" $FLAG)
+ echo "$META" > "${RUNNER_TEMP}/diagram_meta.json"
+ echo "diff meta: $META"
+ read N RENDERED TRUNC < <(python3 -c "import json;d=json.load(open('${RUNNER_TEMP}/diagram_meta.json'));print(d['n_changed'], str(d['rendered']).lower(), str(d['truncated']).lower())")
+ echo "n_changed=$N" >> $GITHUB_OUTPUT
+ echo "rendered=$RENDERED" >> $GITHUB_OUTPUT
+ echo "truncated=$TRUNC" >> $GITHUB_OUTPUT
+ echo "diagram_md=${RUNNER_TEMP}/diagram.md" >> $GITHUB_OUTPUT
+
+ - name: Build PR comment body
+ if: steps.guard.outputs.skip != 'true'
+ id: body
+ shell: bash
+ run: |
+ HEADER="${{ inputs.comment_header }}"
+ BASE_REF="${{ github.event.pull_request.base.ref }}"
+ N="${{ steps.diagram.outputs.n_changed }}"
+ RENDERED="${{ steps.diagram.outputs.rendered }}"
+ TRUNC="${{ steps.diagram.outputs.truncated }}"
+ BODY_FILE=$(mktemp)
+
+ OWNER_REPO="${{ github.repository }}"
+ OWNER="${OWNER_REPO%%/*}"; REPO="${OWNER_REPO##*/}"
+ PR="${{ steps.guard.outputs.pr_number }}"
+ CTA_BASE="${{ inputs.cta_base_url }}"
+ ISSUES="${{ steps.health.outputs.issues }}"
+
+ headline() {
+ if [ "$1" = "0" ]; then echo "no architectural changes";
+ elif [ "$1" = "1" ]; then echo "1 component changed";
+ else echo "$1 components changed"; fi
+ }
+
+ # Call-to-action: links open the live workspace (github.dev-equivalent) and
+ # the VS Code extension via the click proxy, with owner/repo/pr for tracking.
+ # The warning banner is shown only when real health findings exist.
+ cta() {
+ [ -z "$CTA_BASE" ] && return
+ local ws="${CTA_BASE}/use-workspace?owner=${OWNER}&repo=${REPO}&pr=${PR}"
+ local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}"
+ echo ""
+ echo "---"
+ if [ -n "$ISSUES" ] && [ "$ISSUES" != "0" ]; then
+ local noun="issue"; [ "$ISSUES" != "1" ] && noun="issues"
+ echo "โ ๏ธ **${ISSUES} architecture ${noun} found.** [**See live in your browser โ**](${ws})"
+ echo ""
fi
-
- if [ "$JSON_FILES_CREATED" -gt 0 ]; then
- echo "JSON files created:"
- ls -la "$JSON_OUTPUT_DIR"
+ echo "๐ This is the flattened map. [**Explore this diff live in your browser โ**](${ws})"
+ echo ""
+ echo "๐ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ**](${mp})"
+ }
+
+ {
+ echo "### ${HEADER} ยท $(headline "$N")"
+ echo ""
+ if [ "$N" = "0" ]; then
+ echo "No architectural changes detected versus \`${BASE_REF}\`."
+ elif [ "$RENDERED" = "true" ]; then
+ cat "${{ steps.diagram.outputs.diagram_md }}"
+ echo ""
+ echo ""
+ echo "Colours indicate components that have been ๐ฉ added ยท ๐จ modified ยท ๐ฅ removed โ versus \`${BASE_REF}\`."
+ if [ "$TRUNC" = "true" ]; then
+ echo ""
+ echo "Showing changed components only โ the full graph exceeds GitHub's inline Mermaid limit."
+ fi
+ else
+ echo "**$(headline "$N")** versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at 500 edges)."
fi
- else
- HAS_CHANGES="false"
- echo "No files were created"
- fi
-
- # Set outputs
- echo "markdown_files_created=$MARKDOWN_FILES_CREATED" >> $GITHUB_OUTPUT
- echo "json_files_created=$JSON_FILES_CREATED" >> $GITHUB_OUTPUT
- echo "output_directory=$MD_OUTPUT_DIR" >> $GITHUB_OUTPUT
- echo "json_directory=$JSON_OUTPUT_DIR" >> $GITHUB_OUTPUT
- echo "has_changes=$HAS_CHANGES" >> $GITHUB_OUTPUT
\ No newline at end of file
+ cta
+ echo ""
+ echo "codeboarding-action ยท run ${{ github.run_id }}"
+ } > "$BODY_FILE"
+
+ echo "body_file=$BODY_FILE" >> $GITHUB_OUTPUT
+ echo "--- comment preview ---"
+ cat "$BODY_FILE"
+ echo "--- end preview ---"
+
+ - name: Post sticky PR comment
+ if: steps.guard.outputs.skip != 'true'
+ uses: marocchino/sticky-pull-request-comment@v2
+ with:
+ header: codeboarding-architecture-diff
+ path: ${{ steps.body.outputs.body_file }}
+ GITHUB_TOKEN: ${{ inputs.github_token }}
diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py
new file mode 100644
index 0000000..35a6cd4
--- /dev/null
+++ b/scripts/diff_to_mermaid.py
@@ -0,0 +1,452 @@
+"""Diff two CodeBoarding analysis.json files and render the delta as a colored Mermaid graph.
+
+Reads a *base* (before) and *head* (after) ``analysis.json`` โ both already
+materialized on disk by the engine โ computes a component/relation diff, and
+emits a GitHub-renderable ```mermaid block where:
+
+ * nodes are colored green=added / yellow=modified / red=deleted (deleted dashed)
+ * arrows are colored the same way (red dashed for deleted)
+
+GitHub renders ```mermaid fenced blocks natively inside PR/issue comments, so the
+output goes straight into the sticky comment โ no image, no Playwright.
+
+The diff set-arithmetic is a port of the action's ``compute_diff.py``, with two
+differences for this use case: both sides are read from plain file paths (not
+``git show``), and a relation whose ``(src, dst)`` is unchanged but whose label
+text changed is reported as ``modified`` (the original only did added/deleted).
+
+Self-contained stdlib.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+
+# GitHub's mermaid config caps (config.schema.yaml defaults; NOT raisable on
+# GitHub). Exceeding either renders a red error box with no diagram, so we stay
+# comfortably under and degrade to a changed-only / text fallback instead.
+MAX_EDGES = 480 # hard cap 500
+MAX_TEXT = 45_000 # hard cap 50000 chars
+
+# Primer-ish fills that read on both light and dark GitHub backgrounds. White
+# label text is set explicitly so it survives dark mode.
+COLORS = {
+ "added": {"fill": "#1f883d", "stroke": "#0b5d23"},
+ "modified": {"fill": "#bf8700", "stroke": "#7d4e00"},
+ "deleted": {"fill": "#cf222e", "stroke": "#82071e"},
+}
+CHANGED = ("added", "modified", "deleted")
+_EDGE_LABEL_MAX = 48
+
+
+# --------------------------------------------------------------------------- #
+# load
+# --------------------------------------------------------------------------- #
+def load_analysis(path: Path) -> dict:
+ try:
+ return json.loads(path.read_text())
+ except (OSError, json.JSONDecodeError) as exc:
+ sys.exit(f"::error::Could not read analysis JSON at {path}: {exc}")
+
+
+# --------------------------------------------------------------------------- #
+# diff (ported from compute_diff.py; relation diff extended with 'modified')
+# --------------------------------------------------------------------------- #
+def _comp_id(c: dict) -> str:
+ return c.get("component_id") or c.get("name", "")
+
+
+def _comp_name(c: dict) -> str:
+ return c.get("name", "")
+
+
+def _file_methods(c: dict) -> list:
+ return c.get("file_methods") or []
+
+
+def _methods_by_file(c: dict) -> dict:
+ by_file: dict = {}
+ for fm in _file_methods(c):
+ fp = fm.get("file_path") or ""
+ names = {m for m in (fm.get("methods") or []) if isinstance(m, str)}
+ if names:
+ by_file.setdefault(fp, set()).update(names)
+ return by_file
+
+
+def _has_structural_changes(base: dict, current: dict) -> bool:
+ base_files = {fm.get("file_path", "") for fm in _file_methods(base)}
+ current_files = {fm.get("file_path", "") for fm in _file_methods(current)}
+ if base_files != current_files:
+ return True
+ if len(base.get("components") or []) != len(current.get("components") or []):
+ return True
+ return False
+
+
+def _diff_methods(base: dict, current: dict) -> dict:
+ base_by_file = _methods_by_file(base)
+ current_by_file = _methods_by_file(current)
+ added: dict = {}
+ removed: dict = {}
+ for file_path in set(base_by_file) | set(current_by_file):
+ a = sorted(current_by_file.get(file_path, set()) - base_by_file.get(file_path, set()))
+ r = sorted(base_by_file.get(file_path, set()) - current_by_file.get(file_path, set()))
+ if a:
+ added[file_path] = a
+ if r:
+ removed[file_path] = r
+ return {"added": added, "removed": removed}
+
+
+def _rel_key(r: dict) -> tuple:
+ # Name is the stable join across two independent analyses; component ids are
+ # positional and can be reshuffled on a full re-run, so prefer names.
+ return (r.get("src_name") or r.get("src_id") or "", r.get("dst_name") or r.get("dst_id") or "")
+
+
+def _diff_relations(base_rels: list, current_rels: list) -> list:
+ base_edges = {_rel_key(r): r for r in (base_rels or [])}
+ current_edges = {_rel_key(r): r for r in (current_rels or [])}
+ result: list = []
+ for key, rel in current_edges.items():
+ if key not in base_edges:
+ status = "added"
+ elif (base_edges[key].get("relation") or "") != (rel.get("relation") or ""):
+ status = "modified"
+ else:
+ status = "unchanged"
+ result.append({**rel, "diff_status": status})
+ for key, rel in base_edges.items():
+ if key not in current_edges:
+ result.append({**rel, "diff_status": "deleted"})
+ return result
+
+
+def _diff_components(base_components: list, current_components: list) -> list:
+ base = base_components or []
+ current = current_components or []
+ base_by_name = {_comp_name(c): c for c in base} # name is the stable cross-analysis join
+ matched_names: set = set()
+ result: list = []
+
+ for comp in current:
+ base_match = base_by_name.get(_comp_name(comp))
+ if base_match is None:
+ result.append({**comp, "diff_status": "added"})
+ continue
+ matched_names.add(_comp_name(base_match))
+ structural = _has_structural_changes(base_match, comp)
+ method_diff = _diff_methods(base_match, comp)
+ has_method_changes = bool(method_diff["added"] or method_diff["removed"])
+ diff_status = "modified" if (structural or has_method_changes) else "unchanged"
+
+ annotated = {**comp, "diff_status": diff_status, "method_diff": method_diff}
+
+ base_subs = base_match.get("components") or []
+ current_subs = comp.get("components") or []
+ if base_subs or current_subs:
+ annotated["components"] = _diff_components(base_subs, current_subs)
+
+ base_sub_rels = base_match.get("components_relations") or []
+ current_sub_rels = comp.get("components_relations") or []
+ if base_sub_rels or current_sub_rels:
+ annotated["components_relations"] = _diff_relations(base_sub_rels, current_sub_rels)
+
+ result.append(annotated)
+
+ for comp in base:
+ if _comp_name(comp) not in matched_names:
+ ghost = {k: v for k, v in comp.items() if k not in ("components", "components_relations", "can_expand")}
+ ghost["diff_status"] = "deleted"
+ result.append(ghost)
+
+ return result
+
+
+def build_diff(base: dict, head: dict) -> dict:
+ return {
+ "components": _diff_components(base.get("components") or [], head.get("components") or []),
+ "components_relations": _diff_relations(
+ base.get("components_relations") or [],
+ head.get("components_relations") or [],
+ ),
+ }
+
+
+# --------------------------------------------------------------------------- #
+# mermaid emit
+# --------------------------------------------------------------------------- #
+def _sanitize(name: str) -> str:
+ """Match the engine's node-id sanitization (utils.sanitize)."""
+ return re.sub(r"\W+", "_", name or "")
+
+
+def _esc(text: str) -> str:
+ """Escape arbitrary text for a mermaid label under GitHub's strict security.
+
+ ``#`` first (so the entities we inject are not re-escaped), then ``"``.
+ """
+ out = (text or "").replace("\n", " ").replace("\r", " ").strip()
+ out = out.replace("#", "#35;").replace('"', "#quot;")
+ return out
+
+
+def _truncate(text: str, limit: int = _EDGE_LABEL_MAX) -> str:
+ text = (text or "").strip()
+ return text if len(text) <= limit else text[: limit - 1].rstrip() + "โฆ"
+
+
+class _Scope:
+ """Per-level name/id -> mermaid key resolver for one nesting level.
+
+ Deleted ghosts get a separate ``del_`` key namespace from present nodes so a
+ reused id/name can't merge an added node onto a deleted one. Keys are made
+ globally unique via the shared ``used`` set. Resolution is name-first (the
+ stable cross-analysis join); present edges resolve head-first, deleted edges
+ ghost-first. ``force`` overrides the per-component diff_status (used when a
+ wholly-added/deleted parent colors its whole subtree).
+ """
+
+ def __init__(self, components: list, used: set, force: str | None = None):
+ self.entries: list = [] # (key, label, status, component)
+ self.head_by_id: dict = {}
+ self.head_by_name: dict = {}
+ self.del_by_id: dict = {}
+ self.del_by_name: dict = {}
+ for comp in components:
+ status = force or comp.get("diff_status", "unchanged")
+ present = status != "deleted"
+ cid, cname = _comp_id(comp), _comp_name(comp)
+ base = ("n_" if present else "del_") + _sanitize(cname or cid or "node")
+ key, n = base, 1
+ while key in used:
+ n += 1
+ key = f"{base}_{n}"
+ used.add(key)
+ self.entries.append((key, cname or cid or "(unnamed)", status, comp))
+ by_id = self.head_by_id if present else self.del_by_id
+ by_name = self.head_by_name if present else self.del_by_name
+ if cname:
+ by_name[cname] = key
+ if cid:
+ by_id[cid] = key
+
+ def resolve(self, rid: str, rname: str, present: bool) -> str | None:
+ maps = [(self.head_by_id, self.head_by_name), (self.del_by_id, self.del_by_name)]
+ if not present:
+ maps.reverse()
+ for by_id, by_name in maps:
+ if rname and rname in by_name: # name-first: stable cross-analysis join
+ return by_name[rname]
+ if rid and rid in by_id:
+ return by_id[rid]
+ return None
+
+
+def _filter_changed(components: list, relations: list) -> tuple:
+ """Keep changed components, the endpoints of changed edges, and edges among the kept โ the size fallback."""
+ changed_rels = [r for r in relations if r.get("diff_status") in CHANGED]
+ keep_ids: set = set()
+ keep_names: set = set()
+ for c in components:
+ if c.get("diff_status") in CHANGED:
+ keep_ids.add(_comp_id(c))
+ keep_names.add(_comp_name(c))
+ for r in changed_rels: # so a changed edge between two unchanged nodes still draws its endpoints
+ keep_ids.update((r.get("src_id", ""), r.get("dst_id", "")))
+ keep_names.update((r.get("src_name", ""), r.get("dst_name", "")))
+
+ kept = [c for c in components if _comp_id(c) in keep_ids or _comp_name(c) in keep_names]
+ kept_ids = {_comp_id(c) for c in kept}
+ kept_names = {_comp_name(c) for c in kept}
+
+ def touches(r: dict, side_id: str, side_name: str) -> bool:
+ return r.get(side_id, "") in kept_ids or r.get(side_name, "") in kept_names
+
+ rels = [
+ r
+ for r in relations
+ if r.get("diff_status") in CHANGED
+ or (touches(r, "src_id", "src_name") and touches(r, "dst_id", "dst_name"))
+ ]
+ return kept, rels
+
+
+def _init_directive(font_size, node_padding, node_spacing, rank_spacing) -> str | None:
+ """Build a Mermaid ``%%{init}%%`` directive to enlarge nodes / spacing.
+
+ Nodes auto-size to their label, so the effective levers are font size and
+ interior padding (bigger nodes) plus node/rank spacing (less cramped). These
+ config keys are honored by GitHub's strict renderer.
+ """
+ flowchart: dict = {}
+ if node_padding is not None:
+ flowchart["padding"] = node_padding
+ if node_spacing is not None:
+ flowchart["nodeSpacing"] = node_spacing
+ if rank_spacing is not None:
+ flowchart["rankSpacing"] = rank_spacing
+ cfg: dict = {}
+ if flowchart:
+ cfg["flowchart"] = flowchart
+ if font_size is not None:
+ cfg["themeVariables"] = {"fontSize": f"{font_size}px"}
+ return "%%{init: " + json.dumps(cfg) + "}%%" if cfg else None
+
+
+def render_mermaid(
+ diff: dict,
+ direction: str = "LR",
+ changed_only: bool = False,
+ edge_labels: bool = True,
+ nested: bool = False,
+ font_size: int | None = None,
+ node_padding: int | None = None,
+ node_spacing: int | None = None,
+ rank_spacing: int | None = None,
+ max_label: int = _EDGE_LABEL_MAX,
+) -> tuple:
+ """Return (mermaid_text, meta). ``mermaid_text`` is None when there's nothing to draw.
+
+ With ``nested`` the depth>1 sub-components are drawn as Mermaid subgraphs โ
+ leaf nodes get a filled class, parent containers a stroke-only ``*Box``
+ class. A wholly-added parent forces ``added`` onto its subtree (the engine
+ only diff-annotates surviving branches; an added subtree arrives raw).
+ """
+ components = diff.get("components") or []
+ relations = diff.get("components_relations") or []
+ n_changed = sum(1 for c in components if c.get("diff_status") in CHANGED)
+
+ if changed_only or len(relations) > MAX_EDGES:
+ components, relations = _filter_changed(components, relations)
+
+ used: set = set()
+ body: list = []
+ node_classes: dict = {"added": [], "modified": [], "deleted": []}
+ box_classes: dict = {"added": [], "modified": [], "deleted": []}
+ edge_styles: dict = {"added": [], "modified": [], "deleted": []}
+ counters = {"edges": 0, "nodes": 0}
+
+ def emit_edges(rels: list, scope: _Scope, pad: str, force: str | None) -> None:
+ for rel in rels:
+ status = force or rel.get("diff_status", "unchanged")
+ present = status != "deleted"
+ src = scope.resolve(rel.get("src_id", ""), rel.get("src_name", ""), present)
+ dst = scope.resolve(rel.get("dst_id", ""), rel.get("dst_name", ""), present)
+ if src is None or dst is None:
+ continue # endpoint not drawn โ skip, don't consume an edge index
+ label = _esc(_truncate(rel.get("relation", ""), max_label)) if edge_labels else ""
+ body.append(f'{pad}{src} -- "{label}" --> {dst}' if label else f"{pad}{src} --> {dst}")
+ if status in edge_styles:
+ edge_styles[status].append(counters["edges"])
+ counters["edges"] += 1
+
+ def emit_level(comps: list, rels: list, indent: int, force: str | None) -> None:
+ pad = " " * indent
+ scope = _Scope(comps, used, force)
+ for key, label, status, comp in scope.entries:
+ children = comp.get("components") if nested else None
+ if children:
+ body.append(f'{pad}subgraph {key}["{_esc(label)}"]')
+ if status in box_classes:
+ box_classes[status].append(key)
+ child_force = force or (status if status == "added" else None)
+ emit_level(children, comp.get("components_relations") or [], indent + 1, child_force)
+ body.append(f"{pad}end")
+ else:
+ body.append(f'{pad}{key}["{_esc(label)}"]')
+ if status in node_classes:
+ node_classes[status].append(key)
+ counters["nodes"] += 1
+ emit_edges(rels, scope, pad, force)
+
+ emit_level(components, relations, 1, None)
+ if counters["nodes"] == 0:
+ return None, {"n_changed": n_changed, "n_nodes": 0, "n_edges": 0, "truncated": False}
+
+ style: list = [
+ f' classDef added fill:{COLORS["added"]["fill"]},stroke:{COLORS["added"]["stroke"]},color:#ffffff;',
+ f' classDef modified fill:{COLORS["modified"]["fill"]},stroke:{COLORS["modified"]["stroke"]},color:#ffffff;',
+ f' classDef deleted fill:{COLORS["deleted"]["fill"]},stroke:{COLORS["deleted"]["stroke"]},'
+ f"color:#ffffff,stroke-dasharray:5 3;",
+ ]
+ if any(box_classes.values()): # stroke-only containers so big parents aren't solid blocks
+ for st in CHANGED:
+ dash = ",stroke-dasharray:5 3" if st == "deleted" else ""
+ style.append(f' classDef {st}Box stroke:{COLORS[st]["stroke"]},stroke-width:2px,fill:none{dash};')
+ for status in CHANGED:
+ if node_classes[status]:
+ style.append(f' class {",".join(node_classes[status])} {status};')
+ if box_classes[status]:
+ style.append(f' class {",".join(box_classes[status])} {status}Box;')
+ for status in CHANGED:
+ idxs = edge_styles[status]
+ if not idxs:
+ continue
+ s = f'stroke:{COLORS[status]["stroke"]},stroke-width:2px'
+ if status == "deleted":
+ s += ",stroke-dasharray:5 3"
+ style.append(f' linkStyle {",".join(str(i) for i in idxs)} {s};')
+
+ directive = _init_directive(font_size, node_padding, node_spacing, rank_spacing)
+ head = ["```mermaid"] + ([directive] if directive else []) + [f"graph {direction}"]
+ text = "\n".join(head + body + style + ["```"])
+ meta = {
+ "n_changed": n_changed,
+ "n_nodes": counters["nodes"],
+ "n_edges": counters["edges"],
+ "truncated": bool(changed_only or len(diff.get("components_relations") or []) > MAX_EDGES),
+ }
+ if len(text) > MAX_TEXT or counters["edges"] > MAX_EDGES: # never trip GitHub's red error box
+ meta["truncated"] = True
+ return None, meta
+ return text, meta
+
+
+# --------------------------------------------------------------------------- #
+# cli
+# --------------------------------------------------------------------------- #
+def main() -> int:
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument("--base", required=True, type=Path, help="Path to the base (before) analysis.json")
+ p.add_argument("--head", required=True, type=Path, help="Path to the head (after) analysis.json")
+ p.add_argument("--out", required=True, type=Path, help="Where to write the ```mermaid block")
+ p.add_argument("--direction", default="LR", choices=["LR", "TD", "TB", "RL", "BT"])
+ p.add_argument("--changed-only", action="store_true", help="Render only changed components + incident edges")
+ p.add_argument("--no-edge-labels", dest="edge_labels", action="store_false", help="Draw arrows without relation labels")
+ p.add_argument("--nested", action="store_true", help="Draw depth>1 sub-components as subgraphs")
+ p.add_argument("--font-size", type=int, default=None, help="Node label font size in px (bigger label โ bigger node)")
+ p.add_argument("--node-padding", type=int, default=None, help="Interior padding around each node label")
+ p.add_argument("--node-spacing", type=int, default=None, help="Space between nodes in the same rank")
+ p.add_argument("--rank-spacing", type=int, default=None, help="Space between ranks")
+ p.add_argument("--max-label", type=int, default=_EDGE_LABEL_MAX, help="Max characters in an edge label before truncation")
+ args = p.parse_args()
+
+ diff = build_diff(load_analysis(args.base), load_analysis(args.head))
+ mermaid, meta = render_mermaid(
+ diff,
+ direction=args.direction,
+ changed_only=args.changed_only,
+ edge_labels=args.edge_labels,
+ nested=args.nested,
+ font_size=args.font_size,
+ node_padding=args.node_padding,
+ node_spacing=args.node_spacing,
+ rank_spacing=args.rank_spacing,
+ max_label=args.max_label,
+ )
+
+ args.out.write_text(mermaid if mermaid is not None else "", encoding="utf-8")
+ meta["rendered"] = mermaid is not None
+ # Machine-readable summary on stdout for the action to consume.
+ print(json.dumps(meta))
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/scripts/run_local.sh b/scripts/run_local.sh
new file mode 100755
index 0000000..8a6d7d7
--- /dev/null
+++ b/scripts/run_local.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+#
+# Local test harness for the CodeBoarding Mermaid architecture-diff action.
+# Mirrors action.yml so you can iterate without waiting on a GitHub runner.
+#
+# Two modes:
+#
+# FAST (no LLM, instant) โ diff two existing analysis.json files and preview:
+# scripts/run_local.sh --base-json BASE.json --head-json HEAD.json
+#
+# FULL pipeline (needs OPENROUTER_API_KEY) โ run the engine on two refs of a
+# local repo, exactly like the action (committed-or-generated base, then
+# incremental head), then diff + preview:
+# export OPENROUTER_API_KEY=sk-or-...
+# scripts/run_local.sh --repo /path/to/repo --base [ --head ][
+#
+# Outputs (default ./.cb-local):
+# diagram.md the ```mermaid block (what the action posts)
+# preview.html opens in a browser and renders the colored diagram via mermaid.js
+#
+set -euo pipefail
+
+ACTION_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+ENGINE="${ENGINE:-$ACTION_DIR/../CodeBoarding}"
+OUT="$ACTION_DIR/.cb-local"
+DEPTH="1"
+DIRECTION="LR"
+CHANGED_ONLY=""
+NO_EDGE_LABELS=""
+NESTED=""
+EXTRA=""
+OPEN="auto"
+REPO="" BASE_REF="" HEAD_REF="" BASE_JSON="" HEAD_JSON=""
+AGENT_MODEL="${AGENT_MODEL:-openrouter/anthropic/claude-sonnet-4}"
+PARSING_MODEL="${PARSING_MODEL:-openrouter/anthropic/claude-sonnet-4}"
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --repo) REPO="$2"; shift 2;;
+ --base) BASE_REF="$2"; shift 2;;
+ --head) HEAD_REF="$2"; shift 2;;
+ --base-json) BASE_JSON="$2"; shift 2;;
+ --head-json) HEAD_JSON="$2"; shift 2;;
+ --engine) ENGINE="$2"; shift 2;;
+ --out) OUT="$2"; shift 2;;
+ --depth) DEPTH="$2"; shift 2;;
+ --direction) DIRECTION="$2"; shift 2;;
+ --changed-only) CHANGED_ONLY="--changed-only"; shift;;
+ --no-edge-labels) NO_EDGE_LABELS="--no-edge-labels"; shift;;
+ --nested) NESTED="--nested"; shift;;
+ --extra) EXTRA="$2"; shift 2;; # raw args forwarded to diff_to_mermaid.py, e.g. --extra "--font-size 20 --node-padding 16"
+ --no-open) OPEN="no"; shift;;
+ -h|--help) sed -n '2,30p' "${BASH_SOURCE[0]}"; exit 0;;
+ *) echo "Unknown arg: $1" >&2; exit 2;;
+ esac
+done
+
+mkdir -p "$OUT"
+
+run_engine() { # $1 = uv-runnable python source
+ ( cd "$ENGINE" && \
+ STATIC_ANALYSIS_CONFIG="$ENGINE/static_analysis_config.yml" \
+ PROJECT_ROOT="$ENGINE" \
+ DIAGRAM_DEPTH_LEVEL="$DEPTH" \
+ CACHING_DOCUMENTATION="false" \
+ ENABLE_MONITORING="false" \
+ OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-}" \
+ AGENT_MODEL="$AGENT_MODEL" \
+ PARSING_MODEL="$PARSING_MODEL" \
+ uv run python -c "$1" )
+}
+
+if [ -n "$BASE_JSON" ] && [ -n "$HEAD_JSON" ]; then
+ echo "== Fast mode: diffing existing analyses (no engine run) =="
+ BASE_ANALYSIS="$BASE_JSON"
+ HEAD_ANALYSIS="$HEAD_JSON"
+else
+ [ -n "$REPO" ] && [ -n "$BASE_REF" ] && [ -n "$HEAD_REF" ] || {
+ echo "Need either --base-json/--head-json, or --repo/--base/--head." >&2; exit 2; }
+ [ -d "$ENGINE" ] || { echo "Engine not found at $ENGINE (set --engine or \$ENGINE)." >&2; exit 2; }
+ [ -n "${OPENROUTER_API_KEY:-}" ] || { echo "Export OPENROUTER_API_KEY for the full pipeline." >&2; exit 2; }
+ REPO="$(cd "$REPO" && pwd)"
+ BASE_DIR="$OUT/base"; HEAD_DIR="$OUT/head"
+ rm -rf "$BASE_DIR" "$HEAD_DIR"; mkdir -p "$BASE_DIR" "$HEAD_DIR"
+
+ echo "== Resolving base analysis at $BASE_REF =="
+ if git -C "$REPO" show "$BASE_REF:.codeboarding/analysis.json" > "$BASE_DIR/analysis.json" 2>/dev/null; then
+ git -C "$REPO" show "$BASE_REF:.codeboarding/static_analysis.pkl" > "$BASE_DIR/static_analysis.pkl" 2>/dev/null \
+ && echo " using committed baseline (+ static_analysis.pkl)" || { rm -f "$BASE_DIR/static_analysis.pkl"; echo " using committed baseline"; }
+ else
+ rm -f "$BASE_DIR/analysis.json"
+ echo " no committed baseline; running FULL analysis on base (LLM)..."
+ BASE_SRC="$OUT/base-src"; rm -rf "$BASE_SRC"
+ git -C "$REPO" worktree add --detach "$BASE_SRC" "$BASE_REF" >/dev/null
+ run_engine "
+from pathlib import Path
+from codeboarding_workflows.analysis import run_full
+print(run_full(repo_name='$(basename "$REPO")', repo_path=Path('$BASE_SRC'), output_dir=Path('$BASE_DIR'),
+ run_id='local-base', log_path='/tmp/cb-local-base.log', depth_level=int('$DEPTH'), source_sha='$BASE_REF'))
+"
+ git -C "$REPO" worktree remove --force "$BASE_SRC" >/dev/null 2>&1 || true
+ fi
+
+ echo "== Analyzing head at $HEAD_REF (incremental from base) =="
+ cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true
+ run_engine "
+from pathlib import Path
+from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError
+from diagram_analysis.exceptions import IncrementalCacheMissingError
+repo=Path('$REPO'); out=Path('$HEAD_DIR'); name='$(basename "$REPO")'
+try:
+ print(run_incremental(repo_path=repo, output_dir=out, project_name=name, run_id='local-head',
+ log_path='/tmp/cb-local-head.log', base_ref='$BASE_REF', target_ref='$HEAD_REF', source_sha='$HEAD_REF'))
+except (IncrementalCacheMissingError, BaselineUnavailableError) as exc:
+ print(f'Incremental unavailable ({exc}); full analysis on head.')
+ for p in out.glob('*'):
+ if p.is_file(): p.unlink()
+ print(run_full(repo_name=name, repo_path=repo, output_dir=out, run_id='local-head',
+ log_path='/tmp/cb-local-head.log', depth_level=int('$DEPTH'), source_sha='$HEAD_REF'))
+"
+ BASE_ANALYSIS="$BASE_DIR/analysis.json"
+ HEAD_ANALYSIS="$HEAD_DIR/analysis.json"
+fi
+
+echo "== Diff -> Mermaid =="
+META="$(python3 "$ACTION_DIR/scripts/diff_to_mermaid.py" \
+ --base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" \
+ --out "$OUT/diagram.md" --direction "$DIRECTION" $CHANGED_ONLY $NO_EDGE_LABELS $NESTED $EXTRA)"
+echo " $META"
+
+# Browser preview: render the (fence-stripped) mermaid via mermaid.js, strict mode
+# to match GitHub. HTML-escape the body so labels with < > & stay valid.
+python3 - "$OUT/diagram.md" "$OUT/preview.html" <<'PY'
+import html, sys
+src, dst = sys.argv[1], sys.argv[2]
+body = open(src, encoding="utf-8").read().strip()
+lines = body.splitlines()
+if lines and lines[0].startswith("```"): lines = lines[1:]
+if lines and lines[-1].startswith("```"): lines = lines[:-1]
+graph = html.escape("\n".join(lines))
+open(dst, "w", encoding="utf-8").write(f"""
+CodeBoarding architecture diff
+
+]Architecture diff preview
+
+ ■ added
+ ■ modified
+ ■ deleted
+
+
+{graph}
+
+""")
+print(f" wrote {dst}")
+PY
+
+echo
+echo "diagram : $OUT/diagram.md"
+echo "preview : $OUT/preview.html"
+if [ "$OPEN" != "no" ]; then
+ if command -v open >/dev/null 2>&1; then open "$OUT/preview.html";
+ elif command -v xdg-open >/dev/null 2>&1; then xdg-open "$OUT/preview.html";
+ else echo "(open $OUT/preview.html in your browser)"; fi
+fi