From f9762d8e4adb7c6e4dda75cfa60dc67faeed1bcb Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 15:24:38 +0200 Subject: [PATCH 1/6] feat: Mermaid PR architecture-diff action (level-1 default, nested opt-in) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the webview/Playwright PNG approach with an inline Mermaid diagram that GitHub renders natively in the PR comment โ€” no image, no orphan branch, no contents:write, and fork-friendly. How it works: - Resolve a base ("before") analysis: use the committed .codeboarding/analysis.json at the PR base if present, else generate one via a full engine run on the base commit. - Analyze the PR head incrementally, seeded from the base (stable component ids), falling back to a full run on cache miss. - scripts/diff_to_mermaid.py diffs the two analyses (name-based matching; relation label change => modified) and emits a graph LR with nodes colored via classDef/class and arrows via positional linkStyle: green added, yellow modified, red dashed deleted. Escaping, deleted-namespace keying, and a size guard (GitHub's ~500-edge / 50k-char cap -> changed-only or text fallback). Rendering: - Level 1 (flat, top-level) is the default โ€” readable inline, never trips the size cap. - nested: true draws depth>1 sub-components as subgraphs (leaf nodes filled, parent containers outlined). Optional --font-size/--node-padding/spacing emit an %%{init}%% directive to enlarge nodes. scripts/run_local.sh mirrors the action for local iteration (fast diff-only or full pipeline) and writes a browser HTML preview rendered with mermaid.js. --- .github/workflows/example-usage.yml | 132 +---- .gitignore | 3 + README.md | 173 ++++--- action.yml | 758 +++++++++++++--------------- scripts/diff_to_mermaid.py | 449 ++++++++++++++++ scripts/run_local.sh | 169 +++++++ 6 files changed, 1079 insertions(+), 605 deletions(-) create mode 100644 scripts/diff_to_mermaid.py create mode 100755 scripts/run_local.sh diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml index 1f75c83..6fb9f78 100644 --- a/.github/workflows/example-usage.yml +++ b/.github/workflows/example-usage.yml @@ -1,126 +1,22 @@ -name: Example Usage of CodeBoarding Action +name: Architecture diff on: - workflow_dispatch: - inputs: - repository_url: - description: 'Repository URL to test with' - required: false - default: 'https://github.com/microsoft/markitdown' - type: string - source_branch: - description: 'Source branch for comparison' - required: false - default: 'main' - type: string - target_branch: - description: 'Target branch for comparison' - required: false - default: 'develop' - type: string - output_format: - description: 'Output format for documentation' - required: false - default: '.md' - type: choice - options: - - '.md' - - '.rst' - pull_request: - branches: [ main, master ] - types: [opened, synchronize, reopened] - - schedule: - # Run daily at 2 AM UTC - - cron: '0 2 * * *' + types: [opened, synchronize, reopened, ready_for_review] + +# Only a PR comment is posted โ€” no image is pushed โ€” so contents:write is not needed. +permissions: + pull-requests: write jobs: - update-docs-action-usage: + architecture-diff: runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - + if: github.event.pull_request.draft == false + timeout-minutes: 60 steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - token: ${{ secrets.GITHUB_TOKEN }} - fetch-depth: 0 # Required to access branch history - - # Determine branches based on context - - name: Set branch variables - id: set-branches - run: | - if [ "${{ github.event_name }}" = "pull_request" ]; then - echo "source_branch=${{ github.head_ref }}" >> $GITHUB_OUTPUT - echo "target_branch=${{ github.base_ref }}" >> $GITHUB_OUTPUT - elif [ "${{ github.event.inputs.source_branch }}" != "" ] && [ "${{ github.event.inputs.target_branch }}" != "" ]; then - echo "source_branch=${{ github.event.inputs.source_branch }}" >> $GITHUB_OUTPUT - echo "target_branch=${{ github.event.inputs.target_branch }}" >> $GITHUB_OUTPUT - else - # Default to current branch and main - echo "source_branch=${{ github.ref_name }}" >> $GITHUB_OUTPUT - echo "target_branch=main" >> $GITHUB_OUTPUT - fi - - - name: Fetch CodeBoarding Documentation - id: codeboarding - uses: ./ - with: - repository_url: ${{ github.event.inputs.repository_url }} - source_branch: ${{ steps.set-branches.outputs.source_branch }} - target_branch: ${{ steps.set-branches.outputs.target_branch }} - output_directory: 'docs' - output_format: ${{ github.event.inputs.output_format || '.md' }} - - - name: Display Action Results - run: | - echo "Documentation files created: ${{ steps.codeboarding.outputs.markdown_files_created }}" - echo "JSON files created: ${{ steps.codeboarding.outputs.json_files_created }}" - echo "Documentation directory: ${{ steps.codeboarding.outputs.output_directory }}" - echo "JSON directory: ${{ steps.codeboarding.outputs.json_directory }}" - echo "Has changes: ${{ steps.codeboarding.outputs.has_changes }}" - - # Check if we have any changes to commit - - name: Check for changes - id: git-changes - run: | - if [ -n "$(git status --porcelain)" ]; then - echo "has_git_changes=true" >> $GITHUB_OUTPUT - else - echo "has_git_changes=false" >> $GITHUB_OUTPUT - fi - - - name: Create Pull Request - if: steps.git-changes.outputs.has_git_changes == 'true' && steps.codeboarding.outputs.has_changes == 'true' - uses: peter-evans/create-pull-request@v5 + - uses: codeboarding/codeboarding-action@v1 with: - token: ${{ secrets.GITHUB_TOKEN }} - commit-message: "docs: update codeboarding documentation" - title: "๐Ÿ“š CodeBoarding Documentation Update" - body: | - ## ๐Ÿ“š Documentation Update - - This PR contains updated documentation files fetched from the CodeBoarding service. - - ### ๐Ÿ“Š Summary - - **Documentation files created/updated**: ${{ steps.codeboarding.outputs.markdown_files_created }} - - **JSON files created/updated**: ${{ steps.codeboarding.outputs.json_files_created }} - - **Documentation directory**: `${{ steps.codeboarding.outputs.output_directory }}/` - - **JSON directory**: `${{ steps.codeboarding.outputs.json_directory }}/` - - **Source branch**: `${{ steps.set-branches.outputs.source_branch }}` - - **Target branch**: `${{ steps.set-branches.outputs.target_branch }}` - - **Output format**: `${{ github.event.inputs.output_format || '.md' }}` - - **Repository analyzed**: `${{ steps.codeboarding.outputs.repo_url }}` - - ### ๐Ÿ” Changes - Files have been updated with fresh documentation content based on code changes between branches. - - --- - - ๐Ÿค– This PR was automatically generated by the CodeBoarding documentation update workflow. - branch: docs/codeboarding-update - base: ${{ steps.set-branches.outputs.target_branch }} - delete-branch: true + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} + # depth_level: '1' # 1-3, higher = more detail + # diagram_direction: 'LR' # LR | TD | TB | RL | BT + # changed_only: 'false' # 'true' to draw only changed components diff --git a/.gitignore b/.gitignore index 865fddd..ea8864f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ test_response.json test_codeboarding/ +# Local test harness output (scripts/run_local.sh) +.cb-local/ + # Environment files .env diff --git a/README.md b/README.md index 043c743..3ba9387 100644 --- a/README.md +++ b/README.md @@ -1,111 +1,134 @@
CodeBoarding Logo - - # CodeBoarding [Diagram-First Documentation] - - [![GitHub Action](https://img.shields.io/badge/GitHub-Action-blue?logo=github-actions)](https://github.com/marketplace/actions/codeboarding-diagram-first-documentation) + + # CodeBoarding Architecture Diff (Mermaid) + + Posts a PR comment with a **Mermaid** architecture diagram showing which components changed โ€” **green** added, **yellow** modified, **red** deleted โ€” for both nodes and arrows.
-Generates diagram-first visualizations of your codebase using static analysis and large language models. +## What it does + +On every pull request, this action: + +1. Resolves a **base ("before") analysis**: it reads the `.codeboarding/analysis.json` committed at the PR base commit if one exists; otherwise it runs a full CodeBoarding analysis on the base commit to produce one. +2. Runs an **incremental analysis on the PR head**, seeded from the base analysis โ€” only LLM-calling the components whose code actually changed, so a typical PR costs a handful of LLM calls. +3. **Diffs the two analyses** and renders the architecture graph as a Mermaid block with changed components and relations colored: + - **green** โ€” added + - **yellow** โ€” modified + - **red** (dashed) โ€” deleted +4. Posts a sticky PR comment containing the Mermaid block. **GitHub renders the diagram inline** โ€” no image, no Playwright, no extra branch. ## Usage ```yaml -name: Generate Documentation +name: Architecture diff on: - push: - branches: [ main ] pull_request: - branches: [ main ] - types: [opened, synchronize, reopened] + types: [opened, synchronize, reopened, ready_for_review] + +permissions: + pull-requests: write # the only permission needed โ€” nothing is pushed jobs: - documentation: + diagram: runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + timeout-minutes: 60 steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Required to access branch history - - - name: Generate Documentation - uses: codeboarding/codeboarding-ghaction@v1 + - uses: codeboarding/codeboarding-action@v1 with: - repository_url: ${{ github.server_url }}/${{ github.repository }} - source_branch: ${{ github.head_ref || github.ref_name }} - target_branch: ${{ github.base_ref || 'main' }} - output_directory: 'docs' - output_format: '.md' - - - name: Upload Documentation - uses: actions/upload-artifact@v4 - with: - name: documentation - path: | - docs/ - .codeboarding/ + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} ``` +You need **one secret**: an LLM API key. OpenRouter is the default; pass your own model via the `agent_model` / `parsing_model` inputs if you prefer. + ## Inputs -| Input | Description | Required | Default | -|-------|-------------|----------|---------| -| `repository_url` | Repository URL for which documentation will be generated | Yes | - | -| `source_branch` | Source branch for comparison (typically the PR branch) | Yes | - | -| `target_branch` | Target branch for comparison (typically the base branch) | Yes | - | -| `output_directory` | Directory where documentation files will be saved | No | `docs` | -| `output_format` | Format for documentation files (either `.md` or `.rst`) | No | `.md` | +| Input | Default | Description | +|---|---|---| +| `llm_api_key` | (required) | LLM API key. Currently OpenRouter (`OPENROUTER_API_KEY`). | +| `github_token` | `${{ github.token }}` | Token used to post the comment. | +| `engine_ref` | `main` | Git ref of `CodeBoarding/CodeBoarding`. Pin in production. | +| `depth_level` | `1` | Diagram depth (1โ€“3). Higher = slower + more detail. | +| `agent_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for analysis. | +| `parsing_model` | `openrouter/anthropic/claude-sonnet-4` | LLM for parsing. | +| `comment_header` | `Architecture review` | Header line of the PR comment. | +| `diagram_direction` | `LR` | Mermaid layout direction: `LR`, `TD`, `TB`, `RL`, or `BT`. | +| `changed_only` | `false` | Draw only changed components and their incident edges. | +| `nested` | `false` | Draw depth>1 sub-components as nested subgraphs (pair with `depth_level >= 2`). | ## Outputs | Output | Description | -|--------|-------------| -| `markdown_files_created` | Number of documentation files created | -| `json_files_created` | Number of JSON files created | -| `output_directory` | Directory where documentation files were saved | -| `json_directory` | Directory where JSON files were saved (always `.codeboarding`) | -| `has_changes` | Whether any files were created or changed | +|---|---| +| `diagram_md` | Path to the rendered ```` ```mermaid ```` block in the runner workspace. | +| `n_changed` | Number of top-level components added/modified/deleted. | +| `truncated` | `true` if the diagram was reduced to changed-only to fit GitHub's Mermaid limit. | + +## How the diff is colored + +Nodes are styled with Mermaid `classDef` / `class`; arrows are styled with positional `linkStyle`. A relation counts as **modified** when its endpoints are unchanged but its label text changed. Example of the emitted block: + +```mermaid +graph LR + Api["API Gateway"] + Auth["Auth Service"] + Cache["Cache"] + Api -- "routes to" --> Auth + Auth -- "reads/writes" --> Cache + classDef added fill:#1f883d,stroke:#0b5d23,color:#ffffff; + classDef modified fill:#bf8700,stroke:#7d4e00,color:#ffffff; + classDef deleted fill:#cf222e,stroke:#82071e,color:#ffffff,stroke-dasharray:5 3; + class Cache added; + class Auth modified; + class Api deleted; + linkStyle 0 stroke:#cf222e,stroke-width:2px,stroke-dasharray:5 3; + linkStyle 1 stroke:#1f883d,stroke-width:2px; +``` -## How It Works +## No baseline required -The action works by: +If `.codeboarding/analysis.json` isn't committed at the PR base commit, the action **generates the baseline itself** by running a full analysis on the base commit, then diffs the head against it. Committing a baseline on your default branch makes runs cheaper (the base run is skipped) and the diff more stable, but it is not required. -1. Analyzing the differences introduced in the source branch and putting the results in the target branch -2. Generating documentation files based on the latest version of the source branch -3. Outputting two types of files: - - Documentation files (Markdown or RST) in the specified output directory - - Metadata files in the `.codeboarding` directory +## Fork PRs -## License +Because nothing is pushed (the diagram is inline Mermaid), there is no image step to skip on forks. The one caveat is GitHub's own policy: **secrets are withheld from `pull_request`-triggered runs on forks**, so the LLM key is unavailable and the run fails early with a clear message. A maintainer can re-run from the Actions tab, or use `pull_request_target` if you understand its security implications. -MIT License - see [LICENSE](LICENSE) file for details. +## Limitations -# CodeBoarding GitHub Action +- **GitHub Mermaid caps.** Inline Mermaid in comments is capped (โ‰ˆ500 edges / 50 000 chars). The action stays under this by auto-falling-back to a changed-only graph; if even that overflows it posts a text summary instead of a broken diagram. +- **Nesting.** By default only the top-level component graph is drawn (matching the engine's default `graph LR`). Set `nested: true` with `depth_level >= 2` to draw sub-components as nested subgraphs โ€” leaf nodes filled, parent containers outlined, both colored by status. Large nested graphs are more likely to hit GitHub's Mermaid caps (above), in which case the action degrades to changed-only or a text summary. +- **Renames show as remove + add.** Components are matched across the two analyses by name (the stable join), so a renamed component appears as a red removal plus a green addition rather than a single yellow change. +- **No click-through.** GitHub renders Mermaid in strict security mode, so node hyperlinks are disabled. -## Important: Timeout Configuration +## Local testing -For large repositories, the analysis can take 15-45 minutes. Make sure to configure appropriate timeouts in your workflow: +A GitHub run is slow (engine install + two analyses). To iterate locally, use `scripts/run_local.sh`. It mirrors `action.yml` and writes `.cb-local/diagram.md` plus a `.cb-local/preview.html` you open in a browser (rendered with mermaid.js in GitHub's strict mode, so it looks like the comment will). -```yaml -jobs: - generate-docs: - runs-on: ubuntu-latest - timeout-minutes: 60 # Set to 60+ minutes for large repositories - steps: - - uses: actions/checkout@v4 - - uses: your-username/codeboarding-ghaction@v1 - with: - # your inputs here +**Fast โ€” no LLM, instant.** Diff two existing `analysis.json` files. Great for iterating on colors/layout. For a realistic pair, pull two revisions of a committed analysis: + +```bash +git show :.codeboarding/analysis.json > /tmp/base.json +git show :.codeboarding/analysis.json > /tmp/head.json +scripts/run_local.sh --base-json /tmp/base.json --head-json /tmp/head.json ``` -## Timeout Guidelines +**Full pipeline โ€” needs an LLM key.** Runs the engine on two refs of a local repo exactly like the action (committed-or-generated base, then incremental head): + +```bash +export OPENROUTER_API_KEY=sk-or-... +scripts/run_local.sh --repo /path/to/repo --base --head \ + --engine /path/to/CodeBoarding # defaults to ../CodeBoarding +``` -- **Small repositories** (<1k files): 10-15 minutes -- **Medium repositories** (1k-5k files): 20-30 minutes -- **Large repositories** (5k+ files): 30-60 minutes -- **Very large repositories** (10k+ files): 45-90 minutes +Flags: `--depth N`, `--direction LR|TD|โ€ฆ`, `--nested`, `--changed-only`, `--no-edge-labels`, `--out DIR`, `--no-open`. + +The diagram step alone is also directly runnable: + +```bash +python3 scripts/diff_to_mermaid.py --base base/analysis.json --head head/analysis.json --out diagram.md +``` + +## License -If your workflow consistently times out, consider: -1. Increasing `timeout-minutes` to 90 or higher -2. Running the action on a schedule during off-peak hours -3. Analyzing specific branches with smaller diffs +MIT โ€” see [LICENSE](LICENSE). diff --git a/action.yml b/action.yml index 6f17d1d..3c3c031 100644 --- a/action.yml +++ b/action.yml @@ -1,447 +1,381 @@ -name: 'CodeBoarding [Diagram-First Documentation]' -description: 'Generates diagram-first visualizations of your codebase using static analysis and large language models.' +name: 'CodeBoarding Architecture Diff (Mermaid)' +description: 'Posts a PR comment with a Mermaid architecture diagram showing which components changed (green added / yellow modified / red deleted) โ€” nodes and arrows.' author: 'CodeBoarding' branding: - icon: 'book-open' # or 'layers', 'git-branch', 'book-open', 'target' + icon: 'git-pull-request' color: 'blue' inputs: - output_directory: - description: 'Directory where documentation files will be saved' - required: false - default: 'docs' - repository_url: - description: 'Repository URL to fetch documentation for (defaults to current repository)' - required: true - source_branch: - description: 'Source branch for comparison' - required: true - target_branch: - description: 'Target branch for comparison' + llm_api_key: + description: 'LLM API key (OpenRouter by default). Required.' required: true - output_format: - description: 'Output format for documentation files (.md, .mdx, .rst, or .html)' + github_token: + description: 'GITHUB_TOKEN used to post the PR comment. Defaults to the workflow token.' + required: false + default: ${{ github.token }} + engine_ref: + description: 'Git ref of CodeBoarding/CodeBoarding to use as the analysis engine.' + required: false + default: 'main' + depth_level: + description: 'Diagram depth (1-3). Higher is slower and more detailed.' + required: false + default: '1' + agent_model: + description: 'LLM model identifier used for analysis (AGENT_MODEL env var).' + required: false + default: 'openrouter/anthropic/claude-sonnet-4' + parsing_model: + description: 'LLM model identifier used for parsing (PARSING_MODEL env var).' + required: false + default: 'openrouter/anthropic/claude-sonnet-4' + comment_header: + description: 'Header line used inside the sticky PR comment.' + required: false + default: 'Architecture review' + diagram_direction: + description: 'Mermaid layout direction: LR, TD, TB, RL, or BT.' required: false - default: '.md' + default: 'LR' + changed_only: + description: 'Render only changed components and their incident edges (also auto-applied when the full graph exceeds GitHub''s Mermaid limit).' + required: false + default: 'false' + nested: + description: 'Draw depth>1 sub-components as nested subgraphs (pair with depth_level >= 2).' + required: false + default: 'false' outputs: - markdown_files_created: - description: 'Number of Markdown files created' - value: ${{ steps.process-docs.outputs.markdown_files_created }} - json_files_created: - description: 'Number of JSON files created' - value: ${{ steps.process-docs.outputs.json_files_created }} - output_directory: - description: 'Directory where Markdown files were saved' - value: ${{ steps.process-docs.outputs.output_directory }} - json_directory: - description: 'Directory where JSON files were saved (.codeboarding)' - value: ${{ steps.process-docs.outputs.json_directory }} - has_changes: - description: 'Whether any files were created or changed' - value: ${{ steps.process-docs.outputs.has_changes }} - repo_url: - description: 'Repository URL that was analyzed' - value: ${{ steps.repo-url.outputs.repo_url }} + diagram_md: + description: 'Path to the rendered ```mermaid block (in the runner workspace).' + value: ${{ steps.diagram.outputs.diagram_md }} + n_changed: + description: 'Number of top-level components added/modified/deleted.' + value: ${{ steps.diagram.outputs.n_changed }} + truncated: + description: 'True if the diagram was reduced to changed-only to fit GitHub''s Mermaid limit.' + value: ${{ steps.diagram.outputs.truncated }} runs: using: 'composite' steps: - - name: Determine repository URL - id: repo-url + - name: Guard โ€” PR event only + id: guard shell: bash run: | - # Use the provided repository URL if it's not empty - if [ -n "${{ inputs.repository_url }}" ]; then - REPO_URL="${{ inputs.repository_url }}" - echo "Using provided repository URL: $REPO_URL" - # Otherwise try to determine from git if we're in a git repository - elif git config --get remote.origin.url > /dev/null 2>&1; then - REPO_URL=$(git config --get remote.origin.url) - # Convert SSH URL to HTTPS if needed - if [[ $REPO_URL == git@* ]]; then - REPO_URL=$(echo $REPO_URL | sed 's|git@github.com:|https://github.com/|') - fi - echo "Using git remote URL: $REPO_URL" + if [ -z "${{ github.event.pull_request.number }}" ]; then + echo "::warning::CodeBoarding Architecture Diff only runs on pull_request events. Skipping." + echo "skip=true" >> $GITHUB_OUTPUT else - REPO_URL="${{ github.server_url }}/${{ github.repository }}" - echo "Using GitHub context URL: $REPO_URL" + echo "skip=false" >> $GITHUB_OUTPUT + echo "base_sha=${{ github.event.pull_request.base.sha }}" >> $GITHUB_OUTPUT + echo "head_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT + echo "pr_number=${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT fi - echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT - - name: Create and poll documentation job - id: fetch-docs + - name: Checkout CodeBoarding engine + if: steps.guard.outputs.skip != 'true' + uses: actions/checkout@v4 + with: + repository: CodeBoarding/CodeBoarding + ref: ${{ inputs.engine_ref }} + path: codeboarding-engine + + - name: Checkout target repository (PR head) + if: steps.guard.outputs.skip != 'true' + uses: actions/checkout@v4 + with: + path: target-repo + fetch-depth: 0 + ref: ${{ steps.guard.outputs.head_sha }} + + - name: Ensure PR base commit is fetched + if: steps.guard.outputs.skip != 'true' shell: bash + working-directory: target-repo run: | - CREATE_JOB_URL="https://server.codeboarding.org/github_action/jobs" - REPO_URL="${{ steps.repo-url.outputs.repo_url }}" - SOURCE_BRANCH="${{ inputs.source_branch }}" - TARGET_BRANCH="${{ inputs.target_branch }}" - OUTPUT_DIRECTORY="${{ inputs.output_directory }}" - OUTPUT_FORMAT="${{ inputs.output_format }}" - - echo "๐Ÿš€ Creating CodeBoarding analysis job...$CREATE_JOB_URL" - echo "๐Ÿ“Š Repository: $REPO_URL" - echo "๐ŸŒฟ Source branch: $SOURCE_BRANCH" - echo "๐ŸŽฏ Target branch: $TARGET_BRANCH" - echo "๐Ÿ“„ Output format: $OUTPUT_FORMAT" - - # Create JSON payload - JSON_PAYLOAD=$(jq -n \ - --arg url "$REPO_URL" \ - --arg source_branch "$SOURCE_BRANCH" \ - --arg target_branch "$TARGET_BRANCH" \ - --arg output_directory "$OUTPUT_DIRECTORY" \ - --arg extension "$OUTPUT_FORMAT" \ - '{ - url: $url, - source_branch: $source_branch, - target_branch: $target_branch, - output_directory: $output_directory, - extension: $extension - }') - - echo "๐Ÿ“‹ Request payload:" - echo "$JSON_PAYLOAD" - - # Create temporary file for response - TEMP_FILE=$(mktemp) - - echo "๐ŸŒ Making API request to create job..." - - # Make the API call to create job - response=$(curl -s -w "%{http_code}" -o "$TEMP_FILE" \ - -X POST \ - -H "Content-Type: application/json" \ - -d "$JSON_PAYLOAD" \ - --max-time 60 \ - --connect-timeout 30 \ - "$CREATE_JOB_URL") - curl_exit_code=$? - - http_code=${response: -3} - - echo "โœ… Job creation request completed!" - echo "๐Ÿ“‹ Response status code: $http_code" - echo "๐Ÿ”ง Curl exit code: $curl_exit_code" - - # Handle curl errors - if [ $curl_exit_code -ne 0 ]; then - echo "โŒ Error: Curl failed with exit code $curl_exit_code" - case $curl_exit_code in - 6) echo "๐ŸŒ Couldn't resolve host - check network connectivity" ;; - 7) echo "๐Ÿ”Œ Failed to connect to host - server might be down" ;; - 28) echo "โฐ Request timed out - server might be busy" ;; - *) echo "โ“ Unknown curl error - check network and server status" ;; - esac - rm -f "$TEMP_FILE" + git fetch origin "${{ steps.guard.outputs.base_sha }}" --depth=1 || true + git cat-file -e "${{ steps.guard.outputs.base_sha }}" && echo "Base commit reachable." || \ + (echo "::error::Base commit ${{ steps.guard.outputs.base_sha }} is not reachable." && exit 1) + + - name: Set up Python 3.13 + if: steps.guard.outputs.skip != 'true' + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: Set up Node.js 20 + if: steps.guard.outputs.skip != 'true' + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install uv + if: steps.guard.outputs.skip != 'true' + uses: astral-sh/setup-uv@v4 + + - name: Cache uv venv (engine) + if: steps.guard.outputs.skip != 'true' + uses: actions/cache@v4 + with: + path: codeboarding-engine/.venv + key: cb-uv-${{ runner.os }}-${{ hashFiles('codeboarding-engine/pyproject.toml', 'codeboarding-engine/uv.lock') }} + restore-keys: | + cb-uv-${{ runner.os }}- + + - name: Cache LSP servers + if: steps.guard.outputs.skip != 'true' + uses: actions/cache@v4 + with: + path: | + codeboarding-engine/static_analyzer/servers/node_modules + codeboarding-engine/static_analyzer/servers/bin + key: cb-lsp-${{ runner.os }}-v1 + restore-keys: | + cb-lsp-${{ runner.os }}- + + - name: Install Python dependencies + if: steps.guard.outputs.skip != 'true' + shell: bash + working-directory: codeboarding-engine + run: | + uv venv --clear + uv pip install -e . + + - name: Install LSP servers + if: steps.guard.outputs.skip != 'true' + shell: bash + working-directory: codeboarding-engine + run: | + uv run python install.py --auto-install-npm + + - name: Prepare & verify LLM key + if: steps.guard.outputs.skip != 'true' + shell: bash + env: + RAW_KEY: ${{ inputs.llm_api_key }} + RAW_AGENT_MODEL: ${{ inputs.agent_model }} + RAW_PARSING_MODEL: ${{ inputs.parsing_model }} + run: | + if [ -z "$RAW_KEY" ]; then + echo "::error::llm_api_key is empty. On fork PRs, repo secrets are withheld by GitHub โ€” a maintainer must re-run, or use pull_request_target." exit 1 fi - - if [ "$http_code" != "202" ]; then - echo "โŒ Error: Job creation failed with status code $http_code" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - - # Try to parse as JSON for better error message - if jq -e '.detail' "$TEMP_FILE" > /dev/null 2>&1; then - echo "๐Ÿ” Error details: $(jq -r '.detail' "$TEMP_FILE")" - fi - - rm -f "$TEMP_FILE" + # Pasting a key into the secret UI often picks up trailing newlines, + # wrapping quotes, or a whole `KEY=value` line. Normalize all of that. + _strip() { printf '%s' "$1" | tr -d '[:space:]' | sed -e 's/^"//;s/"$//' -e "s/^'//;s/'\$//"; } + KEY="$(_strip "$RAW_KEY")" + case "$KEY" in + OPENROUTER_API_KEY=*) KEY="${KEY#OPENROUTER_API_KEY=}";; + openrouter_api_key=*) KEY="${KEY#openrouter_api_key=}";; + esac + KEY="$(_strip "$KEY")" + AGENT_MODEL="$(_strip "$RAW_AGENT_MODEL")" + PARSING_MODEL="$(_strip "$RAW_PARSING_MODEL")" + + # Mask the cleaned value (it may differ from the registered secret). + echo "::add-mask::$KEY" + + case "$KEY" in sk-or-v1-*) PFX=1 ;; *) PFX=0 ;; esac + echo "OPENROUTER_API_KEY length: ${#KEY}; looks-like-OpenRouter: $PFX" + STATUS=$(curl -sS -o /tmp/openrouter-auth.json -w "%{http_code}" \ + -H "Authorization: Bearer $KEY" --max-time 10 \ + https://openrouter.ai/api/v1/auth/key || echo "curl-fail") + echo "OpenRouter /auth/key response: HTTP $STATUS" + if [ "$STATUS" != "200" ]; then + echo "::error::OpenRouter rejected the API key (HTTP $STATUS). Verify the OPENROUTER_API_KEY secret." + cat /tmp/openrouter-auth.json 2>/dev/null || true exit 1 fi - - # Check if response is valid JSON - if ! jq empty "$TEMP_FILE" 2>/dev/null; then - echo "โŒ Error: Invalid JSON response" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - rm -f "$TEMP_FILE" - exit 1 + + # Export (masked) for the analysis steps. + { + echo "OPENROUTER_API_KEY=$KEY" + echo "AGENT_MODEL=$AGENT_MODEL" + echo "PARSING_MODEL=$PARSING_MODEL" + } >> "$GITHUB_ENV" + + - name: Resolve base analysis (committed baseline) + if: steps.guard.outputs.skip != 'true' + id: base + shell: bash + working-directory: target-repo + run: | + BASE_SHA="${{ steps.guard.outputs.base_sha }}" + BASE_DIR="${RUNNER_TEMP}/cb-base" + HEAD_DIR="${RUNNER_TEMP}/cb-head" + mkdir -p "$BASE_DIR" "$HEAD_DIR" + echo "base_dir=$BASE_DIR" >> $GITHUB_OUTPUT + echo "head_dir=$HEAD_DIR" >> $GITHUB_OUTPUT + if git show "${BASE_SHA}:.codeboarding/analysis.json" > "${BASE_DIR}/analysis.json" 2>/dev/null; then + git show "${BASE_SHA}:.codeboarding/static_analysis.pkl" > "${BASE_DIR}/static_analysis.pkl" 2>/dev/null \ + && echo "Seeded committed static_analysis.pkl from base." \ + || rm -f "${BASE_DIR}/static_analysis.pkl" + echo "committed=true" >> $GITHUB_OUTPUT + echo "Using committed .codeboarding/analysis.json at ${BASE_SHA}." + else + rm -f "${BASE_DIR}/analysis.json" + echo "committed=false" >> $GITHUB_OUTPUT + echo "No committed baseline at ${BASE_SHA}; will generate one via a full analysis on the base commit." fi - - # Extract job_id from response - JOB_ID=$(jq -r '.job_id' "$TEMP_FILE") - - if [ "$JOB_ID" = "null" ] || [ -z "$JOB_ID" ]; then - echo "โŒ Error: No job_id found in response" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - rm -f "$TEMP_FILE" + + - name: Generate base analysis (no committed baseline) + if: steps.guard.outputs.skip != 'true' && steps.base.outputs.committed == 'false' + shell: bash + working-directory: codeboarding-engine + env: + STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml + PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }} + CACHING_DOCUMENTATION: 'false' + ENABLE_MONITORING: 'false' + run: | + BASE_SRC="${RUNNER_TEMP}/base-src" + rm -rf "$BASE_SRC" + git -C "${{ github.workspace }}/target-repo" worktree add --detach "$BASE_SRC" "${{ steps.guard.outputs.base_sha }}" + uv run python -c " + from pathlib import Path + from codeboarding_workflows.analysis import run_full + res = run_full( + repo_name='${{ github.event.repository.name }}', + repo_path=Path('$BASE_SRC'), + output_dir=Path('${{ steps.base.outputs.base_dir }}'), + run_id='${{ github.run_id }}-${{ github.run_attempt }}-base', + log_path='/tmp/cb-base.log', + depth_level=int('${{ inputs.depth_level }}'), + source_sha='${{ steps.guard.outputs.base_sha }}', + ) + print(f'Base analysis written: {res}') + " + if [ ! -f "${{ steps.base.outputs.base_dir }}/analysis.json" ]; then + echo "::error::Base full analysis ran but analysis.json is missing." exit 1 fi - - echo "โœ… Job created successfully!" - echo "๐Ÿ†” Job ID: $JOB_ID" - - # Start polling job status - STATUS_URL="https://server.codeboarding.org/github_action/jobs/$JOB_ID" - - echo "๐Ÿ“Š Starting job status polling..." - echo "โฐ This may take 15-45 minutes for large repositories..." - echo "๐Ÿ’ก If your workflow times out, increase 'timeout-minutes' in your job configuration" - - # Polling loop - POLL_COUNT=0 - MAX_POLLS=90 # 90 minutes max (90 * 1 minute intervals) - - while [ $POLL_COUNT -lt $MAX_POLLS ]; do - POLL_COUNT=$((POLL_COUNT + 1)) - - echo "๐Ÿ” Polling attempt $POLL_COUNT of $MAX_POLLS ($(date '+%H:%M:%S'))" - - # Make status check API call - response=$(curl -s -w "%{http_code}" -o "$TEMP_FILE" \ - --max-time 30 \ - --connect-timeout 10 \ - "$STATUS_URL") - - curl_exit_code=$? - http_code=${response: -3} - - # Handle curl errors - if [ $curl_exit_code -ne 0 ]; then - echo "โš ๏ธ Warning: Status check failed with curl exit code $curl_exit_code" - echo "๐Ÿ”„ Retrying in 30 seconds..." - sleep 30 - continue - fi - - if [ "$http_code" != "200" ]; then - echo "โš ๏ธ Warning: Status check failed with HTTP code $http_code" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - echo "๐Ÿ”„ Retrying in 30 seconds..." - sleep 30 - continue - fi - - # Check if response is valid JSON - if ! jq empty "$TEMP_FILE" 2>/dev/null; then - echo "โš ๏ธ Warning: Invalid JSON response" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - echo "๐Ÿ”„ Retrying in 30 seconds..." - sleep 30 - continue - fi - - # Extract status from response - STATUS=$(jq -r '.status' "$TEMP_FILE") - - echo "๐Ÿ“Š Current job status: $STATUS" - - if [ "$STATUS" = "COMPLETED" ]; then - echo "โœ… Job completed successfully!" - - # Check if result field exists and contains files - if jq -e '.result' "$TEMP_FILE" > /dev/null; then - echo "๐Ÿ“ฆ Result field found, preparing output..." - - # Check if result is a JSON string or already a JSON object - RESULT_TYPE=$(jq -r '.result | type' "$TEMP_FILE") - - if [ "$RESULT_TYPE" = "string" ]; then - echo "๐Ÿ”ง Result is a JSON string, parsing it..." - # Parse the JSON string in the result field - jq -r '.result' "$TEMP_FILE" | jq '.' > "${TEMP_FILE}_result" - else - echo "๐Ÿ”ง Result is already a JSON object, extracting it..." - # Extract the result object directly - jq '.result' "$TEMP_FILE" > "${TEMP_FILE}_result" - fi - - # Verify the extracted result - if jq -e '.files' "${TEMP_FILE}_result" > /dev/null; then - echo "โœ… Files extracted successfully" - mv "${TEMP_FILE}_result" "$TEMP_FILE" - echo "response_file=$TEMP_FILE" >> $GITHUB_OUTPUT - exit 0 # Successfully extracted files, exit with success - else - echo "โŒ Error: Extracted result is missing files structure" - echo "๐Ÿ“„ Extracted content:" - cat "${TEMP_FILE}_result" - rm -f "${TEMP_FILE}_result" "$TEMP_FILE" - exit 1 - fi - else - echo "โŒ Error: Job completed but no result or result.files found in response" - echo "๐Ÿ“„ Response structure:" - jq '.' "$TEMP_FILE" - - # If result exists, show what it contains - if jq -e '.result' "$TEMP_FILE" > /dev/null; then - echo "๐Ÿ“„ Result field content:" - RESULT_TYPE=$(jq -r '.result | type' "$TEMP_FILE") - echo "Result type: $RESULT_TYPE" - - if [ "$RESULT_TYPE" = "string" ]; then - echo "Result string content:" - jq -r '.result' "$TEMP_FILE" - else - echo "Result object content:" - jq '.result' "$TEMP_FILE" - fi - fi - - rm -f "$TEMP_FILE" - exit 1 - fi - elif [ "$STATUS" = "FAILED" ] || [ "$STATUS" = "ERROR" ]; then - echo "โŒ Job failed with status: $STATUS" - echo "๐Ÿ“„ Response content:" - cat "$TEMP_FILE" - rm -f "$TEMP_FILE" - exit 1 - else - # Job still in progress - echo "โณ Job in progress (status: $STATUS)..." - - # Show additional progress information if available - if jq -e '.updated_at' "$TEMP_FILE" > /dev/null; then - UPDATED_AT=$(jq -r '.updated_at' "$TEMP_FILE") - echo "๐Ÿ• Last updated: $UPDATED_AT" - fi - - echo "๐Ÿ’ค Waiting 15 seconds before next check..." - sleep 15 - fi - done - - # Only reach here if we've exceeded max polls without completion - echo "โŒ Error: Job polling timed out after $MAX_POLLS attempts" - echo "๐Ÿ—๏ธ The repository analysis is taking longer than expected." - echo "๐Ÿ“Š This might be due to:" - echo " โ€ข Very large repository size (>10k files)" - echo " โ€ข Complex codebase requiring extensive analysis" - echo " โ€ข Server load or processing delays" - echo "" - echo "๐Ÿ’ก Suggestions:" - echo " โ€ข Try again later when server load might be lower" - echo " โ€ข Consider analyzing smaller branches or specific directories" - echo " โ€ข Increase your GitHub Actions job timeout-minutes to 120+" - echo " โ€ข Contact support if the issue persists" - - rm -f "$TEMP_FILE" - exit 1 - - name: Process documentation files - id: process-docs + - name: Analyze PR head (incremental from base) + if: steps.guard.outputs.skip != 'true' + id: analyze shell: bash + working-directory: codeboarding-engine + env: + STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml + PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + DIAGRAM_DEPTH_LEVEL: ${{ inputs.depth_level }} + CACHING_DOCUMENTATION: 'false' + ENABLE_MONITORING: 'false' run: | - RESPONSE_FILE="${{ steps.fetch-docs.outputs.response_file }}" - MD_OUTPUT_DIR="${{ inputs.output_directory }}" - JSON_OUTPUT_DIR=".codeboarding" - OUTPUT_FORMAT="${{ inputs.output_format }}" - - # Validate output format - if [[ "$OUTPUT_FORMAT" != ".md" && "$OUTPUT_FORMAT" != ".mdx" && "$OUTPUT_FORMAT" != ".rst" && "$OUTPUT_FORMAT" != ".html" ]]; then - echo "Error: Invalid output format '$OUTPUT_FORMAT'. Must be either '.md', '.mdx', '.rst', or '.html'" + BASE_DIR="${{ steps.base.outputs.base_dir }}" + HEAD_DIR="${{ steps.base.outputs.head_dir }}" + # Seed the head dir from the base analysis so incremental stitches + # component ids from the baseline (stable diff). Base dir is left + # untouched as the "before" snapshot for the diff. + cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true + uv run python -c " + from pathlib import Path + from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError + from diagram_analysis.exceptions import IncrementalCacheMissingError + base_sha='${{ steps.guard.outputs.base_sha }}' + head_sha='${{ steps.guard.outputs.head_sha }}' + repo=Path('${{ github.workspace }}/target-repo') + out=Path('$HEAD_DIR') + name='${{ github.event.repository.name }}' + rid='${{ github.run_id }}-${{ github.run_attempt }}-head' + try: + res = run_incremental( + repo_path=repo, output_dir=out, project_name=name, run_id=rid, + log_path='/tmp/cb-head.log', base_ref=base_sha, target_ref=head_sha, + source_sha=head_sha, + ) + except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: + print(f'Incremental unavailable ({exc}); running full analysis on head.') + for p in out.glob('*'): + if p.is_file(): + p.unlink() + res = run_full( + repo_name=name, repo_path=repo, output_dir=out, run_id=rid, + log_path='/tmp/cb-head.log', depth_level=int('${{ inputs.depth_level }}'), + source_sha=head_sha, + ) + print(f'Head analysis written: {res}') + " + if [ ! -f "$HEAD_DIR/analysis.json" ]; then + echo "::error::Head analysis ran but analysis.json is missing." exit 1 fi - - # Clean and create the output directories - mkdir -p "$MD_OUTPUT_DIR" - - # Remove existing .codeboarding files before adding new ones - if [ -d "$JSON_OUTPUT_DIR" ]; then - echo "Cleaning existing JSON files from $JSON_OUTPUT_DIR" - rm -rf "$JSON_OUTPUT_DIR" - fi - mkdir -p "$JSON_OUTPUT_DIR" - - # Initialize counters - MARKDOWN_FILES_CREATED=0 - JSON_FILES_CREATED=0 - - echo "=== Processing Documentation Files ===" - echo "Response JSON structure:" - jq . "$RESPONSE_FILE" - echo "Using output format: $OUTPUT_FORMAT" - # Parse JSON response and create files using keys as filenames - if jq -e '.files' "$RESPONSE_FILE" > /dev/null; then - echo "Files key found, proceeding to create files..." - - # Check if files object is empty - FILES_COUNT=$(jq '.files | length' "$RESPONSE_FILE") - if [ "$FILES_COUNT" -eq 0 ]; then - echo "โ„น๏ธ No documentation files were generated for this repository/branch combination." - echo "๐Ÿ“ This might be because:" - echo " โ€ข No changes were detected between the source and target branches" - echo " โ€ข The repository or branches don't exist or are not accessible" - echo " โ€ข No analyzable code files were found" - echo " โ€ข The branches are identical (no diff to analyze)" + echo "base_analysis=$BASE_DIR/analysis.json" >> $GITHUB_OUTPUT + echo "head_analysis=$HEAD_DIR/analysis.json" >> $GITHUB_OUTPUT + + - name: Diff analyses โ†’ Mermaid + if: steps.guard.outputs.skip != 'true' + id: diagram + shell: bash + run: | + FLAG="" + [ "${{ inputs.changed_only }}" = "true" ] && FLAG="$FLAG --changed-only" + [ "${{ inputs.nested }}" = "true" ] && FLAG="$FLAG --nested" + META=$(python3 ${{ github.action_path }}/scripts/diff_to_mermaid.py \ + --base "${{ steps.analyze.outputs.base_analysis }}" \ + --head "${{ steps.analyze.outputs.head_analysis }}" \ + --out "${RUNNER_TEMP}/diagram.md" \ + --direction "${{ inputs.diagram_direction }}" $FLAG) + echo "$META" > "${RUNNER_TEMP}/diagram_meta.json" + echo "diff meta: $META" + read N RENDERED TRUNC < <(python3 -c "import json;d=json.load(open('${RUNNER_TEMP}/diagram_meta.json'));print(d['n_changed'], str(d['rendered']).lower(), str(d['truncated']).lower())") + echo "n_changed=$N" >> $GITHUB_OUTPUT + echo "rendered=$RENDERED" >> $GITHUB_OUTPUT + echo "truncated=$TRUNC" >> $GITHUB_OUTPUT + echo "diagram_md=${RUNNER_TEMP}/diagram.md" >> $GITHUB_OUTPUT + + - name: Build PR comment body + if: steps.guard.outputs.skip != 'true' + id: body + shell: bash + run: | + HEADER="${{ inputs.comment_header }}" + BASE_REF="${{ github.event.pull_request.base.ref }}" + N="${{ steps.diagram.outputs.n_changed }}" + RENDERED="${{ steps.diagram.outputs.rendered }}" + TRUNC="${{ steps.diagram.outputs.truncated }}" + BODY_FILE=$(mktemp) + + headline() { + if [ "$1" = "0" ]; then echo "no architectural changes"; + elif [ "$1" = "1" ]; then echo "1 component changed"; + else echo "$1 components changed"; fi + } + + { + echo "### ${HEADER} ยท $(headline "$N")" + echo "" + if [ "$N" = "0" ]; then + echo "No architectural changes detected versus \`${BASE_REF}\`." + elif [ "$RENDERED" = "true" ]; then + cat "${{ steps.diagram.outputs.diagram_md }}" + echo "" + echo "" + echo "๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ deleted โ€” compared against \`${BASE_REF}\`." + if [ "$TRUNC" = "true" ]; then + echo "" + echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." + fi else - # Get each key from files object and create a file with that name - while IFS= read -r filename; do - echo "Processing file: $filename" - - # Get the content for this filename - content=$(jq -r ".files[\"$filename\"]" "$RESPONSE_FILE") - - # Determine file type and destination - if [[ "$filename" == *.json ]]; then - # JSON file - output_dir="$JSON_OUTPUT_DIR" - output_filename="$filename" - echo "$content" > "$output_dir/$output_filename" - echo "Created JSON file: $output_dir/$output_filename" - JSON_FILES_CREATED=$((JSON_FILES_CREATED + 1)) - else - # Documentation file - add appropriate extension if not present - output_dir="$MD_OUTPUT_DIR" - - # Check if filename has an extension - if [[ "$filename" == *.* ]]; then - # Extract basename without extension - basename="${filename%.*}" - else - basename="$filename" - fi - - # Add the selected output format extension - output_filename="${basename}${OUTPUT_FORMAT}" - - echo "$content" > "$output_dir/$output_filename" - echo "Created documentation file: $output_dir/$output_filename" - MARKDOWN_FILES_CREATED=$((MARKDOWN_FILES_CREATED + 1)) - fi - done < <(jq -r '.files | keys[]' "$RESPONSE_FILE") + echo "**$(headline "$N")** versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at 500 edges)." fi - else - echo "No 'files' key found in response JSON - checking if job completed with no results" - fi - - # Clean up temporary file - rm -f "$RESPONSE_FILE" - - # Check if any files were created - TOTAL_FILES=$((MARKDOWN_FILES_CREATED + JSON_FILES_CREATED)) - if [ "$TOTAL_FILES" -gt 0 ]; then - HAS_CHANGES="true" - echo "Created $MARKDOWN_FILES_CREATED Markdown files in $MD_OUTPUT_DIR" - echo "Created $JSON_FILES_CREATED JSON files in $JSON_OUTPUT_DIR" - - # List created files - if [ "$MARKDOWN_FILES_CREATED" -gt 0 ]; then - echo "Markdown files created:" - ls -la "$MD_OUTPUT_DIR" - fi - - if [ "$JSON_FILES_CREATED" -gt 0 ]; then - echo "JSON files created:" - ls -la "$JSON_OUTPUT_DIR" - fi - else - HAS_CHANGES="false" - echo "No files were created" - fi - - # Set outputs - echo "markdown_files_created=$MARKDOWN_FILES_CREATED" >> $GITHUB_OUTPUT - echo "json_files_created=$JSON_FILES_CREATED" >> $GITHUB_OUTPUT - echo "output_directory=$MD_OUTPUT_DIR" >> $GITHUB_OUTPUT - echo "json_directory=$JSON_OUTPUT_DIR" >> $GITHUB_OUTPUT - echo "has_changes=$HAS_CHANGES" >> $GITHUB_OUTPUT \ No newline at end of file + echo "" + echo "codeboarding-action ยท run ${{ github.run_id }}" + } > "$BODY_FILE" + + echo "body_file=$BODY_FILE" >> $GITHUB_OUTPUT + echo "--- comment preview ---" + cat "$BODY_FILE" + echo "--- end preview ---" + + - name: Post sticky PR comment + if: steps.guard.outputs.skip != 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: codeboarding-architecture-diff + path: ${{ steps.body.outputs.body_file }} + GITHUB_TOKEN: ${{ inputs.github_token }} diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py new file mode 100644 index 0000000..6d9f6fd --- /dev/null +++ b/scripts/diff_to_mermaid.py @@ -0,0 +1,449 @@ +"""Diff two CodeBoarding analysis.json files and render the delta as a colored Mermaid graph. + +Reads a *base* (before) and *head* (after) ``analysis.json`` โ€” both already +materialized on disk by the engine โ€” computes a component/relation diff, and +emits a GitHub-renderable ```mermaid block where: + + * nodes are colored green=added / yellow=modified / red=deleted (deleted dashed) + * arrows are colored the same way (red dashed for deleted) + +GitHub renders ```mermaid fenced blocks natively inside PR/issue comments, so the +output goes straight into the sticky comment โ€” no image, no Playwright. + +The diff set-arithmetic is a port of the action's ``compute_diff.py``, with two +differences for this use case: both sides are read from plain file paths (not +``git show``), and a relation whose ``(src, dst)`` is unchanged but whose label +text changed is reported as ``modified`` (the original only did added/deleted). + +Self-contained stdlib. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + +# GitHub's mermaid config caps (config.schema.yaml defaults; NOT raisable on +# GitHub). Exceeding either renders a red error box with no diagram, so we stay +# comfortably under and degrade to a changed-only / text fallback instead. +MAX_EDGES = 480 # hard cap 500 +MAX_TEXT = 45_000 # hard cap 50000 chars + +# Primer-ish fills that read on both light and dark GitHub backgrounds. White +# label text is set explicitly so it survives dark mode. +COLORS = { + "added": {"fill": "#1f883d", "stroke": "#0b5d23"}, + "modified": {"fill": "#bf8700", "stroke": "#7d4e00"}, + "deleted": {"fill": "#cf222e", "stroke": "#82071e"}, +} +CHANGED = ("added", "modified", "deleted") +_EDGE_LABEL_MAX = 48 + + +# --------------------------------------------------------------------------- # +# load +# --------------------------------------------------------------------------- # +def load_analysis(path: Path) -> dict: + try: + return json.loads(path.read_text()) + except (OSError, json.JSONDecodeError) as exc: + sys.exit(f"::error::Could not read analysis JSON at {path}: {exc}") + + +# --------------------------------------------------------------------------- # +# diff (ported from compute_diff.py; relation diff extended with 'modified') +# --------------------------------------------------------------------------- # +def _comp_id(c: dict) -> str: + return c.get("component_id") or c.get("name", "") + + +def _comp_name(c: dict) -> str: + return c.get("name", "") + + +def _file_methods(c: dict) -> list: + return c.get("file_methods") or [] + + +def _methods_by_file(c: dict) -> dict: + by_file: dict = {} + for fm in _file_methods(c): + fp = fm.get("file_path") or "" + names = {m for m in (fm.get("methods") or []) if isinstance(m, str)} + if names: + by_file.setdefault(fp, set()).update(names) + return by_file + + +def _has_structural_changes(base: dict, current: dict) -> bool: + base_files = {fm.get("file_path", "") for fm in _file_methods(base)} + current_files = {fm.get("file_path", "") for fm in _file_methods(current)} + if base_files != current_files: + return True + if len(base.get("components") or []) != len(current.get("components") or []): + return True + return False + + +def _diff_methods(base: dict, current: dict) -> dict: + base_by_file = _methods_by_file(base) + current_by_file = _methods_by_file(current) + added: dict = {} + removed: dict = {} + for file_path in set(base_by_file) | set(current_by_file): + a = sorted(current_by_file.get(file_path, set()) - base_by_file.get(file_path, set())) + r = sorted(base_by_file.get(file_path, set()) - current_by_file.get(file_path, set())) + if a: + added[file_path] = a + if r: + removed[file_path] = r + return {"added": added, "removed": removed} + + +def _rel_key(r: dict) -> tuple: + # Name is the stable join across two independent analyses; component ids are + # positional and can be reshuffled on a full re-run, so prefer names. + return (r.get("src_name") or r.get("src_id") or "", r.get("dst_name") or r.get("dst_id") or "") + + +def _diff_relations(base_rels: list, current_rels: list) -> list: + base_edges = {_rel_key(r): r for r in (base_rels or [])} + current_edges = {_rel_key(r): r for r in (current_rels or [])} + result: list = [] + for key, rel in current_edges.items(): + if key not in base_edges: + status = "added" + elif (base_edges[key].get("relation") or "") != (rel.get("relation") or ""): + status = "modified" + else: + status = "unchanged" + result.append({**rel, "diff_status": status}) + for key, rel in base_edges.items(): + if key not in current_edges: + result.append({**rel, "diff_status": "deleted"}) + return result + + +def _diff_components(base_components: list, current_components: list) -> list: + base = base_components or [] + current = current_components or [] + base_by_name = {_comp_name(c): c for c in base} # name is the stable cross-analysis join + matched_names: set = set() + result: list = [] + + for comp in current: + base_match = base_by_name.get(_comp_name(comp)) + if base_match is None: + result.append({**comp, "diff_status": "added"}) + continue + matched_names.add(_comp_name(base_match)) + structural = _has_structural_changes(base_match, comp) + method_diff = _diff_methods(base_match, comp) + has_method_changes = bool(method_diff["added"] or method_diff["removed"]) + diff_status = "modified" if (structural or has_method_changes) else "unchanged" + + annotated = {**comp, "diff_status": diff_status, "method_diff": method_diff} + + base_subs = base_match.get("components") or [] + current_subs = comp.get("components") or [] + if base_subs or current_subs: + annotated["components"] = _diff_components(base_subs, current_subs) + + base_sub_rels = base_match.get("components_relations") or [] + current_sub_rels = comp.get("components_relations") or [] + if base_sub_rels or current_sub_rels: + annotated["components_relations"] = _diff_relations(base_sub_rels, current_sub_rels) + + result.append(annotated) + + for comp in base: + if _comp_name(comp) not in matched_names: + ghost = {k: v for k, v in comp.items() if k not in ("components", "components_relations", "can_expand")} + ghost["diff_status"] = "deleted" + result.append(ghost) + + return result + + +def build_diff(base: dict, head: dict) -> dict: + return { + "components": _diff_components(base.get("components") or [], head.get("components") or []), + "components_relations": _diff_relations( + base.get("components_relations") or [], + head.get("components_relations") or [], + ), + } + + +# --------------------------------------------------------------------------- # +# mermaid emit +# --------------------------------------------------------------------------- # +def _sanitize(name: str) -> str: + """Match the engine's node-id sanitization (utils.sanitize).""" + return re.sub(r"\W+", "_", name or "") + + +def _esc(text: str) -> str: + """Escape arbitrary text for a mermaid label under GitHub's strict security. + + ``#`` first (so the entities we inject are not re-escaped), then ``"``. + """ + out = (text or "").replace("\n", " ").replace("\r", " ").strip() + out = out.replace("#", "#35;").replace('"', "#quot;") + return out + + +def _truncate(text: str, limit: int = _EDGE_LABEL_MAX) -> str: + text = (text or "").strip() + return text if len(text) <= limit else text[: limit - 1].rstrip() + "โ€ฆ" + + +class _Scope: + """Per-level name/id -> mermaid key resolver for one nesting level. + + Deleted ghosts get a separate ``del_`` key namespace from present nodes so a + reused id/name can't merge an added node onto a deleted one. Keys are made + globally unique via the shared ``used`` set. Resolution is name-first (the + stable cross-analysis join); present edges resolve head-first, deleted edges + ghost-first. ``force`` overrides the per-component diff_status (used when a + wholly-added/deleted parent colors its whole subtree). + """ + + def __init__(self, components: list, used: set, force: str | None = None): + self.entries: list = [] # (key, label, status, component) + self.head_by_id: dict = {} + self.head_by_name: dict = {} + self.del_by_id: dict = {} + self.del_by_name: dict = {} + for comp in components: + status = force or comp.get("diff_status", "unchanged") + present = status != "deleted" + cid, cname = _comp_id(comp), _comp_name(comp) + base = ("n_" if present else "del_") + _sanitize(cname or cid or "node") + key, n = base, 1 + while key in used: + n += 1 + key = f"{base}_{n}" + used.add(key) + self.entries.append((key, cname or cid or "(unnamed)", status, comp)) + by_id = self.head_by_id if present else self.del_by_id + by_name = self.head_by_name if present else self.del_by_name + if cname: + by_name[cname] = key + if cid: + by_id[cid] = key + + def resolve(self, rid: str, rname: str, present: bool) -> str | None: + maps = [(self.head_by_id, self.head_by_name), (self.del_by_id, self.del_by_name)] + if not present: + maps.reverse() + for by_id, by_name in maps: + if rname and rname in by_name: # name-first: stable cross-analysis join + return by_name[rname] + if rid and rid in by_id: + return by_id[rid] + return None + + +def _filter_changed(components: list, relations: list) -> tuple: + """Keep changed components, the endpoints of changed edges, and edges among the kept โ€” the size fallback.""" + changed_rels = [r for r in relations if r.get("diff_status") in CHANGED] + keep_ids: set = set() + keep_names: set = set() + for c in components: + if c.get("diff_status") in CHANGED: + keep_ids.add(_comp_id(c)) + keep_names.add(_comp_name(c)) + for r in changed_rels: # so a changed edge between two unchanged nodes still draws its endpoints + keep_ids.update((r.get("src_id", ""), r.get("dst_id", ""))) + keep_names.update((r.get("src_name", ""), r.get("dst_name", ""))) + + kept = [c for c in components if _comp_id(c) in keep_ids or _comp_name(c) in keep_names] + kept_ids = {_comp_id(c) for c in kept} + kept_names = {_comp_name(c) for c in kept} + + def touches(r: dict, side_id: str, side_name: str) -> bool: + return r.get(side_id, "") in kept_ids or r.get(side_name, "") in kept_names + + rels = [ + r + for r in relations + if r.get("diff_status") in CHANGED + or (touches(r, "src_id", "src_name") and touches(r, "dst_id", "dst_name")) + ] + return kept, rels + + +def _init_directive(font_size, node_padding, node_spacing, rank_spacing) -> str | None: + """Build a Mermaid ``%%{init}%%`` directive to enlarge nodes / spacing. + + Nodes auto-size to their label, so the effective levers are font size and + interior padding (bigger nodes) plus node/rank spacing (less cramped). These + config keys are honored by GitHub's strict renderer. + """ + flowchart: dict = {} + if node_padding is not None: + flowchart["padding"] = node_padding + if node_spacing is not None: + flowchart["nodeSpacing"] = node_spacing + if rank_spacing is not None: + flowchart["rankSpacing"] = rank_spacing + cfg: dict = {} + if flowchart: + cfg["flowchart"] = flowchart + if font_size is not None: + cfg["themeVariables"] = {"fontSize": f"{font_size}px"} + return "%%{init: " + json.dumps(cfg) + "}%%" if cfg else None + + +def render_mermaid( + diff: dict, + direction: str = "LR", + changed_only: bool = False, + edge_labels: bool = True, + nested: bool = False, + font_size: int | None = None, + node_padding: int | None = None, + node_spacing: int | None = None, + rank_spacing: int | None = None, +) -> tuple: + """Return (mermaid_text, meta). ``mermaid_text`` is None when there's nothing to draw. + + With ``nested`` the depth>1 sub-components are drawn as Mermaid subgraphs โ€” + leaf nodes get a filled class, parent containers a stroke-only ``*Box`` + class. A wholly-added parent forces ``added`` onto its subtree (the engine + only diff-annotates surviving branches; an added subtree arrives raw). + """ + components = diff.get("components") or [] + relations = diff.get("components_relations") or [] + n_changed = sum(1 for c in components if c.get("diff_status") in CHANGED) + + if changed_only or len(relations) > MAX_EDGES: + components, relations = _filter_changed(components, relations) + + used: set = set() + body: list = [] + node_classes: dict = {"added": [], "modified": [], "deleted": []} + box_classes: dict = {"added": [], "modified": [], "deleted": []} + edge_styles: dict = {"added": [], "modified": [], "deleted": []} + counters = {"edges": 0, "nodes": 0} + + def emit_edges(rels: list, scope: _Scope, pad: str, force: str | None) -> None: + for rel in rels: + status = force or rel.get("diff_status", "unchanged") + present = status != "deleted" + src = scope.resolve(rel.get("src_id", ""), rel.get("src_name", ""), present) + dst = scope.resolve(rel.get("dst_id", ""), rel.get("dst_name", ""), present) + if src is None or dst is None: + continue # endpoint not drawn โ€” skip, don't consume an edge index + label = _esc(_truncate(rel.get("relation", ""))) if edge_labels else "" + body.append(f'{pad}{src} -- "{label}" --> {dst}' if label else f"{pad}{src} --> {dst}") + if status in edge_styles: + edge_styles[status].append(counters["edges"]) + counters["edges"] += 1 + + def emit_level(comps: list, rels: list, indent: int, force: str | None) -> None: + pad = " " * indent + scope = _Scope(comps, used, force) + for key, label, status, comp in scope.entries: + children = comp.get("components") if nested else None + if children: + body.append(f'{pad}subgraph {key}["{_esc(label)}"]') + if status in box_classes: + box_classes[status].append(key) + child_force = force or (status if status == "added" else None) + emit_level(children, comp.get("components_relations") or [], indent + 1, child_force) + body.append(f"{pad}end") + else: + body.append(f'{pad}{key}["{_esc(label)}"]') + if status in node_classes: + node_classes[status].append(key) + counters["nodes"] += 1 + emit_edges(rels, scope, pad, force) + + emit_level(components, relations, 1, None) + if counters["nodes"] == 0: + return None, {"n_changed": n_changed, "n_nodes": 0, "n_edges": 0, "truncated": False} + + style: list = [ + f' classDef added fill:{COLORS["added"]["fill"]},stroke:{COLORS["added"]["stroke"]},color:#ffffff;', + f' classDef modified fill:{COLORS["modified"]["fill"]},stroke:{COLORS["modified"]["stroke"]},color:#ffffff;', + f' classDef deleted fill:{COLORS["deleted"]["fill"]},stroke:{COLORS["deleted"]["stroke"]},' + f"color:#ffffff,stroke-dasharray:5 3;", + ] + if any(box_classes.values()): # stroke-only containers so big parents aren't solid blocks + for st in CHANGED: + dash = ",stroke-dasharray:5 3" if st == "deleted" else "" + style.append(f' classDef {st}Box stroke:{COLORS[st]["stroke"]},stroke-width:2px,fill:none{dash};') + for status in CHANGED: + if node_classes[status]: + style.append(f' class {",".join(node_classes[status])} {status};') + if box_classes[status]: + style.append(f' class {",".join(box_classes[status])} {status}Box;') + for status in CHANGED: + idxs = edge_styles[status] + if not idxs: + continue + s = f'stroke:{COLORS[status]["stroke"]},stroke-width:2px' + if status == "deleted": + s += ",stroke-dasharray:5 3" + style.append(f' linkStyle {",".join(str(i) for i in idxs)} {s};') + + directive = _init_directive(font_size, node_padding, node_spacing, rank_spacing) + head = ["```mermaid"] + ([directive] if directive else []) + [f"graph {direction}"] + text = "\n".join(head + body + style + ["```"]) + meta = { + "n_changed": n_changed, + "n_nodes": counters["nodes"], + "n_edges": counters["edges"], + "truncated": bool(changed_only or len(diff.get("components_relations") or []) > MAX_EDGES), + } + if len(text) > MAX_TEXT or counters["edges"] > MAX_EDGES: # never trip GitHub's red error box + meta["truncated"] = True + return None, meta + return text, meta + + +# --------------------------------------------------------------------------- # +# cli +# --------------------------------------------------------------------------- # +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--base", required=True, type=Path, help="Path to the base (before) analysis.json") + p.add_argument("--head", required=True, type=Path, help="Path to the head (after) analysis.json") + p.add_argument("--out", required=True, type=Path, help="Where to write the ```mermaid block") + p.add_argument("--direction", default="LR", choices=["LR", "TD", "TB", "RL", "BT"]) + p.add_argument("--changed-only", action="store_true", help="Render only changed components + incident edges") + p.add_argument("--no-edge-labels", dest="edge_labels", action="store_false", help="Draw arrows without relation labels") + p.add_argument("--nested", action="store_true", help="Draw depth>1 sub-components as subgraphs") + p.add_argument("--font-size", type=int, default=None, help="Node label font size in px (bigger label โ‡’ bigger node)") + p.add_argument("--node-padding", type=int, default=None, help="Interior padding around each node label") + p.add_argument("--node-spacing", type=int, default=None, help="Space between nodes in the same rank") + p.add_argument("--rank-spacing", type=int, default=None, help="Space between ranks") + args = p.parse_args() + + diff = build_diff(load_analysis(args.base), load_analysis(args.head)) + mermaid, meta = render_mermaid( + diff, + direction=args.direction, + changed_only=args.changed_only, + edge_labels=args.edge_labels, + nested=args.nested, + font_size=args.font_size, + node_padding=args.node_padding, + node_spacing=args.node_spacing, + rank_spacing=args.rank_spacing, + ) + + args.out.write_text(mermaid if mermaid is not None else "", encoding="utf-8") + meta["rendered"] = mermaid is not None + # Machine-readable summary on stdout for the action to consume. + print(json.dumps(meta)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/run_local.sh b/scripts/run_local.sh new file mode 100755 index 0000000..8a6d7d7 --- /dev/null +++ b/scripts/run_local.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# +# Local test harness for the CodeBoarding Mermaid architecture-diff action. +# Mirrors action.yml so you can iterate without waiting on a GitHub runner. +# +# Two modes: +# +# FAST (no LLM, instant) โ€” diff two existing analysis.json files and preview: +# scripts/run_local.sh --base-json BASE.json --head-json HEAD.json +# +# FULL pipeline (needs OPENROUTER_API_KEY) โ€” run the engine on two refs of a +# local repo, exactly like the action (committed-or-generated base, then +# incremental head), then diff + preview: +# export OPENROUTER_API_KEY=sk-or-... +# scripts/run_local.sh --repo /path/to/repo --base --head +# +# Outputs (default ./.cb-local): +# diagram.md the ```mermaid block (what the action posts) +# preview.html opens in a browser and renders the colored diagram via mermaid.js +# +set -euo pipefail + +ACTION_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ENGINE="${ENGINE:-$ACTION_DIR/../CodeBoarding}" +OUT="$ACTION_DIR/.cb-local" +DEPTH="1" +DIRECTION="LR" +CHANGED_ONLY="" +NO_EDGE_LABELS="" +NESTED="" +EXTRA="" +OPEN="auto" +REPO="" BASE_REF="" HEAD_REF="" BASE_JSON="" HEAD_JSON="" +AGENT_MODEL="${AGENT_MODEL:-openrouter/anthropic/claude-sonnet-4}" +PARSING_MODEL="${PARSING_MODEL:-openrouter/anthropic/claude-sonnet-4}" + +while [ $# -gt 0 ]; do + case "$1" in + --repo) REPO="$2"; shift 2;; + --base) BASE_REF="$2"; shift 2;; + --head) HEAD_REF="$2"; shift 2;; + --base-json) BASE_JSON="$2"; shift 2;; + --head-json) HEAD_JSON="$2"; shift 2;; + --engine) ENGINE="$2"; shift 2;; + --out) OUT="$2"; shift 2;; + --depth) DEPTH="$2"; shift 2;; + --direction) DIRECTION="$2"; shift 2;; + --changed-only) CHANGED_ONLY="--changed-only"; shift;; + --no-edge-labels) NO_EDGE_LABELS="--no-edge-labels"; shift;; + --nested) NESTED="--nested"; shift;; + --extra) EXTRA="$2"; shift 2;; # raw args forwarded to diff_to_mermaid.py, e.g. --extra "--font-size 20 --node-padding 16" + --no-open) OPEN="no"; shift;; + -h|--help) sed -n '2,30p' "${BASH_SOURCE[0]}"; exit 0;; + *) echo "Unknown arg: $1" >&2; exit 2;; + esac +done + +mkdir -p "$OUT" + +run_engine() { # $1 = uv-runnable python source + ( cd "$ENGINE" && \ + STATIC_ANALYSIS_CONFIG="$ENGINE/static_analysis_config.yml" \ + PROJECT_ROOT="$ENGINE" \ + DIAGRAM_DEPTH_LEVEL="$DEPTH" \ + CACHING_DOCUMENTATION="false" \ + ENABLE_MONITORING="false" \ + OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-}" \ + AGENT_MODEL="$AGENT_MODEL" \ + PARSING_MODEL="$PARSING_MODEL" \ + uv run python -c "$1" ) +} + +if [ -n "$BASE_JSON" ] && [ -n "$HEAD_JSON" ]; then + echo "== Fast mode: diffing existing analyses (no engine run) ==" + BASE_ANALYSIS="$BASE_JSON" + HEAD_ANALYSIS="$HEAD_JSON" +else + [ -n "$REPO" ] && [ -n "$BASE_REF" ] && [ -n "$HEAD_REF" ] || { + echo "Need either --base-json/--head-json, or --repo/--base/--head." >&2; exit 2; } + [ -d "$ENGINE" ] || { echo "Engine not found at $ENGINE (set --engine or \$ENGINE)." >&2; exit 2; } + [ -n "${OPENROUTER_API_KEY:-}" ] || { echo "Export OPENROUTER_API_KEY for the full pipeline." >&2; exit 2; } + REPO="$(cd "$REPO" && pwd)" + BASE_DIR="$OUT/base"; HEAD_DIR="$OUT/head" + rm -rf "$BASE_DIR" "$HEAD_DIR"; mkdir -p "$BASE_DIR" "$HEAD_DIR" + + echo "== Resolving base analysis at $BASE_REF ==" + if git -C "$REPO" show "$BASE_REF:.codeboarding/analysis.json" > "$BASE_DIR/analysis.json" 2>/dev/null; then + git -C "$REPO" show "$BASE_REF:.codeboarding/static_analysis.pkl" > "$BASE_DIR/static_analysis.pkl" 2>/dev/null \ + && echo " using committed baseline (+ static_analysis.pkl)" || { rm -f "$BASE_DIR/static_analysis.pkl"; echo " using committed baseline"; } + else + rm -f "$BASE_DIR/analysis.json" + echo " no committed baseline; running FULL analysis on base (LLM)..." + BASE_SRC="$OUT/base-src"; rm -rf "$BASE_SRC" + git -C "$REPO" worktree add --detach "$BASE_SRC" "$BASE_REF" >/dev/null + run_engine " +from pathlib import Path +from codeboarding_workflows.analysis import run_full +print(run_full(repo_name='$(basename "$REPO")', repo_path=Path('$BASE_SRC'), output_dir=Path('$BASE_DIR'), + run_id='local-base', log_path='/tmp/cb-local-base.log', depth_level=int('$DEPTH'), source_sha='$BASE_REF')) +" + git -C "$REPO" worktree remove --force "$BASE_SRC" >/dev/null 2>&1 || true + fi + + echo "== Analyzing head at $HEAD_REF (incremental from base) ==" + cp -a "$BASE_DIR"/. "$HEAD_DIR"/ 2>/dev/null || true + run_engine " +from pathlib import Path +from codeboarding_workflows.analysis import run_incremental, run_full, BaselineUnavailableError +from diagram_analysis.exceptions import IncrementalCacheMissingError +repo=Path('$REPO'); out=Path('$HEAD_DIR'); name='$(basename "$REPO")' +try: + print(run_incremental(repo_path=repo, output_dir=out, project_name=name, run_id='local-head', + log_path='/tmp/cb-local-head.log', base_ref='$BASE_REF', target_ref='$HEAD_REF', source_sha='$HEAD_REF')) +except (IncrementalCacheMissingError, BaselineUnavailableError) as exc: + print(f'Incremental unavailable ({exc}); full analysis on head.') + for p in out.glob('*'): + if p.is_file(): p.unlink() + print(run_full(repo_name=name, repo_path=repo, output_dir=out, run_id='local-head', + log_path='/tmp/cb-local-head.log', depth_level=int('$DEPTH'), source_sha='$HEAD_REF')) +" + BASE_ANALYSIS="$BASE_DIR/analysis.json" + HEAD_ANALYSIS="$HEAD_DIR/analysis.json" +fi + +echo "== Diff -> Mermaid ==" +META="$(python3 "$ACTION_DIR/scripts/diff_to_mermaid.py" \ + --base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" \ + --out "$OUT/diagram.md" --direction "$DIRECTION" $CHANGED_ONLY $NO_EDGE_LABELS $NESTED $EXTRA)" +echo " $META" + +# Browser preview: render the (fence-stripped) mermaid via mermaid.js, strict mode +# to match GitHub. HTML-escape the body so labels with < > & stay valid. +python3 - "$OUT/diagram.md" "$OUT/preview.html" <<'PY' +import html, sys +src, dst = sys.argv[1], sys.argv[2] +body = open(src, encoding="utf-8").read().strip() +lines = body.splitlines() +if lines and lines[0].startswith("```"): lines = lines[1:] +if lines and lines[-1].startswith("```"): lines = lines[:-1] +graph = html.escape("\n".join(lines)) +open(dst, "w", encoding="utf-8").write(f""" +CodeBoarding architecture diff + +

Architecture diff preview

+
+ ■ added + ■ modified + ■ deleted +
+
+{graph}
+
+""") +print(f" wrote {dst}") +PY + +echo +echo "diagram : $OUT/diagram.md" +echo "preview : $OUT/preview.html" +if [ "$OPEN" != "no" ]; then + if command -v open >/dev/null 2>&1; then open "$OUT/preview.html"; + elif command -v xdg-open >/dev/null 2>&1; then xdg-open "$OUT/preview.html"; + else echo "(open $OUT/preview.html in your browser)"; fi +fi From e00323a498b45b8c71f440b8776f0679dd73abfe Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 15:36:43 +0200 Subject: [PATCH 2/6] test: self-test workflow to run the action on a PR (remove before merge) --- .github/workflows/test-self.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/test-self.yml diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml new file mode 100644 index 0000000..5440157 --- /dev/null +++ b/.github/workflows/test-self.yml @@ -0,0 +1,23 @@ +name: Self-test architecture diff + +# Exercises THIS branch's action (uses: ./) against the action repo itself, so a +# PR posts a Mermaid architecture-diff comment. Remove before merge. + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + pull-requests: write + +jobs: + diagram: + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v4 + - uses: ./ + with: + llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} + agent_model: ${{ secrets.AGENT_MODEL }} + parsing_model: ${{ secrets.PARSING_MODEL }} From 9802ce93374cd9e5de8a7e39fac18d38ea23c8dc Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 16:37:44 +0200 Subject: [PATCH 3/6] feat(comment): clarify color legend (file-level changes) + add workspace/extension CTA via click proxy --- .github/workflows/example-usage.yml | 5 +++-- .github/workflows/test-self.yml | 1 + action.yml | 25 ++++++++++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml index 6fb9f78..1e8913c 100644 --- a/.github/workflows/example-usage.yml +++ b/.github/workflows/example-usage.yml @@ -1,8 +1,9 @@ name: Architecture diff +# Reference example only (the README shows the real pull_request usage). Manual +# trigger so it doesn't run the published @v1 against this repo's own PRs. on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] + workflow_dispatch: # Only a PR comment is posted โ€” no image is pushed โ€” so contents:write is not needed. permissions: diff --git a/.github/workflows/test-self.yml b/.github/workflows/test-self.yml index 5440157..302e014 100644 --- a/.github/workflows/test-self.yml +++ b/.github/workflows/test-self.yml @@ -21,3 +21,4 @@ jobs: llm_api_key: ${{ secrets.OPENROUTER_API_KEY }} agent_model: ${{ secrets.AGENT_MODEL }} parsing_model: ${{ secrets.PARSING_MODEL }} + cta_base_url: https://codeboarding.pontux-inc.workers.dev diff --git a/action.yml b/action.yml index 3c3c031..5ce42aa 100644 --- a/action.yml +++ b/action.yml @@ -46,6 +46,10 @@ inputs: description: 'Draw depth>1 sub-components as nested subgraphs (pair with depth_level >= 2).' required: false default: 'false' + cta_base_url: + description: 'Base URL of the click proxy (e.g. https://go.codeboarding.org). When set, the comment adds "open in workspace" / "get the extension" links with owner/repo/pr appended. Empty disables the CTA.' + required: false + default: '' outputs: diagram_md: @@ -340,12 +344,30 @@ runs: TRUNC="${{ steps.diagram.outputs.truncated }}" BODY_FILE=$(mktemp) + OWNER_REPO="${{ github.repository }}" + OWNER="${OWNER_REPO%%/*}"; REPO="${OWNER_REPO##*/}" + PR="${{ steps.guard.outputs.pr_number }}" + CTA_BASE="${{ inputs.cta_base_url }}" + headline() { if [ "$1" = "0" ]; then echo "no architectural changes"; elif [ "$1" = "1" ]; then echo "1 component changed"; else echo "$1 components changed"; fi } + # Call-to-action: links open the live workspace (github.dev-equivalent) and + # the extension via the click proxy, with owner/repo/pr appended for tracking. + cta() { + [ -z "$CTA_BASE" ] && return + local ws="${CTA_BASE}/use-workspace?owner=${OWNER}&repo=${REPO}&pr=${PR}" + local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" + echo "" + echo "---" + echo "๐Ÿ” **This is the flattened map.** [**Explore it live in your browser โ†’**](${ws}) โ€” expand each component, follow every dependency, and click straight through to the code that changed. No install; it opens right here on this PR." + echo "" + echo "๐Ÿ’ก Want this on every PR? [**Add the CodeBoarding extension โ†’**](${mp})" + } + { echo "### ${HEADER} ยท $(headline "$N")" echo "" @@ -355,7 +377,7 @@ runs: cat "${{ steps.diagram.outputs.diagram_md }}" echo "" echo "" - echo "๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ deleted โ€” compared against \`${BASE_REF}\`." + echo "Components are tinted by the files that changed inside them โ€” ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\` (not whole subsystems being added or dropped)." if [ "$TRUNC" = "true" ]; then echo "" echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." @@ -363,6 +385,7 @@ runs: else echo "**$(headline "$N")** versus \`${BASE_REF}\`, but the diagram is too large to render inline (GitHub caps inline Mermaid at 500 edges)." fi + cta echo "" echo "codeboarding-action ยท run ${{ github.run_id }}" } > "$BODY_FILE" From f5d3bbac23c77145e99295a5237b51a5e0b0117c Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 16:50:35 +0200 Subject: [PATCH 4/6] fix(comment): extension CTA = explore diffs in VS Code (not 'on every PR') --- action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/action.yml b/action.yml index 5ce42aa..6ac9456 100644 --- a/action.yml +++ b/action.yml @@ -363,9 +363,9 @@ runs: local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" echo "" echo "---" - echo "๐Ÿ” **This is the flattened map.** [**Explore it live in your browser โ†’**](${ws}) โ€” expand each component, follow every dependency, and click straight through to the code that changed. No install; it opens right here on this PR." + echo "๐Ÿ” **This is the flattened map.** [**Explore this diff live in your browser โ†’**](${ws}) โ€” expand every component, follow each dependency, and click straight through to the changed code. Opens in your browser, no install." echo "" - echo "๐Ÿ’ก Want this on every PR? [**Add the CodeBoarding extension โ†’**](${mp})" + echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp}) to explore these architecture diffs right inside VS Code." } { From 951a729f9d77a2777ce0fb0f00497c01f845c00c Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 17:10:56 +0200 Subject: [PATCH 5/6] feat(comment): real health-check warning hook + tighter legend/CTA copy --- action.yml | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/action.yml b/action.yml index 6ac9456..e37c80e 100644 --- a/action.yml +++ b/action.yml @@ -311,6 +311,46 @@ runs: echo "base_analysis=$BASE_DIR/analysis.json" >> $GITHUB_OUTPUT echo "head_analysis=$HEAD_DIR/analysis.json" >> $GITHUB_OUTPUT + - name: Architecture health check (best-effort) + if: steps.guard.outputs.skip != 'true' + id: health + continue-on-error: true + shell: bash + working-directory: codeboarding-engine + env: + STATIC_ANALYSIS_CONFIG: ${{ github.workspace }}/codeboarding-engine/static_analysis_config.yml + PROJECT_ROOT: ${{ github.workspace }}/codeboarding-engine + run: | + HEAD_DIR="${{ steps.base.outputs.head_dir }}" + TARGET="${{ github.workspace }}/target-repo" + # Count real WARNING/CRITICAL findings from the head analysis. Never fails + # the run โ€” a missing/old health module just yields 0 issues (no banner). + uv run python -c " + import json + from pathlib import Path + issues = 0 + try: + from static_analyzer.analysis_cache import StaticAnalysisCache + from health.runner import run_health_checks + from health.models import Severity + cache = StaticAnalysisCache(artifact_dir=Path('$HEAD_DIR'), repo_root=Path('$TARGET')) + sa = cache.get() + if sa is not None: + report = run_health_checks(sa, repo_name='${{ github.event.repository.name }}', repo_path=Path('$TARGET')) + if report is not None: + for cs in report.check_summaries: + for fg in getattr(cs, 'finding_groups', []): + if getattr(fg, 'severity', None) in (Severity.WARNING, Severity.CRITICAL): + issues += len(fg.entities) + print(f'Architecture issues found: {issues}') + except Exception as exc: + print(f'Health check skipped ({exc}).') + Path('/tmp/cb-issues.txt').write_text(str(issues)) + " + N=$(cat /tmp/cb-issues.txt 2>/dev/null || echo 0) + echo "issues=$N" >> $GITHUB_OUTPUT + echo "Architecture issues: $N" + - name: Diff analyses โ†’ Mermaid if: steps.guard.outputs.skip != 'true' id: diagram @@ -348,6 +388,7 @@ runs: OWNER="${OWNER_REPO%%/*}"; REPO="${OWNER_REPO##*/}" PR="${{ steps.guard.outputs.pr_number }}" CTA_BASE="${{ inputs.cta_base_url }}" + ISSUES="${{ steps.health.outputs.issues }}" headline() { if [ "$1" = "0" ]; then echo "no architectural changes"; @@ -356,16 +397,22 @@ runs: } # Call-to-action: links open the live workspace (github.dev-equivalent) and - # the extension via the click proxy, with owner/repo/pr appended for tracking. + # the VS Code extension via the click proxy, with owner/repo/pr for tracking. + # The warning banner is shown only when real health findings exist. cta() { [ -z "$CTA_BASE" ] && return local ws="${CTA_BASE}/use-workspace?owner=${OWNER}&repo=${REPO}&pr=${PR}" local mp="${CTA_BASE}/use-marketplace?owner=${OWNER}&repo=${REPO}&pr=${PR}" echo "" echo "---" - echo "๐Ÿ” **This is the flattened map.** [**Explore this diff live in your browser โ†’**](${ws}) โ€” expand every component, follow each dependency, and click straight through to the changed code. Opens in your browser, no install." + if [ -n "$ISSUES" ] && [ "$ISSUES" != "0" ]; then + local noun="issue"; [ "$ISSUES" != "1" ] && noun="issues" + echo "โš ๏ธ **${ISSUES} architecture ${noun} found.** [**See live in your browser โ†’**](${ws})" + echo "" + fi + echo "๐Ÿ” This is the flattened map. [**Explore this diff live in your browser โ†’**](${ws})" echo "" - echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp}) to explore these architecture diffs right inside VS Code." + echo "๐Ÿ’ก Prefer your editor? [**Get the CodeBoarding VS Code extension โ†’**](${mp})" } { @@ -377,7 +424,7 @@ runs: cat "${{ steps.diagram.outputs.diagram_md }}" echo "" echo "" - echo "Components are tinted by the files that changed inside them โ€” ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\` (not whole subsystems being added or dropped)." + echo "Colours indicate components that have been ๐ŸŸฉ added ยท ๐ŸŸจ modified ยท ๐ŸŸฅ removed โ€” versus \`${BASE_REF}\`." if [ "$TRUNC" = "true" ]; then echo "" echo "Showing changed components only โ€” the full graph exceeds GitHub's inline Mermaid limit." From e9db8c3e249398f92d54037f44147a29d8a1c77a Mon Sep 17 00:00:00 2001 From: brovatten Date: Wed, 3 Jun 2026 17:15:59 +0200 Subject: [PATCH 6/6] feat(diff): expose --max-label to control edge-label truncation length --- scripts/diff_to_mermaid.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/diff_to_mermaid.py b/scripts/diff_to_mermaid.py index 6d9f6fd..35a6cd4 100644 --- a/scripts/diff_to_mermaid.py +++ b/scripts/diff_to_mermaid.py @@ -309,6 +309,7 @@ def render_mermaid( node_padding: int | None = None, node_spacing: int | None = None, rank_spacing: int | None = None, + max_label: int = _EDGE_LABEL_MAX, ) -> tuple: """Return (mermaid_text, meta). ``mermaid_text`` is None when there's nothing to draw. @@ -339,7 +340,7 @@ def emit_edges(rels: list, scope: _Scope, pad: str, force: str | None) -> None: dst = scope.resolve(rel.get("dst_id", ""), rel.get("dst_name", ""), present) if src is None or dst is None: continue # endpoint not drawn โ€” skip, don't consume an edge index - label = _esc(_truncate(rel.get("relation", ""))) if edge_labels else "" + label = _esc(_truncate(rel.get("relation", ""), max_label)) if edge_labels else "" body.append(f'{pad}{src} -- "{label}" --> {dst}' if label else f"{pad}{src} --> {dst}") if status in edge_styles: edge_styles[status].append(counters["edges"]) @@ -423,6 +424,7 @@ def main() -> int: p.add_argument("--node-padding", type=int, default=None, help="Interior padding around each node label") p.add_argument("--node-spacing", type=int, default=None, help="Space between nodes in the same rank") p.add_argument("--rank-spacing", type=int, default=None, help="Space between ranks") + p.add_argument("--max-label", type=int, default=_EDGE_LABEL_MAX, help="Max characters in an edge label before truncation") args = p.parse_args() diff = build_diff(load_analysis(args.base), load_analysis(args.head)) @@ -436,6 +438,7 @@ def main() -> int: node_padding=args.node_padding, node_spacing=args.node_spacing, rank_spacing=args.rank_spacing, + max_label=args.max_label, ) args.out.write_text(mermaid if mermaid is not None else "", encoding="utf-8")