From 49210e78cf9ef2b12bdfb906077fd1a99cf6a88e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Dinis=20Ferreira?= Date: Sat, 30 May 2026 22:58:31 +0200 Subject: [PATCH 1/3] ci: fast static analysis with early-fail lint + inline SARIF Replace the pmd/checkstyle jobs with a parallel shape that gives early, inline feedback and stops re-running analysis inside the build: - lint: compile + pmd:pmd + checkstyle:checkstyle (SARIF) + pmd:cpd-check, -T 2C, --fail-never; gates by counting the merged SARIF (+ cpd.xml grep). Fails in ~3-5 min on its own check, independent of the build. - spotbugs: compile + spotbugs:spotbugs (SARIF), -Xmx4g, own parallel lane (the slow analysis). - maven-verify: build + tests only; the redundant checkstyle/pmd/spotbugs goals are dropped (now owned by lint/spotbugs). - line-endings: unchanged. All three emit SARIF 2.1.0, merged per tool and uploaded to Code Scanning (security-events: write) for inline annotations on the PR diff + Security tab. No custom Python annotator. Count-gate rather than the *:check goals: the check goals @Execute-fork a second analysis and cannot emit SARIF, and without the full compile classpath they false-positive on type-resolving rules. Each report goal runs once (full-reactor compile -> correct + SARIF) and the gate counts the result. Rationale + tables in docs/ci-static-analysis-design.md; measurement protocol in docs/ci-measurement-protocol.md. CPD gating is wired but inert until #1339 lowers the token threshold. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/verify.yml | 167 ++++++++++++++++++++++++++++-- docs/ci-measurement-protocol.md | 65 ++++++++++++ docs/ci-static-analysis-design.md | 108 +++++++++++++++++++ 3 files changed, 331 insertions(+), 9 deletions(-) create mode 100644 docs/ci-measurement-protocol.md create mode 100644 docs/ci-static-analysis-design.md diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index 07e922619..69e62f186 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -1,8 +1,27 @@ name: verify on: pull_request: + +# Code Scanning needs write access to upload SARIF results for inline annotations. +permissions: + contents: read + security-events: write + jobs: - pmd: + # Fast, early-fail lint lane: PMD + Checkstyle (+ CPD). Turns red in a few + # minutes on any violation, independent of the long build below, so a stray + # PMD/Checkstyle issue is reported immediately rather than after `verify`. + # + # `compile` is in the same invocation as the analysis goals: PMD's + # type-resolving rules (e.g. InvalidLogMessageFormat on the SLF4J + # trailing-Throwable idiom) need Tycho's aux-classpath, which a fresh `mvn` + # does not inherit from a prior step's target/classes. + # + # `--fail-never` lets every module produce its report (no Maven cascade-skip), + # so the uploaded SARIF — and therefore the inline annotations — are complete. + # The gate is a jq count over the merged SARIFs; compile errors still halt + # (MojoExecutionException is not suppressed by --fail-never). + lint: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -12,10 +31,90 @@ jobs: java-version: '21' - name: Set up Workspace Environment Variable run: echo "WORKSPACE=${{ github.workspace }}" >> $GITHUB_ENV - - name: PMD Check - run: mvn pmd:pmd pmd:cpd pmd:check pmd:cpd-check -f ./ddk-parent/pom.xml --batch-mode --fail-at-end - checkstyle: + - name: Cache Maven dependencies + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 + with: + path: /home/runner/.m2/repository + key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven-0- + + - name: PMD + Checkstyle reports (SARIF) + # PMD: SarifRenderer FQCN — emits pmd.sarif.json AND keeps pmd.xml. + # Checkstyle: output.format=sarif — SARIF content in checkstyle-result.xml. + # CPD is excluded here: the global -Dformat flag uses PMD's Renderer + # hierarchy and would ClassCastException CPD's CPDReportRenderer. + run: | + mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ + compile \ + pmd:pmd checkstyle:checkstyle \ + -Dformat=net.sourceforge.pmd.renderers.SarifRenderer \ + -Dcheckstyle.output.format=sarif + + - name: CPD report (separate invocation — no SARIF support) + # CPD has no SARIF renderer; emits cpd.xml only. Run standalone so the + # PMD -Dformat flag isn't in scope. + # NOTE: project CPD token threshold is currently very high (issue #1339), + # which effectively disables detection; re-tune once #1339 lands. + run: | + mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ + compile \ + pmd:cpd-check + + - name: Merge per-module SARIFs (PMD + Checkstyle) + if: always() + # Code Scanning accepts one run per category per upload; each analyzer + # writes one SARIF per module, so concatenate each analyzer's results + # into a single run under .sarif-merged/. + run: | + mkdir -p .sarif-merged + # Merge per-module SARIFs into one run. Filter to JSON-parseable files: + # the ddk-parent aggregator writes a plain-XML checkstyle-result.xml that + # would break jq, and modules with no findings may emit non-SARIF stubs. + merge() { # $1 = find-glob, $2 = output + local f valid=() + while IFS= read -r f; do + jq -e . "$f" >/dev/null 2>&1 && valid+=("$f") + done < <(find . -path "$1") + if [ ${#valid[@]} -gt 0 ]; then + jq -s '{ + "$schema": .[0]."$schema", version: .[0].version, + runs: [{ tool: .[0].runs[0].tool, + results: [.[].runs[].results[]?], + invocations: [.[].runs[].invocations[]?] }] + }' "${valid[@]}" > "$2" + fi + } + merge '*/target/pmd.sarif.json' .sarif-merged/pmd.sarif + merge '*/target/checkstyle-result.xml' .sarif-merged/checkstyle.sarif + + - name: Gate on PMD / CPD / Checkstyle violations + run: | + set -eu + sarif_total=$(jq '[.runs[].results[]?] | length' \ + .sarif-merged/pmd.sarif .sarif-merged/checkstyle.sarif 2>/dev/null \ + | awk '{s+=$1} END {print s+0}') + cpd_total=$(find . -name 'cpd.xml' -path '*/target/*' -exec grep -c '/dev/null \ + | awk -F: '{s+=$2} END {print s+0}') + echo "PMD/Checkstyle SARIF violations: $sarif_total" + echo "CPD duplications: $cpd_total" + if [ "$sarif_total" != "0" ] || [ "$cpd_total" != "0" ]; then + echo "::error::Static analysis found violations (PMD/CPD/Checkstyle)." + exit 1 + fi + + - name: Upload PMD/Checkstyle SARIF to Code Scanning + if: always() + uses: github/codeql-action/upload-sarif@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3.35.4 + with: + sarif_file: .sarif-merged + category: lint + + # SpotBugs is the slow critical-path analysis (the experiments' durable + # finding), so it runs in its own parallel lane and never delays `lint`. + spotbugs: runs-on: ubuntu-24.04 + env: + MAVEN_OPTS: -Xmx4g steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 @@ -24,14 +123,66 @@ jobs: java-version: '21' - name: Set up Workspace Environment Variable run: echo "WORKSPACE=${{ github.workspace }}" >> $GITHUB_ENV - - name: Checkstyle Check - run: mvn checkstyle:checkstyle checkstyle:check -f ./ddk-parent/pom.xml --batch-mode --fail-at-end + - name: Cache Maven dependencies + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 + with: + path: /home/runner/.m2/repository + key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven-0- + + - name: SpotBugs report (SARIF) + # sarifOutput=true emits spotbugsSarif.json (also writes spotbugsXml.xml). + run: | + mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ + compile \ + spotbugs:spotbugs \ + -Dspotbugs.sarifOutput=true + + - name: Merge per-module SpotBugs SARIFs + if: always() + run: | + mkdir -p .sarif-merged + valid=() + while IFS= read -r f; do + jq -e . "$f" >/dev/null 2>&1 && valid+=("$f") + done < <(find . -path '*/target/spotbugsSarif.json') + if [ ${#valid[@]} -gt 0 ]; then + jq -s '{ + "$schema": .[0]."$schema", version: .[0].version, + runs: [{ tool: .[0].runs[0].tool, + results: [.[].runs[].results[]?], + invocations: [.[].runs[].invocations[]?] }] + }' "${valid[@]}" > .sarif-merged/spotbugs.sarif + fi + + - name: Gate on SpotBugs violations + run: | + set -eu + sb_total=$(jq '[.runs[].results[]?] | length' .sarif-merged/spotbugs.sarif 2>/dev/null || echo 0) + echo "SpotBugs SARIF violations: $sb_total" + if [ "$sb_total" != "0" ]; then + echo "::error::SpotBugs found violations." + exit 1 + fi + + - name: Upload SpotBugs SARIF to Code Scanning + if: always() + uses: github/codeql-action/upload-sarif@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3.35.4 + with: + sarif_file: .sarif-merged + category: spotbugs + line-endings: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: Check LF line endings run: bash .github/scripts/check-line-endings.sh + + # Build + tests only. Static analysis now lives in the `lint` and `spotbugs` + # jobs, so the redundant checkstyle/pmd/spotbugs goals are dropped from here — + # this is the wall-clock long pole and no longer re-runs analysis. + # No `-T 2C`: tests are not known to pass reliably under reactor parallelism. maven-verify: runs-on: ubuntu-24.04 steps: @@ -51,9 +202,7 @@ jobs: path: /home/runner/.m2/repository key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} - name: Build with Maven within a virtual X Server Environment - # Run pmd:pmd and pmd:cpd first to generate reports for all modules, then run pmd:check and pmd:cpd-check - # This ensures all violations are collected and reported before the build fails - run: xvfb-run mvn clean verify checkstyle:check pmd:pmd pmd:cpd pmd:check pmd:cpd-check spotbugs:check -f ./ddk-parent/pom.xml --batch-mode --fail-at-end + run: xvfb-run mvn clean verify -f ./ddk-parent/pom.xml --batch-mode --fail-at-end - name: Fail on missing surefire reports if: always() run: bash .github/scripts/check-surefire-reports.sh diff --git a/docs/ci-measurement-protocol.md b/docs/ci-measurement-protocol.md new file mode 100644 index 000000000..470392f48 --- /dev/null +++ b/docs/ci-measurement-protocol.md @@ -0,0 +1,65 @@ +# CI timing-measurement protocol + +How to get **trustworthy** numbers for a CI-shape change. Written because the +earlier experiment program (2026-05) produced numbers that can't be trusted: +single samples, taken during an Eclipse p2 mirror-flaky window, against a CI +shape that has since changed. Don't repeat that. + +## What we're measuring + +- **Wall-clock** per run = the slowest job (what a developer waits for). +- **Per-job duration** = the analysis bottleneck (settles e.g. spotbugs-vs-maven-verify). +- **Failure latency** = time-to-red on a *planted* lint violation — the metric the + early-fail goal actually cares about. Measure it separately. + +Wall-clock is the headline, but per-job + failure-latency are what tell you *why*. + +## Noise floor (observed on this repo's `verify` runs) + +Per-job spread across recent runs — any change must beat this to be real signal: + +| Job | median | spread (±) | +|---|---|---| +| line-endings | ~5s | ±3s | +| checkstyle (old shape) | ~115s | ±~104s (≈90%!) | +| pmd (old shape) | ~191s | ±63s | +| maven-verify | ~869s (~14.5m) | ±~93s (≈11%) | + +**Decision rule:** accept candidate B as faster than A only if +`median(B) < median(A) − 2 × IQR(A)`. For maven-verify a real win must exceed ~100s; +for checkstyle, ~200s. Anything smaller is noise. + +## Protocol + +1. **Pre-flight — confirm mirrors are healthy.** Run one throwaway build; if + target-platform resolution stalls or errors, **stop** — the window is bad + (this is what voided the 2026-05-09 numbers). Measure only on a clean window. +2. **Warm the caches.** Run 3–5 discard builds first so `~/.m2` + `.cache/tycho` + are populated; cold-cache runs have a different (network-bound) profile. +3. **Sample.** N = 15–20 `workflow_dispatch` runs per candidate, back-to-back in a + ≤30-minute window. Run A and B **interleaved within ≤10 minutes** of each other + so they see the same mirror weather and runner-pool load. +4. **Report medians + IQR**, per job *and* total wall-clock. Never a single sample, + never the mean. +5. **Pin `ubuntu-24.04`** (not a matrix) for consistent runner hardware. +6. **Record per-sample metadata:** cache hit/miss, run start time, headSha. +7. **Failure latency:** plant a synthetic PMD/Checkstyle violation, measure how long + until the `lint` check goes red (independent of the build job). + +## Why not just trust the old experiment numbers + +- Single sample each (no repetition). +- 2026-05-09 mirror-flaky window → resolution time is contaminated and varies per job + even within one dispatch. +- Numbers disagree across rounds (sequential measured 21m one round, 33m another). +- `#1369` reshaped master's CI after the experiments ran, so their baseline is stale. + +## What is *not* a timing lever (validated) + +- **Local p2 mirror**: with a warm `~/.m2`/`.cache/tycho`, offline resolution is + ~equal to online (measured 5s vs 6s) — a mirror's steady-state speedup is ~0. + Its only value is cold-cache + flaky-mirror insurance; deferred per the rare-flake + rule. +- **`-Dtycho.mode=maven` on a gate pass**: marginal on a warm cache (resolution is + already seconds); and it *breaks* PMD type-resolving rules (strips the classpath → + false positives). Not used. diff --git a/docs/ci-static-analysis-design.md b/docs/ci-static-analysis-design.md new file mode 100644 index 000000000..acbee5eec --- /dev/null +++ b/docs/ci-static-analysis-design.md @@ -0,0 +1,108 @@ +# Static-analysis CI design: SARIF inline display + count-gate + +This documents *why* the static-analysis CI is shaped the way it is, so the +mechanics (verified against the plugin source) don't have to be re-discovered. + +## Goal + +Fast, complete, **early** PMD + Checkstyle feedback — a violation should fail CI +in minutes, on its own check, not after the ~15-minute build. SpotBugs (the slow +analysis) runs in its own parallel lane so it never delays the fast checks. All +of PMD, Checkstyle, and SpotBugs surface **inline** on the PR via SARIF + GitHub +Code Scanning (no custom annotator). + +## Job shape (`verify.yml`) + +Four independent parallel jobs (no `needs:`); wall-clock = the slowest job. + +| Job | Runs | Threads | Gate | +|---|---|---|---| +| `lint` | `compile` + `pmd:pmd` + `checkstyle:checkstyle` (SARIF) + `pmd:cpd-check` | `-T 2C` | jq count of SARIF results (+ CPD `` grep) | +| `spotbugs` | `compile` + `spotbugs:spotbugs` (SARIF), `-Xmx4g` | `-T 2C` | jq count of SARIF results | +| `maven-verify` | `clean verify` (build + tests only) | none | Tycho-surefire | +| `line-endings` | `git ls-files` check | n/a | shell | + +`maven-verify` **no longer re-runs** the analysis goals (now owned by `lint` / +`spotbugs`). Analysis jobs use `-T 2C`; the test job does not (tests are not known +reliable under reactor parallelism). + +## Report goals vs. check goals (code-verified at each plugin version tag) + +| Goal | Kind | `@Execute` fork | Runs analysis? | Writes | Fails build? | SARIF-capable? | +|---|---|---|---|---|---|---| +| `pmd:pmd` | report | — | yes | `pmd.xml` (+ `pmd.sarif.json` w/ `-Dformat=…SarifRenderer`) | no | **yes** | +| `pmd:check` | check | `@Execute(goal=pmd)` | yes (forked) | `pmd.xml` | yes (`> maxAllowedViolations`, dflt 0; `failurePriority` dflt 5 = all) | no (wants `pmd.xml`) | +| `pmd:cpd` | report | — | yes | `cpd.xml` | no | no (no CPD SARIF renderer) | +| `pmd:cpd-check` | check | `@Execute(goal=cpd)` | yes (forked) | `cpd.xml` | yes | no | +| `checkstyle:checkstyle` | report | — | yes | `checkstyle-result.xml` (XML, or SARIF w/ `-Dcheckstyle.output.format=sarif`) | no | **yes** | +| `checkstyle:check` | check | **none** (self-contained) | yes (internal, source-based) | `checkstyle-result.xml` (XML) | yes (severity ≥ `violationSeverity`) | no | +| `spotbugs:spotbugs` | report | — | yes (bytecode) | `spotbugsXml.xml` (+ `spotbugsSarif.json` w/ `-Dspotbugs.sarifOutput=true`) | no | **yes** | +| `spotbugs:check` | check | `@Execute(goal=spotbugs)` | yes (forked) | `spotbugsXml.xml` | yes (`bugCount > 0`) | no | + +The split is **observe vs. enforce**: report goals produce output (for the Maven +site / dashboards / SARIF consumers) and never fail the build; check goals bind to +`verify` and exist to fail the build. The `@Execute(goal=…)` on the PMD/CPD/SpotBugs +checks forks the report goal first (so `mvn pmd:check` is self-contained) — which +means they **re-run the analysis every time**. `checkstyle:check` is the lone +exception: no `@Execute`, runs Checkstyle internally in one pass. + +## Maven reactor failure flags + +| Flag | Reactor behavior | Multi-module report completeness | Suppresses violation failure? | +|---|---|---|---| +| `--fail-fast` (default) | halt at first failing module | downstream skipped → reports missing | no | +| `--fail-at-end` | build independent modules, fail at end | modules downstream of a failure still cascade-skipped → incomplete | no | +| `--fail-never` | never fail on `MojoFailureException` (a real compile error / `MojoExecutionException` still halts) | **all** modules run → **complete** reports/SARIF | **yes** — violation failures swallowed | + +## Why `report goal + --fail-never + jq count` (the count-gate) + +We want **both** SARIF (for inline display) **and** a build gate, from **one** +analysis pass. The plugin design forces a choice: + +| Combination | Result | +|---|---| +| check goal + `--fail-fast` | gates, but first violation hides later modules' findings | +| check goal + `--fail-at-end` | gates, but cascade-skip drops downstream findings | +| check goal + `--fail-never` | doesn't gate (failure swallowed) — pointless | +| **report goal + `--fail-never` + jq count** | **all** modules analyzed → complete SARIF (full inline display); the jq count does the gating | + +So each tool runs its report goal **once** (`compile` + report, full classpath → +correct + SARIF), and a jq count over the merged SARIF fails the job. + +### Why not the `:check` goals +- They `@Execute`-fork a **second** analysis on top of the `pmd:pmd` we already run + for SARIF — pure waste. +- The forked analysis is only correct with the full `compile` + classpath; run cheaply + (`-Dtycho.mode=maven` / no compile) it loses the classpath and **false-positives** + (e.g. PMD `InvalidLogMessageFormat` flags the SLF4J trailing-`Throwable` idiom because + it can't resolve the last arg as a `Throwable`). +- `pmd:check` is **incompatible with `-Dformat=sarif`**: its fork writes `pmd.sarif.json`, + but the check looks for `pmd.xml` → "unable to find report." + +### count-gate == `:check` fidelity (for this project's config) +Counting **all** SARIF results equals what `:check` would fail on, because: PMD has no +`failurePriority` override (default 5 = all priorities), Checkstyle is globally +`severity=warning` with no info-level results, and SpotBugs uses `threshold=Low` with no +`failThreshold`/`maxAllowedViolations` override. Three config-drift caveats would make +the count-gate *stricter* than `:check` (over-fail, never under-fail), each guard-worthy: +1. PMD `pmd.failurePriority` set below 5. +2. A Checkstyle rule emitting `info`/`note` severity. +3. SpotBugs `failThreshold` or non-zero `maxAllowedViolations`. + +## Two operational rules + +- **`compile` must be full-reactor** (`-f ddk-parent/pom.xml`, no `-pl`). PMD's + type-resolving rules need the complete aux-classpath; a `-pl` subset produces + false positives (the trailing-`Throwable` case). +- **Merge SARIFs from SARIF files only.** Code Scanning accepts one run per category, + so per-module SARIFs are merged (jq) before upload. The `ddk-parent` aggregator emits + plain-XML `checkstyle-result.xml`; the merge must filter to JSON-parseable files. + +## SARIF mechanics + +- PMD: `-Dformat=net.sourceforge.pmd.renderers.SarifRenderer` → `pmd.sarif.json`. +- Checkstyle: `-Dcheckstyle.output.format=sarif` → SARIF content in `checkstyle-result.xml`. +- SpotBugs: `-Dspotbugs.sarifOutput=true` → `spotbugsSarif.json`. +- All emit SARIF 2.1.0. Merge per tool → `github/codeql-action/upload-sarif` + (`permissions: security-events: write`). CPD has no SARIF renderer → XML + grep gate + (no inline display for CPD; acceptable, duplications are rare). From 2ffb917f3c911ef28d3c80f74c0197616e57f646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Dinis=20Ferreira?= Date: Sun, 31 May 2026 14:37:32 +0200 Subject: [PATCH 2/3] ci: add restore-keys to the maven-verify Maven cache The lint and spotbugs jobs fall back to a restore-key prefix on a pom.xml change; maven-verify lacked it, so any pom change gave it a hard cache miss and a full ~/.m2 re-download. Add the same restore-keys for parity across all three Maven jobs. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/verify.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index 69e62f186..220ba97f1 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -201,6 +201,7 @@ jobs: with: path: /home/runner/.m2/repository key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven-0- - name: Build with Maven within a virtual X Server Environment run: xvfb-run mvn clean verify -f ./ddk-parent/pom.xml --batch-mode --fail-at-end - name: Fail on missing surefire reports From 0b31263e880249bdfcc0ce0c8c02d5432325869b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Dinis=20Ferreira?= Date: Sun, 31 May 2026 12:02:13 +0200 Subject: [PATCH 3/3] ci: scope SpotBugs to the PR's changed modules (per-module skip) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SpotBugs' per-module analysis is the spotbugs job's long pole. A PR only needs its changed modules scanned, so a pre-step injects true> into every unchanged reactor module's pom — the plugin then skips the goal, and the per-module JVM fork, for them. The full-reactor compile is kept (a changed module keeps its complete aux-classpath); a build/config change falls back to a full scan. pull_request only — master/snapshot run a full scan. -Dspotbugs.onlyAnalyze was the cleaner-looking alternative but screens too late (after the per-module fork), ~17% vs ~88% measured; the script header documents the migration if an upstream SpotBugs early-exit ever lands. - .github/scripts/compute-spotbugs-skip.sh: diff -> changed modules -> inject skip into the unchanged ones (idempotent; build/config change -> full scan). - verify.yml spotbugs job: fetch-depth 0 + a scope step before compile; -Djgit.dirtyWorkingTree=ignore because the scope step dirties poms on purpose and this job releases nothing (releases/maven-verify keep =error); SARIF upload guarded so an empty scan set (no module scanned) doesn't fail the upload. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/scripts/compute-spotbugs-skip.sh | 78 ++++++++++++++++++++++++ .github/workflows/verify.yml | 20 +++++- 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100755 .github/scripts/compute-spotbugs-skip.sh diff --git a/.github/scripts/compute-spotbugs-skip.sh b/.github/scripts/compute-spotbugs-skip.sh new file mode 100755 index 000000000..3fa43826b --- /dev/null +++ b/.github/scripts/compute-spotbugs-skip.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# +# Scope SpotBugs to a pull request's changed modules. +# +# Default is RUN (analyze). On a PR this injects true +# into every UNCHANGED reactor module's pom, so spotbugs-maven-plugin skips the goal — +# and therefore the per-module JVM fork (SpotBugsMojo gates on `skip` before forking) — +# for those modules. The full-reactor compile is left intact (a changed module is still +# analysed with its complete aux-classpath). Master/snapshot builds run a full scan; +# this script is invoked on pull_request only. +# +# Why this and not -Dspotbugs.onlyAnalyze: onlyAnalyze is one clean flag, but SpotBugs +# applies its class screener too late (after the per-module fork + class scan), so it +# only trimmed ~17% of the goal vs ~88% for this per-module skip (measured on this +# reactor). A small upstream SpotBugs early-exit (skip the run when no application class +# matches the screener) would make onlyAnalyze competitive; if that ever lands, switch +# to onlyAnalyze and delete this script. +# +# Run from the repository root. Usage: compute-spotbugs-skip.sh +set -euo pipefail +base="${1:?base sha required}" + +changed=$(git diff --name-only --diff-filter=ACMR "${base}...HEAD") + +# 1) A change to shared build/config can affect any module -> full scan (skip nothing). +# Fail safe: the worst case here is "analyse everything", never "analyse nothing". +while IFS= read -r f; do + [ -n "$f" ] || continue + case "$f" in + pom.xml | ddk-parent/* | .mvn/* | *.target | .github/* | *[Ss]pot[Bb]ugs*[Ee]xclude*) + echo "Build/config change ($f) -> full SpotBugs scan (no skips)." + exit 0 + ;; + esac +done < (strip the leading ../). +# ddk-parent is NOT in its own , so it can never be skip-injected — which +# is what prevents an accidental inherited (global) skip. +module_dirs=$(grep -oE '\.\./[^<]+' ddk-parent/pom.xml \ + | sed -E 's#.*\.\./([^<]+)#\1#') + +# 4) Idempotently inject the skip property; handle poms with and without . +# sed -i.bak + rm is portable across GNU (CI) and BSD (local) sed. +inject_skip() { + local pom="$1/pom.xml" + [ -f "$pom" ] || return 0 + if grep -q '' "$pom"; then return 0; fi + if grep -q '' "$pom"; then + sed -i.bak 's##\n true#' "$pom" + else + sed -i.bak 's## \n true\n \n#' "$pom" + fi + rm -f "$pom.bak" +} + +# 5) Skip every reactor module that was not touched by this PR. +kept=0 +skipped=0 +while IFS= read -r mod; do + [ -n "$mod" ] || continue + if printf '%s\n' "${changed_mods}" | grep -qx "$mod"; then + kept=$((kept + 1)) + else + inject_skip "$mod" + skipped=$((skipped + 1)) + fi +done <}" diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index 220ba97f1..0a88c4dad 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -117,6 +117,8 @@ jobs: MAVEN_OPTS: -Xmx4g steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + fetch-depth: 0 # need the PR base commit to diff the changed modules - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 with: distribution: 'temurin' @@ -130,13 +132,25 @@ jobs: key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-maven-0- + - name: Scope SpotBugs to the PR's changed modules + # Injects true> into unchanged module poms so the per-module + # SpotBugs fork is skipped for them (the lever that actually scopes the cost). + # Full compile is preserved (correct aux-classpath); a build/config change -> + # full scan. pull_request only; master/snapshot run a full scan. + run: bash .github/scripts/compute-spotbugs-skip.sh "${{ github.event.pull_request.base.sha }}" + - name: SpotBugs report (SARIF) # sarifOutput=true emits spotbugsSarif.json (also writes spotbugsXml.xml). + # jgit.dirtyWorkingTree=ignore: the scope step intentionally edits poms, so the + # working tree is dirty here; this job releases nothing, so we tell Tycho's jgit + # build-qualifier to use the last commit's timestamp instead of failing (the + # repo keeps jgit.dirtyWorkingTree=error for maven-verify / releases). run: | mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ compile \ spotbugs:spotbugs \ - -Dspotbugs.sarifOutput=true + -Dspotbugs.sarifOutput=true \ + -Djgit.dirtyWorkingTree=ignore - name: Merge per-module SpotBugs SARIFs if: always() @@ -166,7 +180,9 @@ jobs: fi - name: Upload SpotBugs SARIF to Code Scanning - if: always() + # Skip when nothing was scanned (e.g. a docs-only PR -> all modules skipped): + # an empty .sarif-merged would otherwise fail upload-sarif ("No SARIF files"). + if: ${{ always() && hashFiles('.sarif-merged/spotbugs.sarif') != '' }} uses: github/codeql-action/upload-sarif@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3.35.4 with: sarif_file: .sarif-merged