diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index 07e922619..220ba97f1 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -1,8 +1,27 @@ name: verify on: pull_request: + +# Code Scanning needs write access to upload SARIF results for inline annotations. +permissions: + contents: read + security-events: write + jobs: - pmd: + # Fast, early-fail lint lane: PMD + Checkstyle (+ CPD). Turns red in a few + # minutes on any violation, independent of the long build below, so a stray + # PMD/Checkstyle issue is reported immediately rather than after `verify`. + # + # `compile` is in the same invocation as the analysis goals: PMD's + # type-resolving rules (e.g. InvalidLogMessageFormat on the SLF4J + # trailing-Throwable idiom) need Tycho's aux-classpath, which a fresh `mvn` + # does not inherit from a prior step's target/classes. + # + # `--fail-never` lets every module produce its report (no Maven cascade-skip), + # so the uploaded SARIF — and therefore the inline annotations — are complete. + # The gate is a jq count over the merged SARIFs; compile errors still halt + # (MojoExecutionException is not suppressed by --fail-never). + lint: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -12,10 +31,90 @@ jobs: java-version: '21' - name: Set up Workspace Environment Variable run: echo "WORKSPACE=${{ github.workspace }}" >> $GITHUB_ENV - - name: PMD Check - run: mvn pmd:pmd pmd:cpd pmd:check pmd:cpd-check -f ./ddk-parent/pom.xml --batch-mode --fail-at-end - checkstyle: + - name: Cache Maven dependencies + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 + with: + path: /home/runner/.m2/repository + key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven-0- + + - name: PMD + Checkstyle reports (SARIF) + # PMD: SarifRenderer FQCN — emits pmd.sarif.json AND keeps pmd.xml. + # Checkstyle: output.format=sarif — SARIF content in checkstyle-result.xml. + # CPD is excluded here: the global -Dformat flag uses PMD's Renderer + # hierarchy and would ClassCastException CPD's CPDReportRenderer. + run: | + mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ + compile \ + pmd:pmd checkstyle:checkstyle \ + -Dformat=net.sourceforge.pmd.renderers.SarifRenderer \ + -Dcheckstyle.output.format=sarif + + - name: CPD report (separate invocation — no SARIF support) + # CPD has no SARIF renderer; emits cpd.xml only. Run standalone so the + # PMD -Dformat flag isn't in scope. + # NOTE: project CPD token threshold is currently very high (issue #1339), + # which effectively disables detection; re-tune once #1339 lands. + run: | + mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ + compile \ + pmd:cpd-check + + - name: Merge per-module SARIFs (PMD + Checkstyle) + if: always() + # Code Scanning accepts one run per category per upload; each analyzer + # writes one SARIF per module, so concatenate each analyzer's results + # into a single run under .sarif-merged/. + run: | + mkdir -p .sarif-merged + # Merge per-module SARIFs into one run. Filter to JSON-parseable files: + # the ddk-parent aggregator writes a plain-XML checkstyle-result.xml that + # would break jq, and modules with no findings may emit non-SARIF stubs. + merge() { # $1 = find-glob, $2 = output + local f valid=() + while IFS= read -r f; do + jq -e . "$f" >/dev/null 2>&1 && valid+=("$f") + done < <(find . -path "$1") + if [ ${#valid[@]} -gt 0 ]; then + jq -s '{ + "$schema": .[0]."$schema", version: .[0].version, + runs: [{ tool: .[0].runs[0].tool, + results: [.[].runs[].results[]?], + invocations: [.[].runs[].invocations[]?] }] + }' "${valid[@]}" > "$2" + fi + } + merge '*/target/pmd.sarif.json' .sarif-merged/pmd.sarif + merge '*/target/checkstyle-result.xml' .sarif-merged/checkstyle.sarif + + - name: Gate on PMD / CPD / Checkstyle violations + run: | + set -eu + sarif_total=$(jq '[.runs[].results[]?] | length' \ + .sarif-merged/pmd.sarif .sarif-merged/checkstyle.sarif 2>/dev/null \ + | awk '{s+=$1} END {print s+0}') + cpd_total=$(find . -name 'cpd.xml' -path '*/target/*' -exec grep -c '/dev/null \ + | awk -F: '{s+=$2} END {print s+0}') + echo "PMD/Checkstyle SARIF violations: $sarif_total" + echo "CPD duplications: $cpd_total" + if [ "$sarif_total" != "0" ] || [ "$cpd_total" != "0" ]; then + echo "::error::Static analysis found violations (PMD/CPD/Checkstyle)." + exit 1 + fi + + - name: Upload PMD/Checkstyle SARIF to Code Scanning + if: always() + uses: github/codeql-action/upload-sarif@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3.35.4 + with: + sarif_file: .sarif-merged + category: lint + + # SpotBugs is the slow critical-path analysis (the experiments' durable + # finding), so it runs in its own parallel lane and never delays `lint`. + spotbugs: runs-on: ubuntu-24.04 + env: + MAVEN_OPTS: -Xmx4g steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 @@ -24,14 +123,66 @@ jobs: java-version: '21' - name: Set up Workspace Environment Variable run: echo "WORKSPACE=${{ github.workspace }}" >> $GITHUB_ENV - - name: Checkstyle Check - run: mvn checkstyle:checkstyle checkstyle:check -f ./ddk-parent/pom.xml --batch-mode --fail-at-end + - name: Cache Maven dependencies + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 + with: + path: /home/runner/.m2/repository + key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven-0- + + - name: SpotBugs report (SARIF) + # sarifOutput=true emits spotbugsSarif.json (also writes spotbugsXml.xml). + run: | + mvn -T 2C -f ./ddk-parent/pom.xml --batch-mode --fail-never \ + compile \ + spotbugs:spotbugs \ + -Dspotbugs.sarifOutput=true + + - name: Merge per-module SpotBugs SARIFs + if: always() + run: | + mkdir -p .sarif-merged + valid=() + while IFS= read -r f; do + jq -e . "$f" >/dev/null 2>&1 && valid+=("$f") + done < <(find . -path '*/target/spotbugsSarif.json') + if [ ${#valid[@]} -gt 0 ]; then + jq -s '{ + "$schema": .[0]."$schema", version: .[0].version, + runs: [{ tool: .[0].runs[0].tool, + results: [.[].runs[].results[]?], + invocations: [.[].runs[].invocations[]?] }] + }' "${valid[@]}" > .sarif-merged/spotbugs.sarif + fi + + - name: Gate on SpotBugs violations + run: | + set -eu + sb_total=$(jq '[.runs[].results[]?] | length' .sarif-merged/spotbugs.sarif 2>/dev/null || echo 0) + echo "SpotBugs SARIF violations: $sb_total" + if [ "$sb_total" != "0" ]; then + echo "::error::SpotBugs found violations." + exit 1 + fi + + - name: Upload SpotBugs SARIF to Code Scanning + if: always() + uses: github/codeql-action/upload-sarif@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3.35.4 + with: + sarif_file: .sarif-merged + category: spotbugs + line-endings: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: Check LF line endings run: bash .github/scripts/check-line-endings.sh + + # Build + tests only. Static analysis now lives in the `lint` and `spotbugs` + # jobs, so the redundant checkstyle/pmd/spotbugs goals are dropped from here — + # this is the wall-clock long pole and no longer re-runs analysis. + # No `-T 2C`: tests are not known to pass reliably under reactor parallelism. maven-verify: runs-on: ubuntu-24.04 steps: @@ -50,10 +201,9 @@ jobs: with: path: /home/runner/.m2/repository key: ${{ runner.os }}-maven-0-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-maven-0- - name: Build with Maven within a virtual X Server Environment - # Run pmd:pmd and pmd:cpd first to generate reports for all modules, then run pmd:check and pmd:cpd-check - # This ensures all violations are collected and reported before the build fails - run: xvfb-run mvn clean verify checkstyle:check pmd:pmd pmd:cpd pmd:check pmd:cpd-check spotbugs:check -f ./ddk-parent/pom.xml --batch-mode --fail-at-end + run: xvfb-run mvn clean verify -f ./ddk-parent/pom.xml --batch-mode --fail-at-end - name: Fail on missing surefire reports if: always() run: bash .github/scripts/check-surefire-reports.sh diff --git a/docs/ci-measurement-protocol.md b/docs/ci-measurement-protocol.md new file mode 100644 index 000000000..470392f48 --- /dev/null +++ b/docs/ci-measurement-protocol.md @@ -0,0 +1,65 @@ +# CI timing-measurement protocol + +How to get **trustworthy** numbers for a CI-shape change. Written because the +earlier experiment program (2026-05) produced numbers that can't be trusted: +single samples, taken during an Eclipse p2 mirror-flaky window, against a CI +shape that has since changed. Don't repeat that. + +## What we're measuring + +- **Wall-clock** per run = the slowest job (what a developer waits for). +- **Per-job duration** = the analysis bottleneck (settles e.g. spotbugs-vs-maven-verify). +- **Failure latency** = time-to-red on a *planted* lint violation — the metric the + early-fail goal actually cares about. Measure it separately. + +Wall-clock is the headline, but per-job + failure-latency are what tell you *why*. + +## Noise floor (observed on this repo's `verify` runs) + +Per-job spread across recent runs — any change must beat this to be real signal: + +| Job | median | spread (±) | +|---|---|---| +| line-endings | ~5s | ±3s | +| checkstyle (old shape) | ~115s | ±~104s (≈90%!) | +| pmd (old shape) | ~191s | ±63s | +| maven-verify | ~869s (~14.5m) | ±~93s (≈11%) | + +**Decision rule:** accept candidate B as faster than A only if +`median(B) < median(A) − 2 × IQR(A)`. For maven-verify a real win must exceed ~100s; +for checkstyle, ~200s. Anything smaller is noise. + +## Protocol + +1. **Pre-flight — confirm mirrors are healthy.** Run one throwaway build; if + target-platform resolution stalls or errors, **stop** — the window is bad + (this is what voided the 2026-05-09 numbers). Measure only on a clean window. +2. **Warm the caches.** Run 3–5 discard builds first so `~/.m2` + `.cache/tycho` + are populated; cold-cache runs have a different (network-bound) profile. +3. **Sample.** N = 15–20 `workflow_dispatch` runs per candidate, back-to-back in a + ≤30-minute window. Run A and B **interleaved within ≤10 minutes** of each other + so they see the same mirror weather and runner-pool load. +4. **Report medians + IQR**, per job *and* total wall-clock. Never a single sample, + never the mean. +5. **Pin `ubuntu-24.04`** (not a matrix) for consistent runner hardware. +6. **Record per-sample metadata:** cache hit/miss, run start time, headSha. +7. **Failure latency:** plant a synthetic PMD/Checkstyle violation, measure how long + until the `lint` check goes red (independent of the build job). + +## Why not just trust the old experiment numbers + +- Single sample each (no repetition). +- 2026-05-09 mirror-flaky window → resolution time is contaminated and varies per job + even within one dispatch. +- Numbers disagree across rounds (sequential measured 21m one round, 33m another). +- `#1369` reshaped master's CI after the experiments ran, so their baseline is stale. + +## What is *not* a timing lever (validated) + +- **Local p2 mirror**: with a warm `~/.m2`/`.cache/tycho`, offline resolution is + ~equal to online (measured 5s vs 6s) — a mirror's steady-state speedup is ~0. + Its only value is cold-cache + flaky-mirror insurance; deferred per the rare-flake + rule. +- **`-Dtycho.mode=maven` on a gate pass**: marginal on a warm cache (resolution is + already seconds); and it *breaks* PMD type-resolving rules (strips the classpath → + false positives). Not used. diff --git a/docs/ci-static-analysis-design.md b/docs/ci-static-analysis-design.md new file mode 100644 index 000000000..acbee5eec --- /dev/null +++ b/docs/ci-static-analysis-design.md @@ -0,0 +1,108 @@ +# Static-analysis CI design: SARIF inline display + count-gate + +This documents *why* the static-analysis CI is shaped the way it is, so the +mechanics (verified against the plugin source) don't have to be re-discovered. + +## Goal + +Fast, complete, **early** PMD + Checkstyle feedback — a violation should fail CI +in minutes, on its own check, not after the ~15-minute build. SpotBugs (the slow +analysis) runs in its own parallel lane so it never delays the fast checks. All +of PMD, Checkstyle, and SpotBugs surface **inline** on the PR via SARIF + GitHub +Code Scanning (no custom annotator). + +## Job shape (`verify.yml`) + +Four independent parallel jobs (no `needs:`); wall-clock = the slowest job. + +| Job | Runs | Threads | Gate | +|---|---|---|---| +| `lint` | `compile` + `pmd:pmd` + `checkstyle:checkstyle` (SARIF) + `pmd:cpd-check` | `-T 2C` | jq count of SARIF results (+ CPD `` grep) | +| `spotbugs` | `compile` + `spotbugs:spotbugs` (SARIF), `-Xmx4g` | `-T 2C` | jq count of SARIF results | +| `maven-verify` | `clean verify` (build + tests only) | none | Tycho-surefire | +| `line-endings` | `git ls-files` check | n/a | shell | + +`maven-verify` **no longer re-runs** the analysis goals (now owned by `lint` / +`spotbugs`). Analysis jobs use `-T 2C`; the test job does not (tests are not known +reliable under reactor parallelism). + +## Report goals vs. check goals (code-verified at each plugin version tag) + +| Goal | Kind | `@Execute` fork | Runs analysis? | Writes | Fails build? | SARIF-capable? | +|---|---|---|---|---|---|---| +| `pmd:pmd` | report | — | yes | `pmd.xml` (+ `pmd.sarif.json` w/ `-Dformat=…SarifRenderer`) | no | **yes** | +| `pmd:check` | check | `@Execute(goal=pmd)` | yes (forked) | `pmd.xml` | yes (`> maxAllowedViolations`, dflt 0; `failurePriority` dflt 5 = all) | no (wants `pmd.xml`) | +| `pmd:cpd` | report | — | yes | `cpd.xml` | no | no (no CPD SARIF renderer) | +| `pmd:cpd-check` | check | `@Execute(goal=cpd)` | yes (forked) | `cpd.xml` | yes | no | +| `checkstyle:checkstyle` | report | — | yes | `checkstyle-result.xml` (XML, or SARIF w/ `-Dcheckstyle.output.format=sarif`) | no | **yes** | +| `checkstyle:check` | check | **none** (self-contained) | yes (internal, source-based) | `checkstyle-result.xml` (XML) | yes (severity ≥ `violationSeverity`) | no | +| `spotbugs:spotbugs` | report | — | yes (bytecode) | `spotbugsXml.xml` (+ `spotbugsSarif.json` w/ `-Dspotbugs.sarifOutput=true`) | no | **yes** | +| `spotbugs:check` | check | `@Execute(goal=spotbugs)` | yes (forked) | `spotbugsXml.xml` | yes (`bugCount > 0`) | no | + +The split is **observe vs. enforce**: report goals produce output (for the Maven +site / dashboards / SARIF consumers) and never fail the build; check goals bind to +`verify` and exist to fail the build. The `@Execute(goal=…)` on the PMD/CPD/SpotBugs +checks forks the report goal first (so `mvn pmd:check` is self-contained) — which +means they **re-run the analysis every time**. `checkstyle:check` is the lone +exception: no `@Execute`, runs Checkstyle internally in one pass. + +## Maven reactor failure flags + +| Flag | Reactor behavior | Multi-module report completeness | Suppresses violation failure? | +|---|---|---|---| +| `--fail-fast` (default) | halt at first failing module | downstream skipped → reports missing | no | +| `--fail-at-end` | build independent modules, fail at end | modules downstream of a failure still cascade-skipped → incomplete | no | +| `--fail-never` | never fail on `MojoFailureException` (a real compile error / `MojoExecutionException` still halts) | **all** modules run → **complete** reports/SARIF | **yes** — violation failures swallowed | + +## Why `report goal + --fail-never + jq count` (the count-gate) + +We want **both** SARIF (for inline display) **and** a build gate, from **one** +analysis pass. The plugin design forces a choice: + +| Combination | Result | +|---|---| +| check goal + `--fail-fast` | gates, but first violation hides later modules' findings | +| check goal + `--fail-at-end` | gates, but cascade-skip drops downstream findings | +| check goal + `--fail-never` | doesn't gate (failure swallowed) — pointless | +| **report goal + `--fail-never` + jq count** | **all** modules analyzed → complete SARIF (full inline display); the jq count does the gating | + +So each tool runs its report goal **once** (`compile` + report, full classpath → +correct + SARIF), and a jq count over the merged SARIF fails the job. + +### Why not the `:check` goals +- They `@Execute`-fork a **second** analysis on top of the `pmd:pmd` we already run + for SARIF — pure waste. +- The forked analysis is only correct with the full `compile` + classpath; run cheaply + (`-Dtycho.mode=maven` / no compile) it loses the classpath and **false-positives** + (e.g. PMD `InvalidLogMessageFormat` flags the SLF4J trailing-`Throwable` idiom because + it can't resolve the last arg as a `Throwable`). +- `pmd:check` is **incompatible with `-Dformat=sarif`**: its fork writes `pmd.sarif.json`, + but the check looks for `pmd.xml` → "unable to find report." + +### count-gate == `:check` fidelity (for this project's config) +Counting **all** SARIF results equals what `:check` would fail on, because: PMD has no +`failurePriority` override (default 5 = all priorities), Checkstyle is globally +`severity=warning` with no info-level results, and SpotBugs uses `threshold=Low` with no +`failThreshold`/`maxAllowedViolations` override. Three config-drift caveats would make +the count-gate *stricter* than `:check` (over-fail, never under-fail), each guard-worthy: +1. PMD `pmd.failurePriority` set below 5. +2. A Checkstyle rule emitting `info`/`note` severity. +3. SpotBugs `failThreshold` or non-zero `maxAllowedViolations`. + +## Two operational rules + +- **`compile` must be full-reactor** (`-f ddk-parent/pom.xml`, no `-pl`). PMD's + type-resolving rules need the complete aux-classpath; a `-pl` subset produces + false positives (the trailing-`Throwable` case). +- **Merge SARIFs from SARIF files only.** Code Scanning accepts one run per category, + so per-module SARIFs are merged (jq) before upload. The `ddk-parent` aggregator emits + plain-XML `checkstyle-result.xml`; the merge must filter to JSON-parseable files. + +## SARIF mechanics + +- PMD: `-Dformat=net.sourceforge.pmd.renderers.SarifRenderer` → `pmd.sarif.json`. +- Checkstyle: `-Dcheckstyle.output.format=sarif` → SARIF content in `checkstyle-result.xml`. +- SpotBugs: `-Dspotbugs.sarifOutput=true` → `spotbugsSarif.json`. +- All emit SARIF 2.1.0. Merge per tool → `github/codeql-action/upload-sarif` + (`permissions: security-events: write`). CPD has no SARIF renderer → XML + grep gate + (no inline display for CPD; acceptable, duplications are rare).