From 2cc98db9c9dfba4139b602e7a146a9cdffb8b0ea Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Fri, 8 May 2026 11:38:57 +0200 Subject: [PATCH 1/6] chore(ai): Add check-code-attribution Claude Code skill (JAVA-499) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a check-code-attribution skill that verifies license headers + THIRD_PARTY_NOTICES.md entries for code copied or adapted from third parties. Reports any invalid headers and entries in the branch diff, along with suggestions for their correction. Implementation notes: - Executes via [Warden](https://warden.sentry.dev/) on every PR (can also be run locally). - Blocks merging via a "Requires Changes" comment for high-severity issues (viz., licensing). - Generates non-blocking PR comments for medium-severity issues (e.g., missing fields in license headers). - Atm workflow is *not* required on GitHub (i.e., the "Requires Changes" comment is the only blocking mechanism). Current configs: ┌─────────────────┬─────────────────────────────┬───────────────────────────────────────────────────┐ │ Setting │ Value │ Effect │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ model │ anthropic/claude-sonnet-4-6 │ Model used for analysis │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ maxTurns │ 15 │ Max tool calls per chunk │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ skill │ alt2-check-code-attribution │ Per-file vendored code attribution check │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ failOn │ high │ Exit code 1 on license violations │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ reportOn │ low │ Show findings at low+ via PR comment │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ requestChanges │ true │ REQUEST_CHANGES review when high finding exists │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ failCheck │ true │ Red X on check run when high finding exists │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ triggers │ pull_request + local │ Runs on PR open/sync and local warden invocations │ ├─────────────────┼─────────────────────────────┼───────────────────────────────────────────────────┤ │ reportOnSuccess │ false (default) │ No comment when everything is clean │ └─────────────────┴─────────────────────────────┴───────────────────────────────────────────────────┘ ignorePaths: ┌─────────────────────┬─────────────────────────────────────────────────────────────────────────────────┐ │ Category │ Patterns │ ├─────────────────────┼─────────────────────────────────────────────────────────────────────────────────┤ │ Infrastructure dirs │ .claude/, .github/, .gradle/, .idea/, .mvn/, buildSrc/, build-logic/, gradle/ │ ├─────────────────────┼─────────────────────────────────────────────────────────────────────────────────┤ │ Generated dirs │ **/generated/**, **/ksp/** │ ├─────────────────────┼─────────────────────────────────────────────────────────────────────────────────┤ │ Generated files │ *.aidl, *.api, *.interp, *.tokens, *.g.kt, *.pb.java, *Binding.java, *Grpc.java │ ├─────────────────────┼─────────────────────────────────────────────────────────────────────────────────┤ │ Build scripts │ build.gradle(.kts), settings.gradle(.kts), gradlew, gradlew.bat │ └─────────────────────┴─────────────────────────────────────────────────────────────────────────────────┘ --- .claude/skills/.gitignore | 2 + .../skills/check-code-attribution/SKILL.md | 217 ++++++++++++++++++ .gitignore | 3 + warden.toml | 87 +++++++ 4 files changed, 309 insertions(+) create mode 100644 .claude/skills/check-code-attribution/SKILL.md create mode 100644 warden.toml diff --git a/.claude/skills/.gitignore b/.claude/skills/.gitignore index 229f4495ee3..2dd55eba801 100644 --- a/.claude/skills/.gitignore +++ b/.claude/skills/.gitignore @@ -8,3 +8,5 @@ !test/** !btrace-perfetto/ !btrace-perfetto/** +!check-code-attribution/ +!check-code-attribution/** diff --git a/.claude/skills/check-code-attribution/SKILL.md b/.claude/skills/check-code-attribution/SKILL.md new file mode 100644 index 00000000000..1ba1e15e1c3 --- /dev/null +++ b/.claude/skills/check-code-attribution/SKILL.md @@ -0,0 +1,217 @@ +--- +name: check-code-attribution +description: Per-file check of vendored code attribution in the current branch diff, including license headers, THIRD_PARTY_NOTICES.md entries, and compatibility with Sentry's licensing policy +allowed-tools: Bash Read Grep Glob +--- + +# Check Code Attribution + +You are reviewing changed files for third-party code attribution compliance in **sentry-java**, an MIT-licensed repository. + +## Local runs — discover changed files first + +When running locally (not via Warden), determine which files changed on this branch: + +```bash +MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) +git diff --name-only "${MB}"..HEAD +``` + +Then run the Quick triage and subsequent checks on **every** file in that list. Warden's `ignorePaths` in `warden.toml` lists the paths to skip — apply the same exclusions locally. + +### Warden CLI (optional local parity check) + +Warden does **not** use Cursor auth. Before running Warden locally, configure a provider (same model family as `warden.toml`, or override with `-m`): + +```bash +# Option A: Anthropic API key (matches CI model in warden.toml) +export WARDEN_ANTHROPIC_API_KEY=sk-ant-... # or: export ANTHROPIC_API_KEY=sk-ant-... + +# Option B: Pi OAuth / API key store (~/.pi/agent/auth.json) +npx pi # then run /login and pick Anthropic (or another provider) + +# Option C: Different provider for a one-off run +export WARDEN_OPENAI_API_KEY=sk-... +npx @sentry/warden origin/main..HEAD --skill check-code-attribution -m openai/gpt-5.5 -vv +``` + +```bash +npx @sentry/warden origin/main..HEAD --skill check-code-attribution -vv +``` + +If you only need attribution review in the IDE, `/check-code-attribution` in Cursor does not require Warden credentials. + +When running via Warden, the changed file is already provided — skip branch-wide discovery, but follow **Warden execution** below. + +## Warden execution + +Warden analyzes one changed file per run (whole-file mode). Complete every Quick triage step — the diff alone is not sufficient. + +**Mandatory on every run (do not skip):** + +1. `Read` the first 20 lines of the changed file. +2. `Grep` `THIRD_PARTY_NOTICES.md` for the class name (filename without extension, e.g. `ANRWatchDog` for `ANRWatchDog.java`). +3. When Bash is available, compare the merge-base header: + ```bash + MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) + git show "${MB}:" | head -50 + ``` + +**Do not dismiss findings because:** + +- A `THIRD_PARTY_NOTICES.md` entry exists — file headers are still required; NOTICES does not replace them. +- The diff only removes a header comment block — removed `-` lines with `Copyright`, `Licensed under`, license disclaimers, or vendoring language ("adapted from", etc.) mean attribution was stripped. +- The header says "Adapted from …" but omits copyright holder or license name — flag missing header fields. + +For `THIRD_PARTY_NOTICES.md` runs: for every **removed** entry in the diff, use `Read` or `Glob` to confirm whether Scope files still exist with attribution headers. If they do, the entry must not be removed. + +## Quick triage + +Sentry's own files carry **no** copyright headers — any copyright/license line indicates third-party code. Every file that reaches this skill is in scope — do not skip files based on extension. + +If this file is `THIRD_PARTY_NOTICES.md`, go to the THIRD_PARTY_NOTICES section below. + +For all other files, perform these checks **before** deciding whether to proceed: + +1. **Read the file header** — use the Read tool to read the first 20 lines of the file. Look for vendored-code signals: `Copyright`, `Licensed under`, `SPDX-License-Identifier`, or vendoring language ("adapted from", "backported from", "based on", "copied from", "derived from", "inspired by", "ported from", "translated from", "vendored"). +2. **Check THIRD_PARTY_NOTICES.md** — use Grep to search `THIRD_PARTY_NOTICES.md` for the file name without extension (e.g., search for `ANRWatchDog` when reviewing `ANRWatchDog.java`). A match means this is a known vendored file. +3. **Scan the diff** — check for vendored-code signals on both added (`+`) and **removed (`-`)** lines. Removed copyright/license lines ARE signals — they mean attribution is being stripped. + +**A signal in ANY of these three sources means this is vendored code — proceed to the vendored source file section.** + +A file referenced in THIRD_PARTY_NOTICES.md is ALWAYS vendored, even if its current header has no attribution. + +**If none of the three sources have signals, report no findings and stop.** + +--- + +## If this file is `THIRD_PARTY_NOTICES.md` + +Validate the changed entries using the diff context: + +1. For each added or modified entry, verify it has all required fields: **Source URL**, **License name**, **Copyright**, **Scope** (file paths), and **full license text** in a fenced code block. +2. For each Scope path, verify the file(s) exist (use Glob or Read). +3. Flag new license types — especially copyleft or AGPL. +4. Flag orphaned entries whose Scope files no longer exist. +5. For **removed** entries (lines prefixed with `-` in the diff), use Read to check whether the Scope files still exist and still have attribution headers. If they do, the entry must not be removed. +6. Check **copyright consistency** — the Copyright field must match the copyright line inside the embedded license text. Flag mismatches. + +--- + +## If this is a vendored file + +### 1. Check attribution header + +The file must have a license header near the top (before the `package` statement in Java/Kotlin files) with: +- Library name or origin +- Copyright year and holder +- License name +- Source URL + +Exact wording and comment style may vary. Only flag **missing fields**, not formatting. + +Compare the current header (from the Read in Quick triage) against the THIRD_PARTY_NOTICES.md entry. For example, if the NOTICES entry says this file is MIT-licensed by "Salomon BRYS" but the current header has no copyright or license mention, the header was stripped. + +When Bash is available (local runs), also compare against the merge-base version for additional context: +```bash +MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) +git show "${MB}:" | head -50 +``` + +Flag these issues: +- **Header stripped** — file is in NOTICES but current header has no attribution +- **Header truncated** — header is present but missing required fields (e.g., copyright line removed, license disclaimer removed) +- **Header inconsistent** — header contradicts what the NOTICES entry says +- **Diff removes attribution lines** — `Copyright`, `Licensed under`, etc. appear on removed lines in the diff + +### 2. Check THIRD_PARTY_NOTICES.md entry + +From the Grep in Quick triage: if no matching entry exists, flag it as missing. A valid entry needs: Source URL, License name, Copyright, Scope, full license text. + +### 3. Check license compatibility + +Classify the license per Sentry's Open Source Legal Policy (https://open.sentry.io/licensing/): + +| Tier | Examples | Severity | +|-----------------|-------------------------------------------------|------------------------------------------| +| Permissive | MIT, BSD, Apache 2.0, ISC, CC0, Unlicense, Zlib | Allowed | +| Weak copyleft | LGPL, MPL, EPL, CDDL | **high** — requires review | +| Strong copyleft | GPL, QPL, Sleepycat, OSL | **high** — requires legal review | +| AGPL | — | **high** — absolute ban, must be removed | +| No license | — | **high** — assume no permission | + +--- + +## If this is a deleted vendored file + +If the diff deletes a file and the removed lines contained attribution headers, check whether `THIRD_PARTY_NOTICES.md` still references it — the entry should be updated or removed. + +--- + +## Severity guide + +| Level | Use for | +|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------| +| **high** | 🚨 License violations: AGPL, copyleft, unlicensed, no-license code | +| **medium** | ⚠️ Missing attribution header fields, stripped headers, missing/inconsistent NOTICES entries, deleted/renamed vendored files needing NOTICES update | +| **low** | 👀 Attribution present but could be improved | + +Warden relies on these severity levels when deciding whether to comment on PRs or require changes. Put the severity emoji **only on the finding title** (see Output) so reviewers can triage at a glance. + +## Output + +**No issues → empty response (say nothing).** + +Otherwise, report each finding ordered by severity (most severe first). + +### Emoji placement (required) + +Use the emoji from the severity guide (🚨, ⚠️, or 👀) — not the word `high`, `medium`, or `low`. + +| Field | Emoji? | Example | +|-------------------|--------------------------|----------------------------------------------------------------------------------------------------| +| **Title** | Yes — once, at the start | `⚠️ Copyright line stripped from vendored file header` | +| **Description** | **No** | `**io.sentry.cache.tape.FileObjectQueue** — The Copyright (C) 2010 Square, Inc. line was removed…` | +| **Verification** | **No** | Evidence steps only | +| **Suggested fix** | **No** | Fix text only | + +**Good (Warden PR comment):** + +``` +Title: ⚠️ Copyright line stripped from vendored file header +Description: **io.sentry.cache.tape.FileObjectQueue** — The `Copyright (C) 2010 Square, Inc.` line was removed from this vendored file's header. Please restore the copyright line. +``` + +**Bad — emoji repeated in the description:** + +``` +Title: ⚠️ Copyright line stripped from vendored file header +Description: ⚠️ The `Copyright (C) 2010 Square, Inc.` line was removed… +``` + +### Warden runs + +For each finding, set: + +- **title** — ` ` (imperative, no class name). Warden bolds this as the PR comment heading. +- **description** — One or two sentences: `**** — `. Do **not** start with an emoji. +- **verification** — Optional evidence steps. No emoji. + +Use fully qualified Java class names in the description (e.g. `io.sentry.CircularFifoQueue`), not file paths. For license issues, include the policy link in the description. + +### Local / IDE runs + +Use this numbered format — same title vs description split as above: + +``` +1\. **** + **** — + +2\. **** + **** — +``` + +Rules: + +- **Escape the period** after the number (`1\.` not `1.`) so markdown does not collapse entries into a tight list. +- Leave an empty line between each numbered finding. diff --git a/.gitignore b/.gitignore index a7899736a86..f252087a5ab 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,6 @@ spy.log # Auto-generated by dotagents — do not commit these files. agents.lock .agents/.gitignore + +# Warden local run logs +.warden/logs/ diff --git a/warden.toml b/warden.toml new file mode 100644 index 00000000000..53571504e25 --- /dev/null +++ b/warden.toml @@ -0,0 +1,87 @@ +version = 1 + +[defaults] +model = "anthropic/claude-opus-4-6" +maxTurns = 30 + +# Attribution findings are policy checks; a second verifier pass often rejects valid +# header/NOTICES mismatches (e.g. "NOTICES still documents it"). +[defaults.verification] +enabled = false + +# Skip generated/binary files from analysis +[[defaults.chunking.filePatterns]] +pattern = "**/*.api" +mode = "skip" + +[[defaults.chunking.filePatterns]] +pattern = "**/gradlew" +mode = "skip" + +[[defaults.chunking.filePatterns]] +pattern = "**/gradlew.bat" +mode = "skip" + +# Whole-file mode: attribution needs file header + NOTICES cross-check, not isolated hunks +[[defaults.chunking.filePatterns]] +pattern = "**/*.java" +mode = "whole-file" + +[[defaults.chunking.filePatterns]] +pattern = "**/*.kt" +mode = "whole-file" + +[[defaults.chunking.filePatterns]] +pattern = "THIRD_PARTY_NOTICES.md" +mode = "whole-file" + +# Coalesce hunks aggressively for any remaining per-hunk files +[defaults.chunking.coalesce] +enabled = true +maxGapLines = 100 +maxChunkSize = 16000 + +[[skills]] +name = "check-code-attribution" +failOn = "high" +reportOn = "low" +maxTurns = 30 +ignorePaths = [ + # Infrastructure directories + ".claude/**", + ".cursor/**", + ".github/**", + ".gradle/**", + ".idea/**", + ".mvn/**", + "gradle/**", + # Generated files + "**/generated/**", + "**/ksp/**", + "**/*.aidl", + "**/*.api", + "**/*.interp", + "**/*.tokens", + "**/*.g.kt", + "**/*.pb.java", + "**/databinding/*Binding.java", + "**/grpc/*Grpc.java", + "**/gradlew", + "**/gradlew.bat", + "**/mvnw", + "**/mvnw.cmd", + # Binary files + "**/*.jar", + # Warden infrastructure + ".warden/**", + "warden.toml", +] + +[[skills.triggers]] +type = "pull_request" +actions = ["opened", "synchronize"] +requestChanges = false # Non-blocking COMMENT reviews only — no "Request Changes" PR reviews. +failCheck = false # Advisory only — do not fail the GitHub check. + +[[skills.triggers]] +type = "local" From 772fdc9b07854c064dd2e5184d23dfb10c5a3079 Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Mon, 18 May 2026 16:37:17 +0200 Subject: [PATCH 2/6] More --- .../skills/check-code-attribution/SKILL.md | 24 ++++++++++--------- AGENTS.md | 2 ++ warden.toml | 23 +++++++++++------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/.claude/skills/check-code-attribution/SKILL.md b/.claude/skills/check-code-attribution/SKILL.md index 1ba1e15e1c3..f3ce36a9a9c 100644 --- a/.claude/skills/check-code-attribution/SKILL.md +++ b/.claude/skills/check-code-attribution/SKILL.md @@ -49,8 +49,8 @@ Warden analyzes one changed file per run (whole-file mode). Complete every Quick **Mandatory on every run (do not skip):** -1. `Read` the first 20 lines of the changed file. -2. `Grep` `THIRD_PARTY_NOTICES.md` for the class name (filename without extension, e.g. `ANRWatchDog` for `ANRWatchDog.java`). +1. `Read` the first 50 lines of the changed file. +2. `Grep` `THIRD_PARTY_NOTICES.md` for the class name (filename without extension, e.g. `ANRWatchDog` for `ANRWatchDog.java`). On renames, also grep the old basename and read Scope sections (see Quick triage). 3. When Bash is available, compare the merge-base header: ```bash MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) @@ -73,8 +73,8 @@ If this file is `THIRD_PARTY_NOTICES.md`, go to the THIRD_PARTY_NOTICES section For all other files, perform these checks **before** deciding whether to proceed: -1. **Read the file header** — use the Read tool to read the first 20 lines of the file. Look for vendored-code signals: `Copyright`, `Licensed under`, `SPDX-License-Identifier`, or vendoring language ("adapted from", "backported from", "based on", "copied from", "derived from", "inspired by", "ported from", "translated from", "vendored"). -2. **Check THIRD_PARTY_NOTICES.md** — use Grep to search `THIRD_PARTY_NOTICES.md` for the file name without extension (e.g., search for `ANRWatchDog` when reviewing `ANRWatchDog.java`). A match means this is a known vendored file. +1. **Read the file header** — use the Read tool to read the first 50 lines of the file. Look for vendored-code signals: `Copyright`, `Licensed under`, `SPDX-License-Identifier`, or vendoring language ("adapted from", "backported from", "based on", "copied from", "derived from", "inspired by", "ported from", "translated from", "vendored"). +2. **Check THIRD_PARTY_NOTICES.md** — use Grep to search `THIRD_PARTY_NOTICES.md` for the file name without extension (e.g., search for `ANRWatchDog` when reviewing `ANRWatchDog.java`). A match means this is a known vendored file. **Renames:** if the diff is a rename (`similarity index` / `rename from` in the diff, or a delete of one path and add of another with the same content), also Grep for the **old** basename and read **Scope** sections in matching entries — NOTICES may still reference the previous class or path name. 3. **Scan the diff** — check for vendored-code signals on both added (`+`) and **removed (`-`)** lines. Removed copyright/license lines ARE signals — they mean attribution is being stripped. **A signal in ANY of these three sources means this is vendored code — proceed to the vendored source file section.** @@ -132,13 +132,15 @@ From the Grep in Quick triage: if no matching entry exists, flag it as missing. Classify the license per Sentry's Open Source Legal Policy (https://open.sentry.io/licensing/): -| Tier | Examples | Severity | -|-----------------|-------------------------------------------------|------------------------------------------| -| Permissive | MIT, BSD, Apache 2.0, ISC, CC0, Unlicense, Zlib | Allowed | -| Weak copyleft | LGPL, MPL, EPL, CDDL | **high** — requires review | -| Strong copyleft | GPL, QPL, Sleepycat, OSL | **high** — requires legal review | -| AGPL | — | **high** — absolute ban, must be removed | -| No license | — | **high** — assume no permission | +| Tier | Examples | Finding | +|-----------------|-------------------------------------------------|---------------------------------------------| +| Permissive | MIT, BSD, Apache 2.0, ISC, CC0, Unlicense, Zlib | None — license is compatible | +| Weak copyleft | LGPL, MPL, EPL, CDDL | 🚨 **high** — requires review | +| Strong copyleft | GPL, QPL, Sleepycat, OSL | 🚨 **high** — requires legal review | +| AGPL | — | 🚨 **high** — absolute ban, must be removed | +| No license | — | 🚨 **high** — assume no permission | + +**Permissive licenses:** do not report a finding solely because the license is MIT/BSD/Apache/etc. Only flag attribution problems (missing or stripped header fields, missing/inconsistent `THIRD_PARTY_NOTICES.md` entry). Copyleft and unlicensed code still get 🚨 findings per the table. --- diff --git a/AGENTS.md b/AGENTS.md index 1784e4f950e..ff50727c662 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -154,6 +154,8 @@ When adapting code from third-party libraries: ``` 2. Add a full attribution entry to `THIRD_PARTY_NOTICES.md` following the existing format (Source, License, Copyright, Scope, full license text) +3. Run the `check-code-attribution` skill locally or wait for it to be auto-run against your PR to check for required fields and verify new licenses against [Sentry's Open Source Legal Policy](https://open.sentry.io/licensing/). + ### Getting PR Information Use `gh pr view` to get PR details from the current branch. This is needed when adding changelog entries, which require the PR number. diff --git a/warden.toml b/warden.toml index 53571504e25..ddd3da96039 100644 --- a/warden.toml +++ b/warden.toml @@ -31,6 +31,10 @@ mode = "whole-file" pattern = "**/*.kt" mode = "whole-file" +[[defaults.chunking.filePatterns]] +pattern = "**/*.kts" +mode = "whole-file" + [[defaults.chunking.filePatterns]] pattern = "THIRD_PARTY_NOTICES.md" mode = "whole-file" @@ -45,7 +49,6 @@ maxChunkSize = 16000 name = "check-code-attribution" failOn = "high" reportOn = "low" -maxTurns = 30 ignorePaths = [ # Infrastructure directories ".claude/**", @@ -56,22 +59,26 @@ ignorePaths = [ ".mvn/**", "gradle/**", # Generated files - "**/generated/**", - "**/ksp/**", "**/*.aidl", "**/*.api", - "**/*.interp", - "**/*.tokens", "**/*.g.kt", + "**/*.interp", "**/*.pb.java", + "**/*.tokens", "**/databinding/*Binding.java", - "**/grpc/*Grpc.java", + "**/generated/**", "**/gradlew", "**/gradlew.bat", + "**/grpc/*Grpc.java", + "**/ksp/**", "**/mvnw", "**/mvnw.cmd", # Binary files "**/*.jar", + # Repo docs (attribution examples in prose, not vendored code) + "AGENTS.md", + "CHANGELOG.md", + "CLAUDE.md", # Warden infrastructure ".warden/**", "warden.toml", @@ -80,8 +87,8 @@ ignorePaths = [ [[skills.triggers]] type = "pull_request" actions = ["opened", "synchronize"] -requestChanges = false # Non-blocking COMMENT reviews only — no "Request Changes" PR reviews. -failCheck = false # Advisory only — do not fail the GitHub check. +requestChanges = false +failCheck = false [[skills.triggers]] type = "local" From 9b8d2bdbc96b575d1ad859d392223be0c9f233a9 Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Mon, 18 May 2026 17:04:10 +0200 Subject: [PATCH 3/6] refine --- .../skills/check-code-attribution/SKILL.md | 83 +++++++++++++------ agents.toml | 4 + 2 files changed, 63 insertions(+), 24 deletions(-) diff --git a/.claude/skills/check-code-attribution/SKILL.md b/.claude/skills/check-code-attribution/SKILL.md index f3ce36a9a9c..c351c5719c9 100644 --- a/.claude/skills/check-code-attribution/SKILL.md +++ b/.claude/skills/check-code-attribution/SKILL.md @@ -60,7 +60,7 @@ Warden analyzes one changed file per run (whole-file mode). Complete every Quick **Do not dismiss findings because:** - A `THIRD_PARTY_NOTICES.md` entry exists — file headers are still required; NOTICES does not replace them. -- The diff only removes a header comment block — removed `-` lines with `Copyright`, `Licensed under`, license disclaimers, or vendoring language ("adapted from", etc.) mean attribution was stripped. +- The diff only removes a header comment block — if removed `-` lines include a **required field** (see below) or vendoring language ("adapted from", etc.), attribution was stripped. Removing boilerplate alone is not stripping. - The header says "Adapted from …" but omits copyright holder or license name — flag missing header fields. For `THIRD_PARTY_NOTICES.md` runs: for every **removed** entry in the diff, use `Read` or `Glob` to confirm whether Scope files still exist with attribution headers. If they do, the entry must not be removed. @@ -75,7 +75,7 @@ For all other files, perform these checks **before** deciding whether to proceed 1. **Read the file header** — use the Read tool to read the first 50 lines of the file. Look for vendored-code signals: `Copyright`, `Licensed under`, `SPDX-License-Identifier`, or vendoring language ("adapted from", "backported from", "based on", "copied from", "derived from", "inspired by", "ported from", "translated from", "vendored"). 2. **Check THIRD_PARTY_NOTICES.md** — use Grep to search `THIRD_PARTY_NOTICES.md` for the file name without extension (e.g., search for `ANRWatchDog` when reviewing `ANRWatchDog.java`). A match means this is a known vendored file. **Renames:** if the diff is a rename (`similarity index` / `rename from` in the diff, or a delete of one path and add of another with the same content), also Grep for the **old** basename and read **Scope** sections in matching entries — NOTICES may still reference the previous class or path name. -3. **Scan the diff** — check for vendored-code signals on both added (`+`) and **removed (`-`)** lines. Removed copyright/license lines ARE signals — they mean attribution is being stripped. +3. **Scan the diff** — check for vendored-code signals on both added (`+`) and **removed (`-`)** lines. Removed lines that drop a **required field** (copyright, license name, source URL, vendoring origin) ARE signals. Removed disclaimer/boilerplate lines alone are not. **A signal in ANY of these three sources means this is vendored code — proceed to the vendored source file section.** @@ -102,15 +102,24 @@ Validate the changed entries using the diff context: ### 1. Check attribution header -The file must have a license header near the top (before the `package` statement in Java/Kotlin files) with: -- Library name or origin -- Copyright year and holder -- License name -- Source URL +**Required fields only** — the file header (before `package` in Java/Kotlin) must include all four. This matches `AGENTS.md`: -Exact wording and comment style may vary. Only flag **missing fields**, not formatting. +| # | Required field | Examples | +|---|------------------------|---------------------------------------------------------------------| +| 1 | Library name or origin | `Adapted from …`, `Based on …`, source library name | +| 2 | Copyright | `Copyright (c) 2016 …`, `Copyright 2010 Square, Inc.` | +| 3 | License name | `Licensed under the Apache License, Version 2.0`, `The MIT License` | +| 4 | Source URL | `https://github.com/…` | -Compare the current header (from the Read in Quick triage) against the THIRD_PARTY_NOTICES.md entry. For example, if the NOTICES entry says this file is MIT-licensed by "Salomon BRYS" but the current header has no copyright or license mention, the header was stripped. +Exact wording and comment style may vary. **Do not flag** missing or changed content that is not one of these four fields. + +**Not required in the file header** (full text belongs in `THIRD_PARTY_NOTICES.md`, not in every source file): + +- Full license boilerplate (MIT permission paragraph, Apache "Unless required by applicable law…" disclaimer, ASF contributor grant preamble) +- Wording differences vs the NOTICES embedded license text (e.g. shortened Apache header vs canonical ASF phrasing) +- Comment style (`//` vs `/* */`), line wrapping, or extra Sentry modification notes + +Compare the current header against the NOTICES entry **only for the four required fields** — e.g. if NOTICES says MIT by "Salomon BRYS" but the header has no copyright or license name, flag it. If both have copyright + license name but the header omits the Apache disclaimer while NOTICES still has the full text, **do not flag**. When Bash is available (local runs), also compare against the merge-base version for additional context: ```bash @@ -119,10 +128,16 @@ git show "${MB}:" | head -50 ``` Flag these issues: -- **Header stripped** — file is in NOTICES but current header has no attribution -- **Header truncated** — header is present but missing required fields (e.g., copyright line removed, license disclaimer removed) -- **Header inconsistent** — header contradicts what the NOTICES entry says -- **Diff removes attribution lines** — `Copyright`, `Licensed under`, etc. appear on removed lines in the diff +- **Header stripped** — file is in NOTICES but current header has none of the four required fields +- **Header truncated** — one or more **required** fields were removed (e.g. copyright line or `Licensed under …` removed) while the file remains vendored +- **Header inconsistent** — a **required** field contradicts NOTICES (wrong copyright holder/year, wrong license name) — not boilerplate or phrasing differences +- **Diff removes required attribution** — removed `-` lines drop a required field or vendoring origin (`Adapted from`, etc.); removing disclaimer/boilerplate lines alone is **not** this + +**Do not report** (no finding, or at most 👀 **low** if you mention NOTICES sync voluntarily — prefer silence): + +- Apache/MIT disclaimer or permission paragraphs removed but all four required fields remain +- Header reworded to a shorter permissive-license form with the same copyright holder and license name +- Header and NOTICES differ only in full license body text ### 2. Check THIRD_PARTY_NOTICES.md entry @@ -140,7 +155,7 @@ Classify the license per Sentry's Open Source Legal Policy (https://open.sentry. | AGPL | — | 🚨 **high** — absolute ban, must be removed | | No license | — | 🚨 **high** — assume no permission | -**Permissive licenses:** do not report a finding solely because the license is MIT/BSD/Apache/etc. Only flag attribution problems (missing or stripped header fields, missing/inconsistent `THIRD_PARTY_NOTICES.md` entry). Copyleft and unlicensed code still get 🚨 findings per the table. +**Permissive licenses:** do not report a finding solely because the license is MIT/BSD/Apache/etc. Only flag missing or stripped **required** header fields, or missing/inconsistent `THIRD_PARTY_NOTICES.md` entry. Do not flag disclaimer/boilerplate-only diffs. Copyleft and unlicensed code still get 🚨 findings per the table. --- @@ -152,11 +167,11 @@ If the diff deletes a file and the removed lines contained attribution headers, ## Severity guide -| Level | Use for | -|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------| -| **high** | 🚨 License violations: AGPL, copyleft, unlicensed, no-license code | -| **medium** | ⚠️ Missing attribution header fields, stripped headers, missing/inconsistent NOTICES entries, deleted/renamed vendored files needing NOTICES update | -| **low** | 👀 Attribution present but could be improved | +| Level | Use for | +|------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **high** | 🚨 License violations: AGPL, copyleft, unlicensed, no-license code | +| **medium** | ⚠️ Missing **required** header fields, stripped required fields, missing/inconsistent NOTICES entries, deleted/renamed vendored files needing NOTICES update | +| **low** | 👀 Optional nits only — never use for boilerplate/disclaimer-only header changes when all four required fields are present | Warden relies on these severity levels when deciding whether to comment on PRs or require changes. Put the severity emoji **only on the finding title** (see Output) so reviewers can triage at a glance. @@ -184,23 +199,41 @@ Title: ⚠️ Copyright line stripped from vendored file header Description: **io.sentry.cache.tape.FileObjectQueue** — The `Copyright (C) 2010 Square, Inc.` line was removed from this vendored file's header. Please restore the copyright line. ``` -**Bad — emoji repeated in the description:** +**Bad — emoji in the description (never do this):** ``` Title: ⚠️ Copyright line stripped from vendored file header Description: ⚠️ The `Copyright (C) 2010 Square, Inc.` line was removed… ``` +**Bad — emoji before the class name:** + +``` +Title: ⚠️ Copyright line stripped from vendored file header +Description: ⚠️ **io.sentry.cache.tape.FileObjectQueue** — The copyright line was removed… +``` + ### Warden runs -For each finding, set: +For each finding, set these fields exactly: -- **title** — ` ` (imperative, no class name). Warden bolds this as the PR comment heading. -- **description** — One or two sentences: `**** — `. Do **not** start with an emoji. -- **verification** — Optional evidence steps. No emoji. +| Field | Value | +|------------------|-----------------------------------------------------------------------------------------------------------------| +| **severity** | `high`, `medium`, or `low` — **never** put emoji here; Warden maps severity from this field, not from the title | +| **title** | ` ` — emoji allowed **only** here (imperative, no class name) | +| **description** | `**** — ` — **plain text only**; see rules below | +| **verification** | Optional evidence steps — plain text only | + +**Description rules (Warden):** + +- **Must** start with `**` + fully qualified class name + `** —` (e.g. `**io.sentry.CircularFifoQueue** —`). +- **Must not** contain 🚨, ⚠️, 👀, or the words `high`, `medium`, or `low` as severity labels. +- **Must not** repeat the title or paraphrase it with an emoji prefix. Use fully qualified Java class names in the description (e.g. `io.sentry.CircularFifoQueue`), not file paths. For license issues, include the policy link in the description. +**Before submitting findings:** For every finding, confirm `description` does not match `[🚨⚠️👀]` and starts with `**io.` or `**com.` (or the correct FQCN). If it contains any emoji, rewrite the description without it. + ### Local / IDE runs Use this numbered format — same title vs description split as above: @@ -215,5 +248,7 @@ Use this numbered format — same title vs description split as above: Rules: +- Put the severity emoji **only** on the title line (`1\. ⚠️ **…**`), never on the description line. +- The description line starts with `**** —` and must not contain 🚨, ⚠️, or 👀. - **Escape the period** after the number (`1\.` not `1.`) so markdown does not collapse entries into a tight list. - Leave an empty line between each numbered finding. diff --git a/agents.toml b/agents.toml index b4c9e091b70..d9770ee7df5 100644 --- a/agents.toml +++ b/agents.toml @@ -35,3 +35,7 @@ source = "path:.agents/skills/test" [[skills]] name = "btrace-perfetto" source = "path:.agents/skills/btrace-perfetto" + +[[skills]] +name = "check-code-attribution" +source = "path:.agents/skills/check-code-attribution" From 94fdf254d83646401c3509106c3df866cb558cb1 Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Wed, 13 May 2026 16:01:00 +0200 Subject: [PATCH 4/6] TRIAL: Add test fixtures for check-code-attribution skill validation Reproduces the 11 manual test scenarios from PR #5401 so the updated skill can be exercised against known attribution issues. Co-Authored-By: Claude Opus 4.6 --- THIRD_PARTY_NOTICES.md | 80 ++++++++++++------- .../io/sentry/android/core/ANRWatchDog.java | 25 +----- .../java/io/sentry/CircularFifoQueue.java | 12 +-- .../sentry/transport/ReusableCountLatch.java | 2 - .../io/sentry/util/CompactJsonWriter.java | 57 +++++++++++++ .../io/sentry/util/ConcurrentLruCache.java | 37 +++++++++ .../main/java/io/sentry/util/LeakyBucket.java | 37 +++++++++ .../java/io/sentry/util/SlidingWindow.java | 35 ++++++++ .../main/java/io/sentry/util/TokenBucket.java | 40 ++++++++++ 9 files changed, 260 insertions(+), 65 deletions(-) create mode 100644 sentry/src/main/java/io/sentry/util/CompactJsonWriter.java create mode 100644 sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java create mode 100644 sentry/src/main/java/io/sentry/util/LeakyBucket.java create mode 100644 sentry/src/main/java/io/sentry/util/SlidingWindow.java create mode 100644 sentry/src/main/java/io/sentry/util/TokenBucket.java diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md index 5a48d567fac..8f64706eef8 100644 --- a/THIRD_PARTY_NOTICES.md +++ b/THIRD_PARTY_NOTICES.md @@ -94,7 +94,7 @@ limitations under the License. **Source:** https://github.com/square/tape (Commit: 445cd3fd0a7b3ec48c9ea3e0e86663fe6d3735d8)
**License:** Apache License 2.0
-**Copyright:** Copyright (C) 2010 Square, Inc. +**Copyright:** Copyright (C) 2015 Square, Inc. ### Scope @@ -118,34 +118,6 @@ limitations under the License. --- -## Square — Seismic (Apache 2.0) - -**Source:** https://github.com/square/seismic
-**License:** Apache License 2.0
-**Copyright:** Copyright 2010 Square, Inc. - -### Scope - -The Sentry Java SDK includes an adapted version of Square's Seismic shake detection algorithm. The rolling sample window approach and `SampleQueue`/`SamplePool` data structures in `io.sentry.android.core.SentryShakeDetector` are based on Seismic's `ShakeDetector`. - -``` -Copyright 2010 Square, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -``` - ---- - ## Square — Curtains (Apache 2.0) **Source:** https://github.com/square/curtains (v1.2.5)
@@ -484,3 +456,53 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` + +--- + +## Resilience4j — RateLimiter (Apache 2.0) + +**Source:** https://github.com/resilience4j/resilience4j (resilience4j-ratelimiter)
+**License:** Apache License 2.0
+**Copyright:** Copyright 2019 Robert Winkler and Bohdan Storozhuk + +### Scope + +The Sentry Java SDK includes an adapted version of the Resilience4j `AtomicRateLimiter` for leaky bucket rate limiting. The code resides in `io.sentry.util.LeakyBucket`. + +``` +Copyright 2019 Robert Winkler and Bohdan Storozhuk + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` + +--- + +## Eclipse Collections — CircularArrayList (EPL 2.0) + +**Source:** https://github.com/eclipse/eclipse-collections/blob/master/eclipse-collections/src/main/java/org/eclipse/collections/impl/list/mutable/CircularArrayList.java
+**License:** Eclipse Public License 2.0
+**Copyright:** Copyright (c) 2022 Goldman Sachs and others + +### Scope + +The Sentry Java SDK includes an adapted circular buffer implementation from Eclipse Collections. The code resides in `io.sentry.util.CircularBuffer`. + +``` +Copyright (c) 2022 Goldman Sachs and others. + +This program and the accompanying materials are made available under the +terms of the Eclipse Public License 2.0 which is available at +http://www.eclipse.org/legal/epl-2.0. + +SPDX-License-Identifier: EPL-2.0 +``` diff --git a/sentry-android-core/src/main/java/io/sentry/android/core/ANRWatchDog.java b/sentry-android-core/src/main/java/io/sentry/android/core/ANRWatchDog.java index b726dd0c881..0c11522c139 100644 --- a/sentry-android-core/src/main/java/io/sentry/android/core/ANRWatchDog.java +++ b/sentry-android-core/src/main/java/io/sentry/android/core/ANRWatchDog.java @@ -1,27 +1,4 @@ -/* - * Adapted from https://github.com/SalomonBrys/ANR-WatchDog/blob/1969075f75f5980e9000eaffbaa13b0daf282dcb/anr-watchdog/src/main/java/com/github/anrwatchdog/ANRWatchDog.java - * - * The MIT License (MIT) - * - * Copyright (c) 2016 Salomon BRYS - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +/* ANRWatchDog implementation */ package io.sentry.android.core; diff --git a/sentry/src/main/java/io/sentry/CircularFifoQueue.java b/sentry/src/main/java/io/sentry/CircularFifoQueue.java index 8fa72e39d56..f0d7d60845f 100644 --- a/sentry/src/main/java/io/sentry/CircularFifoQueue.java +++ b/sentry/src/main/java/io/sentry/CircularFifoQueue.java @@ -1,20 +1,12 @@ /* * Adapted from https://github.com/apache/commons-collections/blob/fce46cdcc6fa33ba9472921d4b3ec3f548d8cbcc/src/main/java/org/apache/commons/collections4/queue/CircularFifoQueue.java * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 + * Copyright 2025 The Apache Software Foundation. + * Licensed under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ package io.sentry; diff --git a/sentry/src/main/java/io/sentry/transport/ReusableCountLatch.java b/sentry/src/main/java/io/sentry/transport/ReusableCountLatch.java index 79543a37026..fb7c4171ee4 100644 --- a/sentry/src/main/java/io/sentry/transport/ReusableCountLatch.java +++ b/sentry/src/main/java/io/sentry/transport/ReusableCountLatch.java @@ -1,8 +1,6 @@ /* * Adapted from https://github.com/MatejTymes/JavaFixes/blob/37e74b9d0a29f7a47485c6d1bb1307f01fb93634/src/main/java/javafixes/concurrency/ReusableCountLatch.java * - * Copyright (C) 2016 Matej Tymes - * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java b/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java new file mode 100644 index 00000000000..97d6f19e646 --- /dev/null +++ b/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java @@ -0,0 +1,57 @@ +/* + * Copyright 2024 Example JSON Project Contributors. + * SPDX-License-Identifier: BSD-3-Clause + * https://github.com/example-json/compact-writer + */ +package io.sentry.util; + +import java.io.IOException; +import java.io.Writer; + +/** + * A lightweight JSON writer that produces compact (no whitespace) output. + */ +public final class CompactJsonWriter { + + private final Writer out; + private boolean needsComma = false; + + public CompactJsonWriter(Writer out) { + this.out = out; + } + + public CompactJsonWriter beginObject() throws IOException { + out.write('{'); + needsComma = false; + return this; + } + + public CompactJsonWriter endObject() throws IOException { + out.write('}'); + needsComma = true; + return this; + } + + public CompactJsonWriter name(String name) throws IOException { + if (needsComma) { + out.write(','); + } + out.write('"'); + out.write(name); + out.write("\":"); + needsComma = false; + return this; + } + + public CompactJsonWriter value(String value) throws IOException { + out.write('"'); + out.write(value); + out.write('"'); + needsComma = true; + return this; + } + + public void flush() throws IOException { + out.flush(); + } +} diff --git a/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java b/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java new file mode 100644 index 00000000000..4dbb55e77b2 --- /dev/null +++ b/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java @@ -0,0 +1,37 @@ +// Adapted from Caffeine Cache. +// Copyright 2024 Ben Manes. +// Licensed under the Apache License 2.0. +// https://github.com/ben-manes/caffeine/blob/master/caffeine/src/main/java/com/github/benmanes/caffeine/cache/BoundedLocalCache.java +package io.sentry.util; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * A simple thread-safe LRU cache backed by a synchronized LinkedHashMap. + */ +public final class ConcurrentLruCache { + + private final Map map; + + public ConcurrentLruCache(int maxSize) { + this.map = new LinkedHashMap(maxSize, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > maxSize; + } + }; + } + + public synchronized V get(K key) { + return map.get(key); + } + + public synchronized void put(K key, V value) { + map.put(key, value); + } + + public synchronized int size() { + return map.size(); + } +} diff --git a/sentry/src/main/java/io/sentry/util/LeakyBucket.java b/sentry/src/main/java/io/sentry/util/LeakyBucket.java new file mode 100644 index 00000000000..03728d54e6b --- /dev/null +++ b/sentry/src/main/java/io/sentry/util/LeakyBucket.java @@ -0,0 +1,37 @@ +// Adapted from Resilience4j RateLimiter. +// https://github.com/resilience4j/resilience4j/blob/master/resilience4j-ratelimiter/src/main/java/io/github/resilience4j/ratelimiter/internal/AtomicRateLimiter.java +package io.sentry.util; + +public final class LeakyBucket { + + private final long capacityNanos; + private final long leakIntervalNanos; + private long availablePermissions; + private long lastLeakNanos; + + public LeakyBucket(int capacity, long leakIntervalNanos) { + this.capacityNanos = capacity; + this.leakIntervalNanos = leakIntervalNanos; + this.availablePermissions = capacity; + this.lastLeakNanos = System.nanoTime(); + } + + public synchronized boolean tryAcquire() { + leak(); + if (availablePermissions > 0) { + availablePermissions--; + return true; + } + return false; + } + + private void leak() { + long now = System.nanoTime(); + long elapsed = now - lastLeakNanos; + long newPermissions = elapsed / leakIntervalNanos; + if (newPermissions > 0) { + availablePermissions = Math.min(capacityNanos, availablePermissions + newPermissions); + lastLeakNanos = now; + } + } +} diff --git a/sentry/src/main/java/io/sentry/util/SlidingWindow.java b/sentry/src/main/java/io/sentry/util/SlidingWindow.java new file mode 100644 index 00000000000..c48ef36198e --- /dev/null +++ b/sentry/src/main/java/io/sentry/util/SlidingWindow.java @@ -0,0 +1,35 @@ +// Adapted from Metrics-Java SlidingWindowReservoir. +// Copyright 2010-2023 Coda Hale and Yammer, Inc. +// Licensed under the Apache License, Version 2.0. +// https://github.com/dropwizard/metrics/blob/main/metrics-core/src/main/java/com/codahale/metrics/SlidingWindowReservoir.java +package io.sentry.util; + +import java.util.concurrent.atomic.AtomicLong; + +public final class SlidingWindow { + + private final Object[] measurements; + private final AtomicLong count = new AtomicLong(); + + public SlidingWindow(int size) { + this.measurements = new Object[size]; + } + + public void update(T value) { + long c = count.incrementAndGet(); + measurements[(int) ((c - 1) % measurements.length)] = value; + } + + public int size() { + long c = count.get(); + if (c > measurements.length) { + return measurements.length; + } + return (int) c; + } + + @SuppressWarnings("unchecked") + public T get(int index) { + return (T) measurements[index % measurements.length]; + } +} diff --git a/sentry/src/main/java/io/sentry/util/TokenBucket.java b/sentry/src/main/java/io/sentry/util/TokenBucket.java new file mode 100644 index 00000000000..7a2a7d1dc32 --- /dev/null +++ b/sentry/src/main/java/io/sentry/util/TokenBucket.java @@ -0,0 +1,40 @@ +// Adapted from Guava RateLimiter. +// https://github.com/google/guava/blob/master/guava/src/com/google/common/util/concurrent/RateLimiter.java +package io.sentry.util; + +/** + * A simple token bucket rate limiter. + */ +public final class TokenBucket { + + private final long maxTokens; + private final long refillIntervalNanos; + private long tokens; + private long lastRefillNanos; + + public TokenBucket(long maxTokens, long refillIntervalNanos) { + this.maxTokens = maxTokens; + this.refillIntervalNanos = refillIntervalNanos; + this.tokens = maxTokens; + this.lastRefillNanos = System.nanoTime(); + } + + public synchronized boolean tryConsume() { + refill(); + if (tokens > 0) { + tokens--; + return true; + } + return false; + } + + private void refill() { + long now = System.nanoTime(); + long elapsed = now - lastRefillNanos; + long newTokens = elapsed / refillIntervalNanos; + if (newTokens > 0) { + tokens = Math.min(maxTokens, tokens + newTokens); + lastRefillNanos = now; + } + } +} From 0c07054ec9d189f82fc82c34b40963ce956b354d Mon Sep 17 00:00:00 2001 From: Adam Brown Date: Mon, 18 May 2026 17:17:15 +0200 Subject: [PATCH 5/6] More --- .../skills/check-code-attribution/SKILL.md | 57 +++++++++++++------ AGENTS.md | 2 +- agents.toml | 4 +- .../java/io/sentry/cache/IEnvelopeCache.java | 4 ++ warden.toml | 3 +- 5 files changed, 49 insertions(+), 21 deletions(-) diff --git a/.claude/skills/check-code-attribution/SKILL.md b/.claude/skills/check-code-attribution/SKILL.md index c351c5719c9..83832a9d8d5 100644 --- a/.claude/skills/check-code-attribution/SKILL.md +++ b/.claude/skills/check-code-attribution/SKILL.md @@ -4,6 +4,8 @@ description: Per-file check of vendored code attribution in the current branch d allowed-tools: Bash Read Grep Glob --- +**Maintainers:** Only edit `.claude/skills/check-code-attribution/SKILL.md` (the committed file). `.agents/skills/check-code-attribution/` is the same path via the `.agents/skills` → `.claude/skills` symlink. + # Check Code Attribution You are reviewing changed files for third-party code attribution compliance in **sentry-java**, an MIT-licensed repository. @@ -185,12 +187,12 @@ Otherwise, report each finding ordered by severity (most severe first). Use the emoji from the severity guide (🚨, ⚠️, or 👀) — not the word `high`, `medium`, or `low`. -| Field | Emoji? | Example | -|-------------------|--------------------------|----------------------------------------------------------------------------------------------------| -| **Title** | Yes — once, at the start | `⚠️ Copyright line stripped from vendored file header` | -| **Description** | **No** | `**io.sentry.cache.tape.FileObjectQueue** — The Copyright (C) 2010 Square, Inc. line was removed…` | -| **Verification** | **No** | Evidence steps only | -| **Suggested fix** | **No** | Fix text only | +| Field | Emoji? | Example | +|-------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------| +| **Title** | Yes — once, at the start | `⚠️ Copyright line stripped from vendored file header` | +| **Description** | **No** | `**io.sentry.cache.tape.FileObjectQueue** — The Copyright (C) 2010 Square, Inc. line was removed…` (see **Description subject** below) | +| **Verification** | **No** | Evidence steps only | +| **Suggested fix** | **No** | Fix text only | **Good (Warden PR comment):** @@ -213,26 +215,45 @@ Title: ⚠️ Copyright line stripped from vendored file header Description: ⚠️ **io.sentry.cache.tape.FileObjectQueue** — The copyright line was removed… ``` +### Description subject (required) + +Every description **must** start with `**** —` (bold subject, space, em dash, space). Pick **one** subject by file type: + +| File type | Subject format | Example | +|-------------------------------------------------------------------------------------------|----------------------------------------------------------------------|----------------------------------------------------------------| +| Java / Kotlin source (`.java`, `.kt`) with a top-level type | Fully qualified class name (FQCN) | `**io.sentry.CircularFifoQueue** —` | +| Java / Kotlin with no single clear type (multiple top-level types, unclear which changed) | FQCN of the primary type under review, or repo-relative path if none | `**sentry/src/.../Foo.kt** —` | +| `THIRD_PARTY_NOTICES.md` | `THIRD_PARTY_NOTICES.md — ` | `**THIRD_PARTY_NOTICES.md — Square — Seismic (Apache 2.0)** —` | +| Gradle / other scripts (e.g. `.kts`, `.gradle`) | Repo-relative path from repository root | `**build.gradle.kts** —` | + +- Prefer **FQCN** for `.java` / `.kt` vendored source (derive from `package` + primary public top-level class). Do not use file paths when a FQCN is clear. +- For license-tier / policy issues, include https://open.sentry.io/licensing/ in the description body. + ### Warden runs For each finding, set these fields exactly: -| Field | Value | -|------------------|-----------------------------------------------------------------------------------------------------------------| -| **severity** | `high`, `medium`, or `low` — **never** put emoji here; Warden maps severity from this field, not from the title | -| **title** | ` ` — emoji allowed **only** here (imperative, no class name) | -| **description** | `**** — ` — **plain text only**; see rules below | -| **verification** | Optional evidence steps — plain text only | +| Field | Value | +|------------------|-------------------------------------------------------------------------------------------------------------------| +| **severity** | `high`, `medium`, or `low` — **never** put emoji here; Warden maps severity from this field, not from the title | +| **title** | ` ` — emoji allowed **only** here (imperative, no class name) | +| **description** | `**** — ` — **plain text only**; subject per **Description subject** above | +| **verification** | Optional evidence steps — plain text only | **Description rules (Warden):** -- **Must** start with `**` + fully qualified class name + `** —` (e.g. `**io.sentry.CircularFifoQueue** —`). +- **Must** match `**** — …` using the table in **Description subject**. - **Must not** contain 🚨, ⚠️, 👀, or the words `high`, `medium`, or `low` as severity labels. - **Must not** repeat the title or paraphrase it with an emoji prefix. -Use fully qualified Java class names in the description (e.g. `io.sentry.CircularFifoQueue`), not file paths. For license issues, include the policy link in the description. +**Good (NOTICES entry removed while scope files remain):** + +``` +Title: ⚠️ NOTICES entry removed for vendored code still in tree +Description: **THIRD_PARTY_NOTICES.md — Square — Seismic (Apache 2.0)** — The Seismic entry was removed but `io.sentry.android.core.SentryShakeDetector` still has an attribution header. Restore the entry or remove attribution from the scope files. +``` -**Before submitting findings:** For every finding, confirm `description` does not match `[🚨⚠️👀]` and starts with `**io.` or `**com.` (or the correct FQCN). If it contains any emoji, rewrite the description without it. +**Before submitting findings:** For every finding, confirm `description` does not match `[🚨⚠️👀]` and matches `^\*\*.+\*\* — `. If it contains any emoji, rewrite the description without it. ### Local / IDE runs @@ -240,15 +261,15 @@ Use this numbered format — same title vs description split as above: ``` 1\. **** - **** — + **** — 2\. **** - **** — + **** — ``` Rules: - Put the severity emoji **only** on the title line (`1\. ⚠️ **…**`), never on the description line. -- The description line starts with `**** —` and must not contain 🚨, ⚠️, or 👀. +- The description line uses `**** —` per **Description subject** and must not contain 🚨, ⚠️, or 👀. - **Escape the period** after the number (`1\.` not `1.`) so markdown does not collapse entries into a tight list. - Leave an empty line between each numbered finding. diff --git a/AGENTS.md b/AGENTS.md index ff50727c662..e6e49477d6a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -154,7 +154,7 @@ When adapting code from third-party libraries: ``` 2. Add a full attribution entry to `THIRD_PARTY_NOTICES.md` following the existing format (Source, License, Copyright, Scope, full license text) -3. Run the `check-code-attribution` skill locally or wait for it to be auto-run against your PR to check for required fields and verify new licenses against [Sentry's Open Source Legal Policy](https://open.sentry.io/licensing/). +3. Run the `check-code-attribution` skill locally or wait for it to be auto-run against your PR to check for required fields and verify new licenses against [Sentry's Open Source Legal Policy](https://open.sentry.io/licensing/). The skill definition lives at `.claude/skills/check-code-attribution/SKILL.md` (registered in `agents.toml`; `.agents/skills/` is a symlink to `.claude/skills/`). ### Getting PR Information diff --git a/agents.toml b/agents.toml index d9770ee7df5..aaa3ba7c3b7 100644 --- a/agents.toml +++ b/agents.toml @@ -36,6 +36,8 @@ source = "path:.agents/skills/test" name = "btrace-perfetto" source = "path:.agents/skills/btrace-perfetto" +# Repo-local skills are committed under .claude/skills/. path:.agents/skills/... resolves via +# the tracked symlink .agents/skills -> ../.claude/skills (do not add a second copy). [[skills]] name = "check-code-attribution" -source = "path:.agents/skills/check-code-attribution" +source = "path:.claude/skills/check-code-attribution" diff --git a/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java b/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java index ec25ce32884..53ae1a5caed 100644 --- a/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java +++ b/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java @@ -1,3 +1,7 @@ +/** + * License: AGPL + * Copyright: 1901 + */ package io.sentry.cache; import io.sentry.Hint; diff --git a/warden.toml b/warden.toml index ddd3da96039..c662e506020 100644 --- a/warden.toml +++ b/warden.toml @@ -1,7 +1,7 @@ version = 1 [defaults] -model = "anthropic/claude-opus-4-6" +model = "anthropic/claude-sonnet-4-6" maxTurns = 30 # Attribution findings are policy checks; a second verifier pass often rejects valid @@ -51,6 +51,7 @@ failOn = "high" reportOn = "low" ignorePaths = [ # Infrastructure directories + ".agents/**", ".claude/**", ".cursor/**", ".github/**", From ac3ae634569cd448d5e220d043f1285a8be8ecb8 Mon Sep 17 00:00:00 2001 From: Sentry Github Bot Date: Mon, 18 May 2026 15:38:23 +0000 Subject: [PATCH 6/6] Format code --- .../java/io/sentry/cache/IEnvelopeCache.java | 5 +---- .../java/io/sentry/util/CompactJsonWriter.java | 4 +--- .../java/io/sentry/util/ConcurrentLruCache.java | 17 ++++++++--------- .../main/java/io/sentry/util/TokenBucket.java | 4 +--- 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java b/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java index 53ae1a5caed..d27e66c9961 100644 --- a/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java +++ b/sentry/src/main/java/io/sentry/cache/IEnvelopeCache.java @@ -1,7 +1,4 @@ -/** - * License: AGPL - * Copyright: 1901 - */ +/** License: AGPL Copyright: 1901 */ package io.sentry.cache; import io.sentry.Hint; diff --git a/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java b/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java index 97d6f19e646..45590161dc4 100644 --- a/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java +++ b/sentry/src/main/java/io/sentry/util/CompactJsonWriter.java @@ -8,9 +8,7 @@ import java.io.IOException; import java.io.Writer; -/** - * A lightweight JSON writer that produces compact (no whitespace) output. - */ +/** A lightweight JSON writer that produces compact (no whitespace) output. */ public final class CompactJsonWriter { private final Writer out; diff --git a/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java b/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java index 4dbb55e77b2..60b8abf81c8 100644 --- a/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java +++ b/sentry/src/main/java/io/sentry/util/ConcurrentLruCache.java @@ -7,20 +7,19 @@ import java.util.LinkedHashMap; import java.util.Map; -/** - * A simple thread-safe LRU cache backed by a synchronized LinkedHashMap. - */ +/** A simple thread-safe LRU cache backed by a synchronized LinkedHashMap. */ public final class ConcurrentLruCache { private final Map map; public ConcurrentLruCache(int maxSize) { - this.map = new LinkedHashMap(maxSize, 0.75f, true) { - @Override - protected boolean removeEldestEntry(Map.Entry eldest) { - return size() > maxSize; - } - }; + this.map = + new LinkedHashMap(maxSize, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > maxSize; + } + }; } public synchronized V get(K key) { diff --git a/sentry/src/main/java/io/sentry/util/TokenBucket.java b/sentry/src/main/java/io/sentry/util/TokenBucket.java index 7a2a7d1dc32..8f64fd92de9 100644 --- a/sentry/src/main/java/io/sentry/util/TokenBucket.java +++ b/sentry/src/main/java/io/sentry/util/TokenBucket.java @@ -2,9 +2,7 @@ // https://github.com/google/guava/blob/master/guava/src/com/google/common/util/concurrent/RateLimiter.java package io.sentry.util; -/** - * A simple token bucket rate limiter. - */ +/** A simple token bucket rate limiter. */ public final class TokenBucket { private final long maxTokens;