diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..8bc1f24 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://json.schemastore.org/claude-code-settings.json", + "hooks": { + "Stop": [ + { + "hooks": [ + { "type": "command", "command": "cd $CLAUDE_PROJECT_DIR && ./harness post-edit" } + ] + } + ] + } +} diff --git a/.github/workflows/dogfood-check-cve.yml b/.github/workflows/dogfood-check-cve.yml new file mode 100644 index 0000000..a20cf4b --- /dev/null +++ b/.github/workflows/dogfood-check-cve.yml @@ -0,0 +1,70 @@ +name: Dogfood — corgea deps --check-cve + +on: + push: + branches: [main] + pull_request: + paths: + - 'src/**' + - 'fixtures/deps/**' + - 'Cargo.lock' + - '.github/workflows/dogfood-check-cve.yml' + +env: + CORGEA_TOKEN: ci-stub-token + CORGEA_NPM_REGISTRY: http://127.0.0.1:1 + STUB_PORT: "9876" + +jobs: + vulnerable-fixture-blocks-build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo build --release --bin corgea --bin vuln-api-stub + - name: Start vuln-api stub + run: | + ./target/release/vuln-api-stub \ + --fixtures fixtures/deps/vuln-api-stub.json \ + --port "$STUB_PORT" & + echo "CORGEA_VULN_API_URL=http://127.0.0.1:${STUB_PORT}" >> "$GITHUB_ENV" + for _ in $(seq 1 50); do + if (echo > /dev/tcp/127.0.0.1/"$STUB_PORT") 2>/dev/null; then + exit 0 + fi + sleep 0.1 + done + echo "vuln-api stub did not start on port $STUB_PORT" + exit 1 + - name: Run against vulnerable fixture; expect exit 1 + run: | + set +e + ./target/release/corgea deps --check-cve --fail-cve --path fixtures/deps/npm + rc=$? + if [ "$rc" -ne 1 ]; then + echo "expected exit 1, got $rc" + exit 1 + fi + + clean-fixture-passes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo build --release --bin corgea --bin vuln-api-stub + - name: Start vuln-api stub + run: | + ./target/release/vuln-api-stub \ + --fixtures fixtures/deps/vuln-api-stub.json \ + --port "$STUB_PORT" & + echo "CORGEA_VULN_API_URL=http://127.0.0.1:${STUB_PORT}" >> "$GITHUB_ENV" + for _ in $(seq 1 50); do + if (echo > /dev/tcp/127.0.0.1/"$STUB_PORT") 2>/dev/null; then + exit 0 + fi + sleep 0.1 + done + echo "vuln-api stub did not start on port $STUB_PORT" + exit 1 + - name: Run against clean fixture; expect exit 0 + run: ./target/release/corgea deps --check-cve --fail-cve --path fixtures/deps/npm-clean diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b1248a7..d63857b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,10 +17,17 @@ jobs: - name: Setup Rust uses: dtolnay/rust-toolchain@stable + with: + components: llvm-tools-preview + + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + + - name: Install cargo-audit + uses: taiki-e/install-action@cargo-audit - name: Cache cargo uses: Swatinem/rust-cache@v2 - - - name: Run unit tests - run: cargo test + - name: CI gate + run: ./harness ci diff --git a/.worktreeinclude b/.worktreeinclude new file mode 100644 index 0000000..2b91ae3 --- /dev/null +++ b/.worktreeinclude @@ -0,0 +1,2 @@ +.humanlayer/ +.env diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9521313 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,57 @@ +# CLAUDE + +Corgea developer CLI — Rust binary shipped via `maturin` to npm + pip. +Repo-root `/Users/juan/Code/corgea/CLAUDE.md` covers cross-codebase +conventions; this file covers cli-only specifics. + +## Commands + +- After edits: `./harness check` — clippy fix, format, tests, suppression report +- Pre-commit: `./harness pre-commit` — staged Rust files only (auto via git hook) +- CI: `./harness ci` — strict clippy (`-D warnings`), format check, dep audit, tests + coverage gate (min 41%) +- Audit: `./harness audit` — `cargo audit` for known dep vulnerabilities +- Coverage: `./harness coverage [--min=N]` — cargo-llvm-cov; HTML report under `target/llvm-cov/`; fails if line coverage < N (default 41) +- Lint: `./harness lint` — clippy + format check, no fixes +- Test: `./harness test` — `cargo test` +- Fix: `./harness fix` — clippy fix + format +- Setup: `./harness setup-hooks` — install `.git/hooks/pre-commit` +- Install: `./harness install` — `cargo install --path .` to `~/.cargo/bin/corgea` +- Auto-format: `./harness post-edit` runs via Claude Code Stop hook + +Add `--verbose` to stream raw command output instead of the quiet summary. + +## Source map + +CLI entry is `src/main.rs` — clap-derived `Commands` enum dispatches to one module per subcommand. + +| Path | Role | +|---|---| +| `authorize.rs` / `cicd.rs` | OAuth device flow + CI/CD token detection for `login` | +| `scanners/{blast,fortify,parsers}` | `scan` subcommand — blast (default), semgrep, snyk, Fortify FPR parsing | +| `scan.rs` / `wait.rs` / `list.rs` / `inspect.rs` | Upload, poll, list, inspect scans and issues against Corgea API | +| `verify_deps/` | `deps` subcommand — registry freshness + optional CVE check (npm + Python) | +| `precheck/` | `npm` / `yarn` / `pnpm` / `pip` / `uv` install wrappers | +| `vuln_api/` | Client for `vuln-api.corgea.app` (advisories); opt-in via `--check-cve` | +| `utils/{api,generic,terminal}` | HTTP, env helpers, TTY/color output | +| `config.rs` | `~/.corgea/config.toml` — url, token, optional `vuln_api_url` | + +## Env vars + +- `CORGEA_TOKEN`, `CORGEA_URL`, `CORGEA_DEBUG` — auth + endpoint override +- `CORGEA_VULN_API_URL` — override vuln-api host (default `https://vuln-api.corgea.app`) +- `CORGEA_NPM_REGISTRY`, `CORGEA_PYPI_REGISTRY` — alternate registries for `deps` and install wrappers + +## Adding a subcommand + +1. New module under `src/` (or `src//mod.rs` if multi-file). +2. Add a variant to `Commands` in `src/main.rs` with clap `#[arg]` help text — this is the user-facing doc. +3. Dispatch in the `match &cli.command` block; call `verify_token_and_exit_when_fail(&corgea_config)` only if the command hits the Corgea API. +4. Exit codes: `1` = expected failure (findings, auth, validation), `2` = bad CLI input. + +## Dogfood fixtures + +`fixtures/deps/` holds minimal npm/yarn/pnpm/pip/poetry/uv projects with pinned, advisory-backed manifests. Used by `cargo test deps_dogfood` (offline) and manual runs — see `fixtures/deps/README.md`. **Do not bump pins** — versions are chosen intentionally. + +## Layer 2 (behavior contract) + +Not wired. Commits, pushes, and arch-config edits are NOT gated by hooks in this subproject — follow the conventions in the repo-root CLAUDE.md. diff --git a/Cargo.lock b/Cargo.lock index 225b82d..b9e8077 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -357,6 +357,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1695,6 +1696,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index 608ffbd..46d1827 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,8 +5,20 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "corgea" +path = "src/lib.rs" + +[[bin]] +name = "corgea" +path = "src/main.rs" + +[[bin]] +name = "vuln-api-stub" +path = "src/bin/vuln-api-stub.rs" + [dependencies] -clap = { version = "4.4.13", features = ["derive"] } +clap = { version = "4.4.13", features = ["derive", "env"] } dirs = "5.0.1" reqwest = { version = "0.12.23", default-features = false, features = [ "blocking", @@ -39,6 +51,7 @@ http-body-util = "0.1" url = "2.5" open = "5.0" urlencoding = "2.1" +semver = "1" [target.'cfg(not(target_os = "windows"))'.dependencies] openssl = { version = "0.10", features = ["vendored"] } diff --git a/README.md b/README.md index b242ebe..17ebd2b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ # Corgea CLI + +[![Dependency security](https://github.com/Corgea/cli/actions/workflows/dogfood-check-cve.yml/badge.svg)](https://github.com/Corgea/cli/actions/workflows/dogfood-check-cve.yml) + Corgea CLI is a powerful developer tool that helps you find and fix security vulnerabilities in your code. Using our AI-powered scanner (blast) and our platform, Corgea identifies complex security issues like business logic flaws, authentication vulnerabilities, and other hard-to-find bugs. The CLI provides commands to scan your codebase, inspect findings, interact with fixes, and much more - all designed with a great developer experience in mind. @@ -26,6 +29,28 @@ Once the binary is installed, login with your token from the Corgea app. corgea login ``` +## Dependency Security + +`corgea deps` is a supply-chain tripwire for pinned npm and Python dependencies. It supports two independent modes: **freshness** (flag recently published versions) and **CVE detection** (query known advisories). + +Freshness gate — block builds that pull in a recently published dependency: + +```bash +corgea deps --threshold 2d --fail +``` + +CVE gate — requires `corgea login` (or `CORGEA_TOKEN`): + +```bash +corgea deps --check-cve --fail-cve + +# Fail only on critical (or critical+high) CVEs; lower-severity +# findings still render but do not block. +corgea deps --check-cve --fail-cve --severity critical +corgea deps --check-cve --fail-cve --severity critical,high +``` + +See [Dependency Scanning (CLI)](https://docs.corgea.app/cli/deps) for flags, exit codes, CI integration, and self-hosted vuln-api setup. ## Development Setup diff --git a/fixtures/deps/README.md b/fixtures/deps/README.md new file mode 100644 index 0000000..f50089c --- /dev/null +++ b/fixtures/deps/README.md @@ -0,0 +1,78 @@ +# Deps dogfood fixtures + +Sample apps for manually testing `corgea deps` and install wrappers (`corgea npm`, etc.) the way a customer would. Each subdirectory is a minimal project with pinned dependency manifests and lockfiles. + +**Do not bump dependency versions** — pins are intentional and advisory-backed. + +## Fixtures + +| Directory | Ecosystem | Lockfile | Primary test | +|---|---|---|---| +| `npm/` | npm | `package-lock.json` | CVE scan (`--check-cve`), `corgea npm` | +| `npm-clean/` | npm | `package-lock.json` | CVE clean control (`lodash@4.17.21`, patched) | +| `npm-unpinned/` | npm | *(none)* | `--fail-unpinned` | +| `yarn/` | npm/yarn | `yarn.lock` | Yarn lockfile parser | +| `pnpm/` | npm/pnpm | `pnpm-lock.yaml` | pnpm lockfile parser | +| `python-requirements/` | Python | `requirements.txt` | `==`-pinned requirements | +| `python-poetry/` | Python | `poetry.lock` | Poetry lock discovery | +| `python-uv/` | Python | `uv.lock` | uv lock discovery | + +## vuln-api e2e stub + +Offline dogfood and GitHub Actions use [`vuln-api-stub.json`](vuln-api-stub.json) with the `vuln-api-stub` binary: + +```bash +cargo build --release --bin vuln-api-stub --bin corgea +./target/release/vuln-api-stub --fixtures fixtures/deps/vuln-api-stub.json --print-url & +export CORGEA_VULN_API_URL=http://127.0.0.1: +export CORGEA_TOKEN=ci-stub-token +export CORGEA_NPM_REGISTRY=http://127.0.0.1:1 + +./target/release/corgea deps --check-cve --fail-cve --path fixtures/deps/npm # expect exit 1 +./target/release/corgea deps --check-cve --fail-cve --path fixtures/deps/npm-clean # expect exit 0 +``` + +Unlisted `(ecosystem, name, version)` keys in the fixture file default to **clean** responses. + +## Manual dogfood + +```bash +cd cli +cargo build --release +BIN=./target/release/corgea + +# Baseline freshness scan +$BIN deps --path fixtures/deps/npm --threshold 2d + +# Pinning enforcement (expect exit 1) +$BIN deps --path fixtures/deps/npm-unpinned --fail-unpinned + +# CVE scan (needs CORGEA_VULN_API_URL + Corgea token) +$BIN deps --path fixtures/deps/npm --check-cve +$BIN deps --path fixtures/deps/python-requirements --ecosystem python --check-cve + +# CI-gate shape +$BIN deps --path fixtures/deps/npm --threshold 2d --fail --fail-unpinned --check-cve + +# JSON output +$BIN deps --path fixtures/deps/npm --check-cve --json + +# Install wrapper (install-time tripwire) +cd fixtures/deps/npm +$BIN npm install --check-only --threshold 2d + +cd ../python-uv +$BIN uv sync --check-only --threshold 2d +``` + +## Automated tests + +```bash +cargo test deps_dogfood +``` + +Runs fixture discovery and stub-server CVE tests offline (no live registry or vuln-api required). + +## Pin sources + +npm pins adapted from `devex-testing-grounds/insecure-js`. Python pins adapted from `devex-testing-grounds/insecure-app/requirements.txt`. diff --git a/fixtures/deps/npm-clean/package-lock.json b/fixtures/deps/npm-clean/package-lock.json new file mode 100644 index 0000000..3279dbc --- /dev/null +++ b/fixtures/deps/npm-clean/package-lock.json @@ -0,0 +1,21 @@ +{ + "name": "deps-fixture-npm-clean", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "deps-fixture-npm-clean", + "version": "0.1.0", + "dependencies": { + "lodash": "4.17.21" + } + }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "license": "MIT" + } + } +} diff --git a/fixtures/deps/npm-clean/package.json b/fixtures/deps/npm-clean/package.json new file mode 100644 index 0000000..0c0984f --- /dev/null +++ b/fixtures/deps/npm-clean/package.json @@ -0,0 +1,9 @@ +{ + "name": "deps-fixture-npm-clean", + "version": "0.1.0", + "private": true, + "description": "Clean CVE control fixture — lodash@4.17.21 (patched). Do not bump.", + "dependencies": { + "lodash": "4.17.21" + } +} diff --git a/fixtures/deps/npm-unpinned/package.json b/fixtures/deps/npm-unpinned/package.json new file mode 100644 index 0000000..09c20df --- /dev/null +++ b/fixtures/deps/npm-unpinned/package.json @@ -0,0 +1,11 @@ +{ + "name": "deps-fixture-npm-unpinned", + "version": "1.0.0", + "private": true, + "description": "Sample app for corgea deps --fail-unpinned testing. No lockfile on purpose.", + "dependencies": { + "json5": "2.2.1", + "lodash": "4.17.20", + "semver": "5.4.1" + } +} diff --git a/fixtures/deps/npm/package-lock.json b/fixtures/deps/npm/package-lock.json new file mode 100644 index 0000000..d6eddaf --- /dev/null +++ b/fixtures/deps/npm/package-lock.json @@ -0,0 +1,32 @@ +{ + "name": "deps-fixture-npm", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "deps-fixture-npm", + "version": "1.0.0", + "dependencies": { + "json5": "2.2.1", + "lodash": "4.17.20", + "semver": "5.4.1" + } + }, + "node_modules/json5": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", + "integrity": "sha512-1hqXHTj2W5V8UxeYl9W7D4W9W6n9qW7D4W9W6n9qW7D4=" + }, + "node_modules/lodash": { + "version": "4.17.20", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz", + "integrity": "sha512-1hqXHTj2W5V8UxeYl9W7D4W9W6n9qW7D4W9W6n9qW7D4=" + }, + "node_modules/semver": { + "version": "5.4.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.4.1.tgz", + "integrity": "sha512-1hqXHTj2W5V8UxeYl9W7D4W9W6n9qW7D4W9W6n9qW7D4=" + } + } +} diff --git a/fixtures/deps/npm/package.json b/fixtures/deps/npm/package.json new file mode 100644 index 0000000..8687e73 --- /dev/null +++ b/fixtures/deps/npm/package.json @@ -0,0 +1,11 @@ +{ + "name": "deps-fixture-npm", + "version": "1.0.0", + "private": true, + "description": "Sample app for corgea deps dogfood. Intentional old pins — do not bump.", + "dependencies": { + "json5": "2.2.1", + "lodash": "4.17.20", + "semver": "5.4.1" + } +} diff --git a/fixtures/deps/pnpm/package.json b/fixtures/deps/pnpm/package.json new file mode 100644 index 0000000..9372366 --- /dev/null +++ b/fixtures/deps/pnpm/package.json @@ -0,0 +1,11 @@ +{ + "name": "deps-fixture-pnpm", + "version": "1.0.0", + "private": true, + "description": "Sample app for corgea deps pnpm-lock.yaml dogfood. Intentional old pins — do not bump.", + "dependencies": { + "json5": "2.2.1", + "lodash": "4.17.20", + "semver": "5.4.1" + } +} diff --git a/fixtures/deps/pnpm/pnpm-lock.yaml b/fixtures/deps/pnpm/pnpm-lock.yaml new file mode 100644 index 0000000..7ee7979 --- /dev/null +++ b/fixtures/deps/pnpm/pnpm-lock.yaml @@ -0,0 +1,31 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + .: + dependencies: + json5: + specifier: 2.2.1 + version: 2.2.1 + lodash: + specifier: 4.17.20 + version: 4.17.20 + semver: + specifier: 5.4.1 + version: 5.4.1 + +packages: + json5@2.2.1: + resolution: {integrity: sha512-x} + engines: {node: '>=6'} + + lodash@4.17.20: + resolution: {integrity: sha512-y} + engines: {node: '>=4'} + + semver@5.4.1: + resolution: {integrity: sha512-z} + engines: {node: '*'} diff --git a/fixtures/deps/python-poetry/poetry.lock b/fixtures/deps/python-poetry/poetry.lock new file mode 100644 index 0000000..6cbe30d --- /dev/null +++ b/fixtures/deps/python-poetry/poetry.lock @@ -0,0 +1,22 @@ +# Intentional old pins for corgea deps dogfood — do not bump. + +[[package]] +name = "django" +version = "2.2.0" +description = "" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyyaml" +version = "5.1" +description = "" +category = "main" +optional = false +python-versions = "*" + +[metadata] +lock-version = "2.0" +python-versions = "^3.8" +content-hash = "fixture" diff --git a/fixtures/deps/python-poetry/pyproject.toml b/fixtures/deps/python-poetry/pyproject.toml new file mode 100644 index 0000000..a9535a9 --- /dev/null +++ b/fixtures/deps/python-poetry/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "deps-fixture-poetry" +version = "0.1.0" +description = "Sample app for corgea deps poetry.lock dogfood." +requires-python = ">=3.8" +dependencies = [ + "django==2.2.0", + "pyyaml==5.1", +] + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/fixtures/deps/python-requirements/requirements.txt b/fixtures/deps/python-requirements/requirements.txt new file mode 100644 index 0000000..76fa5fe --- /dev/null +++ b/fixtures/deps/python-requirements/requirements.txt @@ -0,0 +1,5 @@ +# Intentional old pins for corgea deps dogfood — do not bump. +django==2.2.0 +pyyaml==5.1 +urllib3==1.25.8 +pillow==8.1.0 diff --git a/fixtures/deps/python-uv/pyproject.toml b/fixtures/deps/python-uv/pyproject.toml new file mode 100644 index 0000000..73db6ca --- /dev/null +++ b/fixtures/deps/python-uv/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "deps-fixture-uv" +version = "0.1.0" +description = "Sample app for corgea deps uv.lock dogfood." +requires-python = ">=3.8" +dependencies = [ + "django==2.2.0", + "urllib3==1.25.8", +] diff --git a/fixtures/deps/python-uv/uv.lock b/fixtures/deps/python-uv/uv.lock new file mode 100644 index 0000000..427adeb --- /dev/null +++ b/fixtures/deps/python-uv/uv.lock @@ -0,0 +1,18 @@ +version = 1 +requires-python = ">=3.8" + +[[package]] +name = "django" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "urllib3" +version = "1.25.8" +source = { registry = "https://pypi.org/simple" } + +[package.metadata] +requires-dist = [ + { name = "django", specifier = "==2.2.0" }, + { name = "urllib3", specifier = "==1.25.8" }, +] diff --git a/fixtures/deps/vuln-api-stub.json b/fixtures/deps/vuln-api-stub.json new file mode 100644 index 0000000..423c03a --- /dev/null +++ b/fixtures/deps/vuln-api-stub.json @@ -0,0 +1,25 @@ +{ + "package_checks": { + "npm/lodash/4.17.20": { + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "CVE-2019-10744", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + } + ] + } + }, + "advisories": { + "CVE-2019-10744": { + "id": "CVE-2019-10744", + "source_url": "https://corgea.app/advisories/CVE-2019-10744" + } + } +} diff --git a/fixtures/deps/yarn/package.json b/fixtures/deps/yarn/package.json new file mode 100644 index 0000000..1afa65d --- /dev/null +++ b/fixtures/deps/yarn/package.json @@ -0,0 +1,11 @@ +{ + "name": "deps-fixture-yarn", + "version": "1.0.0", + "private": true, + "description": "Sample app for corgea deps yarn.lock dogfood. Intentional old pins — do not bump.", + "dependencies": { + "json5": "2.2.1", + "lodash": "4.17.20", + "semver": "5.4.1" + } +} diff --git a/fixtures/deps/yarn/yarn.lock b/fixtures/deps/yarn/yarn.lock new file mode 100644 index 0000000..e9c4e40 --- /dev/null +++ b/fixtures/deps/yarn/yarn.lock @@ -0,0 +1,14 @@ +# THIS IS AN AUTOGENERATED FILE. +# yarn lockfile v1 + +"json5@2.2.1": + version "2.2.1" + resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.1.tgz" + +"lodash@4.17.20": + version "4.17.20" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.20.tgz" + +"semver@5.4.1": + version "5.4.1" + resolved "https://registry.yarnpkg.com/semver/-/semver-5.4.1.tgz" diff --git a/harness b/harness new file mode 100755 index 0000000..c147468 --- /dev/null +++ b/harness @@ -0,0 +1,306 @@ +#!/usr/bin/env bash +# Project development tasks. Bash + cargo + git only. +# Usage: ./harness [--verbose] [--min=N] +# +# Commands: check, fix, lint, test, audit, coverage, pre-commit, ci, +# post-edit, setup-hooks, suppressions, install + +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$ROOT" + +VERBOSE=0 +COVERAGE_MIN=41 +for arg in "$@"; do + case "$arg" in + --verbose) VERBOSE=1 ;; + --min=*) COVERAGE_MIN="${arg#--min=}" ;; + esac +done + +if [ -t 1 ]; then + GREEN=$'\033[32m'; RED=$'\033[31m'; BLUE=$'\033[34m'; DIM=$'\033[2m'; RESET=$'\033[0m' +else + GREEN=""; RED=""; BLUE=""; DIM=""; RESET="" +fi + +# ── Runner ────────────────────────────────────────────────────────── +# run -- +# Quiet by default: captures stdout+stderr, prints only on failure. +# --verbose streams raw output. +# no_exit=1 lets the caller aggregate failures (e.g. cmd_check). + +LAST_RC=0 +LAST_OUTPUT="" + +run() { + local desc="$1"; shift + local no_exit="$1"; shift + [ "$1" = "--" ] && shift + + if [ "$VERBOSE" -eq 1 ]; then + printf " %s→ %s%s\n" "$DIM" "$*" "$RESET" + "$@" + LAST_RC=$? + LAST_OUTPUT="" + if [ "$LAST_RC" -eq 0 ]; then + printf " %s✓%s %s\n" "$GREEN" "$RESET" "$desc" + return 0 + fi + printf " %s✗%s %s\n" "$RED" "$RESET" "$desc" + [ "$no_exit" = "0" ] && exit "$LAST_RC" + return "$LAST_RC" + fi + + local tmp; tmp="$(mktemp)" + "$@" >"$tmp" 2>&1 + LAST_RC=$? + LAST_OUTPUT="$(cat "$tmp")" + rm -f "$tmp" + if [ "$LAST_RC" -eq 0 ]; then + printf " %s✓%s %s\n" "$GREEN" "$RESET" "$desc" + return 0 + fi + printf " %s✗%s %s\n" "$RED" "$RESET" "$desc" + [ -n "$LAST_OUTPUT" ] && printf "%s\n" "$LAST_OUTPUT" + [ "$no_exit" = "0" ] && exit "$LAST_RC" + return "$LAST_RC" +} + +run_with_summary() { + local desc="$1"; shift + local no_exit="$1"; shift + [ "$1" = "--" ] && shift + + run "$desc" "$no_exit" -- "$@" + local rc=$? + [ $rc -ne 0 ] && return $rc + + # Reprint last line with test summary suffix (cargo test). + local passed total_passed=0 duration=0 + while IFS= read -r line; do + passed="$(printf "%s" "$line" | sed -nE 's/.*ok\. ([0-9]+) passed.*/\1/p')" + [ -n "$passed" ] && total_passed=$(( total_passed + passed )) + local d + d="$(printf "%s" "$line" | sed -nE 's/.*finished in ([0-9.]+)s.*/\1/p')" + if [ -n "$d" ]; then + awk_cmp=$(awk -v a="$d" -v b="$duration" 'BEGIN{print (a>b)?1:0}') + [ "$awk_cmp" = "1" ] && duration="$d" + fi + done <<<"$LAST_OUTPUT" + if [ "$total_passed" -gt 0 ]; then + # Overwrite previous OK line with summary detail. + printf "\033[1A\033[2K %s✓%s %s %s(%s passed, %ss)%s\n" \ + "$GREEN" "$RESET" "$desc" "$DIM" "$total_passed" "$duration" "$RESET" + fi + return 0 +} + +# ── Git helpers ───────────────────────────────────────────────────── + +staged_rs_files() { + git diff --cached --name-only --diff-filter=d --relative 2>/dev/null \ + | grep -E '\.rs$' || true +} + +changed_rs_files() { + git status --porcelain 2>/dev/null \ + | sed -E 's/^...//' \ + | grep -E '\.rs$' || true +} + +# ── Suppressions (report-only) ────────────────────────────────────── + +cmd_suppressions() { + printf "\n=== Suppressions ===\n\n" + local total=0 line_total=0 crate_total=0 + local file + local tmp; tmp="$(mktemp)" + while IFS= read -r -d '' file; do + grep -oE '#!?\[allow\([^)]*\)\]' "$file" 2>/dev/null >>"$tmp" || true + done < <(find src -type f -name '*.rs' -print0 2>/dev/null) + [ -d tests ] && while IFS= read -r -d '' file; do + grep -oE '#!?\[allow\([^)]*\)\]' "$file" 2>/dev/null >>"$tmp" || true + done < <(find tests -type f -name '*.rs' -print0 2>/dev/null) + + line_total=$(awk '/^#\[allow/ {n++} END{print n+0}' "$tmp") + crate_total=$(awk '/^#!\[allow/ {n++} END{print n+0}' "$tmp") + total=$(( line_total + crate_total )) + + printf "Suppressions: %d total\n" "$total" + [ "$total" -eq 0 ] && { rm -f "$tmp"; return 0; } + [ "$line_total" -gt 0 ] && printf " allow: %d\n" "$line_total" + [ "$crate_total" -gt 0 ] && printf " allow_crate: %d\n" "$crate_total" + + # Top 10 rules across both kinds. + sed -E 's/#!?\[allow\(([^)]*)\)\]/\1/' "$tmp" \ + | tr ',' '\n' \ + | sed -E 's/^[[:space:]]+|[[:space:]]+$//g' \ + | grep -v '^$' \ + | sort | uniq -c | sort -rn | head -10 \ + | awk '{ rule=$2; for (i=3;i<=NF;i++) rule=rule" "$i; printf " %s: %d\n", rule, $1 }' + rm -f "$tmp" + return 0 +} + +# ── Commands ──────────────────────────────────────────────────────── + +cmd_fix() { + run "Clippy fix" 0 -- cargo clippy --fix --allow-dirty --allow-staged + run "Format" 0 -- cargo fmt +} + +cmd_lint() { + run "Clippy" 0 -- cargo clippy + run "Format check" 0 -- cargo fmt --check +} + +cmd_test() { + run_with_summary "Tests" 0 -- cargo test +} + +cmd_audit() { + _cmd_audit_inner 0 +} + +_cmd_audit_inner() { + local strict="$1" + if cargo audit --version >/dev/null 2>&1; then + run "Dep audit" 0 -- cargo audit + return + fi + if [ "$strict" = "1" ]; then + printf " %s✗%s Dep audit (cargo-audit not installed)\n" "$RED" "$RESET" + exit 1 + fi + printf " %s⊘ Dep audit skipped (install: cargo install cargo-audit)%s\n" "$DIM" "$RESET" +} + +cmd_coverage() { + printf "\n%s[coverage]%s min=%s%%\n\n" "$BLUE" "$RESET" "$COVERAGE_MIN" + if ! cargo llvm-cov --version >/dev/null 2>&1; then + printf " %s✗%s Coverage (cargo-llvm-cov not installed)\n" "$RED" "$RESET" + printf " %sInstall:%s cargo install cargo-llvm-cov\n" "$DIM" "$RESET" + exit 1 + fi + run "Coverage (min ${COVERAGE_MIN}%)" 0 -- \ + cargo llvm-cov --summary-only --fail-under-lines "$COVERAGE_MIN" + run "HTML report" 0 -- cargo llvm-cov report --html + printf " %sHTML:%s %s/target/llvm-cov/html/index.html\n" \ + "$DIM" "$RESET" "$ROOT" +} + +cmd_post_edit() { + local changed; changed="$(changed_rs_files)" + [ -z "$changed" ] && return 0 + # Never fail the Stop hook. + run "Format" 1 -- cargo fmt || true + return 0 +} + +cmd_pre_commit() { + local staged; staged="$(staged_rs_files)" + if [ -z "$staged" ]; then + printf "No staged Rust files — skipping checks\n" + return 0 + fi + printf "\n%s[pre-commit]%s\n\n" "$BLUE" "$RESET" + cmd_fix + cmd_test +} + +cmd_check() { + local start; start=$(date +%s) + printf "\n%s[check]%s Running pre-flight checks...\n\n" "$BLUE" "$RESET" + + local passed=0 failed=0 + run "Clippy fix" 1 -- cargo clippy --fix --allow-dirty --allow-staged + [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) + run "Format" 1 -- cargo fmt + [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) + run "Clippy (strict)" 1 -- cargo clippy -- -D warnings + [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) + run_with_summary "Tests" 1 -- cargo test + [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) + + cmd_suppressions + + local elapsed=$(( $(date +%s) - start )) + printf "\n" + if [ "$failed" -gt 0 ]; then + printf "%sFAIL%s %d passed, %d failed %s(%ds)%s\n" \ + "$RED" "$RESET" "$passed" "$failed" "$DIM" "$elapsed" "$RESET" + exit 1 + fi + printf "%sOK%s %d passed %s(%ds)%s\n" \ + "$GREEN" "$RESET" "$passed" "$DIM" "$elapsed" "$RESET" +} + +cmd_ci() { + printf "\n%s[ci]%s\n\n" "$BLUE" "$RESET" + run "Clippy (strict)" 0 -- cargo clippy -- -D warnings + run "Format check" 0 -- cargo fmt --check + _cmd_audit_inner 1 + if ! cargo llvm-cov --version >/dev/null 2>&1; then + printf " %s✗%s Coverage (cargo-llvm-cov not installed)\n" "$RED" "$RESET" + printf " %sInstall:%s cargo install cargo-llvm-cov\n" "$DIM" "$RESET" + exit 1 + fi + run_with_summary "Tests + coverage (min ${COVERAGE_MIN}%)" 0 -- \ + cargo llvm-cov --summary-only --fail-under-lines "$COVERAGE_MIN" +} + +cmd_install() { + printf "\n%s[install]%s Building and installing corgea to ~/.cargo/bin\n\n" "$BLUE" "$RESET" + run "cargo install" 0 -- cargo install --path . --force + local bin="$HOME/.cargo/bin/corgea" + if [ -x "$bin" ]; then + local ver; ver="$("$bin" --version 2>/dev/null || echo unknown)" + printf "\n%sInstalled%s %s %s(%s)%s\n" "$GREEN" "$RESET" "$bin" "$DIM" "$ver" "$RESET" + local resolved; resolved="$(command -v corgea 2>/dev/null || true)" + if [ -n "$resolved" ] && [ "$resolved" != "$bin" ]; then + printf "%sNote:%s 'corgea' on PATH resolves to %s — re-order PATH to prefer ~/.cargo/bin\n" \ + "$DIM" "$RESET" "$resolved" + fi + fi +} + +cmd_setup_hooks() { + local hook_dir="$ROOT/.git/hooks" + local hook="$hook_dir/pre-commit" + mkdir -p "$hook_dir" + cat >"$hook" <<'EOF' +#!/bin/sh +exec "$(git rev-parse --show-toplevel)/harness" pre-commit +EOF + chmod +x "$hook" + printf "Installed pre-commit hook at %s\n" "$hook" +} + +# ── Dispatch ──────────────────────────────────────────────────────── + +cmd="${1:-check}" +case "$cmd" in + check) cmd_check ;; + fix) cmd_fix ;; + lint) cmd_lint ;; + test) cmd_test ;; + audit) cmd_audit ;; + coverage) cmd_coverage ;; + pre-commit) cmd_pre_commit ;; + ci) cmd_ci ;; + post-edit) cmd_post_edit ;; + setup-hooks) cmd_setup_hooks ;; + suppressions) cmd_suppressions ;; + install) cmd_install ;; + -h|--help|help) + printf "Usage: ./harness [--verbose] [--min=N]\n\n" + printf "Commands: check, fix, lint, test, audit, coverage, pre-commit,\n" + printf " ci, post-edit, setup-hooks, suppressions, install\n" + ;; + *) + printf "Unknown command: %s\n" "$cmd" >&2 + exit 1 + ;; +esac diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 2429d9c..c4df9b5 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -109,6 +109,144 @@ corgea setup-hooks --default-config # Default: secrets + PII, fail on Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypass with `git commit --no-verify`. +### Deps — `corgea deps` + +Supply-chain tripwire: looks up every pinned dependency in the project against the public registry (npm or PyPI) and flags anything whose installed version was published within a configurable recency window. Useful for catching very-recent malicious version pushes before they get baked into a build. + +```bash +corgea deps # 2-day window, prod deps, both ecosystems +corgea deps --threshold 7d # widen the window to 7 days +corgea deps --threshold 48h --fail # exit 1 if any recent dep is found (CI gate) +corgea deps --fail-unpinned # exit 1 if any dep can't be verified because it isn't pinned +corgea deps --ecosystem npm # only check npm deps +corgea deps --ecosystem python --include-dev # python only, include dev deps +corgea deps --path ./services/api # check a different project +corgea deps --json # machine-readable output +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--ecosystem` | `-e` | `npm`, `python`, or `all` (default) | +| `--threshold` | `-t` | Recency window: `2d`, `48h`, `30m`, `1w`, etc. (default `2d`) | +| `--include-dev` | | Include development dependencies | +| `--fail` | `-f` | Exit non-zero if any recent dep is detected | +| `--fail-unpinned` | | Exit non-zero if any dep is unpinned (manifest with no lockfile, or unpinned `requirements.txt` line) | +| `--json` | | JSON output instead of human text | +| `--path` | `-p` | Project directory (default: `.`) | +| `--check-cve` | | Query Corgea vulnerability database for known CVEs/advisories (requires login) | +| `--fail-cve` | | Exit non-zero if any known CVE is found (requires `--check-cve`) | +| `--severity` | | Minimum severity to trip `--fail-cve` (`critical|high|medium|low|info`, comma list for exact set, or `any` for default). Requires `--fail-cve`. | + +### CVE detection + +Pass `--check-cve` to query the Corgea vulnerability database for known CVEs and advisories on every pinned dependency. Requires `corgea login` first (or `CORGEA_TOKEN` set). Without a token, the command refuses to start and exits **2** with no report printed. + +```bash +# Local: see what would fail +corgea deps --check-cve + +# CI: fail the build on any known CVE +corgea deps --check-cve --fail-cve + +# CI: fail only on critical CVEs (high/medium/low still render). +corgea deps --check-cve --fail-cve --severity critical +``` + +Example finding: + +```text +✗ npm lodash@4.17.20: GHSA-xxxx-yyyy-zzzz [TOP-FIX] (severity: high) + → upgrade to 4.17.21 + https://corgea.app/advisories/GHSA-xxxx-yyyy-zzzz +``` + +With `--json`, each dependency in `results[]` includes a `cves[]` array and `cve_status` label. Top-level `cve_summary` reports counts (`checked`, `vulnerable`, `clean`, `errors`, `unpinned_not_checked`). CVE fields are omitted when `--check-cve` is not passed. + +`cve_summary` also carries `severity_floor` (the rendered `--severity` value, default `"any"`) and `vulnerable_above_floor` (count of findings that meet the floor; equals `vulnerable` when floor is `any`). + +| Override | Where | Default | +|----------|-------|---------| +| Token | `corgea login` or `CORGEA_TOKEN` env | (required) | +| Vuln-api URL | `CORGEA_VULN_API_URL` env, or `vuln_api_url` in `~/.corgea/config.toml` | `https://vuln-api.corgea.app` | + +**Exit codes — CVE CI gating:** + +| Exit | Condition | +|------|-----------| +| 0 | No vulnerable deps found, or `--check-cve` not passed, or findings present but no `--fail-cve` | +| 1 | Known CVE found **and** `--fail-cve` passed | +| 2 | `--check-cve` without token; `--fail-cve` without `--check-cve`; parse/validation errors | + +**All deps gates (independent flags):** + +| Flag | Exit 1 when | +|------|-------------| +| `--fail` | Recent publish, registry error, CVE finding, **or CVE lookup error** | +| `--fail-unpinned` | Unpinned dep detected | +| `--fail-cve` | CVE finding only (lookup errors do **not** trigger) | + +#### GitHub Actions + +```yaml +name: Dependency security +on: + pull_request: + push: + branches: [main] + +jobs: + corgea-deps: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - run: npm install -g @corgea/cli + - name: Check dependencies for known CVEs + env: + CORGEA_TOKEN: ${{ secrets.CORGEA_TOKEN }} + run: corgea deps --check-cve --fail-cve +``` + +Python install, self-hosted vuln-api, and strict-mode variants: https://docs.corgea.app/cli/deps#ci-integration + +Full reference: https://docs.corgea.app/cli/deps + +Supported lockfiles (preferred → fallback): npm: `package-lock.json`, `npm-shrinkwrap.json`, `pnpm-lock.yaml` (v5/v6/v9), `yarn.lock`. Python: `poetry.lock`, `Pipfile.lock`, `uv.lock`, `requirements.txt` (only `==`-pinned lines). + +### Install wrappers — `corgea npm` / `yarn` / `pnpm` / `pip` / `uv` + +Wraps install commands (`npm install`, `yarn add`, `pnpm add`, `pip install`), resolves what the package manager *would* install against the public registry, and refuses to run the install when a resolved version was published within `--threshold`. Use as a thin replacement for the bare command in CI scripts or interactive shells. + +```bash +corgea npm install axios@^1.0.0 --save-dev +corgea pnpm add @types/node@latest +corgea yarn add lodash +corgea pip install requests==2.31.0 +corgea pip install -r requirements.txt +corgea uv add django +corgea uv pip install requests==2.31.0 +corgea uv sync # verifies uv.lock / other Python lockfiles +corgea npm install # bare install — verifies the lockfile +``` + +| Flag | Description | +|------|-------------| +| `--threshold ` (`-t`) | Recency window (`2d`, `48h`, `30m`, `1w`). Default `2d`. | +| `--no-fail` | Demote a recent finding from a hard block to a warning (install runs anyway). | +| `--check-only` | Run the verification but never exec the install. | +| `--fail-unpinned` | Also fail on unverifiable specs (URL/git/file/editable) and unpinned `requirements.txt` lines pulled in by `-r`. | +| `--json` | Machine-readable output. | + +Spec resolution: + +* **npm / yarn / pnpm** — `pkg`, `pkg@latest`, `pkg@1.2.3`, `pkg@^1.0.0`, `pkg@>=1.0.0 <2.0.0`, `pkg@next` (any dist-tag), and scoped names (`@types/node@...`). Ranges are resolved against the registry's full version list using `semver` semantics. +* **pip / `uv pip install` / `uv add`** — `pkg`, `pkg==1.2.3`, `pkg>=1,<2`, `pkg~=1.4`, `pkg[extras]==X`. Exact `==` pins are honoured precisely; other PEP 440 specifiers are resolved against PyPI's release list with a best-effort comparison. `uv sync` with no package args verifies the project lockfile (`uv.lock`, etc.) then runs sync. +* **Skipped (warning, not blocked)** — `git+...`, `file:...`, `./local`, `http(s)://...`, `npm:alias@...`, `workspace:*`, `pip -e`. These are explicit out-of-band sources we can't verify against a registry. + +Subcommands other than `install` / `add` / `i` are forwarded straight through to the package manager unchanged, so `corgea npm view ...` and similar just work. + ## Common Workflows ### Scan full project @@ -148,6 +286,37 @@ corgea scan --fail-on CR --out-format sarif --out-file results.sarif corgea upload report.json --project-name my-app ``` +### Block builds that pull in a freshly-published dependency + +```bash +corgea deps --threshold 2d --fail +``` + +### Require pinned, lockfile-resolved dependencies + +```bash +corgea deps --fail-unpinned +``` + +Use this together with `--fail` to gate both freshness and pinning in one CI step: + +```bash +corgea deps --threshold 2d --fail --fail-unpinned +``` + +### Block CI on known CVEs + +See [GitHub Actions](#github-actions) under CVE detection for the full workflow. Local dry-run first: `corgea deps --check-cve` (no `--fail-cve`) to inspect findings without failing. + +### Pre-check an install before letting it run + +```bash +corgea npm install axios@^1.0.0 +corgea pip install -r requirements.txt --fail-unpinned +``` + +Ecosystem commands resolve the actual version a package manager would install, block if it was published within the threshold, and otherwise transparently run the install (preserving the package manager's exit code). + ### Export results ```bash diff --git a/src/authorize.rs b/src/authorize.rs index 39b5df3..686c042 100644 --- a/src/authorize.rs +++ b/src/authorize.rs @@ -1,17 +1,19 @@ -use crate::{config::Config, utils::{terminal, api}}; +use crate::{ + config::Config, + utils::{api, terminal}, +}; +use http_body_util::Full; +use hyper::body::Bytes; use hyper::body::Incoming; use hyper::service::service_fn; use hyper::{Request, Response, StatusCode}; use hyper_util::rt::TokioIo; -use http_body_util::Full; -use hyper::body::Bytes; use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::thread; use std::time::Duration; use tokio::net::TcpListener; - const DEFAULT_PORT: u16 = 9876; pub fn run(scope: Option, url: Option) -> Result<(), Box> { @@ -24,60 +26,62 @@ pub fn run(scope: Option, url: Option) -> Result<(), Box "https://www.corgea.app".to_string(), }; - + // Find available port starting from default let port = find_available_port(DEFAULT_PORT)?; let callback_url = format!("http://localhost:{}", port); - let auth_url = format!("{}/authorize?callback={}", base_domain, - urlencoding::encode(&callback_url)); - + let auth_url = format!( + "{}/authorize?callback={}", + base_domain, + urlencoding::encode(&callback_url) + ); + println!("Opening browser to authorize Corgea CLI..."); println!("Authorization URL: {}", auth_url); - + // Open browser if let Err(e) = open::that(&auth_url) { eprintln!("Failed to open browser automatically: {}", e); println!("Please manually open the following URL in your browser:"); println!("{}", auth_url); } - + // Set up shared state for the authorization code let auth_code = Arc::new(Mutex::new(None::)); let auth_code_clone = auth_code.clone(); - + // Set up loading message let stop_signal = Arc::new(Mutex::new(false)); let stop_signal_clone = stop_signal.clone(); - + // Start loading spinner in a separate thread let loading_handle = thread::spawn(move || { terminal::show_loading_message("Waiting for authorization...", stop_signal_clone); }); - + // Start the HTTP server to listen for the callback let rt = tokio::runtime::Runtime::new()?; - let result = rt.block_on(async { - start_callback_server(port, auth_code_clone).await - }); - + let result = rt.block_on(async { start_callback_server(port, auth_code_clone).await }); + // Stop the loading spinner *stop_signal.lock().unwrap() = true; loading_handle.join().unwrap(); - + match result { Ok(code) => { - // Exchange the code for a user token let user_token = api::exchange_code_for_token(&base_domain, &code)?; - + // Save the user token to config let mut config = Config::load().expect("Failed to load config"); - config.set_token(user_token).expect("Failed to save user token"); + config + .set_token(user_token) + .expect("Failed to save user token"); config.set_url(base_domain).expect("Failed to save URL"); - + println!("\r🎉 Successfully authenticated to Corgea!"); println!("You can now use other Corgea CLI commands."); - + Ok(()) } Err(e) => { @@ -95,7 +99,7 @@ fn find_available_port(start_port: u16) -> Result Result Result> { let addr = format!("127.0.0.1:{}", port); let listener = match TcpListener::bind(&addr).await { - Ok(listener) => { - listener - } + Ok(listener) => listener, Err(e) => { return Err(format!("Failed to bind to {}: {}", addr, e).into()); } }; - + loop { tokio::select! { accept_result = listener.accept() => { @@ -175,17 +177,17 @@ async fn handle_callback( auth_code: Arc>>, ) -> Result>, hyper::Error> { let uri = req.uri(); - + // Parse query parameters if let Some(query) = uri.query() { let params = parse_query_params(query); - + if let Some(code) = params.get("code") { // Store the authorization code if let Ok(mut code_guard) = auth_code.lock() { *code_guard = Some(code.clone()); } - + // Return success page let success_html = r#" @@ -357,20 +359,20 @@ async fn handle_callback( "#; - + return Ok(Response::builder() .status(StatusCode::OK) .header("Content-Type", "text/html") .body(Full::new(Bytes::from(success_html))) .unwrap()); } - + if let Some(error) = params.get("error") { let default_error = "Unknown error occurred".to_string(); - let error_description = params.get("error_description") - .unwrap_or(&default_error); - - let error_html = format!(r#" + let error_description = params.get("error_description").unwrap_or(&default_error); + + let error_html = format!( + r#" @@ -432,8 +434,10 @@ async fn handle_callback( - "#, error, error_description); - + "#, + error, error_description + ); + return Ok(Response::builder() .status(StatusCode::BAD_REQUEST) .header("Content-Type", "text/html") @@ -441,7 +445,7 @@ async fn handle_callback( .unwrap()); } } - + // Default response for other requests let response_html = r#" @@ -500,7 +504,7 @@ async fn handle_callback( "#; - + Ok(Response::builder() .status(StatusCode::OK) .header("Content-Type", "text/html") @@ -514,20 +518,16 @@ fn parse_query_params(query: &str) -> HashMap { .filter_map(|param| { let mut parts = param.splitn(2, '='); match (parts.next(), parts.next()) { - (Some(key), Some(value)) => { - Some(( - urlencoding::decode(key).ok()?.into_owned(), - urlencoding::decode(value).ok()?.into_owned(), - )) - } + (Some(key), Some(value)) => Some(( + urlencoding::decode(key).ok()?.into_owned(), + urlencoding::decode(value).ok()?.into_owned(), + )), _ => None, } }) .collect() } - - #[cfg(test)] mod tests { use super::*; @@ -541,7 +541,10 @@ mod tests { fn reserve_ephemeral_port() -> u16 { let listener = StdTcpListener::bind("127.0.0.1:0").expect("failed to bind ephemeral port"); - listener.local_addr().expect("failed to get local addr").port() + listener + .local_addr() + .expect("failed to get local addr") + .port() } fn spawn_callback_server( @@ -604,7 +607,10 @@ mod tests { let params = parse_query_params("code=a%20b&error_description=needs%2Blogin"); assert_eq!(params.get("code"), Some(&"a b".to_string())); - assert_eq!(params.get("error_description"), Some(&"needs+login".to_string())); + assert_eq!( + params.get("error_description"), + Some(&"needs+login".to_string()) + ); } #[test] diff --git a/src/bin/vuln-api-stub.rs b/src/bin/vuln-api-stub.rs new file mode 100644 index 0000000..cf47d08 --- /dev/null +++ b/src/bin/vuln-api-stub.rs @@ -0,0 +1,40 @@ +//! Standalone vuln-api stub for e2e dogfood and local development. + +use clap::Parser; +use corgea::vuln_api_stub; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command( + name = "vuln-api-stub", + about = "Minimal TCP stub for vuln-api package-check routes" +)] +struct Args { + /// JSON fixture file (`package_checks` + optional `advisories`). + #[arg(long)] + fixtures: PathBuf, + + /// TCP port to bind (`0` = ephemeral). + #[arg(long, default_value = "0")] + port: u16, + + /// Print base URL to stdout and keep serving until SIGTERM. + #[arg(long)] + print_url: bool, +} + +fn main() { + let args = Args::parse(); + let stub = if args.port == 0 { + vuln_api_stub::spawn_from_file(&args.fixtures) + } else { + let fixtures = vuln_api_stub::load_from_file(&args.fixtures) + .unwrap_or_else(|e| panic!("failed to load {}: {e}", args.fixtures.display())); + vuln_api_stub::spawn_on_port(fixtures, args.port) + }; + if args.print_url { + println!("{}", stub.base_url); + } + eprintln!("vuln-api stub listening on {}", stub.base_url); + stub.block(); +} diff --git a/src/cicd.rs b/src/cicd.rs index 7743784..40e075e 100644 --- a/src/cicd.rs +++ b/src/cicd.rs @@ -1,20 +1,19 @@ - pub fn running_in_ci() -> bool { // this will need to be updated to include other CI systems std::env::var("CI").is_ok() && std::env::var("GITHUB_ACTIONS").is_ok() } pub fn which_ci() -> String { - return if std::env::var("GITHUB_ACTIONS").is_ok() { + if std::env::var("GITHUB_ACTIONS").is_ok() { "github".to_string() } else { "unknown".to_string() } } - pub fn get_github_env_vars() -> std::collections::HashMap { - let mut github_env_vars: std::collections::HashMap = std::collections::HashMap::new(); + let mut github_env_vars: std::collections::HashMap = + std::collections::HashMap::new(); for (key, value) in std::env::vars() { if key.starts_with("GITHUB_") { diff --git a/src/config.rs b/src/config.rs index 8976c61..10b31d9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,22 +1,25 @@ -use dirs; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::{env, fs, io}; -use toml; + +/// Production vuln-api host. Used when neither `CORGEA_VULN_API_URL` +/// nor `vuln_api_url` in `~/.corgea/config.toml` is set. Self-hosted +/// or staging deployments override via env or config. +const DEFAULT_VULN_API_URL: &str = "https://vuln-api.corgea.app"; #[derive(Serialize, Deserialize, Clone)] pub struct Config { pub(crate) url: String, pub(crate) debug: i8, pub(crate) token: String, + #[serde(default)] + pub(crate) vuln_api_url: Option, } impl Config { fn config_path() -> io::Result { - let mut dir_path = dirs::home_dir().ok_or(io::Error::new( - io::ErrorKind::Other, - "Unable to get home directory", - ))?; + let mut dir_path = + dirs::home_dir().ok_or(io::Error::other("Unable to get home directory"))?; dir_path.push(".corgea"); @@ -38,6 +41,7 @@ impl Config { url: "https://www.corgea.app".to_string(), debug: 0, token: "".to_string(), + vuln_api_url: None, }; let toml = toml::to_string(&config).expect("Failed to serialize config"); @@ -95,13 +99,25 @@ impl Config { return corgea_token; } - return self.token.clone(); + self.token.clone() } pub fn get_debug(&self) -> i8 { if let Ok(corgea_debug) = env::var("CORGEA_DEBUG") { return corgea_debug.parse::().unwrap_or(0); } - return self.debug; + self.debug + } + + pub fn get_vuln_api_url(&self) -> String { + let raw = crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .or_else(|| self.vuln_api_url.clone()) + .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()); + let trimmed = raw.trim().trim_end_matches('/'); + if trimmed.is_empty() { + DEFAULT_VULN_API_URL.trim_end_matches('/').to_string() + } else { + trimmed.to_string() + } } } diff --git a/src/inspect.rs b/src/inspect.rs index 0933d0c..89fe21e 100644 --- a/src/inspect.rs +++ b/src/inspect.rs @@ -1,16 +1,15 @@ -use crate::utils; use crate::config::Config; -use std::time::SystemTime; use crate::scanners; +use crate::utils; +use std::time::SystemTime; pub fn run( - config: &Config, - issues: &bool, - json: &bool, - summary: &bool, - fix_explanation: &bool, - fix_diff: &bool, + config: &Config, + issues: &bool, + json: &bool, + summary: &bool, + fix_explanation: &bool, + fix_diff: &bool, id: &String, - ) { fn print_section(title: &str, value: impl ToString) { println!("{:<15}: {}", title, value.to_string()); @@ -22,7 +21,10 @@ pub fn run( let issue_details = match utils::api::get_issue(&config.get_url(), id) { Ok(issue) => issue, Err(e) => { - eprintln!("Failed to fetch issue details for issue ID {} with error:\n{}", id, e); + eprintln!( + "Failed to fetch issue details for issue ID {} with error:\n{}", + id, e + ); if e.to_string().contains("404") { println!("If you're trying to inspect a scan make sure to remove the --issue argument"); } @@ -38,33 +40,45 @@ pub fn run( print_section("Urgency", &issue_details.issue.urgency); print_section("Category", &issue_details.issue.classification.name); print_section("File Path", &issue_details.issue.location.file.path); - print_section("Line Num", issue_details.issue.location.line_number.to_string()); - print_section("Status", utils::generic::get_status(&issue_details.issue.status)); + print_section( + "Line Num", + issue_details.issue.location.line_number.to_string(), + ); + print_section( + "Status", + utils::generic::get_status(&issue_details.issue.status), + ); } if let Some(ref details) = issue_details.issue.details { if let Some(ref explanation) = details.explanation { if *summary || show_everything { - println!("Explanation:\n\n{}\n-------------------------", utils::terminal::format_code(explanation)) + println!( + "Explanation:\n\n{}\n-------------------------", + utils::terminal::format_code(explanation) + ) } } - } + } if let Some(auto_fix_suggestion) = issue_details.issue.auto_fix_suggestion { if *fix_explanation || show_everything { if show_everything { - utils::terminal::prompt_to_continue_or_exit(Some("\nTo continue to viewing the fix explanation please press enter, otherwise Ctrl+C to exit.\n".into())); + utils::terminal::prompt_to_continue_or_exit(Some("\nTo continue to viewing the fix explanation please press enter, otherwise Ctrl+C to exit.\n")); } if let Some(ref patch) = &auto_fix_suggestion.patch { utils::terminal::print_with_pagination(&format!( - "Fix Explanation:\n\n{}\n-------------------------", utils::terminal::format_code(&patch.explanation) + "Fix Explanation:\n\n{}\n-------------------------", + utils::terminal::format_code(&patch.explanation) )); } } - if *fix_diff || show_everything { + if *fix_diff || show_everything { if show_everything { - utils::terminal::prompt_to_continue_or_exit(Some("\nTo continue to viewing the diff of the fix please press enter, otherwise Ctrl+C to exit.\n".into())); + utils::terminal::prompt_to_continue_or_exit(Some("\nTo continue to viewing the diff of the fix please press enter, otherwise Ctrl+C to exit.\n")); } if let Some(ref patch) = &auto_fix_suggestion.patch { - utils::terminal::print_with_pagination(&utils::terminal::format_diff(&patch.diff)); + utils::terminal::print_with_pagination(&utils::terminal::format_diff( + &patch.diff, + )); } } } @@ -74,7 +88,9 @@ pub fn run( Err(e) => { eprintln!("Failed to fetch scan details for scan ID {}: {}", id, e); if e.to_string().contains("404") { - println!("If you're trying to inspect an issues make sure to pass --issue argument"); + println!( + "If you're trying to inspect an issues make sure to pass --issue argument" + ); } std::process::exit(1); } @@ -90,21 +106,21 @@ pub fn run( print_section("Status", scan_details.status); print_section("Project", &scan_details.project); print_section("Engine", &scan_details.engine); - let created_at = chrono::DateTime::::from(SystemTime::now()).format("%Y-%m-%d %H:%M:%S").to_string(); + let created_at = chrono::DateTime::::from(SystemTime::now()) + .format("%Y-%m-%d %H:%M:%S") + .to_string(); print_section("Created At", &created_at); - match scanners::blast::fetch_and_group_scan_issues(&config.get_url(), &scan_details.project) { - Ok(counts) => { - let total_issues = counts.values().sum::(); - let order = vec!["CR", "HI", "ME", "LO"]; - for urgency in order { - if let Some(count) = counts.get(urgency) { - print_section(&format!("{} Issues", urgency), &count.to_string()); - } + if let Ok(counts) = + scanners::blast::fetch_and_group_scan_issues(&config.get_url(), &scan_details.project) + { + let total_issues = counts.values().sum::(); + let order = vec!["CR", "HI", "ME", "LO"]; + for urgency in order { + if let Some(count) = counts.get(urgency) { + print_section(&format!("{} Issues", urgency), count.to_string()); } - print_section("Total Issues", &total_issues); - }, - Err(_) => { } + } + print_section("Total Issues", total_issues); }; - } } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..2c531e6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod vuln_api_stub; diff --git a/src/list.rs b/src/list.rs index afacc31..571559b 100644 --- a/src/list.rs +++ b/src/list.rs @@ -1,17 +1,31 @@ -use crate::utils; use crate::config::Config; -use std::path::Path; -use serde_json::json; use crate::log::debug; +use crate::utils; +use serde_json::json; +use std::path::Path; -pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: &Option, page_size: &Option, scan_id: &Option) { - let project_name = utils::generic::get_current_working_directory().unwrap_or("unknown".to_string()); - println!(""); +pub fn run( + config: &Config, + issues: &bool, + sca_issues: &bool, + json: &bool, + page: &Option, + page_size: &Option, + scan_id: &Option, +) { + let project_name = + utils::generic::get_current_working_directory().unwrap_or("unknown".to_string()); + println!(); if *sca_issues { - let sca_issues_response = match utils::api::get_sca_issues(&config.get_url(), Some((*page).unwrap_or(1)), *page_size, scan_id.clone()) { + let sca_issues_response = match utils::api::get_sca_issues( + &config.get_url(), + Some((*page).unwrap_or(1)), + *page_size, + scan_id.clone(), + ) { Ok(response) => response, Err(e) => { - debug(&format!("Error Sending Request: {}", e.to_string())); + debug(&format!("Error Sending Request: {}", e)); if e.to_string().contains("404") { if scan_id.is_some() { eprintln!("Scan with ID '{}' doesn't exist or has no SCA issues. Please run 'corgea scan' to create a new scan for this project.", scan_id.as_ref().unwrap()); @@ -42,18 +56,16 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: return; } - let mut table = vec![ - vec![ - "Issue ID".to_string(), - "Package".to_string(), - "Version".to_string(), - "Fix Version".to_string(), - "Severity".to_string(), - "CVE".to_string(), - "Ecosystem".to_string(), - "File Path".to_string(), - ], - ]; + let mut table = vec![vec![ + "Issue ID".to_string(), + "Package".to_string(), + "Version".to_string(), + "Fix Version".to_string(), + "Severity".to_string(), + "CVE".to_string(), + "Ecosystem".to_string(), + "File Path".to_string(), + ]]; for issue in &sca_issues_response.issues { let path = Path::new(&issue.location.path); @@ -77,7 +89,11 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: issue.id.clone(), issue.package.name.clone(), issue.package.version.clone(), - issue.package.fix_version.clone().unwrap_or("N/A".to_string()), + issue + .package + .fix_version + .clone() + .unwrap_or("N/A".to_string()), issue.severity.clone().unwrap_or("N/A".to_string()), issue.cve.clone().unwrap_or("N/A".to_string()), issue.package.ecosystem.clone(), @@ -85,12 +101,22 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: ]); } - utils::terminal::print_table(table, Some(sca_issues_response.page), Some(sca_issues_response.total_pages)); + utils::terminal::print_table( + table, + Some(sca_issues_response.page), + Some(sca_issues_response.total_pages), + ); } else if *issues { - let issues_response = match utils::api::get_scan_issues(&config.get_url(), &project_name, Some((*page).unwrap_or(1)), *page_size, scan_id.clone()) { + let issues_response = match utils::api::get_scan_issues( + &config.get_url(), + &project_name, + Some((*page).unwrap_or(1)), + *page_size, + scan_id.clone(), + ) { Ok(response) => response, Err(e) => { - debug(&format!("Error Sending Request: {}", e.to_string())); + debug(&format!("Error Sending Request: {}", e)); if e.to_string().contains("404") { if scan_id.is_some() { eprintln!("Scan with ID '{}' doesn't exist. Please run 'corgea scan' to create a new scan for this project.", scan_id.as_ref().unwrap()); @@ -110,12 +136,17 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: } }; let mut render_blocking_rules = false; - let mut blocking_rules: std::collections::HashMap = std::collections::HashMap::new(); + let mut blocking_rules: std::collections::HashMap = + std::collections::HashMap::new(); if scan_id.is_some() { let mut page: u32 = 1; loop { - match utils::api::check_blocking_rules(&config.get_url(), scan_id.as_ref().unwrap(), Some(page)) { + match utils::api::check_blocking_rules( + &config.get_url(), + scan_id.as_ref().unwrap(), + Some(page), + ) { Ok(rules) => { if rules.block { render_blocking_rules = true; @@ -138,7 +169,6 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: } } - if *json { let mut json = serde_json::json!({ "page": issues_response.page, @@ -146,30 +176,31 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: "results": &issues_response.issues }); if render_blocking_rules { - json["results"] = serde_json::json!( - issues_response.issues.unwrap_or_default().iter().map(|issue| { - serde_json::json!( - utils::api::IssueWithBlockingRules { - id: issue.id.clone(), - scan_id: issue.scan_id.clone(), - status: issue.status.clone(), - urgency: issue.urgency.clone(), - created_at: issue.created_at.clone(), - classification: issue.classification.clone(), - location: issue.location.clone(), - details: issue.details.clone(), - auto_triage: issue.auto_triage.clone(), - auto_fix_suggestion: issue.auto_fix_suggestion.clone(), - blocked: blocking_rules.contains_key(&issue.id), - blocking_rules: if blocking_rules.contains_key(&issue.id) { - Some(vec![blocking_rules.get(&issue.id).unwrap().clone()]) - } else { - None - } + json["results"] = serde_json::json!(issues_response + .issues + .unwrap_or_default() + .iter() + .map(|issue| { + serde_json::json!(utils::api::IssueWithBlockingRules { + id: issue.id.clone(), + scan_id: issue.scan_id.clone(), + status: issue.status.clone(), + urgency: issue.urgency.clone(), + created_at: issue.created_at.clone(), + classification: issue.classification.clone(), + location: issue.location.clone(), + details: issue.details.clone(), + auto_triage: issue.auto_triage.clone(), + auto_fix_suggestion: issue.auto_fix_suggestion.clone(), + blocked: blocking_rules.contains_key(&issue.id), + blocking_rules: if blocking_rules.contains_key(&issue.id) { + Some(vec![blocking_rules.get(&issue.id).unwrap().clone()]) + } else { + None } - ) - }).collect::>() - ); + }) + }) + .collect::>()); } let output = json!(json); println!("{}", serde_json::to_string_pretty(&output).unwrap()); @@ -186,9 +217,7 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: table_header.push("Blocking".to_string()); table_header.push("Rule ID".to_string()); } - let mut table = vec![ - table_header - ]; + let mut table = vec![table_header]; for issue in &issues_response.issues.unwrap_or_default() { let classification_display = issue.classification.id.clone(); @@ -216,23 +245,36 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: issue.location.line_number.to_string(), ]; if render_blocking_rules { - row.push(blocking_rules.get(&issue.id).is_some().to_string()); - row.push(blocking_rules.get(&issue.id).unwrap_or(&"".to_string()).to_string()); + row.push(blocking_rules.contains_key(&issue.id).to_string()); + row.push( + blocking_rules + .get(&issue.id) + .unwrap_or(&"".to_string()) + .to_string(), + ); } table.push(row); } utils::terminal::print_table(table, issues_response.page, issues_response.total_pages); } else { - let (scans, page, total_pages) = match utils::api::query_scan_list(&config.get_url(), Some(&project_name), *page, *page_size) { + let (scans, page, total_pages) = match utils::api::query_scan_list( + &config.get_url(), + Some(&project_name), + *page, + *page_size, + ) { Ok(scans) => { let page = scans.page; let total_pages = scans.total_pages; - let filtered_scans: Vec = scans.scans.unwrap_or_default().into_iter() + let filtered_scans: Vec = scans + .scans + .unwrap_or_default() + .into_iter() .filter(|scan| scan.project == project_name) .collect(); (filtered_scans, page, total_pages) - }, + } Err(e) => { if e.to_string().contains("404") { eprintln!("Project with name '{}' doesn't exist. Please run 'corgea scan' to create a new scan for this project.", project_name); @@ -256,20 +298,18 @@ pub fn run(config: &Config, issues: &bool, sca_issues: &bool, json: &bool, page: println!("{}", serde_json::to_string_pretty(&output).unwrap()); return; } - let mut table = vec![ - vec![ - "Scan ID".to_string(), - "Project".to_string(), - "Status".to_string(), - "Repo".to_string(), - "Branch".to_string(), - ], - ]; + let mut table = vec![vec![ + "Scan ID".to_string(), + "Project".to_string(), + "Status".to_string(), + "Repo".to_string(), + "Branch".to_string(), + ]]; for scan in &scans { let formatted_repo = scan.repo.clone().unwrap_or("N/A".to_string()); let formatted_repo = if formatted_repo != "N/A" { - if let Some(repo_name) = formatted_repo.split('/').last() { + if let Some(repo_name) = formatted_repo.split('/').next_back() { let owner = formatted_repo.split('/').nth(3).unwrap_or("unknown"); let repo_name = repo_name.strip_suffix(".git").unwrap_or(repo_name); format!("{}/{}", owner, repo_name) diff --git a/src/log.rs b/src/log.rs index daf745a..7f193fe 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,8 +1,8 @@ -use crate::config::Config; +use crate::config::Config; pub fn debug(input: &str) { let config = Config::load().expect("Failed to load config"); if config.get_debug() == 1 { println!("DEBUG: {}\n", input); } -} \ No newline at end of file +} diff --git a/src/main.rs b/src/main.rs index 5da00f9..7cbc51e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,28 +1,31 @@ +mod authorize; +mod cicd; mod config; -mod scan; -mod wait; -mod list; mod inspect; -mod cicd; +mod list; mod log; +mod precheck; +mod scan; mod setup_hooks; -mod authorize; +mod verify_deps; +mod vuln_api; +mod wait; mod scanners { - pub mod fortify; pub mod blast; + pub mod fortify; pub mod parsers; } mod utils { - pub mod terminal; - pub mod generic; pub mod api; + pub mod generic; + pub mod terminal; } mod targets; -use std::str::FromStr; -use clap::{Parser, Subcommand, CommandFactory}; +use clap::{Args, CommandFactory, Parser, Subcommand}; use config::Config; use scanners::fortify::parse as fortify_parse; +use std::str::FromStr; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -32,20 +35,26 @@ struct Cli { command: Option, #[arg(required = false)] - args: Vec, + args: Vec, } #[derive(Subcommand, Debug)] enum Commands { /// Authenticate to Corgea - Login { + Login { #[arg(help = "API token (if not provided, will use OAuth flow)")] token: Option, - #[arg(long, help = "The url of the corgea instance to use. defaults to https://www.corgea.app")] + #[arg( + long, + help = "The url of the corgea instance to use. defaults to https://www.corgea.app" + )] url: Option, - #[arg(long, help = "Scope to use for custom domain (e.g., 'ikea' for ikea.corgea.app). Only used with OAuth flow")] + #[arg( + long, + help = "Scope to use for custom domain (e.g., 'ikea' for ikea.corgea.app). Only used with OAuth flow" + )] scope: Option, }, /// Upload a scan report to Corgea via STDIN or a file @@ -65,13 +74,20 @@ enum Commands { #[arg(default_value = "blast")] scanner: Scanner, - #[arg(long, help = "Fail on (exits with error code 1) a specific severity level . Valid options are CR, HI, ME, LO.")] + #[arg( + long, + help = "Fail on (exits with error code 1) a specific severity level . Valid options are CR, HI, ME, LO." + )] fail_on: Option, #[arg(long, help = "Only scan uncommitted changes.")] only_uncommitted: bool, - #[arg(short, long, help = "Fail on (exits with error code 1) based on blocking rules defined in the web app.")] + #[arg( + short, + long, + help = "Fail on (exits with error code 1) based on blocking rules defined in the web app." + )] fail: bool, #[arg( @@ -88,10 +104,17 @@ enum Commands { )] scan_type: Option, - #[arg(long, help = "Output the result to a file in a specific format. Valid options are json, html, sarif, markdown.")] + #[arg( + long, + help = "Output the result to a file in a specific format. Valid options are json, html, sarif, markdown." + )] out_format: Option, - #[arg(short, long, help = "Output the result to a file. you can use the out_format option to specify the format of the output file.")] + #[arg( + short, + long, + help = "Output the result to a file. you can use the out_format option to specify the format of the output file." + )] out_file: Option, #[arg( @@ -107,16 +130,18 @@ enum Commands { project_name: Option, }, /// Wait for the latest in progress scan - Wait { - scan_id: Option, - }, + Wait { scan_id: Option }, /// List something, by default it lists the scans #[command(alias = "ls")] List { #[arg(short, long, help = "List issues instead of scans")] issues: bool, - #[arg(long, short = 'c', help = "List SCA (Software Composition Analysis) issues instead of regular issues")] + #[arg( + long, + short = 'c', + help = "List SCA (Software Composition Analysis) issues instead of regular issues" + )] sca_issues: bool, #[arg(short, long, help = "Specify the scan id to list issues for.")] @@ -129,7 +154,7 @@ enum Commands { json: bool, #[arg(long, value_parser = clap::value_parser!(u16), help = "Number of items per page")] - page_size: Option + page_size: Option, }, /// Inspect something, by default it will inspect a scan Inspect { @@ -140,22 +165,192 @@ enum Commands { #[arg(long, help = "Output the result in JSON format.")] json: bool, - #[arg(long, short, help = "Display a summary only of the issue in the output (only if --issue is true).")] + #[arg( + long, + short, + help = "Display a summary only of the issue in the output (only if --issue is true)." + )] summary: bool, - #[arg(long, short, help = "Display the fix explanations only in the output (only if --issue is true).")] + #[arg( + long, + short, + help = "Display the fix explanations only in the output (only if --issue is true)." + )] fix: bool, - #[arg(long, short, help = "Display the diff of the fix only in the output (only if --issue is true).")] + #[arg( + long, + short, + help = "Display the diff of the fix only in the output (only if --issue is true)." + )] diff: bool, id: String, }, /// Setup a git hook, currently only pre-commit is supported SetupHooks { - #[arg(long, short, help = "Include default config (scan types are pii, secrets and fail on levels are CR, HI, ME, LO).")] + #[arg( + long, + short, + help = "Include default config (scan types are pii, secrets and fail on levels are CR, HI, ME, LO)." + )] default_config: bool, }, + /// Verify installed dependencies against the registry to flag recently published versions. + /// Useful as a supply-chain tripwire: any dep whose installed version was published within + /// the configured threshold will be reported. Currently supports npm and Python. + /// Pass --check-cve to query the Corgea vulnerability database for known advisories (requires login). + Deps { + #[arg( + long, + short = 'e', + default_value = "all", + help = "Which ecosystem(s) to verify. Valid options are 'npm', 'python', or 'all' (default)." + )] + ecosystem: String, + + #[arg( + long, + short = 't', + default_value = "2d", + help = "Recency threshold. Any dependency published within this window is flagged. Examples: '2d' (default), '48h', '30m', '1w'. Bare numbers are interpreted as days." + )] + threshold: String, + + #[arg( + long, + help = "Include development dependencies (default: production only)." + )] + include_dev: bool, + + #[arg( + long, + short = 'f', + help = "Exit with a non-zero status code if any recently published dependency is found." + )] + fail: bool, + + #[arg( + long, + help = "Exit with a non-zero status code if any dependency is unpinned (e.g. package.json without a lockfile, pyproject.toml/Pipfile without a matching lockfile, or unpinned `requirements.txt` lines). Independent of --fail." + )] + fail_unpinned: bool, + + #[arg( + long, + help = "Output the result as JSON instead of human-readable text." + )] + json: bool, + + #[arg( + long, + short = 'p', + help = "Path to the project to verify. Defaults to the current directory." + )] + path: Option, + + #[arg( + long, + help = "Check each dependency against the Corgea vulnerability database for known CVEs/advisories. Requires corgea login (or CORGEA_TOKEN). See https://docs.corgea.app/cli/deps#check-cve." + )] + check_cve: bool, + + #[arg( + long, + env = "CORGEA_CVE_CONCURRENCY", + default_value = "8", + value_parser = clap::value_parser!(u8).range(1..=32), + help = "Max in-flight vuln-api requests when --check-cve is set (1..32). Tune down for slow networks or vuln-api rate limits." + )] + cve_concurrency: u8, + + #[arg( + long, + requires = "check_cve", + help = "Exit with a non-zero status code if any known CVE is found. Requires --check-cve. Independent of --fail and --fail-unpinned. See https://docs.corgea.app/cli/deps#check-cve." + )] + fail_cve: bool, + + #[arg( + long, + default_value = "any", + value_parser = verify_deps::parse_severity_floor_arg, + help = "Minimum severity required to trip --fail-cve. Single value (critical|high|medium|low|info) matches that level and above; comma-separated list (e.g. critical,high) matches exactly those levels; 'any' (default) matches everything. Requires --fail-cve when set to a non-'any' value. See https://docs.corgea.app/cli/deps#severity." + )] + severity: verify_deps::SeverityFloor, + }, + /// Wrap `npm` install/add commands: verify registry publish times, then run npm. + /// + /// Examples: + /// corgea npm install axios@^1.0.0 --save-dev + /// corgea npm install + Npm(InstallWrapArgs), + /// Wrap `yarn` add/install commands: verify registry publish times, then run yarn. + /// + /// Examples: + /// corgea yarn add lodash + /// corgea yarn install + Yarn(InstallWrapArgs), + /// Wrap `pnpm` add/install commands: verify registry publish times, then run pnpm. + /// + /// Examples: + /// corgea pnpm add @types/node@latest + /// corgea pnpm install + Pnpm(InstallWrapArgs), + /// Wrap `pip install`: verify registry publish times, then run pip. + /// + /// Examples: + /// corgea pip install requests==2.31.0 + /// corgea pip install -r requirements.txt + Pip(InstallWrapArgs), + /// Wrap `uv` install commands: verify registry publish times, then run uv. + /// + /// Examples: + /// corgea uv add requests + /// corgea uv pip install django==5.0.1 + /// corgea uv sync + Uv(InstallWrapArgs), +} + +/// Shared flags for `corgea npm` / `yarn` / `pnpm` / `pip` / `uv`. +#[derive(Args, Debug, Clone)] +struct InstallWrapArgs { + #[arg( + long, + short = 't', + default_value = "2d", + help = "Recency threshold. Resolved versions younger than this are flagged. Same syntax as `deps --threshold`." + )] + threshold: String, + + #[arg( + long, + help = "Demote a recent finding from a hard block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Run the verification but never exec the install command." + )] + check_only: bool, + + #[arg( + long, + help = "Also fail when an unpinned/unverifiable spec (URL, git, file:, editable) is in the install command." + )] + fail_unpinned: bool, + + #[arg( + long, + help = "Output the result as JSON instead of human-readable text." + )] + json: bool, + + /// Arguments forwarded to the package manager (subcommand and package specs). + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -165,6 +360,33 @@ enum Scanner { Blast, } +fn parse_threshold_or_exit(threshold: &str) -> std::time::Duration { + match verify_deps::parse_threshold(threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("Invalid --threshold: {}", e); + std::process::exit(2); + } + } +} + +fn install_wrap_options(args: &InstallWrapArgs) -> precheck::PrecheckOptions { + precheck::PrecheckOptions { + threshold: parse_threshold_or_exit(&args.threshold), + no_fail: args.no_fail, + check_only: args.check_only, + fail_unpinned: args.fail_unpinned, + json: args.json, + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + } +} + +fn run_install_wrap_command(manager: precheck::PackageManager, args: &InstallWrapArgs) { + let exit_code = precheck::run_install(manager, &args.cmd, install_wrap_options(args)); + std::process::exit(exit_code); +} + impl FromStr for Scanner { type Err = &'static str; @@ -181,20 +403,18 @@ impl FromStr for Scanner { fn main() { let cli = Cli::parse(); let mut corgea_config = Config::load().expect("Failed to load config"); - fn verify_token_and_exit_when_fail (config: &Config) { + fn verify_token_and_exit_when_fail(config: &Config) { if config.get_token().is_empty() { eprintln!("No token set.\nPlease run 'corgea login' to authenticate.\nFor more info checkout our docs at Check out our docs at https://docs.corgea.app/install_cli#login-with-the-cli"); std::process::exit(1); } utils::api::set_auth_token(&config.get_token()); match utils::api::verify_token(config.get_url().as_str()) { - Ok(true) => { - return; - } + Ok(true) => {} Ok(false) => { println!("Invalid token provided.\nPlease run 'corgea login' to authenticate.\nFor more info checkout our docs at Check out our docs at https://docs.corgea.app/install_cli#login-with-the-cli"); std::process::exit(1); - }, + } Err(e) => { eprintln!("Error occurred: {}", e); std::process::exit(1); @@ -203,19 +423,34 @@ fn main() { } match &cli.command { Some(Commands::Login { token, url, scope }) => { - let effective_token = token.clone().or_else(|| utils::generic::get_env_var_if_exists("CORGEA_TOKEN")); - + let effective_token = token + .clone() + .or_else(|| utils::generic::get_env_var_if_exists("CORGEA_TOKEN")); + match effective_token { Some(token_value) => { - let token_source = if token.is_some() { "parameter" } else { "CORGEA_TOKEN environment variable" }; + let token_source = if token.is_some() { + "parameter" + } else { + "CORGEA_TOKEN environment variable" + }; utils::api::set_auth_token(&token_value); - match utils::api::verify_token(url.as_deref().unwrap_or(corgea_config.get_url().as_str())) { + match utils::api::verify_token( + url.as_deref().unwrap_or(corgea_config.get_url().as_str()), + ) { Ok(true) => { - corgea_config.set_token(token_value.clone()).expect("Failed to set token"); + corgea_config + .set_token(token_value.clone()) + .expect("Failed to set token"); if let Some(url) = url { - corgea_config.set_url(url.clone()).expect("Failed to set url"); + corgea_config + .set_url(url.clone()) + .expect("Failed to set url"); } - println!("Successfully authenticated to Corgea using token from {}.", token_source) + println!( + "Successfully authenticated to Corgea using token from {}.", + token_source + ) } Ok(false) => println!("Invalid token provided from {}.", token_source), Err(e) => { @@ -225,7 +460,7 @@ fn main() { } eprintln!("Error occurred: {}", e); std::process::exit(1); - }, + } } } // No token available - use OAuth flow @@ -233,9 +468,9 @@ fn main() { if url.is_some() && scope.is_some() { eprintln!("Warning: --url option is ignored when using OAuth flow with --scope. The scope determines the domain."); } - + match authorize::run(scope.clone(), url.clone()) { - Ok(()) => {}, + Ok(()) => {} Err(e) => { eprintln!("Authorization failed: {}", e); std::process::exit(1); @@ -244,7 +479,10 @@ fn main() { } } } - Some(Commands::Upload { report, project_name }) => { + Some(Commands::Upload { + report, + project_name, + }) => { verify_token_and_exit_when_fail(&corgea_config); match report { Some(report) => { @@ -259,7 +497,18 @@ fn main() { } } } - Some(Commands::Scan { scanner , fail_on, fail, only_uncommitted, scan_type, policy, out_format, out_file, target, project_name }) => { + Some(Commands::Scan { + scanner, + fail_on, + fail, + only_uncommitted, + scan_type, + policy, + out_format, + out_file, + target, + project_name, + }) => { verify_token_and_exit_when_fail(&corgea_config); if let Some(level) = fail_on { if *scanner != Scanner::Blast { @@ -292,7 +541,9 @@ fn main() { std::process::exit(1); } - if out_file.is_some() && !out_format.is_some() || !out_file.is_some() && out_format.is_some() { + if out_file.is_some() && !out_format.is_some() + || !out_file.is_some() && out_format.is_some() + { eprintln!("out_file and out_format must be used together."); std::process::exit(1); } @@ -342,14 +593,32 @@ fn main() { match scanner { Scanner::Snyk => scan::run_snyk(&corgea_config, project_name.clone()), Scanner::Semgrep => scan::run_semgrep(&corgea_config, project_name.clone()), - Scanner::Blast => scanners::blast::run(&corgea_config, fail_on.clone(), fail, only_uncommitted, scan_type.clone(), policy.clone(), out_format.clone(), out_file.clone(), target.clone(), project_name.clone()) + Scanner::Blast => scanners::blast::run( + &corgea_config, + fail_on.clone(), + fail, + only_uncommitted, + scan_type.clone(), + policy.clone(), + out_format.clone(), + out_file.clone(), + target.clone(), + project_name.clone(), + ), } } Some(Commands::Wait { scan_id }) => { verify_token_and_exit_when_fail(&corgea_config); wait::run(&corgea_config, scan_id.clone(), None); } - Some(Commands::List { issues , json, page, page_size, scan_id, sca_issues}) => { + Some(Commands::List { + issues, + json, + page, + page_size, + scan_id, + sca_issues, + }) => { verify_token_and_exit_when_fail(&corgea_config); if *issues && *sca_issues { eprintln!("Cannot use both --issues and --sca-issues at the same time."); @@ -359,15 +628,146 @@ fn main() { println!("scan_id option is only supported for issues list command."); std::process::exit(1); } - list::run(&corgea_config, issues, sca_issues, json, page, page_size, scan_id); + list::run( + &corgea_config, + issues, + sca_issues, + json, + page, + page_size, + scan_id, + ); } - Some(Commands::Inspect { issue, json, id, summary, fix, diff }) => { + Some(Commands::Inspect { + issue, + json, + id, + summary, + fix, + diff, + }) => { verify_token_and_exit_when_fail(&corgea_config); inspect::run(&corgea_config, issue, json, summary, fix, diff, id) } Some(Commands::SetupHooks { default_config }) => { setup_hooks::setup_pre_commit_hook(*default_config); } + Some(Commands::Deps { + ecosystem, + threshold, + include_dev, + fail, + fail_unpinned, + json, + path, + check_cve, + fail_cve, + cve_concurrency, + severity, + }) => { + // Runtime validation: a non-`Any` --severity is meaningful only + // when --fail-cve is set (it gates the exit code). Explicit + // `--severity any` is a no-op and is accepted without + // --fail-cve so CI matrices can pass the flag unconditionally. + if !matches!(severity, verify_deps::SeverityFloor::Any) && !*fail_cve { + eprintln!("error: --severity requires --fail-cve."); + eprintln!(" See https://docs.corgea.app/cli/deps#severity"); + std::process::exit(2); + } + + let parsed_ecosystem = match verify_deps::Ecosystem::parse(ecosystem) { + Ok(e) => e, + Err(e) => { + eprintln!("{}", e); + std::process::exit(2); + } + }; + let parsed_threshold = match verify_deps::parse_threshold(threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("Invalid --threshold: {}", e); + std::process::exit(2); + } + }; + + let project_path = + std::path::PathBuf::from(path.clone().unwrap_or_else(|| ".".to_string())); + + let (vuln_api_url, vuln_api_token) = if *check_cve { + let trimmed_token = corgea_config.get_token().trim().to_string(); + if trimmed_token.is_empty() { + eprintln!("error: --check-cve requires a Corgea token."); + eprintln!(" Run `corgea login` or set CORGEA_TOKEN."); + eprintln!(" See https://docs.corgea.app/cli/deps#check-cve"); + std::process::exit(2); + } + (Some(corgea_config.get_vuln_api_url()), Some(trimmed_token)) + } else { + (None, None) + }; + + let opts = verify_deps::VerifyOptions { + ecosystem: parsed_ecosystem, + threshold: parsed_threshold, + include_dev: *include_dev, + fail: *fail, + fail_unpinned: *fail_unpinned, + fail_cve: *fail_cve, + json: *json, + path: project_path, + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + check_cve: *check_cve, + vuln_api_url, + vuln_api_token, + cve_concurrency: *cve_concurrency as usize, + severity_floor: severity.clone(), + }; + + match verify_deps::run(&opts) { + Ok(report) => { + if opts.json { + verify_deps::report::print_json(&report); + } else { + verify_deps::report::print_text(&report); + } + let recent = !report.recent().is_empty(); + let errors = !report.errors().is_empty(); + let unpinned = report.has_unpinned(); + let cve_vulnerable_any = !report.cve_findings().is_empty(); + let cve_vulnerable_above_floor = !report.cve_findings_above_floor().is_empty(); + let cve_errored = !report.cve_errors().is_empty(); + if (recent || errors || cve_vulnerable_any || cve_errored) && opts.fail { + std::process::exit(1); + } + if unpinned && opts.fail_unpinned { + std::process::exit(1); + } + if cve_vulnerable_above_floor && opts.fail_cve { + std::process::exit(1); + } + } + Err(e) => { + eprintln!("deps failed: {}", e); + std::process::exit(2); + } + } + } + Some(Commands::Npm(args)) => { + run_install_wrap_command(precheck::PackageManager::Npm, args); + } + Some(Commands::Yarn(args)) => { + run_install_wrap_command(precheck::PackageManager::Yarn, args); + } + Some(Commands::Pnpm(args)) => { + run_install_wrap_command(precheck::PackageManager::Pnpm, args); + } + Some(Commands::Pip(args)) => { + run_install_wrap_command(precheck::PackageManager::Pip, args); + } + Some(Commands::Uv(args)) => { + run_install_wrap_command(precheck::PackageManager::Uv, args); + } None => { utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..015cb92 --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,862 @@ +//! Install wrappers: `corgea npm`, `corgea yarn`, `corgea pnpm`, `corgea pip`, `corgea uv`. +//! +//! Wraps an install command from a supported package manager, resolves what +//! the package manager *would* install against the public registry, and either +//! blocks the install or runs it transparently. +//! +//! Verification rule: a package is rejected if the resolved version +//! was published within `--threshold` (default `2d`). This mirrors +//! the `deps` flow but applies to the install-time set of +//! packages instead of the already-locked set. +//! +//! By default a "recent" finding makes the wrapper exit with status 1 +//! *without* running the install. Use `--no-fail` to demote this to a +//! warning (the install runs anyway), or `--check-only` to skip the +//! install regardless of verification result. + +pub mod parse; + +use std::ffi::OsString; +use std::process::Command; +use std::time::Duration; + +use chrono::Utc; + +use crate::utils::terminal::{set_text_color, TerminalColor}; +use crate::verify_deps; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Yarn, + Pnpm, + Pip, + Uv, +} + +impl PackageManager { + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Yarn => "yarn", + PackageManager::Pnpm => "pnpm", + PackageManager::Pip => "pip", + PackageManager::Uv => "uv", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + PackageManager::Npm => matches!(sub, "install" | "i" | "add"), + PackageManager::Yarn => matches!(sub, "add" | "install"), + PackageManager::Pnpm => matches!(sub, "add" | "install" | "i"), + PackageManager::Pip => matches!(sub, "install"), + PackageManager::Uv => false, + } + } + + fn lockfile_mode(self) -> LockfileMode { + match self { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => LockfileMode::Npm, + PackageManager::Pip | PackageManager::Uv => LockfileMode::Python, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum LockfileMode { + Npm, + Python, +} + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + /// If true, never exec the underlying install command. + pub check_only: bool, + /// If true, also fail on unpinned-style warnings (URL specs, + /// unparseable specs, missing `requirements.txt` reference). + pub fail_unpinned: bool, + pub json: bool, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +/// Each item the user (or a `-r` requirements file) asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly, version is older than the threshold. + Ok { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + }, + /// Resolved cleanly but version was published within the threshold. + Recent { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, +} + +impl PrecheckReport { + pub fn recent_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Recent { .. })) + .count() + } + pub fn error_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Error { .. })) + .count() + } + pub fn skipped_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Skipped { .. })) + .count() + } + pub fn ok_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Ok { .. })) + .count() + } +} + +/// Canonical entry for ecosystem commands (`corgea npm install …`). +/// +/// `cmd` is everything after the ecosystem name, e.g. +/// `["install", "axios@^1.0.0", "--save-dev"]`. An empty `cmd` execs the +/// package manager with no arguments. +pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { + if manager == PackageManager::Uv { + return run_uv(cmd, opts); + } + + if cmd.is_empty() { + return exec_install(manager, &[], opts.check_only); + } + + let subcommand = &cmd[0]; + let rest = &cmd[1..]; + + if !manager.is_install_subcommand(subcommand) { + return exec_install_with_args(manager, subcommand, rest, opts.check_only); + } + + let parsed = match parse::parse_install_args(manager, rest) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + + let check_only = opts.check_only; + run_parsed_install( + manager, + subcommand, + rest, + parsed, + || exec_install_with_args(manager, subcommand, rest, check_only), + opts, + manager.lockfile_mode(), + ) +} + +fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { + if cmd.is_empty() { + return exec_uv(cmd, opts.check_only); + } + + let check_only = opts.check_only; + let exec = || exec_uv(cmd, check_only); + + match parse::classify_uv_command(cmd) { + parse::UvCommand::Passthrough => exec_uv(cmd, opts.check_only), + parse::UvCommand::PipInstall { install_args } => { + let parsed = match parse::parse_pip_install_args(install_args) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + run_parsed_install( + PackageManager::Uv, + "pip install", + install_args, + parsed, + exec, + opts, + LockfileMode::Python, + ) + } + parse::UvCommand::Add { add_args } => run_parsed_install( + PackageManager::Uv, + "add", + add_args, + parse::parse_pypi_positionals_args(add_args), + exec, + opts, + LockfileMode::Python, + ), + parse::UvCommand::Sync { sync_args } => run_parsed_install( + PackageManager::Uv, + "sync", + sync_args, + parse::parse_pypi_positionals_args(sync_args), + exec, + opts, + LockfileMode::Python, + ), + } +} + +/// Post-parse verification shared by npm/yarn/pnpm/pip and uv install paths. +fn run_parsed_install( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: parse::ParsedInstall, + exec: impl FnOnce() -> i32, + opts: PrecheckOptions, + lockfile_mode: LockfileMode, +) -> i32 { + if !parsed.requirements_files.is_empty() { + let code = verify_lockfile_or_requirements(&opts, &parsed.requirements_files); + if code != 0 && !opts.no_fail { + return code; + } + } + + if parsed.targets.is_empty() && !parsed.bare_install { + return exec(); + } + + if parsed.bare_install { + let exit_from_lockfile = verify_project_lockfile(&opts, lockfile_mode); + if exit_from_lockfile != 0 && !opts.no_fail { + return exit_from_lockfile; + } + return exec(); + } + + let now = Utc::now(); + let threshold = + chrono::Duration::from_std(opts.threshold).expect("threshold validated before run_install"); + + let outcomes: Vec<_> = parsed + .targets + .iter() + .map(|target| verify_one(target, &opts, &now, threshold)) + .collect(); + + let report = PrecheckReport { + manager, + subcommand: subcommand_label.to_string(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + }; + + if opts.json { + print_json(&report); + } else { + print_text(&report); + } + + if should_block_install(&report, &opts) { + if !opts.json { + eprintln!( + "{}", + set_text_color( + "Refusing to run install. Pass --no-fail to proceed anyway.", + TerminalColor::Red, + ) + ); + } + return 1; + } + + exec() +} + +fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + if opts.no_fail { + return false; + } + report.recent_count() > 0 || (report.error_count() > 0 && opts.fail_unpinned) +} + +fn exec_uv(args: &[String], check_only: bool) -> i32 { + if check_only { + return 0; + } + exec_command("uv", args) +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, + threshold: chrono::Duration, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => { + registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) + } + }; + + match resolved { + Ok(resolved) => { + let age_chrono = now.signed_duration_since(resolved.published_at); + let age = age_chrono + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + if age_chrono < threshold { + TargetOutcome::Recent { + target: target.clone(), + resolved, + age, + } + } else { + TargetOutcome::Ok { + target: target.clone(), + resolved, + age, + } + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +fn verify_project_lockfile(opts: &PrecheckOptions, mode: LockfileMode) -> i32 { + let ecosystem = match mode { + LockfileMode::Npm => verify_deps::Ecosystem::Npm, + LockfileMode::Python => verify_deps::Ecosystem::Python, + }; + delegate_to_verify_deps(install_wrap_verify_opts( + opts, + ecosystem, + std::path::PathBuf::from("."), + )) +} + +fn install_wrap_verify_opts( + opts: &PrecheckOptions, + ecosystem: verify_deps::Ecosystem, + path: std::path::PathBuf, +) -> verify_deps::VerifyOptions { + verify_deps::VerifyOptions::for_install_wrap( + ecosystem, + path, + opts.threshold, + !opts.no_fail, + opts.fail_unpinned, + opts.json, + opts.npm_registry.clone(), + opts.pypi_registry.clone(), + ) +} + +fn verify_lockfile_or_requirements( + opts: &PrecheckOptions, + requirements_files: &[std::path::PathBuf], +) -> i32 { + if requirements_files.is_empty() { + return verify_project_lockfile(opts, LockfileMode::Python); + } + + let mut overall: i32 = 0; + for req in requirements_files { + let parent = req + .parent() + .filter(|p| !p.as_os_str().is_empty()) + .map(std::path::Path::to_path_buf) + .unwrap_or_else(|| std::path::PathBuf::from(".")); + // `deps` only discovers a file named `requirements.txt`; other + // `-r` paths are parsed and checked directly. + let file_name = req + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + if file_name != "requirements.txt" { + // Parse the file ourselves and run the registry checks. + let code = verify_arbitrary_requirements(req, opts); + if code != 0 { + overall = code; + } + continue; + } + let code = delegate_to_verify_deps(install_wrap_verify_opts( + opts, + verify_deps::Ecosystem::Python, + parent, + )); + if code != 0 { + overall = code; + } + } + overall +} + +/// Read a requirements file at an arbitrary path, parse it, and run +/// the same registry verification we'd run for a project's +/// `requirements.txt`. Used when the user passes +/// `pip install -r dev-reqs.txt` (a non-default name). +fn verify_arbitrary_requirements(req_path: &std::path::Path, opts: &PrecheckOptions) -> i32 { + let content = match std::fs::read_to_string(req_path) { + Ok(c) => c, + Err(e) => { + eprintln!("deps: failed to read {}: {}", req_path.display(), e); + return 2; + } + }; + let (deps, unpinned) = crate::verify_deps::python::parse_requirements_with_warnings(&content); + + if deps.is_empty() && unpinned.is_empty() { + return 0; + } + + let now = chrono::Utc::now(); + let threshold = match chrono::Duration::from_std(opts.threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("invalid threshold: {}", e); + return 2; + } + }; + + let mut recent_count: usize = 0; + let mut error_count: usize = 0; + println!( + "Pre-checking {} (threshold {})", + req_path.display(), + verify_deps::format_duration(opts.threshold) + ); + for dep in &deps { + match crate::verify_deps::registry::pypi_publish_time( + &dep.name, + &dep.version, + opts.pypi_registry.as_deref(), + ) { + Ok(published_at) => { + let age_chrono = now.signed_duration_since(published_at); + let age = age_chrono + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + if age_chrono < threshold { + println!( + " {} {}@{} published {} ago at {} (within threshold)", + set_text_color("⚠", TerminalColor::Yellow), + dep.name, + dep.version, + set_text_color(&verify_deps::format_duration(age), TerminalColor::Yellow,), + published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + recent_count += 1; + } else { + println!( + " {} {}@{} published {} ago", + set_text_color("✓", TerminalColor::Green), + dep.name, + dep.version, + verify_deps::format_duration(age), + ); + } + } + Err(e) => { + println!( + " {} {}@{}: {}", + set_text_color("✗", TerminalColor::Red), + dep.name, + dep.version, + e + ); + error_count += 1; + } + } + } + if !unpinned.is_empty() { + println!( + "{}", + set_text_color( + "Unpinned lines (cannot be verified):", + TerminalColor::Yellow, + ) + ); + for line in &unpinned { + println!(" {} {}", set_text_color("?", TerminalColor::Yellow), line); + } + } + if recent_count > 0 && !opts.no_fail { + return 1; + } + if !unpinned.is_empty() && opts.fail_unpinned { + return 1; + } + if error_count > 0 && opts.fail_unpinned { + return 1; + } + 0 +} + +fn delegate_to_verify_deps(opts: verify_deps::VerifyOptions) -> i32 { + match verify_deps::run(&opts) { + Ok(report) => { + if opts.json { + verify_deps::report::print_json(&report); + } else { + verify_deps::report::print_text(&report); + } + let recent = !report.recent().is_empty(); + let unpinned = report.has_unpinned(); + if recent && opts.fail { + return 1; + } + if unpinned && opts.fail_unpinned { + return 1; + } + 0 + } + Err(e) => { + eprintln!("deps failed: {}", e); + 2 + } + } +} + +fn exec_install(manager: PackageManager, args: &[String], check_only: bool) -> i32 { + if check_only { + return 0; + } + exec_command(manager.binary_name(), args) +} + +fn exec_install_with_args( + manager: PackageManager, + subcommand: &str, + rest: &[String], + check_only: bool, +) -> i32 { + if check_only { + return 0; + } + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command(manager.binary_name(), &full) +} + +fn exec_command(binary: &str, args: &[String]) -> i32 { + // Resolve the binary on PATH. On Windows this finds `.cmd` shims. + let resolved = match which::which(binary) { + Ok(p) => p, + Err(e) => { + eprintln!( + "could not find '{}' on PATH ({}). Make sure the package manager is installed.", + binary, e + ); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + match Command::new(&resolved).args(&os_args).status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + eprintln!("failed to exec {}: {}", binary, e); + 1 + } + } +} + +fn print_text(report: &PrecheckReport) { + let label = report.manager.binary_name(); + let display: Vec<&str> = report.original_args.iter().map(String::as_str).collect(); + println!( + "Pre-checking `{} {} {}` (threshold {})", + label, + report.subcommand, + display.join(" "), + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + report.skipped_count(), + report.error_count(), + ); + + for o in &report.outcomes { + match o { + TargetOutcome::Ok { + target, + resolved, + age, + } => { + println!( + " {} {} → {}@{} published {} ago", + set_text_color("✓", TerminalColor::Green), + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + TargetOutcome::Recent { + target, + resolved, + age, + } => { + println!( + " {} {} → {}@{} published {} ago at {} (within threshold)", + set_text_color("⚠", TerminalColor::Yellow), + target.display, + resolved.name, + resolved.version, + set_text_color(&verify_deps::format_duration(*age), TerminalColor::Yellow), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } + TargetOutcome::Skipped { target, reason } => { + println!( + " {} {}: {}", + set_text_color("?", TerminalColor::Yellow), + target.display, + reason, + ); + } + TargetOutcome::Error { target, error } => { + println!( + " {} {}: {}", + set_text_color("✗", TerminalColor::Red), + target.display, + error, + ); + } + } + } +} + +fn print_json(report: &PrecheckReport) { + use serde_json::json; + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + TargetOutcome::Ok { + target, + resolved, + age, + } => json!({ + "status": "ok", + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + TargetOutcome::Recent { + target, + resolved, + age, + } => json!({ + "status": "recent", + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + TargetOutcome::Skipped { target, reason } => json!({ + "status": "skipped", + "spec": target.display, + "name": target.name, + "reason": reason, + }), + TargetOutcome::Error { target, error } => json!({ + "status": "error", + "spec": target.display, + "name": target.name, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "manager": report.manager.binary_name(), + "subcommand": report.subcommand, + "args": report.original_args, + "threshold_seconds": report.threshold.as_secs(), + "summary": { + "ok": report.ok_count(), + "recent": report.recent_count(), + "skipped": report.skipped_count(), + "errors": report.error_count(), + }, + "results": outcomes, + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn install_subcommand_recognition() { + assert!(PackageManager::Npm.is_install_subcommand("install")); + assert!(PackageManager::Npm.is_install_subcommand("i")); + assert!(PackageManager::Npm.is_install_subcommand("add")); + assert!(!PackageManager::Npm.is_install_subcommand("update")); + + assert!(PackageManager::Yarn.is_install_subcommand("add")); + assert!(PackageManager::Yarn.is_install_subcommand("install")); + + assert!(PackageManager::Pnpm.is_install_subcommand("add")); + assert!(PackageManager::Pnpm.is_install_subcommand("install")); + assert!(PackageManager::Pnpm.is_install_subcommand("i")); + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } + + #[test] + fn run_install_passthrough_non_install_subcommand() { + let opts = PrecheckOptions { + threshold: Duration::from_secs(86400), + no_fail: true, + check_only: true, + fail_unpinned: false, + json: false, + npm_registry: None, + pypi_registry: None, + }; + // `view` is not an install subcommand — should return 0 in check_only mode + // without needing network or npm on PATH for resolution. + let code = run_install( + PackageManager::Npm, + &[ + "view".to_string(), + "lodash".to_string(), + "version".to_string(), + ], + opts, + ); + assert_eq!(code, 0); + } + + #[test] + fn run_uv_passthrough_check_only() { + let opts = PrecheckOptions { + threshold: Duration::from_secs(86400), + no_fail: true, + check_only: true, + fail_unpinned: false, + json: false, + npm_registry: None, + pypi_registry: None, + }; + assert_eq!( + run_install( + PackageManager::Uv, + &["run".to_string(), "pytest".to_string()], + opts + ), + 0 + ); + } + + #[test] + fn run_install_empty_cmd_check_only() { + let opts = PrecheckOptions { + threshold: Duration::from_secs(86400), + no_fail: false, + check_only: true, + fail_unpinned: false, + json: false, + npm_registry: None, + pypi_registry: None, + }; + assert_eq!(run_install(PackageManager::Npm, &[], opts), 0); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..bf993cb --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,631 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::PathBuf; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — the requirements files we should + /// load and verify in lieu of standalone targets. + pub requirements_files: Vec, + /// True if the user invoked the bare install (`npm install` / + /// `pip install` with no positional specs and no `-r`). + pub bare_install: bool, +} + +/// `uv pip install` argument list (everything after `pip install`). +pub fn parse_pip_install_args(args: &[String]) -> Result { + build_parsed_install(extract_pip_positionals(args)?, true) +} + +/// `uv add` argument list (everything after `add`). +pub fn parse_pypi_positionals_args(args: &[String]) -> ParsedInstall { + build_parsed_install(extract_node_positionals(args), false) + .expect("node positionals never fail") +} + +fn build_parsed_install(positionals: PositionalSplit, pypi: bool) -> Result { + let mut parsed = ParsedInstall::default(); + for raw in &positionals.specs { + let target = if pypi { + parse_pypi_spec(raw) + } else { + parse_npm_spec(raw) + }; + parsed.targets.push(target); + } + parsed.requirements_files = positionals.requirements_files; + if parsed.targets.is_empty() && parsed.requirements_files.is_empty() { + parsed.bare_install = true; + } + Ok(parsed) +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + match manager { + PackageManager::Pip => parse_pip_install_args(args), + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + build_parsed_install(extract_node_positionals(args), false) + } + PackageManager::Uv => unreachable!("uv uses classify_uv_command"), + } +} + +/// Install-shaped `uv` invocations we know how to verify. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UvCommand<'a> { + Passthrough, + PipInstall { install_args: &'a [String] }, + Add { add_args: &'a [String] }, + Sync { sync_args: &'a [String] }, +} + +pub fn classify_uv_command(cmd: &[String]) -> UvCommand<'_> { + match cmd.first().map(String::as_str) { + Some("pip") if matches!(cmd.get(1).map(String::as_str), Some("install" | "i")) => { + UvCommand::PipInstall { + install_args: &cmd[2..], + } + } + Some("add") => UvCommand::Add { + add_args: &cmd[1..], + }, + Some("sync") => UvCommand::Sync { + sync_args: &cmd[1..], + }, + _ => UvCommand::Passthrough, + } +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Strip flags from a npm/yarn/pnpm install argument list, returning +/// only the positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; we handle both by skipping the next token if it +/// looks like a value (doesn't start with `-` and contains `:` or `/` +/// or starts with a digit, suggesting a URL / path / port / version). +/// +/// We deliberately avoid maintaining an exhaustive flag whitelist — +/// real-world install commands are too varied. The heuristic above +/// is correct for the common cases (`--registry url`, `--prefix path`, +/// `-w pkgname`, etc.) and conservatively skips occasional ambiguous +/// values (no spec we'd want to verify ever starts with `:` or `/`). +fn extract_node_positionals(args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + // Flag. Skip the next token if it looks like a value. + if let Some(eq_idx) = a.find('=') { + // `--flag=value` already self-contained. + let _ = eq_idx; + i += 1; + continue; + } + // Heuristic: peek at the next arg. If it doesn't look + // like a package spec (i.e. contains `://` or starts with + // `/` or `.`) skip it; otherwise leave it alone for the + // next iteration. + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i += if next_is_value { 2 } else { 1 }; + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args + .get(i + 1) + .ok_or_else(|| "`-r` / `--requirement` requires a file path".to_string())?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + // Unknown flag — apply the same value-skipping heuristic + // as in node land. + if a.contains('=') { + i += 1; + continue; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i += if next_is_value { 2 } else { 1 }; + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +pub(crate) fn parse_npm_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "git:", "git@", "ssh://", "http://", "https://", "file:", "./", "../", "/", "~/", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a URL/git/filesystem reference — registry verification skipped" + .to_string(), + }, + }; + } + if trimmed.starts_with("npm:") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "npm: aliased dependency — registry verification skipped".to_string(), + }, + }; + } + if trimmed.starts_with("workspace:") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "workspace: dependency — registry verification skipped".to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() || spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { + name, + display, + kind, + } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars() + .next() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +pub(crate) fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + // Find the first specifier operator (`==`, `>=`, `<=`, `!=`, `~=`, + // `>`, `<`). PEP 440 also allows `===` (arbitrary equality). + // Find the leftmost specifier operator. On ties, prefer the + // longer operator (e.g. `==` over `=`). + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let mut split_at: Option = None; + for sep in &separators { + if let Some(idx) = trimmed.find(sep) { + split_at = match split_at { + Some(prev) if prev <= idx => Some(prev), + _ => Some(idx), + }; + } + } + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&trimmed[..idx], &trimmed[idx..]), + None => (trimmed, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part.split('[').next().unwrap_or(name_part).trim(); + + // Strip env markers: `package; python_version >= "3.7"`. + let spec_no_marker = spec_part.split(';').next().unwrap_or(spec_part).trim(); + + let kind = if spec_no_marker.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec_no_marker.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else if let Some(rest) = spec_no_marker.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec_no_marker.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn npm_kind(t: &InstallTarget) -> &TargetKind { + &t.kind + } + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(&args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(&args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ( + "axios@>=1.0.0 <2.0.0", + NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + ), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input); + match (npm_kind(&target), &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node").name, "@types/node"); + assert_eq!(parse_npm_spec("@types/node@20.10.5").name, "@types/node"); + assert_eq!(parse_npm_spec("axios@1.2.3").name, "axios"); + assert_eq!(parse_npm_spec("axios").name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + ]; + for u in unverifiable { + let t = parse_npm_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ( + "requests[security]==2.31.0", + PypiSpec::Exact("2.31.0".to_string()), + ), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!( + parse_pypi_spec("requests[security]==2.31.0").name, + "requests" + ); + assert_eq!( + parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").name, + "requests" + ); + match parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").kind { + TargetKind::Pypi(PypiSpec::Exact(v)) => assert_eq!(v, "2.31.0"), + _ => panic!("expected exact spec"), + } + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn classify_uv_command_recognizes_install_shapes() { + assert!(matches!( + classify_uv_command(&[ + "pip".to_string(), + "install".to_string(), + "requests".to_string(), + ]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["pip".to_string(), "i".to_string()]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["add".to_string(), "django".to_string()]), + UvCommand::Add { .. } + )); + assert!(matches!( + classify_uv_command(&["sync".to_string()]), + UvCommand::Sync { .. } + )); + assert_eq!( + classify_uv_command(&["run".to_string(), "pytest".to_string()]), + UvCommand::Passthrough + ); + assert_eq!( + classify_uv_command(&["lock".to_string()]), + UvCommand::Passthrough + ); + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + } +} diff --git a/src/scan.rs b/src/scan.rs index 184dbdd..d657bc5 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -1,14 +1,14 @@ +use crate::cicd::*; +use crate::log::debug; +use crate::scanners::parsers::ScanParserFactory; +use crate::{utils, Config}; +use reqwest::header; +use serde_json::Value; use std::collections::HashSet; use std::io::{self, Read}; -use crate::{utils, Config}; -use uuid::Uuid; use std::path::Path; use std::process::Command; -use crate::cicd::{*}; -use crate::log::debug; -use reqwest::header; -use crate::scanners::parsers::ScanParserFactory; -use serde_json::Value; +use uuid::Uuid; pub fn run_command(base_cmd: &String, mut command: Command) -> String { match which::which(base_cmd) { @@ -30,7 +30,7 @@ pub fn run_command(base_cmd: &String, mut command: Command) -> String { std::process::exit(1); } - return stdout; + stdout } else { let stderr = String::from_utf8(output.stderr).expect("Failed to parse stderr"); let stdout = String::from_utf8(output.stdout).expect("Failed to parse stdout"); @@ -55,7 +55,11 @@ pub fn run_semgrep(config: &Config, project_name: Option) { println!("Scanning with semgrep..."); let base_command = "semgrep"; let mut command = std::process::Command::new(base_command); - command.arg("scan").arg("--config").arg("auto").arg("--json"); + command + .arg("scan") + .arg("--config") + .arg("auto") + .arg("--json"); println!("Running \"semgrep scan --config auto --json\""); @@ -100,7 +104,12 @@ pub fn read_file_report(config: &Config, file_path: &str, project_name: Option) -> Option { +pub fn parse_scan( + config: &Config, + input: String, + save_to_file: bool, + project_name: Option, +) -> Option { debug("Parsing the scan report"); // Remove BOM (Byte Order Mark) if present @@ -115,7 +124,14 @@ pub fn parse_scan(config: &Config, input: String, save_to_file: bool, project_na std::process::exit(0); } - return upload_scan(config, parse_result.paths, parse_result.scanner, cleaned_input.to_string(), save_to_file, project_name); + upload_scan( + config, + parse_result.paths, + parse_result.scanner, + cleaned_input.to_string(), + save_to_file, + project_name, + ) } Err(error_message) => { @@ -125,7 +141,14 @@ pub fn parse_scan(config: &Config, input: String, save_to_file: bool, project_na } } -pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: String, save_to_file: bool, project_name: Option) -> Option { +pub fn upload_scan( + config: &Config, + paths: Vec, + scanner: String, + input: String, + save_to_file: bool, + project_name: Option, +) -> Option { let in_ci = running_in_ci(); let ci_platform = which_ci(); let github_env_vars = get_github_env_vars(); @@ -133,30 +156,38 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: let run_id = Uuid::new_v4().to_string(); let base_url = config.get_url(); let api_base = "/api/v1"; - let project; - if in_ci { + let project = if in_ci { debug("Running in CI"); - project = format!("{}-{}", - github_env_vars.get("GITHUB_REPOSITORY").expect("Failed to get GITHUB_REPOSITORY").to_string(), - github_env_vars.get("GITHUB_PR").expect("Failed to get GITHUB_REPOSITORY").to_string()) + format!( + "{}-{}", + github_env_vars + .get("GITHUB_REPOSITORY") + .expect("Failed to get GITHUB_REPOSITORY"), + github_env_vars + .get("GITHUB_PR") + .expect("Failed to get GITHUB_REPOSITORY") + ) } else { - project = utils::generic::determine_project_name(project_name.as_deref()); - } + utils::generic::determine_project_name(project_name.as_deref()) + }; let repo_data = std::env::var("REPO_DATA").unwrap_or_else(|_| "".to_string()); let scan_upload_url = if repo_data.is_empty() { format!( - "{}{}/scan-upload?engine={}&run_id={}&project={}&ci={}&ci_platform={}", base_url, api_base, scanner, run_id, project, in_ci, ci_platform + "{}{}/scan-upload?engine={}&run_id={}&project={}&ci={}&ci_platform={}", + base_url, api_base, scanner, run_id, project, in_ci, ci_platform ) } else { format!( - "{}{}/scan-upload?engine={}&run_id={}&project={}&ci={}&ci_platform={}&repo_data={}", base_url, api_base, scanner, run_id, project, in_ci, ci_platform, repo_data + "{}{}/scan-upload?engine={}&run_id={}&project={}&ci={}&ci_platform={}&repo_data={}", + base_url, api_base, scanner, run_id, project, in_ci, ci_platform, repo_data ) }; let git_config_upload_url = format!( - "{}{}/git-config-upload?run_id={}", base_url, api_base, run_id + "{}{}/git-config-upload?run_id={}", + base_url, api_base, run_id ); let client = utils::api::http_client(); @@ -168,7 +199,10 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: for path in &paths { if !Path::new(&path).exists() { - eprintln!("Required file {} not found which is required for the scan, exiting.", path); + eprintln!( + "Required file {} not found which is required for the scan, exiting.", + path + ); std::process::exit(1); } @@ -177,7 +211,8 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: } let src_upload_url = format!( - "{}{}/code-upload?run_id={}&path={}", base_url, api_base, run_id, path + "{}{}/code-upload?run_id={}&path={}", + base_url, api_base, run_id, path ); debug(&format!("Uploading file: {}", path)); let fp = Path::new(&path); @@ -191,16 +226,19 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: .expect("Failed to read file"); debug(&format!("POST: {}", src_upload_url)); - let res = client.post(&src_upload_url) - .multipart(form) - .send(); + let res = client.post(&src_upload_url).multipart(form).send(); match res { Ok(response) => { if !response.status().is_success() { let status = response.status(); - let body = response.text().unwrap_or_else(|_| "Unable to read response body".to_string()); - debug(&format!("Code upload failed with status: {}. Response body: {}", status, body)); + let body = response + .text() + .unwrap_or_else(|_| "Unable to read response body".to_string()); + debug(&format!( + "Code upload failed with status: {}. Response body: {}", + status, body + )); eprintln!("Failed to upload file {} {}... retrying", status, path); std::thread::sleep(std::time::Duration::from_secs(1)); attempts += 1; @@ -219,7 +257,10 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: if attempts == 3 && !success { upload_error_count += 1; - eprintln!("Failed to upload file: {} after 3 attempts. skipping...", path); + eprintln!( + "Failed to upload file: {} after 3 attempts. skipping...", + path + ); } } @@ -235,30 +276,34 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: let input_size = input_bytes.len(); let max_upload_size = 50 * 1024 * 1024; // 50mb let chunk_size = match std::env::var("DEBUG_CORGEA_OVERRIDE_REPORT_CHUNK_SIZE") { - Ok(val) => { - match val.parse::() { - Ok(mb) if mb > 0 => { - debug(&format!("Overriding report chunk size to {} MB", mb)); - mb * 1024 * 1024 - } - _ => { - eprintln!("Invalid DEBUG_CORGEA_OVERRIDE_REPORT_CHUNK_SIZE value '{}', using default 1 MB", val); - 1024 * 1024 - } + Ok(val) => match val.parse::() { + Ok(mb) if mb > 0 => { + debug(&format!("Overriding report chunk size to {} MB", mb)); + mb * 1024 * 1024 } - } + _ => { + eprintln!("Invalid DEBUG_CORGEA_OVERRIDE_REPORT_CHUNK_SIZE value '{}', using default 1 MB", val); + 1024 * 1024 + } + }, Err(_) => 1024 * 1024, // default 1mb }; let is_chunked = input_size > max_upload_size; let res = if is_chunked { - let total_chunks = (input_size + chunk_size - 1) / chunk_size; + let total_chunks = input_size.div_ceil(chunk_size); debug(&format!("Uploading scan in {} chunks", total_chunks)); let mut offset = 0usize; let mut last_response = None; for (index, chunk) in input_bytes.chunks(chunk_size).enumerate() { - debug(&format!("POST: {} (chunk {}/{})", scan_upload_url, index + 1, total_chunks)); - let response = client.post(&scan_upload_url) + debug(&format!( + "POST: {} (chunk {}/{})", + scan_upload_url, + index + 1, + total_chunks + )); + let response = client + .post(&scan_upload_url) .header(header::CONTENT_TYPE, "application/json") .header("Upload-Offset", offset.to_string()) .header("Upload-Length", input_size.to_string()) @@ -295,7 +340,7 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: false } } - }, + } Err(_) => true, }; last_response = Some(response); @@ -308,7 +353,8 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: last_response.expect("Failed to upload scan.") } else { debug(&format!("POST: {}", scan_upload_url)); - client.post(&scan_upload_url) + client + .post(&scan_upload_url) .header(header::CONTENT_TYPE, "application/json") .body(input.clone()) .send() @@ -365,8 +411,13 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: } else { upload_failed = true; let status = response.status(); - let body = response.text().unwrap_or_else(|_| "Unable to read response body".to_string()); - debug(&format!("Scan upload failed with status: {}. Response body: {}", status, body)); + let body = response + .text() + .unwrap_or_else(|_| "Unable to read response body".to_string()); + debug(&format!( + "Scan upload failed with status: {}. Response body: {}", + status, body + )); eprintln!("Failed to upload scan: {}", status); } } @@ -376,7 +427,6 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: } } - let git_config_path = Path::new(".git/config"); if git_config_path.exists() { @@ -386,9 +436,7 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: .expect("Failed to read file"); debug(&format!("POST: {}", git_config_upload_url)); - let res = client.post(&git_config_upload_url) - .multipart(form) - .send(); + let res = client.post(&git_config_upload_url).multipart(form).send(); match res { Ok(response) => { @@ -404,7 +452,8 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: if in_ci { let ci_data_upload_url = format!( - "{}{}/ci-data-upload?run_id={}&platform={}", base_url, api_base, run_id, ci_platform + "{}{}/ci-data-upload?run_id={}&platform={}", + base_url, api_base, run_id, ci_platform ); let mut github_env_vars_json = serde_json::Map::new(); @@ -421,7 +470,8 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: }; debug(&format!("POST: {}", ci_data_upload_url)); - let _res = client.post(ci_data_upload_url) + let _res = client + .post(ci_data_upload_url) .header(header::CONTENT_TYPE, "application/json") .body(github_env_vars_json_string) .send(); @@ -433,7 +483,7 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: match std::fs::write(&file_path, input.clone()) { Ok(_) => println!("Successfully saved scan to {}", file_path.display()), - Err(e) => eprintln!("Failed to save scan to {}: {}", file_path.display(), e) + Err(e) => eprintln!("Failed to save scan to {}: {}", file_path.display(), e), } } @@ -441,13 +491,22 @@ pub fn upload_scan(config: &Config, paths: Vec, scanner: String, input: std::process::exit(1); } - println!("Successfully scanned using {} and uploaded to Corgea.", scanner); + println!( + "Successfully scanned using {} and uploaded to Corgea.", + scanner + ); if upload_error_count > 0 { - println!("Failed to upload {} files, you may not see all fixes in Corgea.", upload_error_count); + println!( + "Failed to upload {} files, you may not see all fixes in Corgea.", + upload_error_count + ); } println!("Go to {base_url} to see results."); - sast_scan_id.map(|scan_id| ScanUploadResult { scan_id, project_id }) + sast_scan_id.map(|scan_id| ScanUploadResult { + scan_id, + project_id, + }) } diff --git a/src/scanners/blast.rs b/src/scanners/blast.rs index d530ed8..e712ddb 100644 --- a/src/scanners/blast.rs +++ b/src/scanners/blast.rs @@ -1,20 +1,19 @@ -use crate::utils; use crate::config::Config; use crate::targets; +use crate::utils; use std::collections::HashMap; -use std::sync::{Arc, Mutex}; -use std::error::Error; -use std::thread; use std::env; +use std::error::Error; use std::fs; +use std::sync::{Arc, Mutex}; +use std::thread; use uuid::Uuid; - - +#[allow(clippy::too_many_arguments)] pub fn run( - config: &Config, - fail_on: Option, - fail: &bool, + config: &Config, + fail_on: Option, + fail: &bool, only_uncommitted: &bool, scan_type: Option, policy: Option, @@ -34,37 +33,33 @@ pub fn run( Ok(false) => { eprintln!("This is not a git repository. Without a git repository Corgea CLI can't determine which files have been modified or added thus only a full scan is possible."); std::process::exit(1); - }, + } Err(e) => { eprintln!("Error checking git repository information: {}. Without a git repository Corgea CLI can't determine which files have been modified or added thus only a full scan is possible.", e); std::process::exit(1); - }, + } Ok(true) => { // Continue with the git repo logic } } } - println!( - "\nScanning with BLAST 🚀🚀🚀" - ); + println!("\nScanning with BLAST 🚀🚀🚀"); if let Some(scan_type) = &scan_type { println!("Running Scan Type: {}", scan_type); } if let Some(policy) = &policy { - println!("Including only specified policies for policy scan: {}", policy); + println!( + "Including only specified policies for policy scan: {}", + policy + ); } println!("\n\n"); let temp_dir = env::temp_dir().join(format!("corgea/tmp/{}", Uuid::new_v4())); fs::create_dir_all(&temp_dir).expect("Failed to create temp directory"); let project_name = utils::generic::determine_project_name(project_name.as_deref()); let zip_path = format!("{}/{}.zip", temp_dir.display(), project_name); - let repo_info = match utils::generic::get_repo_info("./") { - Ok(info) => info, - Err(_) => { - None - } - }; + let repo_info = utils::generic::get_repo_info("./").unwrap_or_default(); match utils::generic::create_path_if_not_exists(&temp_dir) { Ok(_) => (), Err(e) => { @@ -79,7 +74,10 @@ pub fn run( let stop_signal = Arc::new(Mutex::new(false)); let stop_signal_clone = Arc::clone(&stop_signal); let packaging_thread = thread::spawn(move || { - utils::terminal::show_loading_message("Packaging your project... ([T]s)", stop_signal_clone); + utils::terminal::show_loading_message( + "Packaging your project... ([T]s)", + stop_signal_clone, + ); }); let target_str: Option<&str> = if *only_uncommitted { @@ -94,7 +92,10 @@ pub fn run( if result.files.is_empty() { *stop_signal.lock().unwrap() = true; let _ = packaging_thread.join(); - print!("\r{}", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset)); + print!( + "\r{}", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset) + ); eprintln!("\n\nError: target resolved to zero files.\n"); eprintln!("Target value: {}\n", target_value); eprintln!("Segment results:"); @@ -102,7 +103,10 @@ pub fn run( if let Some(ref error) = segment_result.error { eprintln!(" {}: ERROR - {}", segment_result.segment, error); } else { - eprintln!(" {}: {} matches", segment_result.segment, segment_result.matches); + eprintln!( + " {}: {} matches", + segment_result.segment, segment_result.matches + ); } } eprintln!("\nPlease check your target specification and try again.\n"); @@ -113,7 +117,9 @@ pub fn run( if *only_uncommitted { println!("\rFiles to be submitted for partial scan:\n"); for (index, file) in result.files.iter().enumerate() { - if let Ok(relative) = file.strip_prefix(std::env::current_dir().unwrap_or_default()) { + if let Ok(relative) = + file.strip_prefix(std::env::current_dir().unwrap_or_default()) + { println!("{}: {}", index + 1, relative.display()); } else { println!("{}: {}", index + 1, file.display()); @@ -122,10 +128,12 @@ pub fn run( println!(); } else { println!("Scanning {} files (target mode)", file_count); - + let display_count = std::cmp::min(20, file_count); for file in result.files.iter().take(display_count) { - if let Ok(relative) = file.strip_prefix(std::env::current_dir().unwrap_or_default()) { + if let Ok(relative) = + file.strip_prefix(std::env::current_dir().unwrap_or_default()) + { println!(" {}", relative.display()); } else { println!(" {}", file.display()); @@ -140,7 +148,10 @@ pub fn run( Err(e) => { *stop_signal.lock().unwrap() = true; let _ = packaging_thread.join(); - print!("\r{}", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset)); + print!( + "\r{}", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset) + ); eprintln!("\n\nError resolving targets: {}\n", e); std::process::exit(1); } @@ -152,23 +163,27 @@ pub fn run( if added_files.is_empty() { *stop_signal.lock().unwrap() = true; let _ = packaging_thread.join(); - print!("\r{}", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset)); + print!( + "\r{}", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset) + ); if *only_uncommitted { eprintln!( "\n\nOops! It seems there are no scannable uncommitted changes in your project.\nYou may have uncommitted changes, but none match the types of files we can scan.\n\n" ); } else { - eprintln!( - "\n\nOops! No valid files found to scan after filtering.\n\n" - ); + eprintln!("\n\nOops! No valid files found to scan after filtering.\n\n"); } std::process::exit(1); } - }, + } Err(e) => { *stop_signal.lock().unwrap() = true; let _ = packaging_thread.join(); - print!("\r{}", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset)); + print!( + "\r{}", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset) + ); eprintln!( "\n\nUh-oh! We couldn't package your project at '{}'.\nThis might be due to insufficient permissions, invalid file paths, or a file system error.\nPlease check the directory and try again.\nError details:\n{}\n\n", zip_path, e @@ -178,9 +193,19 @@ pub fn run( } *stop_signal.lock().unwrap() = true; let _ = packaging_thread.join(); - print!("\r{}Project packaged successfully.\n", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Green)); + print!( + "\r{}Project packaged successfully.\n", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Green) + ); println!("\n\nSubmitting scan to Corgea:"); - let upload_result = match utils::api::upload_zip(&zip_path, &config.get_url(), &project_name, repo_info, scan_type, policy) { + let upload_result = match utils::api::upload_zip( + &zip_path, + &config.get_url(), + &project_name, + repo_info, + scan_type, + policy, + ) { Ok(result) => result, Err(e) => { eprintln!("\n\nOh no! We encountered an issue while uploading the zip file '{}' to the server.\nPlease ensure that: @@ -197,13 +222,18 @@ pub fn run( e ); std::process::exit(1); - }, + } }; let scan_id = upload_result.scan_id; let scan_url = match &upload_result.project_id { Some(pid) => format!("{}/project/{}/?scan_id={}", config.get_url(), pid, scan_id), - None => format!("{}/project/{}?scan_id={}", config.get_url(), project_name, scan_id), + None => format!( + "{}/project/{}?scan_id={}", + config.get_url(), + project_name, + scan_id + ), }; let _ = utils::generic::delete_directory(&temp_dir); @@ -222,7 +252,10 @@ pub fn run( let stop_signal = Arc::new(Mutex::new(false)); let stop_signal_clone = Arc::clone(&stop_signal); let results_thread = thread::spawn(move || { - utils::terminal::show_loading_message("Collecting scan results... ([T]s)", stop_signal_clone); + utils::terminal::show_loading_message( + "Collecting scan results... ([T]s)", + stop_signal_clone, + ); }); let classifications = match report_scan_status(&config.get_url(), &project_name) { @@ -234,7 +267,7 @@ pub fn run( utils::terminal::set_text_color(&scan_url, utils::terminal::TerminalColor::Green) ); issues_classes - }, + } Err(e) => { *stop_signal.lock().unwrap() = true; let _ = results_thread.join(); @@ -247,7 +280,10 @@ pub fn run( - Error details: {}\n", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset), utils::terminal::set_text_color( - &format!("Failed to report the scan status for project: '{}'.", project_name), + &format!( + "Failed to report the scan status for project: '{}'.", + project_name + ), utils::terminal::TerminalColor::Red ), utils::terminal::set_text_color(&scan_url, utils::terminal::TerminalColor::Blue), @@ -258,13 +294,14 @@ pub fn run( } }; if *fail { - let blocking_rules = match utils::api::check_blocking_rules(&config.get_url(), &scan_id, None) { - Ok(rules) => rules, - Err(e) => { - eprintln!("Failed to check blocking rules: {}", e); - std::process::exit(1); - } - }; + let blocking_rules = + match utils::api::check_blocking_rules(&config.get_url(), &scan_id, None) { + Ok(rules) => rules, + Err(e) => { + eprintln!("Failed to check blocking rules: {}", e); + std::process::exit(1); + } + }; if blocking_rules.block { println!("\nExiting with error code 1 due to some issues violating some blocking rules defined for this project.\nfor more details, please check the scan results at the link: {}\nAlternatively, you can run {} to view the issues list on your local machine.", utils::terminal::set_text_color(&scan_url, utils::terminal::TerminalColor::Green), @@ -282,18 +319,29 @@ pub fn run( let stop_signal = Arc::new(Mutex::new(false)); let stop_signal_clone = Arc::clone(&stop_signal); let results_thread = thread::spawn(move || { - utils::terminal::show_loading_message("Generating scan report... ([T]s)", stop_signal_clone); + utils::terminal::show_loading_message( + "Generating scan report... ([T]s)", + stop_signal_clone, + ); }); if out_format == "json" { - let issues = match utils::api::get_all_issues(&config.get_url(), &project_name, Some(scan_id.clone())) { + let issues = match utils::api::get_all_issues( + &config.get_url(), + &project_name, + Some(scan_id.clone()), + ) { Ok(issues) => issues, Err(e) => { eprintln!("\n\nFailed to fetch issues: {}\n\n", e); std::process::exit(1); } }; - let sca_issues = match utils::api::get_all_sca_issues(&config.get_url(), &project_name, Some(scan_id.clone())) { + let sca_issues = match utils::api::get_all_sca_issues( + &config.get_url(), + &project_name, + Some(scan_id.clone()), + ) { Ok(issues) => issues, Err(e) => { eprintln!("\n\nFailed to fetch SCA issues: {}\n\n", e); @@ -302,15 +350,17 @@ pub fn run( }; let json = serde_json::to_string_pretty(&issues).unwrap(); let sca_json = serde_json::to_string_pretty(&sca_issues).unwrap(); - let report_json= serde_json::to_string_pretty(&classifications).unwrap(); - let results_json = format!("{{\"issues\": {}, \"sca_issues\": {}, \"report\": {}}}", json, sca_json, report_json); + let report_json = serde_json::to_string_pretty(&classifications).unwrap(); + let results_json = format!( + "{{\"issues\": {}, \"sca_issues\": {}, \"report\": {}}}", + json, sca_json, report_json + ); *stop_signal.lock().unwrap() = true; let _ = results_thread.join(); fs::write(out_file.clone(), results_json).expect("Failed to write JSON file, check if the file path is valid and you have the necessary permissions to write to it."); utils::terminal::clear_previous_line(); println!("\n\nScan results written to: {}\n\n", out_file.clone()); - } - else if out_format == "html" { + } else if out_format == "html" { let report = match utils::api::get_scan_report(&config.get_url(), &scan_id, None) { Ok(html) => html, Err(e) => { @@ -323,23 +373,26 @@ pub fn run( fs::write(out_file.clone(), report).expect("\n\nFailed to write HTML file, check if the file path is valid and you have the necessary permissions to write to it."); utils::terminal::clear_previous_line(); println!("\n\nScan report written to: {}\n\n", out_file.clone()); - } - else if out_format == "sarif" { - let report = match utils::api::get_scan_report(&config.get_url(), &scan_id, Some("sarif")) { - Ok(sarif) => sarif, - Err(e) => { - eprintln!("\n\nFailed to fetch SARIF report: {}\n\n", e); - std::process::exit(1); - } - }; + } else if out_format == "sarif" { + let report = + match utils::api::get_scan_report(&config.get_url(), &scan_id, Some("sarif")) { + Ok(sarif) => sarif, + Err(e) => { + eprintln!("\n\nFailed to fetch SARIF report: {}\n\n", e); + std::process::exit(1); + } + }; *stop_signal.lock().unwrap() = true; let _ = results_thread.join(); fs::write(out_file.clone(), report).expect("\n\nFailed to write SARIF file, check if the file path is valid and you have the necessary permissions to write to it."); utils::terminal::clear_previous_line(); println!("\n\nScan report written to: {}\n\n", out_file.clone()); - } - else if out_format == "markdown" { - let report = match utils::api::get_scan_report(&config.get_url(), &scan_id, Some("markdown")) { + } else if out_format == "markdown" { + let report = match utils::api::get_scan_report( + &config.get_url(), + &scan_id, + Some("markdown"), + ) { Ok(markdown) => markdown, Err(e) => { eprintln!("\n\nFailed to fetch Markdown report: {}\n\n", e); @@ -359,100 +412,96 @@ pub fn run( if let Some(fail_on) = fail_on { match fail_on.as_str() { - "LO" => { - if classifications.values().any(|&count| count > 0) { - std::process::exit(1); - } - }, - "ME" => { - if classifications.get("ME").map_or(false, |&count| count > 0) || - classifications.get("HI").map_or(false, |&count| count > 0) { - std::process::exit(1); - } - }, - "HI" => { - if classifications.get("CR").map_or(false, |&count| count > 0) || - classifications.get("HI").map_or(false, |&count| count > 0) { - std::process::exit(1); - } - }, + "LO" if classifications.values().any(|&count| count > 0) => { + std::process::exit(1); + } + "ME" if (classifications.get("ME").is_some_and(|&count| count > 0) + || classifications.get("HI").is_some_and(|&count| count > 0)) => + { + std::process::exit(1); + } + "HI" if (classifications.get("CR").is_some_and(|&count| count > 0) + || classifications.get("HI").is_some_and(|&count| count > 0)) => + { + std::process::exit(1); + } "CR" => { if let Some(cr_count) = classifications.get("CR") { if *cr_count > 0 { std::process::exit(1); } } - }, + } _ => (), } } - - } pub fn wait_for_scan(config: &Config, scan_id: &str) { - // Create loading animation - let stop_signal = Arc::new(Mutex::new(false)); + // Create loading animation + let stop_signal = Arc::new(Mutex::new(false)); - // Spawn a new thread for the spinner animation - let stop_signal_clone = Arc::clone(&stop_signal); - thread::spawn(move || { - utils::terminal::show_loading_message("Scanning... The Hunt Is On! ([T]s)", stop_signal_clone); - }); - - loop { - std::thread::sleep(std::time::Duration::from_secs(1)); - match check_scan_status(&scan_id, &config.get_url()) { - Ok(true) => { - *stop_signal.lock().unwrap() = true; - break; - }, - Ok(false) => { }, - Err(e) => { - eprintln!( - "\n\nUnable to check the scan status for scan ID '{}'.\nPlease verify that: + // Spawn a new thread for the spinner animation + let stop_signal_clone = Arc::clone(&stop_signal); + thread::spawn(move || { + utils::terminal::show_loading_message( + "Scanning... The Hunt Is On! ([T]s)", + stop_signal_clone, + ); + }); + + loop { + std::thread::sleep(std::time::Duration::from_secs(1)); + match check_scan_status(scan_id, &config.get_url()) { + Ok(true) => { + *stop_signal.lock().unwrap() = true; + break; + } + Ok(false) => {} + Err(e) => { + eprintln!( + "\n\nUnable to check the scan status for scan ID '{}'.\nPlease verify that: - The server URL '{}' is reachable. - Your authentication token is valid. - The scan ID '{}' exists and is correct. Check out our docs at https://docs.corgea.app/install_cli#login-with-the-cli - Error details:\n{}", - scan_id, - config.get_url(), - scan_id, - e - ); - std::process::exit(1); - } + Error details:\n{}", + scan_id, + config.get_url(), + scan_id, + e + ); + std::process::exit(1); } } - print!("{}", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset)); - println!( - "\r╭────────────────────────────────────────────╮\n\ + } + print!( + "{}", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset) + ); + println!( + "\r╭────────────────────────────────────────────╮\n\ │ {: <42} │\n\ │ 🎉🎉 Scan Completed Successfully! 🎉🎉 │\n\ │ {: <42} │\n\ ╰────────────────────────────────────────────╯\n", - " ", - " " - ); - - - - + " ", " " + ); } - pub fn check_scan_status(scan_id: &str, url: &str) -> Result> { match utils::api::get_scan(url, scan_id) { Ok(scan) => Ok(scan.status == "complete"), - Err(e) => Err(e) + Err(e) => Err(e), } } - -pub fn fetch_and_group_scan_issues(url: &str, project: &str) -> Result, Box> { +pub fn fetch_and_group_scan_issues( + url: &str, + project: &str, +) -> Result, Box> { let issues = match utils::api::get_all_issues(url, project, None) { Ok(issues) => issues, Err(err) => { @@ -462,13 +511,18 @@ pub fn fetch_and_group_scan_issues(url: &str, project: &str) -> Result = HashMap::new(); if !issues.is_empty() { for issue in &issues { - *classification_counts.entry(issue.urgency.clone()).or_insert(0) += 1; + *classification_counts + .entry(issue.urgency.clone()) + .or_insert(0) += 1; } } Ok(classification_counts) } -pub fn report_scan_status(url: &str, project: &str) -> Result, Box>{ +pub fn report_scan_status( + url: &str, + project: &str, +) -> Result, Box> { let classification_counts = match fetch_and_group_scan_issues(url, project) { Ok(counts) => counts, Err(e) => { @@ -479,8 +533,8 @@ pub fn report_scan_status(url: &str, project: &str) -> Result(); utils::terminal::clear_previous_line(); println!("\rScan Results:-\n"); - println!("{:<20} | {}", "Classification", "Count"); - println!("{:-<20} | {}", "", ""); + println!("{:<20} | Count", "Classification"); + println!("{:-<20} | ", ""); let order = vec!["CR", "HI", "ME", "LO"]; for classification in order { @@ -491,8 +545,7 @@ pub fn report_scan_status(url: &str, project: &str) -> Result) { let temp_dir = match TempDir::new() { @@ -48,7 +48,14 @@ pub fn parse(config: &Config, file_path: &str, project_name: Option) { } let (scan_data, paths) = extract_file_path(outpath); - let _scan_id = upload_scan(config, paths, "fortify".to_string(), scan_data, false, project_name); + let _scan_id = upload_scan( + config, + paths, + "fortify".to_string(), + scan_data, + false, + project_name, + ); } else { println!("File 'audit.fvdl' not found in the archive"); }; @@ -61,7 +68,9 @@ fn extract_file_path(scan_file: PathBuf) -> (String, Vec) { let mut reader = BufReader::new(file); let mut contents = String::new(); - reader.read_to_string(&mut contents).expect("Unable to read file"); + reader + .read_to_string(&mut contents) + .expect("Unable to read file"); let mut xml_reader = Reader::from_str(&contents); xml_reader.config_mut().trim_text(true); diff --git a/src/scanners/parsers/checkmarx.rs b/src/scanners/parsers/checkmarx.rs index f8da40f..4fda0f2 100644 --- a/src/scanners/parsers/checkmarx.rs +++ b/src/scanners/parsers/checkmarx.rs @@ -1,8 +1,8 @@ -use serde_json::Value; +use super::{ParseResult, ScanParser}; use crate::log::debug; -use super::{ScanParser, ParseResult}; -use quick_xml::Reader; use quick_xml::events::Event; +use quick_xml::Reader; +use serde_json::Value; pub struct CheckmarxCliParser; @@ -79,13 +79,22 @@ impl ScanParser for CheckmarxWebParser { for language in languages { if let Some(queries) = language.get("queries").and_then(|v| v.as_array()) { for query in queries { - if let Some(vulns) = query.get("vulnerabilities").and_then(|v| v.as_array()) { + if let Some(vulns) = + query.get("vulnerabilities").and_then(|v| v.as_array()) + { for vuln in vulns { - if let Some(nodes) = vuln.get("nodes").and_then(|v| v.as_array()) { + if let Some(nodes) = + vuln.get("nodes").and_then(|v| v.as_array()) + { for node in nodes { if let Some(path) = node.get("fileName") { if let Some(truncated_path) = path.as_str() { - paths.push(truncated_path.get(1..).unwrap_or("").to_string()); + paths.push( + truncated_path + .get(1..) + .unwrap_or("") + .to_string(), + ); } } } @@ -124,14 +133,13 @@ impl CheckmarxXmlParser { match reader.read_event_into(&mut buf) { Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { if e.name().as_ref() == b"Result" { - for attr in e.attributes() { - if let Ok(attr) = attr { - if attr.key.as_ref() == b"FileName" { - if let Ok(file_name) = std::str::from_utf8(&attr.value) { - let clean_path = file_name.trim_start_matches('/').trim_start_matches('\\'); - if !clean_path.is_empty() { - paths.push(clean_path.to_string()); - } + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"FileName" { + if let Ok(file_name) = std::str::from_utf8(&attr.value) { + let clean_path = + file_name.trim_start_matches('/').trim_start_matches('\\'); + if !clean_path.is_empty() { + paths.push(clean_path.to_string()); } } } @@ -139,7 +147,8 @@ impl CheckmarxXmlParser { } else if e.name().as_ref() == b"FileName" { if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) { if let Ok(file_name) = std::str::from_utf8(text.as_ref()) { - let clean_path = file_name.trim_start_matches('/').trim_start_matches('\\'); + let clean_path = + file_name.trim_start_matches('/').trim_start_matches('\\'); if !clean_path.is_empty() { paths.push(clean_path.to_string()); } diff --git a/src/scanners/parsers/coverity.rs b/src/scanners/parsers/coverity.rs index 1d3f5d7..80c7109 100644 --- a/src/scanners/parsers/coverity.rs +++ b/src/scanners/parsers/coverity.rs @@ -23,17 +23,13 @@ impl ScanParser for CoverityParser { let is_merged_defect = e.name().as_ref() == b"cov:mergedDefect" || e.name().as_ref() == b"mergedDefect"; if is_merged_defect { - for attr in e.attributes() { - if let Ok(attr) = attr { - if attr.key.as_ref() == b"file" { - if let Ok(file_path) = std::str::from_utf8(attr.value.as_ref()) - { - let clean_path = file_path - .trim_start_matches('/') - .trim_start_matches('\\'); - if !clean_path.is_empty() { - paths.push(clean_path.to_string()); - } + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"file" { + if let Ok(file_path) = std::str::from_utf8(attr.value.as_ref()) { + let clean_path = + file_path.trim_start_matches('/').trim_start_matches('\\'); + if !clean_path.is_empty() { + paths.push(clean_path.to_string()); } } } diff --git a/src/scanners/parsers/mod.rs b/src/scanners/parsers/mod.rs index 8311935..cae9ae6 100644 --- a/src/scanners/parsers/mod.rs +++ b/src/scanners/parsers/mod.rs @@ -1,5 +1,3 @@ - - #[derive(Debug)] pub struct ParseResult { pub paths: Vec, @@ -34,8 +32,11 @@ impl ScanParserFactory { } #[allow(dead_code)] - pub fn find_parser(&self, input: &str) -> Option<&Box> { - self.parsers.iter().find(|parser| parser.detect(input)) + pub fn find_parser(&self, input: &str) -> Option<&dyn ScanParser> { + self.parsers + .iter() + .find(|parser| parser.detect(input)) + .map(|b| b.as_ref()) } pub fn parse_scan_data(&self, input: &str) -> Result { @@ -53,7 +54,7 @@ impl ScanParserFactory { } } -pub mod semgrep; -pub mod sarif; pub mod checkmarx; pub mod coverity; +pub mod sarif; +pub mod semgrep; diff --git a/src/scanners/parsers/sarif.rs b/src/scanners/parsers/sarif.rs index d9b1956..4781bda 100644 --- a/src/scanners/parsers/sarif.rs +++ b/src/scanners/parsers/sarif.rs @@ -1,29 +1,38 @@ -use serde_json::Value; +use super::{ParseResult, ScanParser}; use crate::log::debug; -use super::{ScanParser, ParseResult}; +use serde_json::Value; pub struct SarifParser; impl ScanParser for SarifParser { fn detect(&self, input: &str) -> bool { if let Ok(data) = serde_json::from_str::(input) { - let schema = data.get("$schema").and_then(|v| v.as_str()).unwrap_or("unknown"); + let schema = data + .get("$schema") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); schema.contains("sarif") } else { false } } - + fn parse(&self, input: &str) -> Option { debug("Detected sarif schema"); - + let data: Value = match serde_json::from_str(input) { Ok(data) => data, Err(_) => return None, }; - - let run = data.get("runs").and_then(|v| v.as_array()).and_then(|v| v.get(0)); - let driver = run.and_then(|v| v.get("tool")).and_then(|v| v.get("driver")).and_then(|v| v.get("name")); + + let run = data + .get("runs") + .and_then(|v| v.as_array()) + .and_then(|v| v.first()); + let driver = run + .and_then(|v| v.get("tool")) + .and_then(|v| v.get("driver")) + .and_then(|v| v.get("name")); let tool = driver.and_then(|v| v.as_str()).unwrap_or("unknown"); let scanner = match tool { @@ -46,12 +55,15 @@ impl ScanParser for SarifParser { for run in runs { if let Some(results) = run.get("results").and_then(|v| v.as_array()) { for result in results { - if let Some(locations) = result.get("locations").and_then(|v| v.as_array()) { + if let Some(locations) = result.get("locations").and_then(|v| v.as_array()) + { for location in locations { - if let Some(uri) = location.get("physicalLocation") + if let Some(uri) = location + .get("physicalLocation") .and_then(|v| v.get("artifactLocation")) .and_then(|v| v.get("uri")) - .and_then(|v| v.as_str()) { + .and_then(|v| v.as_str()) + { paths.push(uri.to_string()); } } @@ -60,10 +72,10 @@ impl ScanParser for SarifParser { } } } - + Some(ParseResult { paths, scanner }) } - + fn scanner_name(&self) -> &str { "sarif" } diff --git a/src/scanners/parsers/semgrep.rs b/src/scanners/parsers/semgrep.rs index db70bb6..f00548b 100644 --- a/src/scanners/parsers/semgrep.rs +++ b/src/scanners/parsers/semgrep.rs @@ -1,6 +1,6 @@ -use serde_json::Value; +use super::{ParseResult, ScanParser}; use crate::log::debug; -use super::{ScanParser, ParseResult}; +use serde_json::Value; pub struct SemgrepParser; @@ -8,15 +8,15 @@ impl ScanParser for SemgrepParser { fn detect(&self, input: &str) -> bool { input.contains("semgrep.dev") } - + fn parse(&self, input: &str) -> Option { debug("Detected semgrep schema"); - + let data: Value = match serde_json::from_str(input) { Ok(data) => data, Err(_) => return None, }; - + let mut paths = Vec::new(); if let Some(results) = data.get("results").and_then(|v| v.as_array()) { for result in results { @@ -25,13 +25,13 @@ impl ScanParser for SemgrepParser { } } } - + Some(ParseResult { paths, scanner: "semgrep".to_string(), }) } - + fn scanner_name(&self) -> &str { "semgrep" } diff --git a/src/setup_hooks.rs b/src/setup_hooks.rs index c90a78e..44febd8 100644 --- a/src/setup_hooks.rs +++ b/src/setup_hooks.rs @@ -29,11 +29,14 @@ pub fn setup_pre_commit_hook(include_default_scan_types: bool) { }); // Check if pre-commit hook already exists - if std::path::Path::new(&pre_commit_path).exists() { - if !terminal::ask_yes_no("Pre-commit hook already exists. Do you want to overwrite it?", false) { - println!("Skipping pre-commit hook setup."); - return; - } + if std::path::Path::new(&pre_commit_path).exists() + && !terminal::ask_yes_no( + "Pre-commit hook already exists. Do you want to overwrite it?", + false, + ) + { + println!("Skipping pre-commit hook setup."); + return; } // Determine scan types to include @@ -62,10 +65,13 @@ pub fn setup_pre_commit_hook(include_default_scan_types: bool) { // Determine fail-on severity levels to include // Create pre-commit hook content - let hook_content = format!(r#"#!/bin/sh + let hook_content = format!( + r#"#!/bin/sh # Corgea pre-commit hook corgea scan blast --only-uncommitted --fail-on LO --scan-type {} -"#, scan_types.join(",")); +"#, + scan_types.join(",") + ); // Write pre-commit hook std::fs::write(&pre_commit_path, hook_content).unwrap_or_else(|e| { @@ -74,11 +80,14 @@ corgea scan blast --only-uncommitted --fail-on LO --scan-type {} }); #[cfg(unix)] - std::fs::set_permissions(&pre_commit_path, std::os::unix::fs::PermissionsExt::from_mode(0o755)) - .unwrap_or_else(|e| { - eprintln!("Failed to set pre-commit hook permissions: {}", e); - std::process::exit(1); - }); + std::fs::set_permissions( + &pre_commit_path, + std::os::unix::fs::PermissionsExt::from_mode(0o755), + ) + .unwrap_or_else(|e| { + eprintln!("Failed to set pre-commit hook permissions: {}", e); + std::process::exit(1); + }); println!("Successfully installed pre-commit hook!"); } diff --git a/src/targets.rs b/src/targets.rs index 81f2d47..96efe65 100644 --- a/src/targets.rs +++ b/src/targets.rs @@ -1,9 +1,9 @@ +use git2::{Delta, Repository, StatusOptions}; +use globset::{Glob, GlobSetBuilder}; +use ignore::WalkBuilder; use std::collections::HashSet; use std::io::{self, BufRead, Read}; use std::path::{Path, PathBuf}; -use globset::{Glob, GlobSetBuilder}; -use ignore::WalkBuilder; -use git2::{Repository, StatusOptions, Delta}; #[derive(Debug)] pub struct TargetResolutionResult { @@ -66,7 +66,11 @@ pub fn resolve_targets(target_value: &str) -> Result Result Result, Stri } let path = Path::new(segment); - + let full_path = if path.is_absolute() { path.to_path_buf() } else { @@ -140,10 +141,11 @@ fn read_stdin_files(nul_delimited: bool) -> Result, String> { if nul_delimited { let mut buffer = Vec::new(); - stdin.lock().read_to_end(&mut buffer).map_err(|e| { - format!("Failed to read from stdin: {}", e) - })?; - + stdin + .lock() + .read_to_end(&mut buffer) + .map_err(|e| format!("Failed to read from stdin: {}", e))?; + for part in buffer.split(|&b| b == 0) { if part.is_empty() { continue; @@ -216,26 +218,25 @@ fn resolve_git_selector(selector: &str, repo_root: &Path) -> Result } fn get_git_staged_files(repo_root: &Path) -> Result, String> { - let repo = Repository::open(repo_root) - .map_err(|e| format!("Failed to open git repository: {}", e))?; + let repo = + Repository::open(repo_root).map_err(|e| format!("Failed to open git repository: {}", e))?; - let mut index = repo.index() + let mut index = repo + .index() .map_err(|e| format!("Failed to get index: {}", e))?; - let head_tree = repo.head() - .ok() - .and_then(|head| head.peel_to_tree().ok()); + let head_tree = repo.head().ok().and_then(|head| head.peel_to_tree().ok()); - let index_tree_id = index.write_tree() + let index_tree_id = index + .write_tree() .map_err(|e| format!("Failed to write index tree: {}", e))?; - let index_tree = repo.find_tree(index_tree_id) + let index_tree = repo + .find_tree(index_tree_id) .map_err(|e| format!("Failed to find index tree: {}", e))?; - let diff = repo.diff_tree_to_tree( - head_tree.as_ref(), - Some(&index_tree), - None - ).map_err(|e| format!("Failed to create diff: {}", e))?; + let diff = repo + .diff_tree_to_tree(head_tree.as_ref(), Some(&index_tree), None) + .map_err(|e| format!("Failed to create diff: {}", e))?; let mut files = Vec::new(); diff.foreach( @@ -253,21 +254,23 @@ fn get_git_staged_files(repo_root: &Path) -> Result, String> { None, None, None, - ).map_err(|e| format!("Failed to iterate diff: {}", e))?; + ) + .map_err(|e| format!("Failed to iterate diff: {}", e))?; Ok(files) } fn get_git_untracked_files(repo_root: &Path) -> Result, String> { - let repo = Repository::open(repo_root) - .map_err(|e| format!("Failed to open git repository: {}", e))?; + let repo = + Repository::open(repo_root).map_err(|e| format!("Failed to open git repository: {}", e))?; let mut opts = StatusOptions::new(); opts.include_untracked(true); opts.exclude_submodules(true); opts.include_ignored(false); - let statuses = repo.statuses(Some(&mut opts)) + let statuses = repo + .statuses(Some(&mut opts)) .map_err(|e| format!("Failed to get statuses: {}", e))?; let mut files = Vec::new(); @@ -284,17 +287,14 @@ fn get_git_untracked_files(repo_root: &Path) -> Result, String> { } fn get_git_modified_files(repo_root: &Path) -> Result, String> { - let repo = Repository::open(repo_root) - .map_err(|e| format!("Failed to open git repository: {}", e))?; + let repo = + Repository::open(repo_root).map_err(|e| format!("Failed to open git repository: {}", e))?; - let head_tree = repo.head() - .ok() - .and_then(|head| head.peel_to_tree().ok()); + let head_tree = repo.head().ok().and_then(|head| head.peel_to_tree().ok()); - let diff = repo.diff_tree_to_workdir( - head_tree.as_ref(), - None - ).map_err(|e| format!("Failed to create diff: {}", e))?; + let diff = repo + .diff_tree_to_workdir(head_tree.as_ref(), None) + .map_err(|e| format!("Failed to create diff: {}", e))?; let mut files = Vec::new(); diff.foreach( @@ -312,14 +312,15 @@ fn get_git_modified_files(repo_root: &Path) -> Result, String> { None, None, None, - ).map_err(|e| format!("Failed to iterate diff: {}", e))?; + ) + .map_err(|e| format!("Failed to iterate diff: {}", e))?; Ok(files) } fn get_git_diff_files(repo_root: &Path, range: &str) -> Result, String> { - let repo = Repository::open(repo_root) - .map_err(|e| format!("Failed to open git repository: {}", e))?; + let repo = + Repository::open(repo_root).map_err(|e| format!("Failed to open git repository: {}", e))?; let parts: Vec<&str> = range.split("...").collect(); let (old_ref, new_ref) = if parts.len() == 2 { @@ -329,23 +330,28 @@ fn get_git_diff_files(repo_root: &Path, range: &str) -> Result, Str if parts.len() == 2 { (parts[0].trim(), parts[1].trim()) } else { - return Err(format!("Invalid diff range format: {}. Expected format: 'old..new' or 'old...new'", range)); + return Err(format!( + "Invalid diff range format: {}. Expected format: 'old..new' or 'old...new'", + range + )); } }; let old_commit = if old_ref.is_empty() { None } else { - Some(repo.revparse_single(old_ref) - .map_err(|e| format!("Failed to resolve reference '{}': {}", old_ref, e))? - .id()) + Some( + repo.revparse_single(old_ref) + .map_err(|e| format!("Failed to resolve reference '{}': {}", old_ref, e))? + .id(), + ) }; let new_commit = if new_ref.is_empty() { repo.head() .map_err(|e| format!("Failed to get HEAD: {}", e))? .target() - .ok_or_else(|| format!("HEAD is not a direct reference"))? + .ok_or_else(|| "HEAD is not a direct reference".to_string())? } else { repo.revparse_single(new_ref) .map_err(|e| format!("Failed to resolve reference '{}': {}", new_ref, e))? @@ -353,24 +359,25 @@ fn get_git_diff_files(repo_root: &Path, range: &str) -> Result, Str }; let old_tree = if let Some(old_id) = old_commit { - Some(repo.find_commit(old_id) - .map_err(|e| format!("Failed to find commit: {}", e))? - .tree() - .map_err(|e| format!("Failed to get tree: {}", e))?) + Some( + repo.find_commit(old_id) + .map_err(|e| format!("Failed to find commit: {}", e))? + .tree() + .map_err(|e| format!("Failed to get tree: {}", e))?, + ) } else { None }; - let new_tree = repo.find_commit(new_commit) + let new_tree = repo + .find_commit(new_commit) .map_err(|e| format!("Failed to find commit: {}", e))? .tree() .map_err(|e| format!("Failed to get tree: {}", e))?; - let diff = repo.diff_tree_to_tree( - old_tree.as_ref(), - Some(&new_tree), - None - ).map_err(|e| format!("Failed to create diff: {}", e))?; + let diff = repo + .diff_tree_to_tree(old_tree.as_ref(), Some(&new_tree), None) + .map_err(|e| format!("Failed to create diff: {}", e))?; let mut files = Vec::new(); diff.foreach( @@ -388,22 +395,21 @@ fn get_git_diff_files(repo_root: &Path, range: &str) -> Result, Str None, None, None, - ).map_err(|e| format!("Failed to iterate diff: {}", e))?; + ) + .map_err(|e| format!("Failed to iterate diff: {}", e))?; Ok(files) } fn resolve_directory(dir: &Path, _repo_root: &Path) -> Result, String> { let mut files = Vec::new(); - - let walker = WalkBuilder::new(dir) - .standard_filters(true) - .build(); + + let walker = WalkBuilder::new(dir).standard_filters(true).build(); for result in walker { let entry = result.map_err(|e| format!("Error walking directory: {}", e))?; let path = entry.path(); - + if path.is_file() { files.push(path.to_path_buf()); } @@ -413,24 +419,23 @@ fn resolve_directory(dir: &Path, _repo_root: &Path) -> Result, Stri } fn resolve_glob(pattern: &str, repo_root: &Path) -> Result, String> { - let glob = Glob::new(pattern) - .map_err(|e| format!("Invalid glob pattern '{}': {}", pattern, e))?; + let glob = + Glob::new(pattern).map_err(|e| format!("Invalid glob pattern '{}': {}", pattern, e))?; let mut glob_builder = GlobSetBuilder::new(); glob_builder.add(glob); - let glob_set = glob_builder.build() + let glob_set = glob_builder + .build() .map_err(|e| format!("Failed to build glob set: {}", e))?; let mut files = Vec::new(); - - let walker = WalkBuilder::new(repo_root) - .standard_filters(true) - .build(); + + let walker = WalkBuilder::new(repo_root).standard_filters(true).build(); for result in walker { let entry = result.map_err(|e| format!("Error walking directory: {}", e))?; let path = entry.path(); - + if path.is_file() { // Get relative path from repo root if let Ok(relative) = path.strip_prefix(repo_root) { @@ -459,23 +464,19 @@ fn normalize_path(path: &Path, _repo_root: &Path) -> Result { } fn find_repo_root() -> Result { - let current_dir = std::env::current_dir() - .map_err(|e| format!("Failed to get current directory: {}", e))?; + let current_dir = + std::env::current_dir().map_err(|e| format!("Failed to get current directory: {}", e))?; match Repository::discover(¤t_dir) { - Ok(repo) => { - repo.workdir() - .map(|p| p.to_path_buf()) - .or_else(|| repo.path().parent().map(|p| p.to_path_buf())) - .ok_or_else(|| "Failed to determine repository root".to_string()) - } - Err(_) => { - Ok(current_dir) - } + Ok(repo) => repo + .workdir() + .map(|p| p.to_path_buf()) + .or_else(|| repo.path().parent().map(|p| p.to_path_buf())) + .ok_or_else(|| "Failed to determine repository root".to_string()), + Err(_) => Ok(current_dir), } } fn is_git_repo(dir: &Path) -> bool { Repository::discover(dir).is_ok() } - diff --git a/src/utils/api.rs b/src/utils/api.rs index f0e8a59..b37c00e 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -1,16 +1,19 @@ +use crate::log::debug; use crate::utils; -use serde_json::json; -use std::collections::HashMap; use reqwest::header::HeaderMap; -use serde::{Deserialize, Serialize}; use reqwest::StatusCode; -use std::fs::File; +use reqwest::{ + blocking::multipart, + blocking::multipart::{Form, Part}, +}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use serde_json::Value; +use std::collections::HashMap; use std::error::Error; +use std::fs::File; use std::io::Read; use std::path::Path; -use reqwest::{blocking::multipart, blocking::multipart::{Form, Part}}; -use serde_json::Value; -use crate::log::debug; const CHUNK_SIZE: usize = 50 * 1024 * 1024; // 50 MB const API_BASE: &str = "/api/v1"; @@ -58,7 +61,7 @@ static SHARED_CLIENT: std::sync::LazyLock = debug(&format!("https_proxy detected: {}", https_proxy)); if std::env::var("CORGEA_ACCEPT_CERT").is_ok() { - debug(&format!("Skipping CA cert validation")); + debug("Skipping CA cert validation"); builder = builder.danger_accept_invalid_certs(true); } } @@ -77,15 +80,24 @@ pub struct DebugRequestBuilder { impl HttpClient { pub fn get(&self, url: U) -> DebugRequestBuilder { - DebugRequestBuilder { client: self.inner.clone(), inner: self.inner.get(url) } + DebugRequestBuilder { + client: self.inner.clone(), + inner: self.inner.get(url), + } } pub fn post(&self, url: U) -> DebugRequestBuilder { - DebugRequestBuilder { client: self.inner.clone(), inner: self.inner.post(url) } + DebugRequestBuilder { + client: self.inner.clone(), + inner: self.inner.post(url), + } } pub fn patch(&self, url: U) -> DebugRequestBuilder { - DebugRequestBuilder { client: self.inner.clone(), inner: self.inner.patch(url) } + DebugRequestBuilder { + client: self.inner.clone(), + inner: self.inner.patch(url), + } } } @@ -97,19 +109,31 @@ impl DebugRequestBuilder { reqwest::header::HeaderValue: TryFrom, >::Error: Into, { - Self { inner: self.inner.header(key, value), client: self.client } + Self { + inner: self.inner.header(key, value), + client: self.client, + } } pub fn query(self, query: &T) -> Self { - Self { inner: self.inner.query(query), client: self.client } + Self { + inner: self.inner.query(query), + client: self.client, + } } pub fn multipart(self, form: reqwest::blocking::multipart::Form) -> Self { - Self { inner: self.inner.multipart(form), client: self.client } + Self { + inner: self.inner.multipart(form), + client: self.client, + } } pub fn body>(self, body: T) -> Self { - Self { inner: self.inner.body(body), client: self.client } + Self { + inner: self.inner.body(body), + client: self.client, + } } pub fn send(self) -> reqwest::Result { @@ -127,7 +151,10 @@ impl DebugRequestBuilder { debug(&format!("→ {} {}", request.method(), request.url())); debug(&format!(" Request headers: {:?}", request.headers())); match COOKIE_JAR.cookies(request.url()) { - Some(cookies) => debug(&format!(" Cookie: {}", cookies.to_str().unwrap_or(""))), + Some(cookies) => debug(&format!( + " Cookie: {}", + cookies.to_str().unwrap_or("") + )), None => debug(" Cookie: (none in jar for this URL)"), } @@ -141,10 +168,12 @@ impl DebugRequestBuilder { } pub fn http_client() -> HttpClient { - HttpClient { inner: SHARED_CLIENT.clone() } + HttpClient { + inner: SHARED_CLIENT.clone(), + } } -fn check_for_warnings(headers: &HeaderMap, status: StatusCode) { +pub(crate) fn check_for_warnings(headers: &HeaderMap, status: StatusCode) { if let Some(warning) = headers.get("warning") { let warnings = warning.to_str().unwrap().split(','); for warning in warnings { @@ -171,68 +200,81 @@ pub fn upload_zip( project_name: &str, repo_info: Option, scan_type: Option, - policy: Option + policy: Option, ) -> Result> { let client = http_client(); let file_size = std::fs::metadata(file_path)?.len(); - let file_name = Path::new(file_path) - .file_name() - .unwrap() - .to_str() - .unwrap(); + let file_name = Path::new(file_path).file_name().unwrap().to_str().unwrap(); let json_object = json!({ "file_name": file_name, "file_size": file_size }); let form = reqwest::blocking::multipart::Form::new() - .part("files", reqwest::blocking::multipart::Part::bytes(Vec::new()) - .file_name(file_name.to_string())) + .part( + "files", + reqwest::blocking::multipart::Part::bytes(Vec::new()).file_name(file_name.to_string()), + ) .text("json", json_object.to_string()); let response_object = client .post(format!("{}{}/start-scan", url, API_BASE)) - .query(&[ - ("scan_type", "blast"), - ]) + .query(&[("scan_type", "blast")]) .multipart(form) .send(); let response_object = match response_object { Ok(response) => { check_for_warnings(response.headers(), response.status()); response - }, - Err(err) => return Err(format!("Network error: Unable to reach the server. Please try again later. Error: {}", err).into()), + } + Err(err) => { + return Err(format!( + "Network error: Unable to reach the server. Please try again later. Error: {}", + err + ) + .into()) + } }; let response_status = response_object.status(); let response_text = response_object.text()?; - + if response_status != StatusCode::OK { - debug(&format!("Initial scan request failed with status: {}. Response body: {}", response_status, response_text)); - + debug(&format!( + "Initial scan request failed with status: {}. Response body: {}", + response_status, response_text + )); + if response_status == StatusCode::BAD_REQUEST { - if let Ok(error_response) = serde_json::from_str::>(&response_text) { + if let Ok(error_response) = + serde_json::from_str::>(&response_text) + { if let Some(message) = error_response.get("message").and_then(Value::as_str) { return Err(format!("Request failed: {}", message).into()); } } return Err(format!("Request failed (400): {}", response_text).into()); } - + return Err("Error getting server response, Please try again later.".into()); } - + let response: HashMap = match serde_json::from_str(&response_text) { Ok(json) => json, Err(_) => { - debug(&format!("Failed to parse initial scan response as JSON. Response body: {}", response_text)); + debug(&format!( + "Failed to parse initial scan response as JSON. Response body: {}", + response_text + )); return Err("Error getting server response, Please try again later.".into()); - }, + } }; let transfer_id = match response["transfer_id"].as_str() { Some(transfer_id) => transfer_id, - None => return Err("Failed to retrieve transfer ID. Please check the request parameters and try again.".into()), + None => return Err( + "Failed to retrieve transfer ID. Please check the request parameters and try again." + .into(), + ), }; let mut file = File::open(file_path)?; let mut buffer = vec![0; CHUNK_SIZE]; @@ -247,14 +289,17 @@ pub fn upload_zip( let chunk = &buffer[..bytes_read]; let mut form = Form::new() - .part( - "chunk_data", - Part::bytes(chunk.to_vec()) - .file_name(file_name.to_string()) - .mime_str("application/octet-stream")?, - ) - .part("project_name", multipart::Part::text(project_name.to_string())) - .part("file_size", multipart::Part::text(file_size.to_string())); + .part( + "chunk_data", + Part::bytes(chunk.to_vec()) + .file_name(file_name.to_string()) + .mime_str("application/octet-stream")?, + ) + .part( + "project_name", + multipart::Part::text(project_name.to_string()), + ) + .part("file_size", multipart::Part::text(file_size.to_string())); if let Some(ref info) = repo_info { if let Some(branch) = &info.branch { form = form.part("branch", multipart::Part::text(branch.to_string())); @@ -279,58 +324,69 @@ pub fn upload_zip( } let response = match client - .patch(format!("{}{}/start-scan/{}/", url, API_BASE, transfer_id)) - .header("Upload-Offset", offset.to_string()) - .header("Upload-Length", file_size.to_string()) - .header("Upload-Name", file_name) - .query(&[ - ("scan_type", "blast") - ]) - .multipart(form) - .send() { + .patch(format!("{}{}/start-scan/{}/", url, API_BASE, transfer_id)) + .header("Upload-Offset", offset.to_string()) + .header("Upload-Length", file_size.to_string()) + .header("Upload-Name", file_name) + .query(&[("scan_type", "blast")]) + .multipart(form) + .send() + { Ok(response) => { check_for_warnings(response.headers(), response.status()); response - }, + } Err(e) => { return Err(format!("Failed to send request: {}", e).into()); } }; if !response.status().is_success() { let status_code = response.status(); - let response_text = response.text().unwrap_or_else(|_| "Unable to read response body".to_string()); - debug(&format!("Chunk upload failed with status: {}. Response body: {}", status_code, response_text)); - + let response_text = response + .text() + .unwrap_or_else(|_| "Unable to read response body".to_string()); + debug(&format!( + "Chunk upload failed with status: {}. Response body: {}", + status_code, response_text + )); + if status_code.is_client_error() && response_text.contains("Invalid policy ids") { - return Err("Invalid policy ids passed. Please check the policy ids and try again.".into()); + return Err( + "Invalid policy ids passed. Please check the policy ids and try again.".into(), + ); } - + if status_code == StatusCode::BAD_REQUEST { - if let Ok(error_response) = serde_json::from_str::>(&response_text) { + if let Ok(error_response) = + serde_json::from_str::>(&response_text) + { if let Some(message) = error_response.get("message").and_then(Value::as_str) { return Err(format!("Upload failed: {}", message).into()); } } return Err(format!("Upload failed (400): {}", response_text).into()); } - - return Err(format!("Failed to upload file: {}", status_code).into()); + return Err(format!("Failed to upload file: {}", status_code).into()); } utils::terminal::show_progress_bar(offset as f32 / file_size as f32); offset += bytes_read as u64; if bytes_read < CHUNK_SIZE { utils::terminal::show_progress_bar(1.0); - print!("\n"); + println!(); let body: HashMap = response.json()?; if let Some(scan_id_value) = body.get("scan_id") { let scan_id = scan_id_value.as_str().unwrap().to_string(); let project_id = body.get("project_id").and_then(|v| { - v.as_str().map(|s| s.to_string()) + v.as_str() + .map(|s| s.to_string()) .or_else(|| v.as_i64().map(|n| n.to_string())) }); - return Ok(UploadZipResult { scan_id, project_id }); + return Ok(UploadZipResult { + scan_id, + project_id, + }); } else { return Err("Failed to get scan_id from response".into()); } @@ -340,14 +396,24 @@ pub fn upload_zip( Err("Failed to upload file".into()) } -pub fn get_all_issues(url: &str, project: &str, scan_id: Option) -> Result, Box> { +pub fn get_all_issues( + url: &str, + project: &str, + scan_id: Option, +) -> Result, Box> { let mut all_issues = Vec::new(); let mut current_page: u32 = 1; loop { - let response = match get_scan_issues(url, project, Some(current_page as u16), Some(30), scan_id.clone()) { + let response = match get_scan_issues( + url, + project, + Some(current_page as u16), + Some(30), + scan_id.clone(), + ) { Ok(response) => response, - Err(e) => return Err(format!("Failed to get scan issues: {}", e).into()) + Err(e) => return Err(format!("Failed to get scan issues: {}", e).into()), }; if let Some(mut issues) = response.issues { @@ -374,19 +440,14 @@ pub fn get_scan_issues( project: &str, page: Option, page_size: Option, - scan_id: Option -) -> Result> { + scan_id: Option, +) -> Result> { let mut seperator = "?"; let mut url = match scan_id { Some(scan_id) => format!("{}{}/scan/{}/issues", url, API_BASE, scan_id), None => { seperator = "&"; - format!( - "{}{}/issues?project={}", - url, - API_BASE, - project - ) + format!("{}{}/issues?project={}", url, API_BASE, project) } }; if let Some(p) = page { @@ -405,14 +466,18 @@ pub fn get_scan_issues( Ok(res) => { check_for_warnings(res.headers(), res.status()); res - }, + } Err(e) => return Err(format!("Failed to send request: {}", e).into()), }; let response_text = response.text()?; - let project_issues_response: ProjectIssuesResponse = serde_json::from_str(&response_text).map_err(|e| { - debug(&format!("Failed to parse response: {}. Response body: {}", e, response_text)); - format!("Failed to parse response: {}", e) - })?; + let project_issues_response: ProjectIssuesResponse = serde_json::from_str(&response_text) + .map_err(|e| { + debug(&format!( + "Failed to parse response: {}. Response body: {}", + e, response_text + )); + format!("Failed to parse response: {}", e) + })?; if project_issues_response.status == "ok" { Ok(project_issues_response) @@ -423,7 +488,7 @@ pub fn get_scan_issues( } } -pub fn get_scan(url: &str, scan_id: &str) -> Result> { +pub fn get_scan(url: &str, scan_id: &str) -> Result> { let url = format!("{}{}/scan/{}", url, API_BASE, scan_id); let client = http_client(); @@ -438,16 +503,27 @@ pub fn get_scan(url: &str, scan_id: &str) -> Result) -> Result> { +pub fn get_scan_report( + url: &str, + scan_id: &str, + format: Option<&str>, +) -> Result> { let url = if let Some(fmt) = format { format!("{}{}/scan/{}/report?format={}", url, API_BASE, scan_id, fmt) } else { @@ -468,43 +544,43 @@ pub fn get_scan_report(url: &str, scan_id: &str, format: Option<&str>) -> Result if response.status().is_success() { Ok(response.text()?) } else { - Err(format!("Error: Unable to fetch scan report. Status code: {}", response.status()).into()) + Err(format!( + "Error: Unable to fetch scan report. Status code: {}", + response.status() + ) + .into()) } } pub fn get_issue(url: &str, issue: &str) -> Result> { - let url = format!( - "{}{}/issue/{}", - url, - API_BASE, - issue, - ); + let url = format!("{}{}/issue/{}", url, API_BASE, issue,); let client = http_client(); debug(&format!("Sending request to URL: {}", url)); let response = match client.get(&url).send() { Ok(res) => { check_for_warnings(res.headers(), res.status()); res - }, + } Err(e) => return Err(format!("Failed to send request: {}", e).into()), }; let response_text = response.text()?; - return match serde_json::from_str::(&response_text) { + match serde_json::from_str::(&response_text) { Ok(body) => Ok(body), Err(e) => { - debug(&format!("Failed to parse response: {}. Response body: {}", e, response_text)); + debug(&format!( + "Failed to parse response: {}. Response body: {}", + e, response_text + )); Err(format!("Failed to parse response: {}", e).into()) - }, - }; + } + } } - - pub fn query_scan_list( url: &str, project: Option<&str>, page: Option, - page_size: Option + page_size: Option, ) -> Result> { let url = format!("{}{}/scans", url, API_BASE); let page = page.unwrap_or(1); @@ -518,60 +594,57 @@ pub fn query_scan_list( query_params.push(("project", project.to_string())); } - let client = http_client(); debug(&format!("Sending request to URL: {}", url)); - let response = match client - .get(url) - .query(&query_params) - .send() { - Ok(res) => { - check_for_warnings(res.headers(), res.status()); - res - }, - Err(e) => return Err(format!("API request failed: {}", e).into()), - }; - if response.status().is_success() { - let response_text = response.text()?; - let api_response: ScansResponse = serde_json::from_str(&response_text).map_err(|e| { - debug(&format!("Failed to parse response: {}. Response body: {}", e, response_text)); - format!("Failed to parse response: {}", e) - })?; - Ok(api_response) - } else { - Err(format!( - "API request failed with status: {}", - response.status() - ).into()) + let response = match client.get(url).query(&query_params).send() { + Ok(res) => { + check_for_warnings(res.headers(), res.status()); + res } + Err(e) => return Err(format!("API request failed: {}", e).into()), + }; + if response.status().is_success() { + let response_text = response.text()?; + let api_response: ScansResponse = serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse response: {}. Response body: {}", + e, response_text + )); + format!("Failed to parse response: {}", e) + })?; + Ok(api_response) + } else { + Err(format!("API request failed with status: {}", response.status()).into()) + } } - pub fn exchange_code_for_token( base_url: &str, code: &str, ) -> Result> { let client = reqwest::blocking::Client::new(); let exchange_url = format!("{}{}/authorize", base_url, API_BASE); - + let response = client .get(&exchange_url) .header("CORGEA-SOURCE", get_source()) .query(&[("code", code)]) .send()?; - + if response.status().is_success() { let response_json: HashMap = response.json()?; - + if let Some(user_token) = response_json.get("user_token") { if let Some(user_token_str) = user_token.as_str() { return Ok(user_token_str.to_string()); } } - + Err("User token not found in response".into()) } else { - let error_text = response.text().unwrap_or_else(|_| "Unknown error".to_string()); + let error_text = response + .text() + .unwrap_or_else(|_| "Unknown error".to_string()); Err(format!("Failed to exchange code for user token: {}", error_text).into()) } } @@ -581,9 +654,7 @@ pub fn verify_token(corgea_url: &str) -> Result> { let client = http_client(); debug(&format!("Sending request to URL: {}", url)); - let response = client - .get(&url) - .send()?; + let response = client.get(&url).send()?; check_for_warnings(response.headers(), response.status()); @@ -592,8 +663,11 @@ pub fn verify_token(corgea_url: &str) -> Result> { let body: HashMap = match serde_json::from_str(&body_text) { Ok(json) => json, Err(e) => { - debug(&format!("Failed to parse response as JSON: {}. Response body: {}", e, body_text)); - return Err(format!("Failed to parse response").into()); + debug(&format!( + "Failed to parse response as JSON: {}. Response body: {}", + e, body_text + )); + return Err("Failed to parse response".to_string().into()); } }; @@ -606,9 +680,12 @@ pub fn verify_token(corgea_url: &str) -> Result> { pub fn check_blocking_rules( url: &str, sast_scan_id: &str, - page: Option + page: Option, ) -> Result> { - let url = format!("{}{}/scan/{}/check_blocking_rules", url, API_BASE, sast_scan_id); + let url = format!( + "{}{}/scan/{}/check_blocking_rules", + url, API_BASE, sast_scan_id + ); let page = page.unwrap_or(1); let query_params = vec![("page", page.to_string())]; @@ -616,43 +693,40 @@ pub fn check_blocking_rules( debug(&format!("Sending request to URL: {}", url)); debug(&format!("Query params: {:?}", query_params)); - let response = match client - .get(url) - .query(&query_params) - .send() { - Ok(res) => { - check_for_warnings(res.headers(), res.status()); - debug(&format!("Response status: {}", res.status())); - debug(&format!("Response headers: {:?}", res.headers())); - res - }, - Err(e) => return Err(format!("API request failed: {}", e).into()), - }; + let response = match client.get(url).query(&query_params).send() { + Ok(res) => { + check_for_warnings(res.headers(), res.status()); + debug(&format!("Response status: {}", res.status())); + debug(&format!("Response headers: {:?}", res.headers())); + res + } + Err(e) => return Err(format!("API request failed: {}", e).into()), + }; if response.status().is_success() { let response_text = response.text()?; - let api_response: BlockingRuleResponse = serde_json::from_str(&response_text).map_err(|e| { - debug(&format!("Failed to parse response: {}. Response body: {}", e, response_text)); - format!("Failed to parse response: {}", e) - })?; + let api_response: BlockingRuleResponse = + serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse response: {}. Response body: {}", + e, response_text + )); + format!("Failed to parse response: {}", e) + })?; Ok(api_response) } else { let status = response.status(); let response_text = response.text()?; debug(&format!("Response body: {}", response_text)); - Err(format!( - "API request failed with status: {}", - status - ).into()) + Err(format!("API request failed with status: {}", status).into()) } } - pub fn get_sca_issues( url: &str, page: Option, page_size: Option, - scan_id: Option + scan_id: Option, ) -> Result> { let client = http_client(); let mut query_params = vec![]; @@ -672,10 +746,7 @@ pub fn get_sca_issues( debug(&format!("Sending request to URL: {}", endpoint)); debug(&format!("Query params: {:?}", query_params)); - let response = client - .get(&endpoint) - .query(&query_params) - .send(); + let response = client.get(&endpoint).query(&query_params).send(); let response = match response { Ok(response) => { @@ -683,14 +754,23 @@ pub fn get_sca_issues( debug(&format!("Response status: {}", response.status())); debug(&format!("Response headers: {:?}", response.headers())); response - }, - Err(err) => return Err(format!("Network error: Unable to reach the server. Please try again later. Error: {}", err).into()), + } + Err(err) => { + return Err(format!( + "Network error: Unable to reach the server. Please try again later. Error: {}", + err + ) + .into()) + } }; let status = response.status(); if !status.is_success() { if status == StatusCode::NOT_FOUND { - return Err("SCA issues not found. Please check the scan ID or ensure the scan has SCA issues.".into()); + return Err( + "SCA issues not found. Please check the scan ID or ensure the scan has SCA issues." + .into(), + ); } return Err(format!("Request failed with status: {}", status).into()); } @@ -699,9 +779,12 @@ pub fn get_sca_issues( let response_data: SCAIssuesResponse = match serde_json::from_str(&response_text) { Ok(json) => json, Err(e) => { - debug(&format!("Failed to parse response: {}. Response body: {}", e, response_text)); - return Err("Error parsing server response. Please try again later.".into()) - }, + debug(&format!( + "Failed to parse response: {}. Response body: {}", + e, response_text + )); + return Err("Error parsing server response. Please try again later.".into()); + } }; Ok(response_data) @@ -710,16 +793,17 @@ pub fn get_sca_issues( pub fn get_all_sca_issues( url: &str, _project: &str, - scan_id: Option + scan_id: Option, ) -> Result, Box> { let mut all_issues = Vec::new(); let mut current_page: u32 = 1; loop { - let response = match get_sca_issues(url, Some(current_page as u16), Some(30), scan_id.clone()) { - Ok(response) => response, - Err(e) => return Err(format!("Failed to get SCA issues: {}", e).into()) - }; + let response = + match get_sca_issues(url, Some(current_page as u16), Some(30), scan_id.clone()) { + Ok(response) => response, + Err(e) => return Err(format!("Failed to get SCA issues: {}", e).into()), + }; if response.issues.is_empty() { break; @@ -737,7 +821,7 @@ pub fn get_all_sca_issues( } #[derive(Deserialize, Serialize, Debug)] -pub struct ScanResponse { +pub struct ScanResponse { pub id: String, pub project: String, pub repo: Option, @@ -753,10 +837,9 @@ pub struct ProjectIssuesResponse { pub issues: Option>, pub page: Option, pub total_pages: Option, - pub total_issues: Option + pub total_issues: Option, } - #[derive(Serialize, Deserialize, Debug)] pub struct ScansResponse { pub status: String, @@ -765,7 +848,6 @@ pub struct ScansResponse { pub scans: Option>, } - #[derive(Serialize, Deserialize, Debug)] pub struct FullIssueResponse { pub status: String, @@ -802,7 +884,6 @@ pub struct IssueWithBlockingRules { pub blocking_rules: Option>, } - #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Classification { pub id: String, @@ -877,7 +958,7 @@ pub struct BlockingRuleResponse { #[derive(Deserialize, Debug, Clone)] pub struct BlockingIssue { pub id: String, - pub triggered_by_rules: Vec + pub triggered_by_rules: Vec, } #[derive(Deserialize, Serialize, Debug)] @@ -913,3 +994,80 @@ pub struct SCAIssuesResponse { pub total_pages: u32, pub total_issues: u32, } + +#[cfg(test)] +mod tests { + use super::*; + use reqwest::header::{HeaderMap, HeaderValue}; + + #[test] + fn is_jwt_accepts_three_dot_separated_non_empty_parts() { + assert!(is_jwt("aaa.bbb.ccc")); + assert!(is_jwt("header.payload.signature")); + } + + #[test] + fn is_jwt_rejects_wrong_part_count() { + assert!(!is_jwt("aaa.bbb")); + assert!(!is_jwt("aaa.bbb.ccc.ddd")); + assert!(!is_jwt("plainstring")); + assert!(!is_jwt("")); + } + + #[test] + fn is_jwt_rejects_when_any_part_is_empty() { + assert!(!is_jwt("aaa..ccc")); + assert!(!is_jwt(".bbb.ccc")); + assert!(!is_jwt("aaa.bbb.")); + } + + #[test] + fn auth_headers_uses_bearer_for_jwt_tokens() { + let headers = auth_headers("aaa.bbb.ccc"); + + assert_eq!( + headers.get("Authorization").map(|v| v.to_str().unwrap()), + Some("Bearer aaa.bbb.ccc") + ); + assert!(headers.get("CORGEA-TOKEN").is_none()); + assert!(headers.get("CORGEA-SOURCE").is_some()); + } + + #[test] + fn auth_headers_uses_corgea_token_header_for_opaque_tokens() { + let headers = auth_headers("opaque-token-xyz"); + + assert_eq!( + headers.get("CORGEA-TOKEN").map(|v| v.to_str().unwrap()), + Some("opaque-token-xyz") + ); + assert!(headers.get("Authorization").is_none()); + assert!(headers.get("CORGEA-SOURCE").is_some()); + } + + #[test] + fn check_for_warnings_is_noop_when_no_warning_header_and_status_ok() { + let headers = HeaderMap::new(); + check_for_warnings(&headers, StatusCode::OK); + } + + #[test] + fn check_for_warnings_is_noop_for_non_299_codes() { + let mut headers = HeaderMap::new(); + headers.insert( + "warning", + HeaderValue::from_static("199 - \"misc warning\""), + ); + check_for_warnings(&headers, StatusCode::OK); + } + + #[test] + fn check_for_warnings_tolerates_multiple_comma_separated_warnings() { + let mut headers = HeaderMap::new(); + headers.insert( + "warning", + HeaderValue::from_static("199 host \"first\", 299 host \"deprecated\""), + ); + check_for_warnings(&headers, StatusCode::OK); + } +} diff --git a/src/utils/generic.rs b/src/utils/generic.rs index 627ddda..7cfad56 100644 --- a/src/utils/generic.rs +++ b/src/utils/generic.rs @@ -1,12 +1,12 @@ +use crate::utils::terminal::{set_text_color, TerminalColor}; +use git2::Repository; +use globset::{Glob, GlobSetBuilder}; +use ignore::WalkBuilder; +use std::env; +use std::fs::{self, File}; use std::io; use std::path::{Path, PathBuf}; use zip::{write::FileOptions, ZipWriter}; -use ignore::WalkBuilder; -use globset::{GlobSetBuilder, Glob}; -use std::fs::{self, File}; -use std::env; -use git2::Repository; -use crate::utils::terminal::{set_text_color, TerminalColor}; // Global exclude globs used across multiple functions const DEFAULT_EXCLUDE_GLOBS: &[&str] = &[ @@ -32,7 +32,7 @@ const DEFAULT_EXCLUDE_GLOBS: &[&str] = &[ ]; /// Create a zip file from a target specification or full repository scan. -/// +/// /// - If `target` is `None`, performs a full repository scan (equivalent to scanning all files). /// - If `target` is `Some(target_str)`, resolves the target using the targets module and creates zip from those files. /// The target string can be a comma-separated list of files, directories, globs, or git selectors. @@ -53,8 +53,9 @@ pub fn create_zip_from_target>( let current_dir = env::current_dir()?; let result = crate::targets::resolve_targets(target_str) .map_err(|e| format!("Failed to resolve targets: {}", e))?; - - result.files + + result + .files .iter() .filter_map(|file| { if !file.exists() || !file.is_file() { @@ -62,17 +63,13 @@ pub fn create_zip_from_target>( } match file.strip_prefix(¤t_dir) { Ok(relative) => Some((file.clone(), relative.to_path_buf())), - Err(_) => { - Some((file.clone(), file.clone())) - } + Err(_) => Some((file.clone(), file.clone())), } }) .collect() } else { let directory = Path::new("."); - let walker = WalkBuilder::new(directory) - .standard_filters(true) - .build(); + let walker = WalkBuilder::new(directory).standard_filters(true).build(); let mut files = Vec::new(); for result in walker { @@ -99,7 +96,7 @@ pub fn create_zip_from_target>( for (path, relative_path) in files_to_zip { let is_excluded = glob_set.is_match(&path); - + if (path.is_file() || path.is_dir()) && !is_excluded { if path.is_file() { zip.start_file(relative_path.to_string_lossy(), options)?; @@ -152,13 +149,12 @@ pub fn create_path_if_not_exists>(path: P) -> io::Result<()> { Ok(()) } - pub fn is_git_repo(dir: &str) -> Result { let git_path = Path::new(dir).join(".git"); if git_path.exists() { return Ok(true); } - + // Fall back to the more expensive discover method for cases like: // - We're in a subdirectory of a git repo // - .git is a file (worktrees, submodules) @@ -183,9 +179,10 @@ pub fn delete_directory>(path: P) -> io::Result<()> { } pub fn get_current_working_directory() -> Option { - env::current_dir() - .ok() - .and_then(|path| path.file_name().map(|name| name.to_string_lossy().to_string())) + env::current_dir().ok().and_then(|path| { + path.file_name() + .map(|name| name.to_string_lossy().to_string()) + }) } /// Determine the project name with fallback logic: @@ -227,25 +224,25 @@ fn extract_repo_name_from_url(url: &str) -> Option { // - git@github.com:user/repo.git // - https://github.com/user/repo // - git@github.com:user/repo - + let url = url.trim(); - + let url = url.strip_suffix(".git").unwrap_or(url); - - if let Some(name) = url.split('/').last() { + + if let Some(name) = url.split('/').next_back() { let name = name.trim(); if !name.is_empty() { return Some(name.to_string()); } } - - if let Some(name) = url.split(':').last() { + + if let Some(name) = url.split(':').next_back() { let name = name.trim(); if !name.is_empty() { return Some(name.to_string()); } } - + None } @@ -271,12 +268,23 @@ pub fn get_repo_info(dir: &str) -> Result, git2::Error> { }); // Get the latest commit SHA - let sha = repo.head().ok().and_then(|head| head.peel_to_commit().ok().map(|commit| commit.id().to_string())); + let sha = repo.head().ok().and_then(|head| { + head.peel_to_commit() + .ok() + .map(|commit| commit.id().to_string()) + }); // Get the remote URL (assuming "origin") - let repo_url = repo.find_remote("origin").ok().and_then(|remote| remote.url().map(|url| url.to_string())); + let repo_url = repo + .find_remote("origin") + .ok() + .and_then(|remote| remote.url().map(|url| url.to_string())); - Ok(Some(RepoInfo { branch, repo_url, sha })) + Ok(Some(RepoInfo { + branch, + repo_url, + sha, + })) } pub fn get_status(status: &str) -> &str { @@ -300,4 +308,3 @@ pub struct RepoInfo { pub repo_url: Option, pub sha: Option, } - diff --git a/src/utils/terminal.rs b/src/utils/terminal.rs index 4c726eb..1bb4c4c 100644 --- a/src/utils/terminal.rs +++ b/src/utils/terminal.rs @@ -1,11 +1,11 @@ -use std::io::{self, Write}; -use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; -use std::{thread, time}; -use std::sync::{Arc, Mutex}; use crate::utils; use regex::Regex; +use std::io::{self, Write}; +use std::sync::{Arc, Mutex}; +use std::{thread, time}; +use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; -pub fn show_progress_bar(progress: f32) -> () { +pub fn show_progress_bar(progress: f32) { let total_bar_length = 50; if progress == -1.0 { print!("\r{}", " ".repeat(50)); @@ -27,17 +27,28 @@ pub fn show_progress_bar(progress: f32) -> () { } pub fn show_loading_message(message: &str, stop_signal: Arc>) { - let spinner = vec!["⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"]; - let spinner_colors = vec![Color::Cyan, Color::Magenta, Color::Yellow, Color::Green]; + let spinner = ["⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"]; + let spinner_colors = [Color::Cyan, Color::Magenta, Color::Yellow, Color::Green]; let start_time = time::Instant::now(); let mut i = 0; let mut stdout = StandardStream::stdout(ColorChoice::Always); print!("{} ", message); io::stdout().flush().unwrap(); loop { - stdout.set_color(ColorSpec::new().set_fg(Some(spinner_colors[i % spinner_colors.len()])).set_bg(Some(Color::Black))).unwrap(); + stdout + .set_color( + ColorSpec::new() + .set_fg(Some(spinner_colors[i % spinner_colors.len()])) + .set_bg(Some(Color::Black)), + ) + .unwrap(); let message = message.replace("[T]", &format!("{:.0}", start_time.elapsed().as_secs())); - print!("\r[{}] {}{}", spinner[i % spinner.len()], message, set_text_color("", TerminalColor::Reset)); + print!( + "\r[{}] {}{}", + spinner[i % spinner.len()], + message, + set_text_color("", TerminalColor::Reset) + ); io::stdout().flush().unwrap(); // Sleep for a bit before updating the spinner thread::sleep(time::Duration::from_millis(100)); @@ -53,8 +64,6 @@ pub fn show_loading_message(message: &str, stop_signal: Arc>) { stdout.reset().unwrap(); } - - pub fn set_text_color(txt: &str, color: TerminalColor) -> String { let color_code = match color { TerminalColor::Red => "\x1b[31m", @@ -63,7 +72,7 @@ pub fn set_text_color(txt: &str, color: TerminalColor) -> String { TerminalColor::Yellow => "\x1b[33m", TerminalColor::Reset => "\x1b[0m", }; - return format!("{}{}{}", color_code, txt, "\x1b[0m"); + format!("{}{}{}", color_code, txt, "\x1b[0m") } pub fn show_welcome_message() { @@ -79,7 +88,7 @@ pub fn show_welcome_message() { "#; println!("{}", set_text_color(dog_art, TerminalColor::Green)); -} +} pub fn format_code(code: &str) -> String { let mut formatted_code = String::new(); @@ -89,7 +98,13 @@ pub fn format_code(code: &str) -> String { for capture in regex.captures_iter(code) { if let Some(matched) = capture.get(1) { formatted_code.push_str(&code[last_end..capture.get(0).unwrap().start()]); - formatted_code.push_str(&format!("`{}`", utils::terminal::set_text_color(matched.as_str(), utils::terminal::TerminalColor::Green))); + formatted_code.push_str(&format!( + "`{}`", + utils::terminal::set_text_color( + matched.as_str(), + utils::terminal::TerminalColor::Green + ) + )); last_end = capture.get(0).unwrap().end(); } } @@ -113,9 +128,9 @@ pub fn format_diff(diff: &str) -> String { format!("{}\n", set_text_color(line, TerminalColor::Green)) } else if line.starts_with("@@") { let formatted_text = regex.replace_all(line, |caps: ®ex::Captures| { - set_text_color(&caps[0], TerminalColor::Blue) + set_text_color(&caps[0], TerminalColor::Blue) }); - format!("{}\n", formatted_text) + format!("{}\n", formatted_text) } else if line.starts_with("-") { format!("{}\n", set_text_color(line, TerminalColor::Red)) } else if line.starts_with("+") { @@ -135,7 +150,11 @@ pub fn clear_line(length: usize) { } pub fn clear_previous_line() { - print!("\r{}{}", utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset), " ".repeat(100)); + print!( + "\r{}{}", + utils::terminal::set_text_color("", utils::terminal::TerminalColor::Reset), + " ".repeat(100) + ); } pub fn print_with_pagination(str: &str) { @@ -143,7 +162,7 @@ pub fn print_with_pagination(str: &str) { let mut lines = str.lines(); let mut buffer = String::new(); let stdin = io::stdin(); - let message ="-- More -- (Press Enter to continue, Ctrl+C to exit)"; + let message = "-- More -- (Press Enter to continue, Ctrl+C to exit)"; loop { clear_line(message.len()); @@ -154,7 +173,6 @@ pub fn print_with_pagination(str: &str) { clear_line(message.len()); return; } - } print!("{}", message); @@ -163,7 +181,6 @@ pub fn print_with_pagination(str: &str) { buffer.clear(); stdin.read_line(&mut buffer).unwrap(); - print!("\x1B[2K\x1B[1A"); stdout.flush().unwrap(); } @@ -182,30 +199,44 @@ pub fn ask_yes_no(question: &str, should_default: bool) -> bool { loop { print!("{} (y/n): ", question); io::stdout().flush().unwrap(); - + let mut input = String::new(); io::stdin().read_line(&mut input).unwrap(); - + match input.trim().to_lowercase().as_str() { "y" | "yes" => return true, "n" | "no" => return false, - _ => if should_default { - return true; - } else { - println!("Please answer with yes/y or no/n"); + _ => { + if should_default { + return true; + } else { + println!("Please answer with yes/y or no/n"); + } } } } } pub fn print_table(table: Vec>, page: Option, total_pages: Option) { - let columns = table.iter().enumerate().fold(vec![vec![]; table[0].len()], |mut acc, (_i, row)| { - for (j, cell) in row.iter().enumerate() { - acc[j].push(cell.clone()); - } - acc - }); - let column_lengths = columns.iter().map(|col| col.iter().map(|cell| cell.len()).max_by(|a, b| a.cmp(b)).unwrap_or(0)).collect::>(); + let columns = + table + .iter() + .enumerate() + .fold(vec![vec![]; table[0].len()], |mut acc, (_i, row)| { + for (j, cell) in row.iter().enumerate() { + acc[j].push(cell.clone()); + } + acc + }); + let column_lengths = columns + .iter() + .map(|col| { + col.iter() + .map(|cell| cell.len()) + .max_by(|a, b| a.cmp(b)) + .unwrap_or(0) + }) + .collect::>(); for (j, row) in table.iter().enumerate() { for (i, cell) in row.iter().enumerate() { print!("{:>, page: Option, total_pages: Opti } } - pub enum TerminalColor { Reset, Red, Green, Blue, Yellow, -} \ No newline at end of file +} diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..a4d551f --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,1833 @@ +//! Dependency freshness verification. +//! +//! Discovers installed dependencies from a project (npm and/or Python), +//! looks up publish times from the public registries (npmjs.org / pypi.org), +//! and flags any package whose installed version was published within a +//! configurable recency threshold. This is intended to act as a fast +//! supply-chain tripwire against very recently published versions of +//! dependencies (a common malware-injection pattern). + +pub mod npm; +pub mod python; +pub mod registry; +pub mod report; +pub mod severity; + +pub use severity::{parse_severity_floor_arg, SeverityFloor, SeverityLevel}; + +use std::io::IsTerminal; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering}; +use std::sync::Mutex; +use std::time::Duration; + +use chrono::{DateTime, Utc}; + +use crate::utils::terminal::{set_text_color, TerminalColor}; +use crate::vuln_api; + +/// Which ecosystem(s) to scan. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Ecosystem { + Npm, + Python, + All, +} + +impl Ecosystem { + pub fn parse(s: &str) -> Result { + match s.to_lowercase().as_str() { + "npm" | "node" | "javascript" | "js" => Ok(Ecosystem::Npm), + "python" | "py" | "pypi" => Ok(Ecosystem::Python), + "all" | "auto" => Ok(Ecosystem::All), + other => Err(format!( + "Unknown ecosystem '{}'. Valid options are: npm, python, all.", + other + )), + } + } +} + +/// A single resolved dependency that we want to verify. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Dependency { + pub name: String, + pub version: String, + pub ecosystem: DependencyEcosystem, + /// Where in the project we discovered this dependency (e.g. lockfile path). + pub source: String, + /// Whether the dependency is a development-only dependency. + pub dev: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DependencyEcosystem { + Npm, + Python, +} + +impl DependencyEcosystem { + pub fn label(self) -> &'static str { + match self { + DependencyEcosystem::Npm => "npm", + DependencyEcosystem::Python => "python", + } + } + + fn vuln_api_ecosystem(self) -> &'static str { + match self { + DependencyEcosystem::Npm => "npm", + DependencyEcosystem::Python => "PyPI", + } + } +} + +/// One verification finding: the dep was published within the threshold. +#[derive(Debug, Clone)] +pub struct Finding { + pub dep: Dependency, + pub published_at: DateTime, + pub age: Duration, +} + +/// Outcome categories for individual dependency lookups. +#[derive(Debug, Clone)] +pub enum LookupOutcome { + /// The dep is older than the threshold — safe. + Ok { + dep: Dependency, + published_at: DateTime, + age: Duration, + }, + /// The dep was published within the threshold window. + Recent(Finding), + /// We could not retrieve metadata for this dep. + Error { dep: Dependency, error: String }, +} + +/// Outcome of a vuln-api CVE lookup for a single dependency. +#[derive(Debug, Clone)] +pub enum CveLookupOutcome { + Clean { dep: Dependency }, + Vulnerable(CveFinding), + Error { dep: Dependency, error: String }, +} + +#[derive(Debug, Clone)] +pub struct CveFinding { + pub dep: Dependency, + pub matches: Vec, + /// Best-effort enrichment from `/v1/advisories/:id`. Index-aligned + /// with `matches`; `None` for entries whose detail lookup failed + /// (404, network, parse, or the cache previously recorded a + /// failure). The CVE line still renders without the advisory URL + /// when this is `None`. + pub advisory_details: Vec>, +} + +#[derive(Debug, Clone)] +pub struct VerifyOptions { + pub ecosystem: Ecosystem, + pub threshold: Duration, + pub include_dev: bool, + pub fail: bool, + /// When true, treat any unpinned dependency or missing-lockfile + /// situation (`package.json` without a lockfile, unpinned + /// `requirements.txt` lines, `pyproject.toml`/`Pipfile` without a + /// matching lockfile) as a hard failure. + pub fail_unpinned: bool, + /// When true, exit non-zero if any dependency has known CVEs. + /// Requires `check_cve`. Independent of `fail` and `fail_unpinned`. + pub fail_cve: bool, + pub json: bool, + pub path: PathBuf, + /// Optional registry overrides (used in tests). + pub npm_registry: Option, + pub pypi_registry: Option, + /// When true, query vuln-api for known CVEs/advisories per dependency. + pub check_cve: bool, + /// Base URL for vuln-api (resolved from env/config in main.rs). + pub vuln_api_url: Option, + /// Token sent to vuln-api as `Authorization: Bearer …` (JWT) or + /// `CORGEA-TOKEN: …` (legacy). Required and non-empty when + /// `check_cve = true`. Preflight in `main.rs` guarantees this before + /// `run()` is called. + pub vuln_api_token: Option, + /// Max in-flight vuln-api package-check requests when `check_cve` is true. + /// Ignored when `check_cve` is false. Default 8, clamped 1..32 by clap. + pub cve_concurrency: usize, + /// Minimum severity required to trip `--fail-cve`. Defaults to + /// `SeverityFloor::Any` (chunk-02 behavior: fail on any finding). + /// Ignored when `check_cve` is false. + pub severity_floor: SeverityFloor, +} + +impl Default for VerifyOptions { + fn default() -> Self { + Self { + ecosystem: Ecosystem::All, + threshold: Duration::from_secs(2 * 24 * 60 * 60), + include_dev: false, + fail: false, + fail_unpinned: false, + fail_cve: false, + json: false, + path: PathBuf::from("."), + npm_registry: None, + pypi_registry: None, + check_cve: false, + vuln_api_url: None, + vuln_api_token: None, + cve_concurrency: 8, + severity_floor: SeverityFloor::Any, + } + } +} + +impl VerifyOptions { + /// Lockfile scan used by install wrappers (`corgea npm`, `pip`, `uv`, …). + #[allow(clippy::too_many_arguments)] + pub fn for_install_wrap( + ecosystem: Ecosystem, + path: PathBuf, + threshold: Duration, + fail: bool, + fail_unpinned: bool, + json: bool, + npm_registry: Option, + pypi_registry: Option, + ) -> Self { + Self { + ecosystem, + threshold, + include_dev: false, + fail, + fail_unpinned, + fail_cve: false, + json, + path, + npm_registry, + pypi_registry, + check_cve: false, + vuln_api_url: None, + vuln_api_token: None, + cve_concurrency: 8, + severity_floor: SeverityFloor::Any, + } + } +} + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => { + (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()) + } + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!( + "threshold must be a non-negative finite number: '{}'", + input + )); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => { + return Err(format!( + "unknown threshold unit '{}'. Use s, m, h, d, or w.", + other + )) + } + }; + + Ok(Duration::from_secs_f64(secs)) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +/// Top-level entry: discover deps and verify them. +/// +/// Returns `Ok(true)` if any recently-published deps were detected, +/// `Ok(false)` otherwise. Fails (`Err`) only on hard discovery errors. +pub fn run(opts: &VerifyOptions) -> Result { + let path = opts.path.as_path(); + if !path.exists() { + return Err(format!("path does not exist: {}", path.display())); + } + + let mut deps: Vec = Vec::new(); + let mut sources: Vec = Vec::new(); + let mut unpinned_warnings: Vec = Vec::new(); + + if matches!(opts.ecosystem, Ecosystem::Npm | Ecosystem::All) { + match npm::discover(path, opts.include_dev) { + Ok(mut found) => { + unpinned_warnings.append(&mut found.warnings); + if !found.deps.is_empty() { + sources.push(found.source.clone()); + deps.append(&mut found.deps); + } + } + Err(e) => { + if opts.ecosystem == Ecosystem::Npm { + return Err(format!("npm discovery failed: {}", e)); + } else { + eprintln!( + "{}", + set_text_color( + &format!("note: skipping npm — {}", e), + TerminalColor::Yellow + ) + ); + } + } + } + } + + if matches!(opts.ecosystem, Ecosystem::Python | Ecosystem::All) { + match python::discover(path, opts.include_dev) { + Ok(mut found) => { + unpinned_warnings.append(&mut found.warnings); + if !found.deps.is_empty() { + sources.push(found.source.clone()); + deps.append(&mut found.deps); + } + } + Err(e) => { + if opts.ecosystem == Ecosystem::Python { + return Err(format!("python discovery failed: {}", e)); + } else { + eprintln!( + "{}", + set_text_color( + &format!("note: skipping python — {}", e), + TerminalColor::Yellow + ) + ); + } + } + } + } + + if deps.is_empty() && unpinned_warnings.is_empty() { + return Err(format!( + "no supported dependency manifests found in {}. Expected one of: \ + package-lock.json, npm-shrinkwrap.json, pnpm-lock.yaml, yarn.lock, \ + requirements.txt, Pipfile.lock, poetry.lock, uv.lock.", + path.display() + )); + } + + deps.sort_by(|a, b| { + a.ecosystem + .label() + .cmp(b.ecosystem.label()) + .then_with(|| a.name.cmp(&b.name)) + .then_with(|| a.version.cmp(&b.version)) + }); + deps.dedup_by(|a, b| a.name == b.name && a.version == b.version && a.ecosystem == b.ecosystem); + + let now = Utc::now(); + let threshold = chrono::Duration::from_std(opts.threshold) + .map_err(|e| format!("invalid threshold: {}", e))?; + + let mut outcomes: Vec = Vec::with_capacity(deps.len()); + let mut cve_outcomes: Vec = Vec::new(); + + let cve_base_url = opts + .vuln_api_url + .as_deref() + .map(str::trim) + .unwrap_or_default(); + let cve_token = opts + .vuln_api_token + .as_deref() + .map(str::trim) + .unwrap_or_default(); + + for dep in &deps { + let published = match dep.ecosystem { + DependencyEcosystem::Npm => { + registry::npm_publish_time(&dep.name, &dep.version, opts.npm_registry.as_deref()) + } + DependencyEcosystem::Python => { + registry::pypi_publish_time(&dep.name, &dep.version, opts.pypi_registry.as_deref()) + } + }; + + match published { + Ok(published_at) => { + let age_chrono = now.signed_duration_since(published_at); + let age = age_chrono + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + if age_chrono < threshold { + outcomes.push(LookupOutcome::Recent(Finding { + dep: dep.clone(), + published_at, + age, + })); + } else { + outcomes.push(LookupOutcome::Ok { + dep: dep.clone(), + published_at, + age, + }); + } + } + Err(e) => { + outcomes.push(LookupOutcome::Error { + dep: dep.clone(), + error: e.to_string(), + }); + } + } + } + + if opts.check_cve { + let client = crate::vuln_api::http_client()?; + cve_outcomes = run_cve_pass(&client, opts, &deps, cve_base_url, cve_token); + } + + Ok(VerifyReport { + sources, + outcomes, + unpinned_warnings, + threshold: opts.threshold, + scanned_at: now, + check_cve: opts.check_cve, + cve_outcomes, + severity_floor: opts.severity_floor.clone(), + }) +} + +/// Aggregated result of a verification run. +#[derive(Debug, Clone)] +pub struct VerifyReport { + pub sources: Vec, + pub outcomes: Vec, + pub unpinned_warnings: Vec, + pub threshold: Duration, + pub scanned_at: DateTime, + pub check_cve: bool, + pub cve_outcomes: Vec, + /// Copy of `VerifyOptions::severity_floor` so renderers can produce + /// the floor-aware summary without `main.rs` having to thread it in. + pub severity_floor: SeverityFloor, +} + +impl VerifyReport { + pub fn recent(&self) -> Vec<&Finding> { + self.outcomes + .iter() + .filter_map(|o| match o { + LookupOutcome::Recent(f) => Some(f), + _ => None, + }) + .collect() + } + + pub fn errors(&self) -> Vec<(&Dependency, &str)> { + self.outcomes + .iter() + .filter_map(|o| match o { + LookupOutcome::Error { dep, error } => Some((dep, error.as_str())), + _ => None, + }) + .collect() + } + + pub fn ok_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, LookupOutcome::Ok { .. })) + .count() + } + + pub fn has_unpinned(&self) -> bool { + !self.unpinned_warnings.is_empty() + } + + pub fn cve_findings(&self) -> Vec<&CveFinding> { + self.cve_outcomes + .iter() + .filter_map(|o| match o { + CveLookupOutcome::Vulnerable(f) => Some(f), + _ => None, + }) + .collect() + } + + pub fn cve_errors(&self) -> Vec<(&Dependency, &str)> { + self.cve_outcomes + .iter() + .filter_map(|o| match o { + CveLookupOutcome::Error { dep, error } => Some((dep, error.as_str())), + _ => None, + }) + .collect() + } + + /// Findings whose worst-severity match meets `self.severity_floor`. + /// Uses `SeverityLevel::parse_lossy` so unknown server strings collapse + /// to `Info` and remain catchable by `Any` / low floors. + pub fn cve_findings_above_floor(&self) -> Vec<&CveFinding> { + self.cve_findings() + .into_iter() + .filter(|f| { + f.matches.iter().any(|m| { + self.severity_floor + .includes(SeverityLevel::parse_lossy(&m.severity_level)) + }) + }) + .collect() + } + + /// Count of findings filtered out by the floor (i.e. `cve_findings - + /// cve_findings_above_floor`). A finding is counted iff none of its + /// matches meet the floor. Pinned by tests for downstream tooling; the + /// text/JSON rendering uses match-level granularity via + /// [`Self::cve_below_floor_matches_count`]. + #[allow(dead_code)] + pub fn cve_findings_below_floor_count(&self) -> usize { + self.cve_findings().len() - self.cve_findings_above_floor().len() + } + + /// Count of individual advisory matches whose severity is below the + /// floor. Counts across all findings — so a single finding with a + /// critical match + a high match contributes 1 to this count when + /// the floor is `AtLeast(Critical)`. Used by `print_text` / + /// `print_json` to surface the "N findings below --severity floor" + /// note (granularity is matches, since the user sees one rendered + /// line per match). + pub fn cve_below_floor_matches_count(&self) -> usize { + self.cve_findings() + .iter() + .flat_map(|f| f.matches.iter()) + .filter(|m| { + !self + .severity_floor + .includes(SeverityLevel::parse_lossy(&m.severity_level)) + }) + .count() + } +} + +/// Helper used by lockfile parsers to bundle their result. +/// +/// `source` is empty when the discoverer could not find a lockfile; +/// in that case `warnings` typically explains why (e.g. a manifest +/// was found but no lockfile to resolve it against). +#[derive(Debug, Clone, Default)] +pub struct DiscoverResult { + pub deps: Vec, + pub source: String, + pub warnings: Vec, +} + +/// A diagnostic about a dependency we *could not* verify because it +/// isn't pinned to an exact version. Examples: +/// +/// * `package.json` is present but no `package-lock.json` / +/// `pnpm-lock.yaml` / `yarn.lock` exists. +/// * `pyproject.toml` or `Pipfile` is present without a matching +/// lockfile. +/// * A `requirements.txt` line is not `==`-pinned (e.g. `requests>=2.0`). +/// +/// These are surfaced in the regular report and, with +/// `--fail-unpinned`, cause a non-zero exit. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UnpinnedWarning { + pub ecosystem: DependencyEcosystem, + /// Which manifest the warning is about (relative path or filename). + pub manifest: String, + /// Human-readable description of why the dep can't be verified. + pub reason: String, +} + +/// Read the file at `path` into a String, returning an informative error. +pub(crate) fn read_to_string(path: &Path) -> Result { + std::fs::read_to_string(path).map_err(|e| format!("failed to read {}: {}", path.display(), e)) +} + +/// Pick the highest `fixed_version` candidate (lexically as semver) from +/// the matches that returned one. Python `fixed_version` strings are +/// piped through `registry::normalize_for_semver` first (PEP 440 → +/// semver). Falls back to the first candidate string if none parse — +/// preserves chunk-01 behaviour for exotic version strings. +pub(super) fn pick_highest_fixed( + eco: DependencyEcosystem, + candidates: &[String], +) -> Option { + if candidates.is_empty() { + return None; + } + let mut best: Option<(semver::Version, String)> = None; + for raw in candidates { + let normalised = match eco { + DependencyEcosystem::Npm => raw.clone(), + DependencyEcosystem::Python => registry::normalize_for_semver(raw), + }; + if let Ok(v) = semver::Version::parse(&normalised) { + if best.as_ref().map(|(b, _)| v > *b).unwrap_or(true) { + best = Some((v, raw.clone())); + } + } + } + best.map(|(_, raw)| raw) + .or_else(|| candidates.first().cloned()) +} + +/// Best-effort fetch of advisory detail for every match in `matches`, +/// memoised in `cache`. Returns a `Vec>` +/// index-aligned with the input; `None` for misses (404, network, parse, +/// or a previously-recorded failure). If either `base_url` or `token` +/// is empty, returns all-`None` without making any HTTP calls. +fn collect_advisory_details( + client: &reqwest::blocking::Client, + cache: &mut std::collections::HashMap>, + base_url: &str, + token: &str, + matches: &[vuln_api::VulnMatch], +) -> Vec> { + if base_url.is_empty() || token.is_empty() { + return vec![None; matches.len()]; + } + matches + .iter() + .map(|m| { + let id = m.advisory_id.clone(); + if let Some(entry) = cache.get(&id) { + return entry.as_ref().ok().cloned(); + } + let entry = vuln_api::get_advisory(client, base_url, token, &id).map_err(|_| ()); + let result = entry.as_ref().ok().cloned(); + cache.insert(id, entry); + result + }) + .collect() +} + +fn report_cve_progress(done: usize, total: usize, json: bool, last_milestone: &AtomicU8) { + if json || total < 20 { + return; + } + if std::io::stderr().is_terminal() { + eprint!("\r[CVE check] {}/{}", done, total); + } else { + let pct = ((done as u64 * 100) / total as u64) as u8; + for threshold in [25u8, 50, 75, 100] { + if pct >= threshold { + let prev = last_milestone.load(Ordering::Relaxed); + if prev < threshold + && last_milestone + .compare_exchange(prev, threshold, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + { + eprintln!("[CVE check] {}/{}", done, total); + } + } + } + } +} + +// Advisory GETs from vulnerable deps may briefly exceed `cve_concurrency` +// in-flight package-check slots; volume is ≪ package checks (accepted). +fn run_cve_pass( + client: &reqwest::blocking::Client, + opts: &VerifyOptions, + deps: &[Dependency], + cve_base_url: &str, + cve_token: &str, +) -> Vec { + if deps.is_empty() { + return Vec::new(); + } + + let concurrency = opts.cve_concurrency.max(1); + let total = deps.len(); + let next = AtomicUsize::new(0); + let results: Mutex>> = + Mutex::new((0..total).map(|_| None).collect()); + let advisory_cache: Mutex< + std::collections::HashMap>, + > = Mutex::new(std::collections::HashMap::new()); + let progress = AtomicUsize::new(0); + let last_milestone = AtomicU8::new(0); + + std::thread::scope(|s| { + for _ in 0..concurrency { + s.spawn(|| loop { + let i = next.fetch_add(1, Ordering::Relaxed); + if i >= total { + break; + } + let dep = &deps[i]; + let outcome = match crate::vuln_api::check_package_version( + client, + cve_base_url, + cve_token, + dep.ecosystem.vuln_api_ecosystem(), + &dep.name, + &dep.version, + ) { + Ok(response) if response.is_vulnerable => { + let advisory_details = { + let mut cache = advisory_cache.lock().unwrap(); + collect_advisory_details( + client, + &mut cache, + cve_base_url, + cve_token, + &response.matches, + ) + }; + CveLookupOutcome::Vulnerable(CveFinding { + dep: dep.clone(), + matches: response.matches, + advisory_details, + }) + } + Ok(_) => CveLookupOutcome::Clean { dep: dep.clone() }, + Err(e) => CveLookupOutcome::Error { + dep: dep.clone(), + error: e.to_string(), + }, + }; + results.lock().unwrap()[i] = Some(outcome); + let done = progress.fetch_add(1, Ordering::Relaxed) + 1; + report_cve_progress(done, total, opts.json, &last_milestone); + }); + } + }); + + if !opts.json && total >= 20 && std::io::stderr().is_terminal() { + eprintln!(); + } + + results + .into_inner() + .unwrap() + .into_iter() + .map(|o| o.expect("every dep index assigned exactly once")) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::io::{Read, Write}; + use std::net::TcpListener; + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; + + struct VulnApiStub { + base_url: String, + seen_auth: Arc>>, + advisory_hits: Arc>>, + _handle: thread::JoinHandle<()>, + } + + fn spawn_vuln_api_stub_with_advisories( + fixtures: HashMap<(String, String, String), crate::vuln_api::VulnCheckResponse>, + advisory_fixtures: HashMap, + ) -> VulnApiStub { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let port = listener.local_addr().unwrap().port(); + let base_url = format!("http://127.0.0.1:{}", port); + let fixtures = Arc::new(Mutex::new(fixtures)); + let advisory_fixtures = Arc::new(Mutex::new(advisory_fixtures)); + let seen_auth: Arc>> = Arc::new(Mutex::new(Vec::new())); + let advisory_hits: Arc>> = + Arc::new(Mutex::new(HashMap::new())); + let seen_auth_thread = seen_auth.clone(); + let advisory_hits_thread = advisory_hits.clone(); + + let handle = thread::spawn(move || { + for stream in listener.incoming().take(32) { + let Ok(mut stream) = stream else { + continue; + }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + + for line in req.lines() { + let lower = line.to_ascii_lowercase(); + if lower.starts_with("authorization:") || lower.starts_with("corgea-token:") { + seen_auth_thread.lock().unwrap().push(line.to_string()); + } + } + + let (status_code, status_text, response_body): (u16, &str, String) = if let Some( + path, + ) = + req.lines().next().and_then(|l| l.split_whitespace().nth(1)) + { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let eco = parts[2].to_string(); + let name = urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(); + let ver = urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(); + let body = fixtures + .lock() + .unwrap() + .get(&(eco.clone(), name.clone(), ver.clone())) + .map(|r| serde_json::to_string(r).unwrap()) + .unwrap_or_else(|| { + serde_json::to_string(&crate::vuln_api::VulnCheckResponse { + ecosystem: eco, + package_name: name, + version: ver, + is_vulnerable: false, + matches: vec![], + }) + .unwrap() + }); + (200, "OK", body) + } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { + let id = urlencoding::decode(parts[2]) + .unwrap_or_default() + .into_owned(); + *advisory_hits_thread + .lock() + .unwrap() + .entry(id.clone()) + .or_insert(0) += 1; + match advisory_fixtures.lock().unwrap().get(&id) { + Some(r) => (200, "OK", serde_json::to_string(r).unwrap()), + None => (404, "Not Found", r#"{"error":"not found"}"#.to_string()), + } + } else { + (200, "OK", r#"{"error":"not found"}"#.to_string()) + } + } else { + (200, "OK", r#"{"error":"bad request"}"#.to_string()) + }; + + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + status_code, + status_text, + response_body.len(), + response_body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + + thread::sleep(Duration::from_millis(50)); + + VulnApiStub { + base_url, + seen_auth, + advisory_hits, + _handle: handle, + } + } + + #[test] + fn pick_highest_fixed_npm_picks_highest() { + let got = pick_highest_fixed( + DependencyEcosystem::Npm, + &["1.0.0".into(), "1.2.0".into(), "1.1.0".into()], + ); + assert_eq!(got, Some("1.2.0".into())); + } + + #[test] + fn pick_highest_fixed_python_via_normalize() { + // "1.0" normalises to "1.0.0", "1.0.1" stays as-is. + let got = pick_highest_fixed(DependencyEcosystem::Python, &["1.0".into(), "1.0.1".into()]); + assert_eq!(got, Some("1.0.1".into())); + } + + #[test] + fn pick_highest_fixed_unparseable_falls_back_to_first() { + // Both PEP 440 prerelease — normalize_for_semver leaves them alone, + // semver parsing fails, helper falls back to candidates.first(). + let got = pick_highest_fixed( + DependencyEcosystem::Python, + &["1.0a1".into(), "1.0rc1".into()], + ); + assert_eq!(got, Some("1.0a1".into())); + } + + #[test] + fn pick_highest_fixed_empty_returns_none() { + let got = pick_highest_fixed(DependencyEcosystem::Npm, &[]); + assert_eq!(got, None); + } + + #[test] + fn vuln_api_stub_serves_advisory_fixture() { + // Wire-shape fixture: `id`, `source_url`, no `remediation`. + // Exercises the rename mapping in `AdvisoryResponse`. + let mut advisory_fixtures = HashMap::new(); + advisory_fixtures.insert( + "GHSA-foo".to_string(), + serde_json::json!({ + "id": "GHSA-foo", + "aliases": ["CVE-2026-0001"], + "title": "test advisory", + "severity_level": "high", + "tier": 1, + "source_url": "https://github.com/advisories/GHSA-foo", + }), + ); + let stub = spawn_vuln_api_stub_with_advisories(HashMap::new(), advisory_fixtures); + + let client = crate::vuln_api::http_client().unwrap(); + let resp = crate::vuln_api::get_advisory(&client, &stub.base_url, "test-token", "GHSA-foo") + .expect("ok"); + assert_eq!(resp.advisory_id, "GHSA-foo"); + assert_eq!( + resp.url.as_deref(), + Some("https://github.com/advisories/GHSA-foo") + ); + + let hits = stub.advisory_hits.lock().unwrap().clone(); + assert_eq!(hits.get("GHSA-foo").copied(), Some(1)); + } + + #[test] + fn vuln_api_stub_returns_404_for_missing_advisory() { + let stub = spawn_vuln_api_stub_with_advisories(HashMap::new(), HashMap::new()); + let client = crate::vuln_api::http_client().unwrap(); + let err = + crate::vuln_api::get_advisory(&client, &stub.base_url, "test-token", "GHSA-missing") + .unwrap_err(); + let msg = format!("{}", err); + assert!(msg.contains("404"), "expected 404 in error, got: {}", msg); + + // The /check route still works against the same stub. + let resp = crate::vuln_api::check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + .expect("clean fallback"); + assert!(!resp.is_vulnerable); + } + + #[test] + fn parse_threshold_units() { + assert_eq!( + parse_threshold("2d").unwrap(), + Duration::from_secs(2 * 86400) + ); + assert_eq!( + parse_threshold("48h").unwrap(), + Duration::from_secs(48 * 3600) + ); + assert_eq!( + parse_threshold("30m").unwrap(), + Duration::from_secs(30 * 60) + ); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!( + parse_threshold("1w").unwrap(), + Duration::from_secs(7 * 86400) + ); + assert_eq!( + parse_threshold("3").unwrap(), + Duration::from_secs(3 * 86400) + ); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } + + #[test] + fn ecosystem_parse_aliases() { + assert_eq!(Ecosystem::parse("npm").unwrap(), Ecosystem::Npm); + assert_eq!(Ecosystem::parse("Python").unwrap(), Ecosystem::Python); + assert_eq!(Ecosystem::parse("all").unwrap(), Ecosystem::All); + assert!(Ecosystem::parse("ruby").is_err()); + } + + #[test] + fn verify_options_default_fail_cve_is_false() { + let opts = VerifyOptions::default(); + assert!(!opts.fail_cve); + } + + #[test] + fn run_without_check_cve_has_empty_cve_outcomes() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.21" } + } + }"#, + ) + .unwrap(); + + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: false, + vuln_api_url: None, + ..Default::default() + }; + + let report = run(&opts).expect("run should succeed"); + assert!(!report.check_cve); + assert!(report.cve_outcomes.is_empty()); + } + + #[test] + fn check_cve_reports_vulnerabilities_from_stub() { + use crate::verify_deps::report::format_cve_finding; + + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: "lodash".into(), + version: "4.17.20".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-integration-test".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<4.17.21".into()), + fixed_version: Some("4.17.21".into()), + }], + }, + ); + + let stub = spawn_vuln_api_stub_with_advisories(fixtures, HashMap::new()); + + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.20" } + } + }"#, + ) + .unwrap(); + + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: true, + vuln_api_url: Some(stub.base_url.clone()), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + + let report = run(&opts).expect("run should succeed"); + assert_eq!(report.cve_findings().len(), 1); + assert_eq!( + report.cve_findings()[0].matches[0].advisory_id, + "GHSA-integration-test" + ); + let text_line = format_cve_finding(report.cve_findings()[0]); + assert!(text_line.contains("GHSA-integration-test")); + assert!( + text_line.contains("→ upgrade to 4.17.21"), + "expected fix continuation line, got: {}", + text_line + ); + assert!( + text_line.contains("[TOP-FIX]"), + "expected [TOP-FIX] badge on tier-1 line, got: {}", + text_line + ); + assert!( + !text_line.contains("tier: "), + "tier: substring leaked into text output: {}", + text_line + ); + + // Auth header must have been attached. + let auth = stub.seen_auth.lock().unwrap().clone(); + assert!( + auth.iter() + .any(|h| h.to_ascii_lowercase().contains("corgea-token: test-token")), + "expected CORGEA-TOKEN header, got: {:?}", + auth + ); + + let opts_off = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: false, + vuln_api_url: None, + vuln_api_token: None, + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + let report_off = run(&opts_off).expect("run should succeed"); + assert!(!report_off.check_cve); + assert!(report_off.cve_outcomes.is_empty()); + } + + #[test] + fn check_cve_renders_advisory_url_and_fix_version() { + use crate::verify_deps::report::format_cve_finding; + + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: "lodash".into(), + version: "4.17.20".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-integration-test".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<4.17.21".into()), + fixed_version: Some("4.17.21".into()), + }], + }, + ); + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-integration-test".to_string(), + serde_json::json!({ + "id": "GHSA-integration-test", + "severity_level": "high", + "tier": 1, + "source_url": "https://github.com/advisories/GHSA-integration-test", + }), + ); + let stub = spawn_vuln_api_stub_with_advisories(fixtures, advisories); + + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.20" } + } + }"#, + ) + .unwrap(); + + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: true, + vuln_api_url: Some(stub.base_url.clone()), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + + let report = run(&opts).expect("run ok"); + assert_eq!(report.cve_findings().len(), 1); + let finding = report.cve_findings()[0]; + assert_eq!(finding.advisory_details.len(), finding.matches.len()); + assert!(finding.advisory_details[0].is_some()); + + let line = format_cve_finding(finding); + assert!(line.contains("→ upgrade to 4.17.21"), "got: {}", line); + assert!( + line.contains("[TOP-FIX]"), + "expected tier-1 badge: {}", + line + ); + assert!(!line.contains("tier: "), "tier: substring leaked: {}", line); + assert!( + line.contains("https://github.com/advisories/GHSA-integration-test"), + "got: {}", + line + ); + + let hits = stub.advisory_hits.lock().unwrap().clone(); + assert_eq!(hits.get("GHSA-integration-test").copied(), Some(1)); + } + + #[test] + fn check_cve_dedupes_shared_advisory_lookups() { + let mut fixtures = HashMap::new(); + let mk = |name: &str| crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: name.into(), + version: "1.0.0".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-shared".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<2.0.0".into()), + fixed_version: Some("2.0.0".into()), + }], + }; + fixtures.insert(("npm".into(), "alpha".into(), "1.0.0".into()), mk("alpha")); + fixtures.insert(("npm".into(), "beta".into(), "1.0.0".into()), mk("beta")); + + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-shared".to_string(), + serde_json::json!({ + "id": "GHSA-shared", + "severity_level": "high", + "tier": 1, + "source_url": "https://github.com/advisories/GHSA-shared", + }), + ); + let stub = spawn_vuln_api_stub_with_advisories(fixtures, advisories); + + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/alpha": { "version": "1.0.0" }, + "node_modules/beta": { "version": "1.0.0" } + } + }"#, + ) + .unwrap(); + + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: true, + cve_concurrency: 1, + vuln_api_url: Some(stub.base_url.clone()), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + let report = run(&opts).expect("run ok"); + assert_eq!(report.cve_findings().len(), 2); + + let hits = stub.advisory_hits.lock().unwrap().clone(); + assert_eq!( + hits.get("GHSA-shared").copied(), + Some(1), + "hits = {:?}", + hits + ); + + // Both findings carry the same URL via the cache. + for f in report.cve_findings() { + let detail = f.advisory_details[0].as_ref().expect("detail present"); + assert_eq!( + detail.url.as_deref(), + Some("https://github.com/advisories/GHSA-shared") + ); + } + } + + #[test] + fn check_cve_handles_advisory_lookup_failure() { + use crate::verify_deps::report::format_cve_finding; + + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: "lodash".into(), + version: "4.17.20".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-no-detail".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<4.17.21".into()), + fixed_version: Some("4.17.21".into()), + }], + }, + ); + // Note: no advisory fixture for GHSA-no-detail — stub returns 404. + let stub = spawn_vuln_api_stub_with_advisories(fixtures, HashMap::new()); + + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.20" } + } + }"#, + ) + .unwrap(); + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: true, + vuln_api_url: Some(stub.base_url.clone()), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + let report = run(&opts).expect("run ok"); + assert_eq!(report.cve_findings().len(), 1); + let f = report.cve_findings()[0]; + assert!( + f.advisory_details[0].is_none(), + "expected detail to be None on 404" + ); + + let line = format_cve_finding(f); + assert!(line.contains("GHSA-no-detail"), "got: {}", line); + assert!(line.contains("→ upgrade to 4.17.21"), "got: {}", line); + assert!( + line.contains("[TOP-FIX]"), + "expected tier-1 badge: {}", + line + ); + assert!(!line.contains("tier: "), "tier: substring leaked: {}", line); + assert!( + !line.contains("https://"), + "should not render URL: {}", + line + ); + } + + #[test] + fn check_cve_json_includes_advisory_url() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: "lodash".into(), + version: "4.17.20".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-json".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<4.17.21".into()), + fixed_version: Some("4.17.21".into()), + }], + }, + ); + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-json".to_string(), + serde_json::json!({ + "id": "GHSA-json", + "severity_level": "high", + "tier": 1, + "source_url": "https://github.com/advisories/GHSA-json", + }), + ); + let stub = spawn_vuln_api_stub_with_advisories(fixtures, advisories); + + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.20" } + } + }"#, + ) + .unwrap(); + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: true, + vuln_api_url: Some(stub.base_url.clone()), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + let report = run(&opts).expect("run ok"); + let finding = report.cve_findings()[0]; + + // Re-serialise the per-match JSON entry inline (mirrors print_json). + let detail = finding.advisory_details[0].as_ref(); + let m = &finding.matches[0]; + let entry = serde_json::json!({ + "advisory_id": m.advisory_id, + "severity_level": m.severity_level, + "tier": m.tier, + "vulnerable_version_range": m.vulnerable_version_range, + "fixed_version": m.fixed_version, + "advisory_url": detail.and_then(|d| d.url.clone()), + }); + assert_eq!( + entry["advisory_url"].as_str(), + Some("https://github.com/advisories/GHSA-json") + ); + assert_eq!(entry["fixed_version"].as_str(), Some("4.17.21")); + assert!( + entry.get("remediation").is_none(), + "remediation should not appear in CVE JSON output" + ); + } + + #[test] + fn cve_outcomes_order_stable_across_concurrency() { + let mut fixtures = HashMap::new(); + let mk = |name: &str| crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: name.into(), + version: "1.0.0".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-shared".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<2.0.0".into()), + fixed_version: Some("2.0.0".into()), + }], + }; + fixtures.insert(("npm".into(), "alpha".into(), "1.0.0".into()), mk("alpha")); + fixtures.insert(("npm".into(), "beta".into(), "1.0.0".into()), mk("beta")); + + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-shared".to_string(), + serde_json::json!({ + "id": "GHSA-shared", + "severity_level": "high", + "tier": 1, + "source_url": "https://github.com/advisories/GHSA-shared", + }), + ); + let stub = spawn_vuln_api_stub_with_advisories(fixtures, advisories); + + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/alpha": { "version": "1.0.0" }, + "node_modules/beta": { "version": "1.0.0" } + } + }"#, + ) + .unwrap(); + + let base_opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: dir.path().to_path_buf(), + check_cve: true, + vuln_api_url: Some(stub.base_url.clone()), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + + let mut opts1 = base_opts.clone(); + opts1.cve_concurrency = 1; + let mut opts16 = base_opts; + opts16.cve_concurrency = 16; + + let report1 = run(&opts1).expect("run ok"); + let report16 = run(&opts16).expect("run ok"); + + fn cve_snapshot(report: &VerifyReport) -> Vec<(String, String, String, String)> { + report + .cve_outcomes + .iter() + .map(|o| { + let (dep, tag) = match o { + CveLookupOutcome::Clean { dep } => (dep, "clean"), + CveLookupOutcome::Error { dep, .. } => (dep, "error"), + CveLookupOutcome::Vulnerable(f) => (&f.dep, "vulnerable"), + }; + ( + dep.ecosystem.label().to_string(), + dep.name.clone(), + dep.version.clone(), + tag.to_string(), + ) + }) + .collect() + } + assert_eq!(cve_snapshot(&report1), cve_snapshot(&report16)); + } + + fn fixture_deps_dir(name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("fixtures/deps") + .join(name) + } + + #[test] + fn deps_dogfood_npm_discovers_pins() { + let result = npm::discover(&fixture_deps_dir("npm"), false).expect("discover npm"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 3); + let names: Vec<_> = result.deps.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"lodash")); + assert!(names.contains(&"semver")); + assert!(names.contains(&"json5")); + } + + #[test] + fn deps_dogfood_npm_unpinned() { + let result = + npm::discover(&fixture_deps_dir("npm-unpinned"), false).expect("discover npm-unpinned"); + assert!(result.deps.is_empty()); + assert_eq!(result.warnings.len(), 1); + assert!(result.warnings[0].manifest.ends_with("package.json")); + + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: fixture_deps_dir("npm-unpinned"), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + let report = run(&opts).expect("run should succeed"); + assert!(report.has_unpinned()); + } + + #[test] + fn deps_dogfood_npm_cve_with_stub() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + crate::vuln_api::VulnCheckResponse { + ecosystem: "npm".into(), + package_name: "lodash".into(), + version: "4.17.20".into(), + is_vulnerable: true, + matches: vec![crate::vuln_api::VulnMatch { + advisory_id: "GHSA-dogfood-fixture".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: Some("<4.17.21".into()), + fixed_version: Some("4.17.21".into()), + }], + }, + ); + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-dogfood-fixture".to_string(), + serde_json::json!({ + "id": "GHSA-dogfood-fixture", + "severity_level": "high", + "tier": 1, + "source_url": "https://github.com/advisories/GHSA-dogfood-fixture", + }), + ); + let stub = spawn_vuln_api_stub_with_advisories(fixtures, advisories); + + let opts = VerifyOptions { + ecosystem: Ecosystem::Npm, + path: fixture_deps_dir("npm"), + check_cve: true, + vuln_api_url: Some(stub.base_url), + vuln_api_token: Some("test-token".into()), + npm_registry: Some("http://127.0.0.1:1".into()), + ..Default::default() + }; + + let report = run(&opts).expect("run should succeed"); + assert_eq!(report.cve_findings().len(), 1); + assert_eq!(report.cve_findings()[0].dep.name, "lodash"); + assert_eq!( + report.cve_findings()[0].matches[0].advisory_id, + "GHSA-dogfood-fixture" + ); + } + + #[test] + fn deps_dogfood_yarn_lock_parses() { + let result = npm::discover(&fixture_deps_dir("yarn"), false).expect("discover yarn"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 3); + assert!(result.source.ends_with("yarn.lock")); + } + + #[test] + fn deps_dogfood_pnpm_lock_parses() { + let result = npm::discover(&fixture_deps_dir("pnpm"), false).expect("discover pnpm"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 3); + assert!(result.source.ends_with("pnpm-lock.yaml")); + } + + #[test] + fn deps_dogfood_python_requirements_discovers() { + let result = python::discover(&fixture_deps_dir("python-requirements"), false) + .expect("discover python-requirements"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 4); + let names: Vec<_> = result.deps.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"django")); + assert!(names.contains(&"pyyaml")); + assert!(names.contains(&"urllib3")); + assert!(names.contains(&"pillow")); + } + + #[test] + fn deps_dogfood_python_poetry_discovers() { + let result = python::discover(&fixture_deps_dir("python-poetry"), false) + .expect("discover python-poetry"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 2); + let names: Vec<_> = result.deps.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"django")); + assert!(names.contains(&"pyyaml")); + } + + #[test] + fn deps_dogfood_python_uv_discovers() { + let result = + python::discover(&fixture_deps_dir("python-uv"), false).expect("discover python-uv"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 2); + let names: Vec<_> = result.deps.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"django")); + assert!(names.contains(&"urllib3")); + } + + mod severity_floor_accessors { + use super::super::{ + CveFinding, CveLookupOutcome, Dependency, DependencyEcosystem, SeverityFloor, + SeverityLevel, VerifyReport, + }; + use crate::vuln_api::VulnMatch; + use chrono::Utc; + use std::collections::BTreeSet; + use std::time::Duration; + + fn dep(name: &str) -> Dependency { + Dependency { + name: name.into(), + version: "1.0.0".into(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".into(), + dev: false, + } + } + + fn vuln_match(advisory: &str, severity: &str) -> VulnMatch { + VulnMatch { + advisory_id: advisory.into(), + severity_level: severity.into(), + tier: 2, + vulnerable_version_range: None, + fixed_version: None, + } + } + + fn finding(name: &str, matches: Vec) -> CveFinding { + let advisory_details = vec![None; matches.len()]; + CveFinding { + dep: dep(name), + matches, + advisory_details, + } + } + + fn report_with_findings(findings: Vec, floor: SeverityFloor) -> VerifyReport { + let cve_outcomes: Vec = findings + .into_iter() + .map(CveLookupOutcome::Vulnerable) + .collect(); + VerifyReport { + sources: vec![], + outcomes: vec![], + unpinned_warnings: vec![], + threshold: Duration::from_secs(0), + scanned_at: Utc::now(), + check_cve: true, + cve_outcomes, + severity_floor: floor, + } + } + + #[test] + fn above_floor_returns_all_findings_for_any() { + let report = report_with_findings( + vec![ + finding("critical-pkg", vec![vuln_match("a", "critical")]), + finding("high-pkg", vec![vuln_match("b", "high")]), + finding("low-pkg", vec![vuln_match("c", "low")]), + ], + SeverityFloor::Any, + ); + assert_eq!(report.cve_findings_above_floor().len(), 3); + assert_eq!(report.cve_findings_below_floor_count(), 0); + } + + #[test] + fn above_floor_at_least_critical_only_matches_critical() { + let report = report_with_findings( + vec![ + finding("critical-pkg", vec![vuln_match("a", "critical")]), + finding("high-pkg", vec![vuln_match("b", "high")]), + ], + SeverityFloor::AtLeast(SeverityLevel::Critical), + ); + assert_eq!(report.cve_findings_above_floor().len(), 1); + assert_eq!(report.cve_findings_below_floor_count(), 1); + assert_eq!( + report.cve_findings_above_floor()[0].dep.name, + "critical-pkg" + ); + } + + #[test] + fn above_floor_at_least_low_matches_low_through_critical() { + let report = report_with_findings( + vec![ + finding("critical-pkg", vec![vuln_match("a", "critical")]), + finding("high-pkg", vec![vuln_match("b", "high")]), + finding("low-pkg", vec![vuln_match("c", "low")]), + ], + SeverityFloor::AtLeast(SeverityLevel::Low), + ); + assert_eq!(report.cve_findings_above_floor().len(), 3); + assert_eq!(report.cve_findings_below_floor_count(), 0); + } + + #[test] + fn above_floor_one_of_matches_exact_set() { + let mut set = BTreeSet::new(); + set.insert(SeverityLevel::Critical); + set.insert(SeverityLevel::High); + let report = report_with_findings( + vec![ + finding("critical-pkg", vec![vuln_match("a", "critical")]), + finding("medium-pkg", vec![vuln_match("b", "medium")]), + finding("high-pkg", vec![vuln_match("c", "high")]), + ], + SeverityFloor::OneOf(set), + ); + assert_eq!(report.cve_findings_above_floor().len(), 2); + assert_eq!(report.cve_findings_below_floor_count(), 1); + } + + #[test] + fn above_floor_uses_any_match_semantics_for_multi_match_finding() { + // A single finding with one critical and one low match should + // count as above-floor for AtLeast(Critical). + let report = report_with_findings( + vec![finding( + "mixed-pkg", + vec![vuln_match("a", "low"), vuln_match("b", "critical")], + )], + SeverityFloor::AtLeast(SeverityLevel::Critical), + ); + assert_eq!(report.cve_findings_above_floor().len(), 1); + assert_eq!(report.cve_findings_below_floor_count(), 0); + } + + #[test] + fn above_floor_unknown_severity_treated_as_info() { + // Server emits "unknown" — must not silently drop from Any / low + // floors. Critical floor must filter it out. + let report_any = report_with_findings( + vec![finding("weird-pkg", vec![vuln_match("a", "unknown")])], + SeverityFloor::Any, + ); + assert_eq!(report_any.cve_findings_above_floor().len(), 1); + + let report_critical = report_with_findings( + vec![finding("weird-pkg", vec![vuln_match("a", "unknown")])], + SeverityFloor::AtLeast(SeverityLevel::Critical), + ); + assert_eq!(report_critical.cve_findings_above_floor().len(), 0); + assert_eq!(report_critical.cve_findings_below_floor_count(), 1); + } + } +} diff --git a/src/verify_deps/npm.rs b/src/verify_deps/npm.rs new file mode 100644 index 0000000..f1efa3d --- /dev/null +++ b/src/verify_deps/npm.rs @@ -0,0 +1,1158 @@ +//! Discover installed npm dependencies from a project directory. +//! +//! Supported, in order of preference: +//! 1. `package-lock.json` / `npm-shrinkwrap.json` (lockfile v1, v2, v3) +//! 2. `pnpm-lock.yaml` (pnpm v5, v6, v7, v9) +//! 3. `yarn.lock` (Yarn classic, v1 syntax) +//! +//! These produce *resolved* (pinned) versions so the registry lookup is +//! exact. We deliberately do not parse `package.json` directly — its +//! version specifiers are ranges, which would require resolution we +//! don't want to redo. + +use std::path::Path; + +use serde::Deserialize; + +use super::{Dependency, DependencyEcosystem, DiscoverResult}; + +const SUPPORTED_FILES: &[&str] = &[ + "package-lock.json", + "npm-shrinkwrap.json", + "pnpm-lock.yaml", + "yarn.lock", +]; + +pub fn discover(project_dir: &Path, include_dev: bool) -> Result { + let candidates: Vec<_> = SUPPORTED_FILES + .iter() + .map(|f| project_dir.join(f)) + .filter(|p| p.exists()) + .collect(); + + let mut warnings = Vec::new(); + + if candidates.is_empty() { + // No lockfile. If there's a manifest with declared dependencies, + // surface it as an unpinned warning rather than an outright "no + // npm lockfile" error — the caller can decide whether to treat + // it as a fail (`--fail-unpinned`) or a soft warning. + let pkg_json = project_dir.join("package.json"); + if pkg_json.exists() && package_json_has_deps(&pkg_json).unwrap_or(false) { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Npm, + manifest: pkg_json.display().to_string(), + reason: format!( + "package.json declares dependencies but no lockfile was found (looked for {}). Run `npm install`, `pnpm install`, or `yarn install` to generate one before verifying.", + SUPPORTED_FILES.join(", ") + ), + }); + return Ok(DiscoverResult { + deps: Vec::new(), + source: String::new(), + warnings, + }); + } + return Err(format!( + "no npm lockfile found in {}. Looked for: {}", + project_dir.display(), + SUPPORTED_FILES.join(", ") + )); + } + + let chosen = &candidates[0]; + let file_name = chosen + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default(); + + let content = super::read_to_string(chosen)?; + + let deps = match file_name { + "package-lock.json" | "npm-shrinkwrap.json" => parse_npm_lock(&content, include_dev)?, + "pnpm-lock.yaml" => parse_pnpm_lock(&content, include_dev)?, + "yarn.lock" => parse_yarn_lock(&content)?, + _ => unreachable!(), + }; + + Ok(DiscoverResult { + deps, + source: chosen.display().to_string(), + warnings, + }) +} + +/// Lightweight check: does this `package.json` declare any +/// `dependencies` or `devDependencies`? Used to decide whether a +/// missing lockfile actually matters. We tolerate parse errors +/// silently — if the file is unreadable we just say "no deps". +fn package_json_has_deps(path: &Path) -> Result { + let content = std::fs::read_to_string(path).map_err(|_| ())?; + let parsed: serde_json::Value = serde_json::from_str(&content).map_err(|_| ())?; + let has = |key: &str| { + parsed + .get(key) + .and_then(|v| v.as_object()) + .map(|m| !m.is_empty()) + .unwrap_or(false) + }; + Ok(has("dependencies") + || has("devDependencies") + || has("peerDependencies") + || has("optionalDependencies")) +} + +#[derive(Debug, Deserialize)] +struct NpmLockRoot { + #[serde(rename = "lockfileVersion")] + lockfile_version: Option, + #[serde(default)] + dependencies: std::collections::BTreeMap, + #[serde(default)] + packages: std::collections::BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct NpmLockV1Entry { + version: Option, + #[serde(default)] + dev: bool, + #[serde(rename = "optional", default)] + _optional: bool, + #[serde(default)] + dependencies: std::collections::BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct NpmLockV2Entry { + version: Option, + name: Option, + #[serde(default)] + dev: bool, + #[serde(rename = "devOptional", default)] + dev_optional: bool, + #[serde(default)] + link: bool, +} + +pub(crate) fn parse_npm_lock(content: &str, include_dev: bool) -> Result, String> { + let root: NpmLockRoot = serde_json::from_str(content) + .map_err(|e| format!("failed to parse npm lockfile: {}", e))?; + + let mut deps: Vec = Vec::new(); + let version = root.lockfile_version.unwrap_or(1); + + if version >= 2 && !root.packages.is_empty() { + for (key, entry) in &root.packages { + if key.is_empty() { + continue; + } + if entry.link { + continue; + } + let dev = entry.dev || entry.dev_optional; + if !include_dev && dev { + continue; + } + let name = entry + .name + .clone() + .or_else(|| extract_name_from_packages_key(key)) + .unwrap_or_default(); + let ver = match &entry.version { + Some(v) if !v.is_empty() => v.clone(), + _ => continue, + }; + if name.is_empty() { + continue; + } + if !is_registry_version(&ver) { + continue; + } + deps.push(Dependency { + name, + version: ver, + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".to_string(), + dev, + }); + } + } else { + collect_v1(&root.dependencies, include_dev, &mut deps); + } + + Ok(deps) +} + +fn collect_v1( + map: &std::collections::BTreeMap, + include_dev: bool, + out: &mut Vec, +) { + for (name, entry) in map { + let dev = entry.dev; + if include_dev || !dev { + if let Some(version) = entry.version.as_ref() { + if !version.is_empty() && is_registry_version(version) { + out.push(Dependency { + name: name.clone(), + version: version.clone(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".to_string(), + dev, + }); + } + } + } + if !entry.dependencies.is_empty() { + collect_v1(&entry.dependencies, include_dev, out); + } + } +} + +/// Extract a package name from a v2/v3 lockfile `packages` key like +/// `node_modules/foo` or `node_modules/@scope/bar/node_modules/baz`. +fn extract_name_from_packages_key(key: &str) -> Option { + let last_nm = key.rfind("node_modules/")?; + let rest = &key[last_nm + "node_modules/".len()..]; + if rest.is_empty() { + return None; + } + if rest.starts_with('@') { + let mut parts = rest.splitn(3, '/'); + let scope = parts.next()?; + let pkg = parts.next()?; + Some(format!("{}/{}", scope, pkg)) + } else { + let first = rest.split('/').next()?; + Some(first.to_string()) + } +} + +/// Filter out non-registry version specifiers (git URLs, file refs, links). +fn is_registry_version(version: &str) -> bool { + let v = version.trim(); + if v.is_empty() { + return false; + } + let lower = v.to_ascii_lowercase(); + let bad_prefixes = [ + "git+", + "git:", + "git://", + "ssh://", + "http://", + "https://", + "file:", + "link:", + "workspace:", + "npm:", + ]; + if bad_prefixes.iter().any(|p| lower.starts_with(p)) { + return false; + } + let first = v.chars().next().unwrap_or(' '); + if !(first.is_ascii_digit() || first == 'v') { + return false; + } + true +} + +/// Parse a Yarn classic (v1) lockfile. +/// +/// Yarn classic format (simplified, the bits we need): +/// +/// ```text +/// "left-pad@^1.3.0": +/// version "1.3.0" +/// resolved "https://registry.yarnpkg.com/left-pad/-/left-pad-1.3.0.tgz" +/// +/// "@scope/pkg@^1.0.0", "@scope/pkg@^1.0.1": +/// version "1.0.5" +/// ``` +pub(crate) fn parse_yarn_lock(content: &str) -> Result, String> { + let mut deps: Vec = Vec::new(); + let mut current_keys: Vec = Vec::new(); + let mut current_version: Option = None; + + let flush = + |keys: &mut Vec, version: &mut Option, out: &mut Vec| { + if let (Some(name), Some(ver)) = + (keys.first().and_then(|k| yarn_key_name(k)), version.clone()) + { + if is_registry_version(&ver) { + out.push(Dependency { + name, + version: ver, + ecosystem: DependencyEcosystem::Npm, + source: "yarn.lock".to_string(), + dev: false, + }); + } + } + keys.clear(); + *version = None; + }; + + for raw_line in content.lines() { + let line = raw_line; + let trimmed = line.trim_end(); + if trimmed.is_empty() || trimmed.trim_start().starts_with('#') { + if !current_keys.is_empty() && current_version.is_some() { + flush(&mut current_keys, &mut current_version, &mut deps); + } + continue; + } + let leading_ws = line.len() - line.trim_start().len(); + if leading_ws == 0 { + if !current_keys.is_empty() && current_version.is_some() { + flush(&mut current_keys, &mut current_version, &mut deps); + } else { + current_keys.clear(); + current_version = None; + } + let header = trimmed.trim_end_matches(':').trim(); + current_keys = split_yarn_header(header); + } else if let Some(rest) = trimmed.trim_start().strip_prefix("version ") { + let v = rest.trim().trim_matches('"').to_string(); + current_version = Some(v); + } + } + if !current_keys.is_empty() && current_version.is_some() { + flush(&mut current_keys, &mut current_version, &mut deps); + } + Ok(deps) +} + +/// Split a yarn lock header line of comma-separated quoted specs into +/// the individual specs. Handles e.g. +/// `"@scope/pkg@^1.0.0", "@scope/pkg@^1.0.1"`. +fn split_yarn_header(header: &str) -> Vec { + let mut out = Vec::new(); + let mut buf = String::new(); + let mut in_quotes = false; + for c in header.chars() { + match c { + '"' => in_quotes = !in_quotes, + ',' if !in_quotes => { + let s = buf.trim().trim_matches('"').to_string(); + if !s.is_empty() { + out.push(s); + } + buf.clear(); + } + _ => buf.push(c), + } + } + let s = buf.trim().trim_matches('"').to_string(); + if !s.is_empty() { + out.push(s); + } + out +} + +/// Extract the package name from a yarn key like `left-pad@^1.3.0` or +/// `@scope/name@^1.0.0`. +fn yarn_key_name(key: &str) -> Option { + let key = key.trim().trim_matches('"'); + if key.is_empty() { + return None; + } + let (name_part, _) = if let Some(rest) = key.strip_prefix('@') { + let after_scope = rest.find('@')?; + let split_at = after_scope + 1; + (&key[..split_at], &key[split_at + 1..]) + } else { + let at = key.find('@')?; + (&key[..at], &key[at + 1..]) + }; + Some(name_part.to_string()) +} + +/// Parse a pnpm-lock.yaml file. Supports lockfile versions 5.x, 6.x, +/// 7.x and 9.x — the format and key conventions vary across versions: +/// +/// * v5/v6 keys in `packages:` use `/` separators: +/// `/lodash/4.17.21:` or `/@types/node/20.10.5:` +/// * v6+ keys may use `@` for the version separator: +/// `/lodash@4.17.21:` or `/@types/node@20.10.5:` +/// * v9 keys drop the leading `/` entirely: +/// `lodash@4.17.21:` or `'@types/node@20.10.5':` +/// +/// Versions can carry a peer-deps suffix that is *not* part of the +/// resolved version — `(react@18.0.0)` in v9, `_react@18.0.0` in v6. +/// Both must be stripped before lookup, since the registry only knows +/// the bare semver version. +/// +/// Dev/prod classification: +/// * v6 packages have a `dev: true|false` field per entry — we use it. +/// * v9 packages don't carry `dev:`. We instead consult the +/// `importers:` section: a (name, version) that appears *only* in +/// `devDependencies` of all importers (and never in `dependencies`) +/// is treated as dev. This is best-effort: transitive deps that are +/// only reached through a dev top-level package are still treated as +/// non-dev, because resolving the full graph from a lockfile is out +/// of scope here. Including those in production scans is the safer +/// default for a supply-chain tripwire. +pub(crate) fn parse_pnpm_lock(content: &str, include_dev: bool) -> Result, String> { + let importers = parse_pnpm_importers(content); + let entries = parse_pnpm_packages(content)?; + + let mut deps = Vec::new(); + for entry in entries { + let key = (entry.name.clone(), entry.version.clone()); + let dev = match entry.dev_field { + Some(d) => d, + None => { + let in_prod = importers.prod.contains(&key); + let in_dev = importers.dev.contains(&key); + in_dev && !in_prod + } + }; + if !include_dev && dev { + continue; + } + if !is_registry_version(&entry.version) { + continue; + } + deps.push(Dependency { + name: entry.name, + version: entry.version, + ecosystem: DependencyEcosystem::Npm, + source: "pnpm-lock.yaml".to_string(), + dev, + }); + } + Ok(deps) +} + +#[derive(Debug, Default)] +struct PnpmImporters { + prod: std::collections::BTreeSet<(String, String)>, + dev: std::collections::BTreeSet<(String, String)>, +} + +#[derive(Debug)] +struct PnpmPackageEntry { + name: String, + version: String, + dev_field: Option, +} + +fn parse_pnpm_packages(content: &str) -> Result, String> { + let mut out = Vec::new(); + let mut state = PackagesState::Outside; + + let mut current_name: Option = None; + let mut current_version: Option = None; + let mut current_dev: Option = None; + let mut entry_indent: usize = 0; + + for raw_line in content.lines() { + if raw_line.trim().is_empty() || raw_line.trim_start().starts_with('#') { + continue; + } + let indent = leading_spaces(raw_line); + let body = &raw_line[indent..]; + + if indent == 0 { + commit_pnpm_entry( + &mut out, + &mut current_name, + &mut current_version, + &mut current_dev, + ); + state = if body.trim_end_matches(' ') == "packages:" { + PackagesState::Inside + } else { + PackagesState::Outside + }; + continue; + } + + if !matches!(state, PackagesState::Inside) { + continue; + } + + if current_name.is_none() { + entry_indent = indent; + } + + if indent == entry_indent && body.ends_with(':') { + commit_pnpm_entry( + &mut out, + &mut current_name, + &mut current_version, + &mut current_dev, + ); + + let key = body.trim_end_matches(':').trim(); + if let Some((name, version)) = extract_pnpm_pkg_key(key) { + current_name = Some(name); + current_version = Some(version); + current_dev = None; + } else { + current_name = None; + current_version = None; + current_dev = None; + } + } else if indent > entry_indent { + if let Some(rest) = body.strip_prefix("dev:") { + let v = rest.trim(); + if v == "true" { + current_dev = Some(true); + } else if v == "false" { + current_dev = Some(false); + } + } + } + } + commit_pnpm_entry( + &mut out, + &mut current_name, + &mut current_version, + &mut current_dev, + ); + Ok(out) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PackagesState { + Outside, + Inside, +} + +fn commit_pnpm_entry( + out: &mut Vec, + name: &mut Option, + version: &mut Option, + dev: &mut Option, +) { + if let (Some(n), Some(v)) = (name.take(), version.take()) { + out.push(PnpmPackageEntry { + name: n, + version: v, + dev_field: dev.take(), + }); + } else { + *name = None; + *version = None; + *dev = None; + } +} + +fn parse_pnpm_importers(content: &str) -> PnpmImporters { + let mut importers = PnpmImporters::default(); + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + enum Bucket { + Prod, + Dev, + None, + } + + let mut active_bucket = Bucket::None; + let mut bucket_indent: usize = usize::MAX; + let mut in_importers_section = false; + let mut pending_name: Option<(String, usize)> = None; + + for raw_line in content.lines() { + if raw_line.trim().is_empty() || raw_line.trim_start().starts_with('#') { + continue; + } + let indent = leading_spaces(raw_line); + let body = &raw_line[indent..]; + + if indent == 0 { + in_importers_section = body.trim_end_matches(' ') == "importers:"; + if !in_importers_section { + if body.trim_end_matches(' ') == "dependencies:" { + active_bucket = Bucket::Prod; + bucket_indent = 0; + pending_name = None; + continue; + } + if body.trim_end_matches(' ') == "devDependencies:" { + active_bucket = Bucket::Dev; + bucket_indent = 0; + pending_name = None; + continue; + } + active_bucket = Bucket::None; + bucket_indent = usize::MAX; + pending_name = None; + } else { + active_bucket = Bucket::None; + bucket_indent = usize::MAX; + pending_name = None; + } + continue; + } + + if in_importers_section { + let trimmed = body.trim_end(); + if trimmed == "dependencies:" { + active_bucket = Bucket::Prod; + bucket_indent = indent; + pending_name = None; + continue; + } + if trimmed == "devDependencies:" { + active_bucket = Bucket::Dev; + bucket_indent = indent; + pending_name = None; + continue; + } + } + + if active_bucket == Bucket::None || indent <= bucket_indent { + if indent <= bucket_indent { + active_bucket = Bucket::None; + bucket_indent = usize::MAX; + pending_name = None; + } + continue; + } + + let (key_part, value_part) = match body.split_once(':') { + Some(x) => x, + None => continue, + }; + let key = key_part.trim().trim_matches('\'').trim_matches('"'); + let value = value_part.trim(); + + let expected_entry_indent = bucket_indent + 2; + if indent != expected_entry_indent { + if let Some((ref pkg, _)) = pending_name { + if key == "version" && !value.is_empty() { + let version = + strip_pnpm_peer_suffix(value.trim_matches('\'').trim_matches('"')); + let pair = (pkg.clone(), version); + match active_bucket { + Bucket::Prod => { + importers.prod.insert(pair); + } + Bucket::Dev => { + importers.dev.insert(pair); + } + Bucket::None => {} + } + pending_name = None; + } + } + continue; + } + + if value.is_empty() { + pending_name = Some((key.to_string(), indent)); + } else { + let version = strip_pnpm_peer_suffix(value.trim_matches('\'').trim_matches('"')); + let pair = (key.to_string(), version); + match active_bucket { + Bucket::Prod => { + importers.prod.insert(pair); + } + Bucket::Dev => { + importers.dev.insert(pair); + } + Bucket::None => {} + } + pending_name = None; + } + } + + importers +} + +fn leading_spaces(line: &str) -> usize { + line.bytes().take_while(|b| *b == b' ').count() +} + +fn extract_pnpm_pkg_key(raw_key: &str) -> Option<(String, String)> { + // Order of trims matters: pnpm v9 quotes the *whole* scoped key + // including the version (`'@types/node@20.10.5'`), and v5/v6 wrap + // the same shape with a leading `/`. Strip both, in either order, + // until the key stabilises. + let mut key = raw_key.trim().to_string(); + for _ in 0..3 { + let trimmed = key + .trim_matches('\'') + .trim_matches('"') + .trim_start_matches('/') + .to_string(); + if trimmed == key { + break; + } + key = trimmed; + } + let key_owned = strip_pnpm_peer_suffix(&key); + let key: &str = &key_owned; + + if let Some(rest) = key.strip_prefix('@') { + let after_scope_idx = rest.find('/')?; + let post = &rest[after_scope_idx + 1..]; + let sep_offset_at = post.find('@'); + let sep_offset_slash = post.find('/'); + let sep_offset = match (sep_offset_at, sep_offset_slash) { + (Some(a), Some(b)) => Some(a.min(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + }?; + let name_end = 1 + after_scope_idx + 1 + sep_offset; + let name = &key[..name_end]; + let version = &key[name_end + 1..]; + if name.is_empty() || version.is_empty() { + return None; + } + Some((name.to_string(), version.to_string())) + } else { + let sep_at = key.find('@'); + let sep_slash = key.find('/'); + let sep = match (sep_at, sep_slash) { + (Some(a), Some(b)) => Some(a.min(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + }?; + let name = &key[..sep]; + let version = &key[sep + 1..]; + if name.is_empty() || version.is_empty() { + return None; + } + Some((name.to_string(), version.to_string())) + } +} + +fn strip_pnpm_peer_suffix(version: &str) -> String { + let v = version.trim(); + let v = match v.find('(') { + Some(idx) => &v[..idx], + None => v, + }; + let v = match v.find('_') { + Some(idx) => &v[..idx], + None => v, + }; + v.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_npm_lock_v1() { + let lock = r#"{ + "name": "demo", + "version": "1.0.0", + "lockfileVersion": 1, + "dependencies": { + "left-pad": { "version": "1.3.0" }, + "is-odd": { "version": "3.0.1", "dev": true, + "dependencies": { + "is-number": { "version": "6.0.0", "dev": true } + } + } + } + }"#; + let prod = parse_npm_lock(lock, false).unwrap(); + let names: Vec<_> = prod + .iter() + .map(|d| (d.name.as_str(), d.version.as_str())) + .collect(); + assert_eq!(names, vec![("left-pad", "1.3.0")]); + + let all = parse_npm_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"left-pad".to_string())); + assert!(names.contains(&"is-odd".to_string())); + assert!(names.contains(&"is-number".to_string())); + } + + #[test] + fn parses_npm_lock_v3() { + let lock = r#"{ + "name": "demo", + "version": "1.0.0", + "lockfileVersion": 3, + "packages": { + "": { + "name": "demo", + "version": "1.0.0" + }, + "node_modules/left-pad": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/left-pad/-/left-pad-1.3.0.tgz" + }, + "node_modules/@types/node": { + "version": "20.10.5", + "dev": true + }, + "node_modules/local-link": { + "link": true, + "resolved": "../local-link" + } + } + }"#; + + let prod = parse_npm_lock(lock, false).unwrap(); + let names: Vec<_> = prod + .iter() + .map(|d| (d.name.as_str(), d.version.as_str())) + .collect(); + assert_eq!(names, vec![("left-pad", "1.3.0")]); + + let all = parse_npm_lock(lock, true).unwrap(); + let mut got: Vec<_> = all + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + got.sort(); + assert_eq!( + got, + vec![ + ("@types/node".to_string(), "20.10.5".to_string()), + ("left-pad".to_string(), "1.3.0".to_string()), + ] + ); + } + + #[test] + fn parses_yarn_lock() { + let lock = r#"# THIS IS AN AUTOGENERATED FILE. +# yarn lockfile v1 + +"left-pad@^1.3.0": + version "1.3.0" + resolved "https://registry.yarnpkg.com/left-pad/-/left-pad-1.3.0.tgz#5b8a3a7765dfe001261dde915589e782f8c94d1e" + +"@types/node@^20.10.0", "@types/node@^20.10.5": + version "20.10.5" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.10.5.tgz" +"#; + let deps = parse_yarn_lock(lock).unwrap(); + assert_eq!(deps.len(), 2); + let names: Vec<_> = deps + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert!(names.contains(&("left-pad".to_string(), "1.3.0".to_string()))); + assert!(names.contains(&("@types/node".to_string(), "20.10.5".to_string()))); + } + + #[test] + fn ignores_non_registry_versions() { + assert!(!is_registry_version("git+https://github.com/x/y.git#abc")); + assert!(!is_registry_version("file:../pkg")); + assert!(!is_registry_version("link:../pkg")); + assert!(!is_registry_version("workspace:*")); + assert!(!is_registry_version("npm:other@1.0.0")); + assert!(is_registry_version("1.2.3")); + assert!(is_registry_version("v1.2.3")); + } + + #[test] + fn extracts_packages_key_name() { + assert_eq!( + extract_name_from_packages_key("node_modules/foo").as_deref(), + Some("foo") + ); + assert_eq!( + extract_name_from_packages_key("node_modules/@scope/bar").as_deref(), + Some("@scope/bar") + ); + assert_eq!( + extract_name_from_packages_key("node_modules/a/node_modules/@s/b").as_deref(), + Some("@s/b") + ); + assert_eq!(extract_name_from_packages_key("").as_deref(), None); + } + + #[test] + fn pnpm_pkg_key_v5() { + // v5: leading slash + slash version separator + assert_eq!( + extract_pnpm_pkg_key("/lodash/4.17.21"), + Some(("lodash".to_string(), "4.17.21".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("/@types/node/20.10.5"), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_v6() { + // v6: leading slash + at-sign version separator + assert_eq!( + extract_pnpm_pkg_key("/lodash@4.17.21"), + Some(("lodash".to_string(), "4.17.21".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("/@types/node@20.10.5"), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_v9() { + // v9: no leading slash; quoted scoped names + assert_eq!( + extract_pnpm_pkg_key("lodash@4.17.21"), + Some(("lodash".to_string(), "4.17.21".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("'@types/node@20.10.5'"), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("\"@types/node@20.10.5\""), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_strips_peer_suffix() { + // v9 paren style: + assert_eq!( + extract_pnpm_pkg_key("/foo@1.0.0(react@18.0.0)"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("foo@1.0.0(react@18.0.0)(typescript@5.0.0)"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + // v6 underscore style: + assert_eq!( + extract_pnpm_pkg_key("/foo/1.0.0_react@18.0.0"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("/foo@1.0.0_react@18.0.0"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_rejects_garbage() { + assert_eq!(extract_pnpm_pkg_key(""), None); + assert_eq!(extract_pnpm_pkg_key("/"), None); + assert_eq!(extract_pnpm_pkg_key("/lodash"), None); + assert_eq!(extract_pnpm_pkg_key("/@scope/no-version"), None); + } + + #[test] + fn parses_pnpm_lock_v9() { + // Realistic pnpm v9 lockfile. + let lock = r#"lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + .: + dependencies: + lodash: + specifier: ^4.17.21 + version: 4.17.21 + '@scope/lib': + specifier: ^1.0.0 + version: 1.0.0 + devDependencies: + typescript: + specifier: ^5.0.0 + version: 5.4.5 + +packages: + lodash@4.17.21: + resolution: {integrity: sha512-x} + engines: {node: '>=12'} + + '@scope/lib@1.0.0': + resolution: {integrity: sha512-y} + + typescript@5.4.5: + resolution: {integrity: sha512-z} + engines: {node: '>=14.17'} + + some-transitive@2.0.0: + resolution: {integrity: sha512-w} +"#; + + let prod = parse_pnpm_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + // typescript is dev-only top-level, should be excluded. + // some-transitive is unclassified — kept as prod (best-effort). + assert!(pairs.contains(&("lodash".to_string(), "4.17.21".to_string()))); + assert!(pairs.contains(&("@scope/lib".to_string(), "1.0.0".to_string()))); + assert!(pairs.contains(&("some-transitive".to_string(), "2.0.0".to_string()))); + assert!(!pairs.contains(&("typescript".to_string(), "5.4.5".to_string()))); + + let all = parse_pnpm_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"typescript".to_string())); + assert_eq!(all.len(), 4); + } + + #[test] + fn parses_pnpm_lock_v6() { + // v6 layout: per-package `dev:` flag drives classification. + let lock = r#"lockfileVersion: '6.0' + +dependencies: + lodash: + specifier: ^4.17.21 + version: 4.17.21 + +devDependencies: + typescript: + specifier: ^5.0.0 + version: 5.4.5 + +packages: + + /lodash@4.17.21: + resolution: {integrity: sha512-x} + dev: false + + /typescript@5.4.5: + resolution: {integrity: sha512-z} + dev: true + + /'@types/node@20.10.5': + resolution: {integrity: sha512-y} + dev: true +"#; + + let prod = parse_pnpm_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert_eq!(pairs, vec![("lodash".to_string(), "4.17.21".to_string())]); + + let all = parse_pnpm_lock(lock, true).unwrap(); + assert_eq!(all.len(), 3); + } + + #[test] + fn parses_pnpm_lock_v5_flat() { + let lock = r#"lockfileVersion: 5.4 + +dependencies: + lodash: 4.17.21 + +devDependencies: + typescript: 5.4.5 + +packages: + + /lodash/4.17.21: + resolution: {integrity: sha512-x} + dev: false + + /typescript/5.4.5: + resolution: {integrity: sha512-z} + dev: true +"#; + let prod = parse_pnpm_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert_eq!(pairs, vec![("lodash".to_string(), "4.17.21".to_string())]); + } + + #[test] + fn pnpm_lock_strips_peer_suffix_in_packages_section() { + let lock = r#"lockfileVersion: '9.0' + +importers: + .: + dependencies: + consumer: + specifier: ^1.0.0 + version: 1.0.0(react@18.2.0) + +packages: + consumer@1.0.0(react@18.2.0): + resolution: {integrity: sha512-x} + react@18.2.0: + resolution: {integrity: sha512-y} +"#; + let deps = parse_pnpm_lock(lock, true).unwrap(); + let pairs: Vec<_> = deps + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert!(pairs.contains(&("consumer".to_string(), "1.0.0".to_string()))); + assert!(pairs.contains(&("react".to_string(), "18.2.0".to_string()))); + } + + #[test] + fn discover_warns_on_package_json_without_lockfile() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package.json"), + r#"{ + "name": "demo", + "version": "1.0.0", + "dependencies": { "lodash": "^4.0.0" } + }"#, + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.deps.is_empty()); + assert_eq!(result.warnings.len(), 1); + assert!(result.warnings[0].manifest.ends_with("package.json")); + assert!(result.warnings[0].reason.contains("lockfile")); + } + + #[test] + fn discover_no_warning_for_empty_package_json() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package.json"), + r#"{ + "name": "demo", + "version": "1.0.0" + }"#, + ) + .unwrap(); + + let err = discover(dir.path(), false).err().expect("expected error"); + assert!(err.contains("no npm lockfile")); + } + + #[test] + fn discover_with_lockfile_emits_no_warnings() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package.json"), + r#"{ "name": "demo", "version": "1.0.0", "dependencies": { "lodash": "^4.0.0" } }"#, + ) + .unwrap(); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.21" } + } + }"#, + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 1); + assert_eq!(result.deps[0].name, "lodash"); + } +} diff --git a/src/verify_deps/python.rs b/src/verify_deps/python.rs new file mode 100644 index 0000000..6bb0ac7 --- /dev/null +++ b/src/verify_deps/python.rs @@ -0,0 +1,762 @@ +//! Discover installed Python dependencies from a project directory. +//! +//! Supported, in order of preference: +//! 1. `poetry.lock` (TOML) +//! 2. `Pipfile.lock` (JSON) +//! 3. `uv.lock` (TOML) +//! 4. `requirements.txt` — only `==`-pinned lines (we can't verify a +//! range against a registry without resolving, which is out of scope). +//! +//! All resolved dependencies are pinned to exact versions. + +use std::path::Path; + +use serde::Deserialize; + +use super::{Dependency, DependencyEcosystem, DiscoverResult}; + +const SUPPORTED_FILES: &[&str] = &["poetry.lock", "Pipfile.lock", "uv.lock", "requirements.txt"]; + +pub fn discover(project_dir: &Path, include_dev: bool) -> Result { + let candidates: Vec<_> = SUPPORTED_FILES + .iter() + .map(|f| project_dir.join(f)) + .filter(|p| p.exists()) + .collect(); + + let mut warnings: Vec = Vec::new(); + + // Always look for sibling manifests that imply the project has + // dependencies, even when a lockfile is present. We surface these + // as warnings only when the corresponding lockfile is missing. + let pyproject = project_dir.join("pyproject.toml"); + let pipfile = project_dir.join("Pipfile"); + let pipfile_lock = project_dir.join("Pipfile.lock"); + let poetry_lock = project_dir.join("poetry.lock"); + let uv_lock = project_dir.join("uv.lock"); + let requirements_in = project_dir.join("requirements.in"); + + if pipfile.exists() && !pipfile_lock.exists() { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: pipfile.display().to_string(), + reason: "Pipfile is present but Pipfile.lock is missing. Run `pipenv lock` to generate one before verifying." + .to_string(), + }); + } + + if requirements_in.exists() && !project_dir.join("requirements.txt").exists() { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: requirements_in.display().to_string(), + reason: "requirements.in is present but no compiled requirements.txt was found. Run `pip-compile` (or `uv pip compile`) to produce a pinned requirements file before verifying." + .to_string(), + }); + } + + if pyproject.exists() + && !poetry_lock.exists() + && !uv_lock.exists() + && !pipfile_lock.exists() + && pyproject_has_deps(&pyproject).unwrap_or(false) + { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: pyproject.display().to_string(), + reason: "pyproject.toml declares dependencies but no lockfile was found (looked for poetry.lock, uv.lock, Pipfile.lock). Run `poetry lock`, `uv lock`, or generate a pinned requirements.txt before verifying." + .to_string(), + }); + } + + if candidates.is_empty() { + // Without a lockfile or pinned requirements.txt we have nothing + // to verify. If we already emitted a warning above, return it + // (and let the caller decide if it's fatal). Otherwise fall + // back to the previous "nothing to do" error. + if !warnings.is_empty() { + return Ok(DiscoverResult { + deps: Vec::new(), + source: String::new(), + warnings, + }); + } + return Err(format!( + "no Python lockfile found in {}. Looked for: {}", + project_dir.display(), + SUPPORTED_FILES.join(", ") + )); + } + + let chosen = &candidates[0]; + let file_name = chosen + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default(); + + let content = super::read_to_string(chosen)?; + + let deps = match file_name { + "poetry.lock" => parse_poetry_lock(&content, include_dev)?, + "Pipfile.lock" => parse_pipfile_lock(&content, include_dev)?, + "uv.lock" => parse_uv_lock(&content)?, + "requirements.txt" => { + let (pinned, unpinned) = parse_requirements_with_warnings(&content); + for line in unpinned { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: chosen.display().to_string(), + reason: format!("requirements.txt line is not `==`-pinned: `{}`", line), + }); + } + pinned + } + _ => unreachable!(), + }; + + Ok(DiscoverResult { + deps, + source: chosen.display().to_string(), + warnings, + }) +} + +/// Lightweight check: does this `pyproject.toml` declare any project +/// dependencies? We look at PEP 621 `[project].dependencies` and +/// `[project].optional-dependencies`, plus the legacy +/// `[tool.poetry.dependencies]` and `[tool.poetry.group.*.dependencies]` +/// tables. Tolerates parse errors. +fn pyproject_has_deps(path: &Path) -> Result { + let content = std::fs::read_to_string(path).map_err(|_| ())?; + let parsed: toml::Value = toml::from_str(&content).map_err(|_| ())?; + + let project_deps = parsed + .get("project") + .and_then(|p| p.get("dependencies")) + .and_then(|v| v.as_array()) + .map(|a| !a.is_empty()) + .unwrap_or(false); + let project_opt = parsed + .get("project") + .and_then(|p| p.get("optional-dependencies")) + .and_then(|v| v.as_table()) + .map(|t| { + t.values() + .any(|v| v.as_array().map(|a| !a.is_empty()).unwrap_or(false)) + }) + .unwrap_or(false); + let poetry_main = parsed + .get("tool") + .and_then(|t| t.get("poetry")) + .and_then(|p| p.get("dependencies")) + .and_then(|v| v.as_table()) + // Poetry seeds `python = "^3.10"` here; ignore that one entry. + .map(|t| t.iter().any(|(k, _)| k != "python")) + .unwrap_or(false); + let poetry_groups = parsed + .get("tool") + .and_then(|t| t.get("poetry")) + .and_then(|p| p.get("group")) + .and_then(|v| v.as_table()) + .map(|groups| { + groups.values().any(|g| { + g.get("dependencies") + .and_then(|d| d.as_table()) + .map(|t| !t.is_empty()) + .unwrap_or(false) + }) + }) + .unwrap_or(false); + + Ok(project_deps || project_opt || poetry_main || poetry_groups) +} + +#[derive(Debug, Deserialize)] +struct PoetryLockRoot { + #[serde(default)] + package: Vec, +} + +#[derive(Debug, Deserialize)] +struct PoetryPackage { + name: String, + version: String, + #[serde(default)] + category: Option, + #[serde(default)] + source: Option, + #[serde(default)] + groups: Option>, +} + +#[derive(Debug, Deserialize)] +struct PoetrySource { + #[serde(rename = "type")] + source_type: Option, +} + +pub(crate) fn parse_poetry_lock( + content: &str, + include_dev: bool, +) -> Result, String> { + let root: PoetryLockRoot = + toml::from_str(content).map_err(|e| format!("failed to parse poetry.lock: {}", e))?; + + let mut out = Vec::new(); + for pkg in root.package { + if let Some(src) = &pkg.source { + if let Some(t) = &src.source_type { + let t = t.to_ascii_lowercase(); + if t == "git" || t == "directory" || t == "file" || t == "url" { + continue; + } + } + } + + let is_dev = is_poetry_dev(&pkg); + if !include_dev && is_dev { + continue; + } + + out.push(Dependency { + name: normalize_python_name(&pkg.name), + version: pkg.version, + ecosystem: DependencyEcosystem::Python, + source: "poetry.lock".to_string(), + dev: is_dev, + }); + } + Ok(out) +} + +fn is_poetry_dev(pkg: &PoetryPackage) -> bool { + if let Some(cat) = &pkg.category { + if !cat.is_empty() && !cat.eq_ignore_ascii_case("main") { + return true; + } + } + if let Some(groups) = &pkg.groups { + if !groups.is_empty() && !groups.iter().any(|g| g.eq_ignore_ascii_case("main")) { + return true; + } + } + false +} + +#[derive(Debug, Deserialize)] +struct PipfileLockRoot { + #[serde(default)] + default: std::collections::BTreeMap, + #[serde(default)] + develop: std::collections::BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct PipfileLockEntry { + version: Option, + #[serde(default)] + git: Option, + #[serde(default)] + path: Option, +} + +pub(crate) fn parse_pipfile_lock( + content: &str, + include_dev: bool, +) -> Result, String> { + let root: PipfileLockRoot = serde_json::from_str(content) + .map_err(|e| format!("failed to parse Pipfile.lock: {}", e))?; + let mut out = Vec::new(); + extend_pipfile(&root.default, false, &mut out); + if include_dev { + extend_pipfile(&root.develop, true, &mut out); + } + Ok(out) +} + +fn extend_pipfile( + map: &std::collections::BTreeMap, + dev: bool, + out: &mut Vec, +) { + for (name, entry) in map { + if entry.git.is_some() || entry.path.is_some() { + continue; + } + let version = match entry.version.as_ref() { + Some(v) => v, + None => continue, + }; + // Pipfile pins look like "==1.2.3" — strip the leading "==". + let version = version.trim_start_matches("==").trim(); + if version.is_empty() { + continue; + } + out.push(Dependency { + name: normalize_python_name(name), + version: version.to_string(), + ecosystem: DependencyEcosystem::Python, + source: "Pipfile.lock".to_string(), + dev, + }); + } +} + +#[derive(Debug, Deserialize)] +struct UvLockRoot { + #[serde(default)] + package: Vec, +} + +#[derive(Debug, Deserialize)] +struct UvPackage { + name: String, + version: Option, + #[serde(default)] + source: Option, +} + +#[derive(Debug, Deserialize)] +struct UvSource { + #[serde(default)] + registry: Option, + #[serde(default)] + git: Option, + #[serde(default)] + url: Option, + #[serde(default)] + path: Option, + #[serde(default)] + editable: Option, + #[serde(default)] + virtual_: Option, +} + +pub(crate) fn parse_uv_lock(content: &str) -> Result, String> { + let root: UvLockRoot = + toml::from_str(content).map_err(|e| format!("failed to parse uv.lock: {}", e))?; + + let mut out = Vec::new(); + for pkg in root.package { + let version = match pkg.version { + Some(v) if !v.is_empty() => v, + _ => continue, + }; + if let Some(src) = pkg.source { + // Skip non-registry sources. + if src.git.is_some() + || src.url.is_some() + || src.path.is_some() + || src.editable.is_some() + || src.virtual_.is_some() + { + continue; + } + if src.registry.is_none() { + continue; + } + } else { + continue; + } + out.push(Dependency { + name: normalize_python_name(&pkg.name), + version, + ecosystem: DependencyEcosystem::Python, + source: "uv.lock".to_string(), + dev: false, + }); + } + Ok(out) +} + +/// Parse a `requirements.txt` file. Returns `(pinned_deps, unpinned_lines)`: +/// +/// * `pinned_deps`: deps with an exact `==` pin, ready for registry +/// lookup. +/// * `unpinned_lines`: each non-empty, non-comment, non-flag line that +/// we *could not* resolve to a pinned version (range specifiers, +/// bare names, git URLs, editables, etc.). Surfaced as warnings so +/// `--fail-unpinned` can fail on them. +pub(crate) fn parse_requirements_with_warnings(content: &str) -> (Vec, Vec) { + let mut deps = Vec::new(); + let mut unpinned = Vec::new(); + let mut continued = String::new(); + for raw_line in content.lines() { + let mut line = raw_line.to_string(); + if let Some(idx) = line.find('#') { + line.truncate(idx); + } + let line = line.trim(); + if line.is_empty() { + continue; + } + let line = if line.ends_with('\\') { + continued.push_str(line.trim_end_matches('\\').trim()); + continued.push(' '); + continue; + } else if !continued.is_empty() { + let mut full = std::mem::take(&mut continued); + full.push_str(line); + full + } else { + line.to_string() + }; + + // `-r other.txt`, `-c constraints.txt`, `--index-url`, etc. + // These are pip configuration directives, not deps. + if line.starts_with('-') { + continue; + } + + let no_extras = match line.find(';') { + Some(i) => line[..i].trim().to_string(), + None => line.clone(), + }; + + let first_token = no_extras + .split_whitespace() + .next() + .unwrap_or("") + .to_string(); + if first_token.is_empty() { + continue; + } + + // VCS / local path / archive URL specifiers — explicit and + // unverifiable against a registry. Don't classify these as + // unpinned warnings; they're an intentional escape hatch. + let lowered = first_token.to_ascii_lowercase(); + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", + ]; + if unverifiable_prefixes.iter().any(|p| lowered.starts_with(p)) { + continue; + } + + if let Some(idx) = first_token.find("==") { + let name_part = &first_token[..idx]; + let version_part = &first_token[idx + 2..]; + let name = name_part.split('[').next().unwrap_or("").trim(); + let version = version_part + .trim() + .trim_matches(|c: char| c == '\'' || c == '"'); + if name.is_empty() || version.is_empty() { + unpinned.push(line.clone()); + continue; + } + deps.push(Dependency { + name: normalize_python_name(name), + version: version.to_string(), + ecosystem: DependencyEcosystem::Python, + source: "requirements.txt".to_string(), + dev: false, + }); + } else { + unpinned.push(line.clone()); + } + } + (deps, unpinned) +} + +/// Backwards-compatible wrapper that drops the unpinned-line list. +/// Used by tests; the binary build path doesn't call it directly any +/// more, so the dead-code lint needs silencing. +#[allow(dead_code)] +pub(crate) fn parse_requirements(content: &str) -> Vec { + parse_requirements_with_warnings(content).0 +} + +/// Normalize a Python distribution name per PEP 503 (lowercase, +/// runs of `_-.` collapsed to single `-`). +pub(crate) fn normalize_python_name(name: &str) -> String { + let lower = name.to_ascii_lowercase(); + let mut out = String::with_capacity(lower.len()); + let mut prev_dash = false; + for c in lower.chars() { + if c == '_' || c == '.' || c == '-' { + if !prev_dash { + out.push('-'); + prev_dash = true; + } + } else { + out.push(c); + prev_dash = false; + } + } + out.trim_matches('-').to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalizes_names() { + assert_eq!(normalize_python_name("Flask"), "flask"); + assert_eq!(normalize_python_name("pytest_mock"), "pytest-mock"); + assert_eq!(normalize_python_name("ruamel.yaml"), "ruamel-yaml"); + assert_eq!( + normalize_python_name("Some__Weird--Name.."), + "some-weird-name" + ); + } + + #[test] + fn parses_requirements_txt() { + let req = r#" +# A comment +requests==2.31.0 +flask==2.3.2 ; python_version >= "3.7" +numpy>=1.20 # not pinned, ignored +-r other.txt +git+https://github.com/x/y.git +django[bcrypt]==4.2.0 + "#; + let deps = parse_requirements(req); + let pairs: Vec<_> = deps + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert!(pairs.contains(&("requests".to_string(), "2.31.0".to_string()))); + assert!(pairs.contains(&("flask".to_string(), "2.3.2".to_string()))); + assert!(pairs.contains(&("django".to_string(), "4.2.0".to_string()))); + assert_eq!(deps.len(), 3); + } + + #[test] + fn requirements_warnings_capture_unpinned_lines() { + let req = r#" +# pinned, no warning +requests==2.31.0 + +# unpinned — should produce warnings +numpy>=1.20 +flask +sqlalchemy~=2.0 + +# pip directives — ignored, not warnings +-r other.txt +--index-url https://example.com/simple + +# VCS / URL deps — explicit escape hatch, no warning +git+https://github.com/x/y.git +https://example.com/pkg.tar.gz +"#; + let (deps, unpinned) = parse_requirements_with_warnings(req); + assert_eq!( + deps.iter().map(|d| d.name.clone()).collect::>(), + vec!["requests".to_string()] + ); + assert_eq!(unpinned.len(), 3); + assert!(unpinned.iter().any(|l| l.contains("numpy>=1.20"))); + assert!(unpinned.iter().any(|l| l == "flask")); + assert!(unpinned.iter().any(|l| l.contains("sqlalchemy~=2.0"))); + } + + #[test] + fn parses_poetry_lock() { + let lock = r#" +[[package]] +name = "Requests" +version = "2.31.0" +description = "x" +category = "main" + +[[package]] +name = "pytest" +version = "7.4.0" +description = "x" +category = "dev" + +[[package]] +name = "local-pkg" +version = "1.0.0" +description = "x" +category = "main" + +[package.source] +type = "directory" +url = "../local" +"#; + let prod = parse_poetry_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert_eq!(pairs, vec![("requests".to_string(), "2.31.0".to_string())]); + + let all = parse_poetry_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"pytest".to_string())); + assert!(!names.contains(&"local-pkg".to_string())); + } + + #[test] + fn parses_pipfile_lock() { + let lock = r#"{ + "_meta": {}, + "default": { + "requests": { "version": "==2.31.0" }, + "private": { "git": "https://example.com/x.git" } + }, + "develop": { + "pytest": { "version": "==7.4.0" } + } + }"#; + let prod = parse_pipfile_lock(lock, false).unwrap(); + let names: Vec<_> = prod.iter().map(|d| d.name.clone()).collect(); + assert_eq!(names, vec!["requests".to_string()]); + + let all = parse_pipfile_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"pytest".to_string())); + } + + #[test] + fn parses_uv_lock() { + let lock = r#" +[[package]] +name = "requests" +version = "2.31.0" + +[package.source] +registry = "https://pypi.org/simple" + +[[package]] +name = "myproj" +version = "0.1.0" + +[package.source] +virtual = "." + +[[package]] +name = "gitdep" +version = "0.0.0" + +[package.source] +git = "https://example.com/x.git" +"#; + let deps = parse_uv_lock(lock).unwrap(); + let pairs: Vec<_> = deps + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert_eq!(pairs, vec![("requests".to_string(), "2.31.0".to_string())]); + } + + #[test] + fn discover_warns_on_pyproject_without_lockfile() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("pyproject.toml"), + r#"[project] +name = "demo" +version = "0.1.0" +dependencies = ["requests>=2.0", "flask"] +"#, + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.deps.is_empty()); + assert_eq!(result.warnings.len(), 1); + assert!(result.warnings[0].reason.contains("pyproject.toml")); + assert!(result.warnings[0].reason.contains("lockfile")); + } + + #[test] + fn discover_no_warning_for_empty_pyproject() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("pyproject.toml"), + r#"[project] +name = "demo" +version = "0.1.0" +"#, + ) + .unwrap(); + + let err = discover(dir.path(), false).err().expect("expected error"); + assert!(err.contains("no Python lockfile found")); + } + + #[test] + fn discover_warns_on_pipfile_without_lock() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write(dir.path().join("Pipfile"), "[packages]\nrequests = \"*\"\n").unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.deps.is_empty()); + assert!(result.warnings.iter().any(|w| w.reason.contains("Pipfile"))); + } + + #[test] + fn discover_emits_unpinned_warnings_from_requirements_txt() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("requirements.txt"), + "requests==2.31.0 +flask>=2.0 +numpy +", + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + let names: Vec<_> = result.deps.iter().map(|d| d.name.clone()).collect(); + assert_eq!(names, vec!["requests".to_string()]); + // Two unpinned lines: `flask>=2.0` and `numpy`. + assert_eq!(result.warnings.len(), 2); + for w in &result.warnings { + assert!(w.reason.contains("not `==`-pinned")); + } + } + + #[test] + fn discover_warns_for_requirements_in_without_compiled_txt() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("requirements.in"), + "requests +flask +", + ) + .unwrap(); + + let err = discover(dir.path(), false).err(); + // requirements.in alone is not enough to find a lockfile, but + // we should have surfaced the in-without-compiled-txt warning + // before getting to the "no lockfile" error. + match err { + Some(e) => assert!(e.contains("no Python lockfile")), + None => {} + } + + // When requirements.in is paired with a pyproject.toml that + // *does* declare deps, we end up returning a warning. + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("requirements.in"), + "requests +", + ) + .unwrap(); + std::fs::write( + dir.path().join("pyproject.toml"), + r#"[project] +name = "demo" +version = "0.1.0" +dependencies = ["requests"] +"#, + ) + .unwrap(); + let result = discover(dir.path(), false).expect("discover"); + assert!(result + .warnings + .iter() + .any(|w| w.manifest.ends_with("requirements.in"))); + assert!(result + .warnings + .iter() + .any(|w| w.manifest.ends_with("pyproject.toml"))); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..98ae806 --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,815 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both functions return the publish time of an exact (name, version) +//! tuple as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(20); + +fn user_agent() -> String { + format!("corgea-cli/{} (deps)", env!("CARGO_PKG_VERSION")) +} + +fn http_client() -> Result<&'static reqwest::blocking::Client, String> { + static CLIENT: OnceLock = OnceLock::new(); + Ok(CLIENT.get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .build() + .expect("registry http client") + })) +} + +#[derive(Debug, Deserialize)] +struct NpmTimeResponse { + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Look up the publish time of an exact `name@version` from the npm registry. +/// +/// We hit the package metadata URL and pull the version's timestamp out +/// of the `time` map. We only need that map, so we set the +/// `application/vnd.npm.install-v1+json` *negotiation* via the regular +/// JSON accept (the abbreviated form omits `time`, so we use the full +/// form intentionally). +pub fn npm_publish_time( + name: &str, + version: &str, + registry: Option<&str>, +) -> Result, String> { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_NPM_REGISTRY) + .trim_end_matches('/'); + let path = encode_npm_name(name); + let url = format!("{}/{}", base, path); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("npm registry request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on npm registry ({})", + name, base + )); + } + if !status.is_success() { + return Err(format!( + "npm registry returned status {} for '{}'", + status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read npm registry response: {}", e))?; + + let parsed: NpmTimeResponse = serde_json::from_str(&body).map_err(|e| { + format!( + "failed to parse npm registry response for '{}': {}", + name, e + ) + })?; + + let raw = parsed.time.get(version).ok_or_else(|| { + format!( + "version '{}' for package '{}' not found in npm registry metadata", + version, name + ) + })?; + + parse_iso8601(raw).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw, name, version, e + ) + }) +} + +/// URL-encode an npm package name. Scoped names contain `@` and `/`, +/// the latter must be encoded as `%2f` for the package metadata URL. +fn encode_npm_name(name: &str) -> String { + if let Some(stripped) = name.strip_prefix('@') { + if let Some((scope, pkg)) = stripped.split_once('/') { + return format!("@{}%2f{}", scope, pkg); + } + } + name.to_string() +} + +#[derive(Debug, Deserialize)] +struct PypiVersionResponse { + urls: Vec, +} + +#[derive(Debug, Deserialize)] +struct PypiUrl { + upload_time_iso_8601: Option, + upload_time: Option, +} + +/// Look up the publish time of an exact (name, version) from PyPI. +/// +/// We hit the JSON API for that exact version (`/pypi///json`) +/// and use the earliest `upload_time_iso_8601` across the version's +/// uploaded files (sdist + wheels) as the publish time. The earliest +/// time is the right one — once the first artifact is up the version +/// is effectively published. +pub fn pypi_publish_time( + name: &str, + version: &str, + registry: Option<&str>, +) -> Result, String> { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_PYPI_REGISTRY) + .trim_end_matches('/'); + let url = format!( + "{}/pypi/{}/{}/json", + base, + urlencoding::encode(name), + urlencoding::encode(version) + ); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("PyPI request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}=={}' not found on PyPI ({})", + name, version, base + )); + } + if !status.is_success() { + return Err(format!( + "PyPI returned status {} for '{}=={}'", + status, name, version + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read PyPI response: {}", e))?; + + let parsed: PypiVersionResponse = serde_json::from_str(&body).map_err(|e| { + format!( + "failed to parse PyPI response for '{}=={}': {}", + name, version, e + ) + })?; + + let mut earliest: Option> = None; + for u in parsed.urls { + let raw = u.upload_time_iso_8601.or(u.upload_time); + if let Some(raw) = raw { + if let Ok(dt) = parse_iso8601(&raw) { + earliest = match earliest { + Some(prev) if prev <= dt => Some(prev), + _ => Some(dt), + }; + } + } + } + + earliest.ok_or_else(|| { + format!( + "no upload time information found on PyPI for '{}=={}' (yanked?)", + name, version + ) + }) +} + +/// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits +/// a naive timestamp like `2023-05-22T18:30:00` (no offset) which +/// chrono's RFC3339 parser rejects, so we accept both shapes. +fn parse_iso8601(raw: &str) -> Result, String> { + if let Ok(dt) = DateTime::parse_from_rfc3339(raw) { + return Ok(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + Err(format!("unrecognised timestamp format: {}", raw)) +} + +// Resolution helpers (npm + PyPI). Inserted before the tests module +// in registry.rs. + +/// What the user typed after `pkg@` in an install command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NpmSpec { + /// `axios`, `axios@`, or no spec — resolve to the `latest` dist-tag. + Latest, + /// `axios@latest`, `axios@next`, etc. + Tag(String), + /// `axios@1.2.3` — already resolved. + Exact(String), + /// `axios@^1.0.0`, `axios@~1.2.0`, `axios@>=1.0.0 <2.0.0`, etc. + Range(String), +} + +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, + pub published_at: DateTime, +} + +#[derive(Debug, Deserialize)] +struct NpmFullMetadata { + #[serde(default, rename = "dist-tags")] + dist_tags: std::collections::BTreeMap, + #[serde(default)] + versions: std::collections::BTreeMap, + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Resolve an `NpmSpec` against the npm registry and return the +/// concrete version + publish time. Used by install wrappers when the +/// install command says e.g. `axios@^1.0.0` and we need to know what +/// would actually be installed before the install runs. +pub fn npm_resolve( + name: &str, + spec: &NpmSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_NPM_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/{}", base, encode_npm_name(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("npm registry request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on npm registry ({})", + name, base + )); + } + if !status.is_success() { + return Err(format!( + "npm registry returned status {} for '{}'", + status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read npm registry response: {}", e))?; + + let meta: NpmFullMetadata = serde_json::from_str(&body).map_err(|e| { + format!( + "failed to parse npm registry response for '{}': {}", + name, e + ) + })?; + + let resolved_version = match spec { + NpmSpec::Latest => meta.dist_tags.get("latest").cloned().ok_or_else(|| { + format!( + "package '{}' has no 'latest' dist-tag on the npm registry", + name + ) + })?, + NpmSpec::Tag(tag) => meta.dist_tags.get(tag).cloned().ok_or_else(|| { + format!( + "package '{}' has no dist-tag named '{}' (available: {})", + name, + tag, + meta.dist_tags + .keys() + .cloned() + .collect::>() + .join(", "), + ) + })?, + NpmSpec::Exact(v) => { + if !meta.versions.contains_key(v) { + return Err(format!( + "version '{}' for package '{}' was not found on the npm registry", + v, name + )); + } + v.clone() + } + NpmSpec::Range(range) => { + npm_pick_highest_matching(&meta.versions, range).ok_or_else(|| { + format!( + "no published version of '{}' satisfies range '{}'", + name, range + ) + })? + } + }; + + let raw_time = meta.time.get(&resolved_version).ok_or_else(|| { + format!( + "publish time missing for {}@{} on the npm registry", + name, resolved_version + ) + })?; + + let published_at = parse_iso8601(raw_time).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw_time, name, resolved_version, e + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: resolved_version, + published_at, + }) +} + +/// Pick the highest semver-compatible version that satisfies `range`. +/// Pre-releases are excluded unless the range itself references a +/// pre-release (matches npm's behaviour). +/// Translate an npm-style version range (`>=1.0.0 <2.0.0`, +/// `1.x`, `>=1.0.0`) to a `semver::VersionReq`. The Rust crate uses +/// `,` as the AND separator, npm uses whitespace, so we normalise +/// before parsing. +fn parse_npm_range(range: &str) -> Option { + if let Ok(req) = semver::VersionReq::parse(range) { + return Some(req); + } + let normalised = range.split_whitespace().collect::>().join(","); + semver::VersionReq::parse(&normalised).ok() +} + +fn npm_pick_highest_matching( + versions: &std::collections::BTreeMap, + range: &str, +) -> Option { + // npm separates predicates with spaces (`>=1.0.0 <2.0.0`); the + // Rust `semver` crate uses commas. Try both. We don't support + // npm's `||` OR syntax here — those are best-effort skipped. + let req = parse_npm_range(range)?; + let range_has_prerelease = range.contains('-'); + + let mut best: Option<(semver::Version, String)> = None; + for raw in versions.keys() { + let v = match semver::Version::parse(raw) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() && !range_has_prerelease { + continue; + } + if !req.matches(&v) { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + +/// PyPI version specifier used by install wrappers. We parse a +/// limited subset of PEP 440 specifiers — enough for the common +/// install-command cases (`pkg`, `pkg==X`, `pkg>=X`, `pkg=2.0`, `<3,>=2`, `~=1.4`). + Specifier(String), +} + +#[derive(Debug, Deserialize)] +struct PypiInfoResponse { + #[allow(dead_code)] + info: PypiInfo, + releases: std::collections::BTreeMap>, +} + +#[derive(Debug, Deserialize)] +#[allow(dead_code)] +struct PypiInfo { + #[serde(default)] + version: Option, + #[serde(default)] + yanked: bool, +} + +/// Resolve a `PypiSpec` against PyPI and return the concrete version +/// + publish time. The latest non-prerelease, non-yanked release is +/// preferred. +pub fn pypi_resolve( + name: &str, + spec: &PypiSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_PYPI_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("PyPI request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!("package '{}' not found on PyPI ({})", name, base)); + } + if !status.is_success() { + return Err(format!("PyPI returned status {} for '{}'", status, name)); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read PyPI response: {}", e))?; + + let meta: PypiInfoResponse = serde_json::from_str(&body) + .map_err(|e| format!("failed to parse PyPI response for '{}': {}", name, e))?; + + let candidates = collect_pypi_candidates(&meta); + let chosen = match spec { + PypiSpec::Latest => pick_latest_stable(&candidates).map(|c| c.0.clone()), + PypiSpec::Exact(v) => { + if candidates.iter().any(|(ver, _)| ver == v) { + Some(v.clone()) + } else { + None + } + } + PypiSpec::Specifier(spec_str) => pypi_resolve_specifier(&candidates, spec_str) + .or_else(|| pick_latest_stable(&candidates).map(|c| c.0.clone())), + }; + + let chosen = chosen.ok_or_else(|| match spec { + PypiSpec::Exact(v) => { + format!( + "version '{}' for package '{}' was not found on PyPI", + v, name + ) + } + _ => format!("no installable version found for '{}' on PyPI", name), + })?; + + let published_at = candidates + .iter() + .find(|(ver, _)| ver == &chosen) + .map(|(_, dt)| *dt) + .ok_or_else(|| { + format!( + "no upload timestamp for '{}' version '{}' on PyPI", + name, chosen + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: chosen, + published_at, + }) +} + +/// Returns `(version, earliest_upload_time)` for every non-yanked +/// release that has at least one uploaded artifact. Empty release +/// entries (which PyPI sometimes keeps around for yanked / private +/// versions) are filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec<(String, DateTime)> { + let mut out = Vec::new(); + for (ver, files) in &meta.releases { + if files.is_empty() { + continue; + } + // Skip yanked-only releases. + if files + .iter() + .all(|f| f.upload_time_iso_8601.is_none() && f.upload_time.is_none()) + { + continue; + } + let mut earliest: Option> = None; + for f in files { + let raw = f.upload_time_iso_8601.clone().or(f.upload_time.clone()); + if let Some(raw) = raw { + if let Ok(dt) = parse_iso8601(&raw) { + earliest = match earliest { + Some(prev) if prev <= dt => Some(prev), + _ => Some(dt), + }; + } + } + } + if let Some(dt) = earliest { + out.push((ver.clone(), dt)); + } + } + out +} + +/// Pick the latest non-prerelease version using `semver` parsing as a +/// best-effort PEP 440 ordering. Falls back to the entry with the +/// latest upload time if no candidate parses as semver. +fn pick_latest_stable(candidates: &[(String, DateTime)]) -> Option<&(String, DateTime)> { + let mut best_semver: Option<(semver::Version, &(String, DateTime))> = None; + for c in candidates { + let normalized = normalize_for_semver(&c.0); + if let Ok(v) = semver::Version::parse(&normalized) { + if !v.pre.is_empty() { + continue; + } + match &best_semver { + Some((cur, _)) if cur >= &v => {} + _ => best_semver = Some((v, c)), + } + } + } + if let Some((_, picked)) = best_semver { + return Some(picked); + } + candidates.iter().max_by_key(|c| c.1) +} + +/// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or +/// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses +/// straight as semver if we pad to 3 components. Anything more exotic +/// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. +pub(super) fn normalize_for_semver(v: &str) -> String { + if v.contains('!') + || v.contains('a') + || v.contains('b') + || v.contains("rc") + || v.contains(".dev") + { + return v.to_string(); + } + let parts: Vec<&str> = v.split('.').collect(); + match parts.len() { + 1 => format!("{}.0.0", parts[0]), + 2 => format!("{}.{}.0", parts[0], parts[1]), + _ => v.to_string(), + } +} + +/// Apply a PEP 440-style specifier expression to the candidate list +/// and return the highest match. Supported operators: `==`, `>=`, `>`, +/// `<=`, `<`, `~=`, `!=`. Unknown operators cause us to give up and +/// return `None` (the caller falls back to "latest stable"). +fn pypi_resolve_specifier(candidates: &[(String, DateTime)], spec: &str) -> Option { + let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); + let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + + for p in &parts { + let (op, val): (&str, &str) = if let Some(v) = p.strip_prefix("===") { + ("==", v.trim()) + } else if let Some(v) = p.strip_prefix("==") { + ("==", v.trim()) + } else if let Some(v) = p.strip_prefix(">=") { + (">=", v.trim()) + } else if let Some(v) = p.strip_prefix("<=") { + ("<=", v.trim()) + } else if let Some(v) = p.strip_prefix("!=") { + ("!=", v.trim()) + } else if let Some(v) = p.strip_prefix("~=") { + ("~=", v.trim()) + } else if let Some(v) = p.strip_prefix(">") { + (">", v.trim()) + } else if let Some(v) = p.strip_prefix("<") { + ("<", v.trim()) + } else { + return None; + }; + let v = semver::Version::parse(&normalize_for_semver(val)).ok()?; + requirements.push((op, v)); + } + + let mut best: Option<(semver::Version, String)> = None; + for (raw, _) in candidates { + let v = match semver::Version::parse(&normalize_for_semver(raw)) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() { + continue; + } + let satisfies = requirements.iter().all(|(op, want)| match *op { + "==" => &v == want, + ">=" => &v >= want, + "<=" => &v <= want, + "!=" => &v != want, + ">" => &v > want, + "<" => &v < want, + "~=" => { + if &v < want { + return false; + } + let upper = semver::Version::new(want.major, want.minor + 1, 0); + v < upper + } + _ => false, + }); + if !satisfies { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn npm_name_encoding() { + assert_eq!(encode_npm_name("left-pad"), "left-pad"); + assert_eq!(encode_npm_name("@scope/pkg"), "@scope%2fpkg"); + assert_eq!(encode_npm_name("@types/node"), "@types%2fnode"); + } + + #[test] + fn parses_iso8601_variants() { + assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05.123Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05+00:00").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05").is_ok()); + assert!(parse_iso8601("not a date").is_err()); + } + + /// Network-touching integration tests. Skipped by default (#[ignore]) + /// so unit-test runs stay hermetic. Run with: + /// cargo test -- --ignored verify_deps::registry::tests::live + #[test] + #[ignore] + fn live_npm_left_pad() { + let dt = npm_publish_time("left-pad", "1.3.0", None).expect("npm lookup"); + assert_eq!(dt.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_pypi_requests() { + let dt = pypi_publish_time("requests", "2.31.0", None).expect("pypi lookup"); + assert_eq!(dt.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_case_insensitive() { + let dt = pypi_publish_time("Flask", "2.3.2", None).expect("pypi case-insensitive"); + assert_eq!(dt.format("%Y-%m-%d").to_string(), "2023-05-01"); + } + + #[test] + #[ignore] + fn live_npm_unknown_version() { + let err = npm_publish_time("left-pad", "999.999.999", None) + .err() + .unwrap(); + assert!(err.contains("not found"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_unknown_version() { + let err = pypi_publish_time("requests", "999.999.999", None) + .err() + .unwrap(); + assert!(err.contains("not found"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_npm_resolve_latest() { + let r = npm_resolve("left-pad", &NpmSpec::Latest, None).expect("npm resolve latest"); + assert_eq!(r.name, "left-pad"); + assert_eq!(r.version, "1.3.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_npm_resolve_exact() { + let r = npm_resolve("left-pad", &NpmSpec::Exact("1.3.0".to_string()), None) + .expect("npm resolve exact"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_range() { + let r = npm_resolve("left-pad", &NpmSpec::Range("^1.0.0".to_string()), None) + .expect("npm resolve range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_npm_style_range() { + // npm uses spaces, the Rust crate uses commas — we should + // accept both. + let r = npm_resolve( + "left-pad", + &NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + None, + ) + .expect("npm resolve space-range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_unknown_tag() { + let err = npm_resolve( + "left-pad", + &NpmSpec::Tag("does-not-exist".to_string()), + None, + ) + .err() + .unwrap(); + assert!(err.contains("dist-tag"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_resolve_latest() { + let r = pypi_resolve("flask", &PypiSpec::Latest, None).expect("pypi resolve latest"); + assert_eq!(r.name, "flask"); + assert!(!r.version.is_empty()); + } + + #[test] + #[ignore] + fn live_pypi_resolve_exact() { + let r = pypi_resolve("requests", &PypiSpec::Exact("2.31.0".to_string()), None) + .expect("pypi resolve exact"); + assert_eq!(r.version, "2.31.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_resolve_specifier() { + let r = pypi_resolve( + "requests", + &PypiSpec::Specifier(">=2.30,<2.32".to_string()), + None, + ) + .expect("pypi resolve specifier"); + // `requests==2.31.0` is the only release in [2.30, 2.32). + assert_eq!(r.version, "2.31.0"); + } +} diff --git a/src/verify_deps/report.rs b/src/verify_deps/report.rs new file mode 100644 index 0000000..fc249e0 --- /dev/null +++ b/src/verify_deps/report.rs @@ -0,0 +1,610 @@ +//! Render a verification report to the terminal or as JSON. + +use std::collections::HashMap; + +use serde_json::json; + +use crate::utils::terminal::{set_text_color, TerminalColor}; + +use super::{format_duration, CveFinding, Dependency, LookupOutcome, SeverityFloor, VerifyReport}; + +fn dep_key(dep: &Dependency) -> (String, String, String) { + ( + dep.ecosystem.label().to_string(), + dep.name.clone(), + dep.version.clone(), + ) +} + +/// Format a single CVE finding line for text output. Public for integration tests. +pub fn format_cve_finding(finding: &CveFinding) -> String { + let dep = &finding.dep; + let fixed_candidates: Vec = finding + .matches + .iter() + .filter_map(|m| m.fixed_version.clone()) + .collect(); + let best_fixed = super::pick_highest_fixed(dep.ecosystem, &fixed_candidates); + let fix_line = match &best_fixed { + Some(v) => format!("\n → upgrade to {}", v), + None => String::new(), + }; + finding + .matches + .iter() + .zip( + finding + .advisory_details + .iter() + .chain(std::iter::repeat(&None)), + ) + .map(|(m, detail)| { + let color = if m.tier == 1 { + TerminalColor::Red + } else { + TerminalColor::Yellow + }; + let badge = if m.tier == 1 { " [TOP-FIX]" } else { "" }; + let url_line = match detail.as_ref().and_then(|d| d.url.as_deref()) { + Some(u) => format!("\n {}", set_text_color(u, TerminalColor::Blue)), + None => String::new(), + }; + set_text_color( + &format!( + "✗ {} {}@{}: {}{} (severity: {}){}{}", + dep.ecosystem.label(), + dep.name, + dep.version, + m.advisory_id, + badge, + m.severity_level, + fix_line, + url_line, + ), + color, + ) + }) + .collect::>() + .join("\n") +} + +/// Render the report for human consumption. +pub fn print_text(report: &VerifyReport) { + println!( + "Verifying dependencies against publish-time threshold of {}", + format_duration(report.threshold) + ); + if !report.sources.is_empty() { + println!("Sources:"); + for s in &report.sources { + println!(" - {}", s); + } + } + + let recent = report.recent(); + let errors = report.errors(); + let ok_count = report.ok_count(); + + println!( + "Checked {} dependencies — {} ok, {} recent, {} errors, {} unpinned", + report.outcomes.len(), + ok_count, + recent.len(), + errors.len(), + report.unpinned_warnings.len(), + ); + + if !report.unpinned_warnings.is_empty() { + println!(); + println!( + "{}", + set_text_color( + "Unpinned dependencies (cannot be verified against the registry):", + TerminalColor::Yellow, + ) + ); + for w in &report.unpinned_warnings { + println!( + " {} [{}] {}: {}", + set_text_color("?", TerminalColor::Yellow), + w.ecosystem.label(), + w.manifest, + w.reason, + ); + } + } + + if !recent.is_empty() { + println!(); + println!( + "{}", + set_text_color( + "Recently published dependencies (within threshold):", + TerminalColor::Yellow, + ) + ); + for f in &recent { + println!( + " {} {}@{} ({}) published {} ago at {}", + set_text_color("⚠", TerminalColor::Yellow), + f.dep.ecosystem.label(), + f.dep.name, + f.dep.version, + set_text_color(&format_duration(f.age), TerminalColor::Yellow,), + f.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } + } + + if report.check_cve { + println!(); + println!( + "{}", + set_text_color("Known vulnerabilities:", TerminalColor::Yellow) + ); + + let cve_findings = report.cve_findings(); + let cve_errors = report.cve_errors(); + + let checked = report.cve_outcomes.len(); + if cve_findings.is_empty() { + if !cve_errors.is_empty() { + // Findings empty but errors present — without this line the + // "Known vulnerabilities:" section looks half-rendered. + println!( + " {}", + set_text_color( + "✗ CVE check did not complete — see errors below", + TerminalColor::Red, + ) + ); + } else if checked == 0 { + println!( + " {}", + set_text_color( + "⚠ no dependencies eligible for CVE check", + TerminalColor::Yellow, + ) + ); + } else { + println!( + " {}", + set_text_color( + &format!( + "✓ no known vulnerabilities ({} dependencies checked)", + checked + ), + TerminalColor::Green, + ) + ); + } + } else { + for finding in &cve_findings { + for line in format_cve_finding(finding).lines() { + println!(" {}", line); + } + } + println!( + " {}", + set_text_color( + &format!("note: {} dependencies CVE-checked", checked), + TerminalColor::Yellow, + ) + ); + let below_floor = report.cve_below_floor_matches_count(); + if below_floor > 0 { + let note = match &report.severity_floor { + SeverityFloor::Any => None, + SeverityFloor::AtLeast(_) => Some(format!( + "note: {} advisory matches below --severity floor ({}) — informational only", + below_floor, + report.severity_floor.label() + )), + SeverityFloor::OneOf(_) => Some(format!( + "note: {} advisory matches outside --severity set ({}) — informational only", + below_floor, + report.severity_floor.label() + )), + }; + if let Some(note) = note { + println!(" {}", set_text_color(¬e, TerminalColor::Yellow)); + } + } + } + + if !cve_errors.is_empty() { + println!(); + println!( + "{}", + set_text_color("CVE lookup errors:", TerminalColor::Red) + ); + for (dep, err) in &cve_errors { + println!( + " {} {}@{} ({}): {}", + set_text_color("✗", TerminalColor::Red), + dep.name, + dep.version, + dep.ecosystem.label(), + err, + ); + } + } + + if !report.unpinned_warnings.is_empty() { + println!( + " {}", + set_text_color( + &format!( + "note: {} unpinned dependency manifest(s) were not CVE-checked", + report.unpinned_warnings.len() + ), + TerminalColor::Yellow, + ) + ); + } + } + + if !errors.is_empty() { + println!(); + println!( + "{}", + set_text_color( + "Could not verify the following dependencies:", + TerminalColor::Red, + ) + ); + for (dep, err) in &errors { + println!( + " {} {}@{} ({}): {}", + set_text_color("✗", TerminalColor::Red), + dep.name, + dep.version, + dep.ecosystem.label(), + err, + ); + } + } + + if recent.is_empty() && errors.is_empty() && report.unpinned_warnings.is_empty() { + println!( + "{}", + set_text_color( + "All dependencies are older than the freshness threshold.", + TerminalColor::Green, + ) + ); + } +} + +/// Per-dep CVE status, kept distinct so downstream automation can +/// tell apart "checked clean", "checked and failed", "lookup errored", +/// and "never checked" (e.g. unpinned manifests). +enum CveStatus { + Clean, + Vulnerable(Vec), + Error(String), + NotChecked, +} + +impl CveStatus { + fn label(&self) -> &'static str { + match self { + CveStatus::Clean => "clean", + CveStatus::Vulnerable(_) => "vulnerable", + CveStatus::Error(_) => "error", + CveStatus::NotChecked => "not_checked", + } + } +} + +/// Render the report as a single JSON object on stdout. +/// +/// ## CVE fields (when `--check-cve` was passed) +/// +/// Each entry in `results` includes a `cves` array (empty when clean) and a +/// `cve_status` label (`clean`, `vulnerable`, `error`, or `not_checked`). +/// Lookup failures add `cve_error` instead of `cves`. When `--check-cve` was +/// not passed, per-dep CVE fields are omitted entirely. +/// +/// Each entry of `cves` carries `advisory_id`, `severity_level`, `tier`, +/// `vulnerable_version_range`, `fixed_version`, and `advisory_url`. +/// The last two may be `null` when the server did not return a fix +/// version or the advisory-detail lookup did not produce a URL +/// (e.g. 404 on `/v1/advisories/:id`). +/// +/// Top-level `cve_summary` is present when `--check-cve` was passed: +/// `{ checked, vulnerable, clean, errors, unpinned_not_checked, +/// severity_floor, vulnerable_above_floor }`. +/// +/// `severity_floor` is the rendered `--severity` value (`"any"` | +/// `"critical"` | ... | `"critical,high"` — comma-joined descending for +/// `OneOf`). `vulnerable_above_floor` is the count of findings whose +/// worst-severity match meets the floor; equals `vulnerable` when the +/// floor is `"any"`. Both keys are always present in `cve_summary`. +/// It is omitted when CVE checking was not requested. +pub fn print_json(report: &VerifyReport) { + let mut cve_by_dep: HashMap<(String, String, String), CveStatus> = HashMap::new(); + if report.check_cve { + for outcome in &report.cve_outcomes { + match outcome { + super::CveLookupOutcome::Vulnerable(f) => { + let entries: Vec<_> = f + .matches + .iter() + .zip(f.advisory_details.iter().chain(std::iter::repeat(&None))) + .map(|(m, detail)| { + let advisory_url = detail.as_ref().and_then(|d| d.url.clone()); + json!({ + "advisory_id": m.advisory_id, + "severity_level": m.severity_level, + "tier": m.tier, + "vulnerable_version_range": m.vulnerable_version_range, + "fixed_version": m.fixed_version, + "advisory_url": advisory_url, + }) + }) + .collect(); + cve_by_dep.insert(dep_key(&f.dep), CveStatus::Vulnerable(entries)); + } + super::CveLookupOutcome::Clean { dep } => { + cve_by_dep.entry(dep_key(dep)).or_insert(CveStatus::Clean); + } + super::CveLookupOutcome::Error { dep, error } => { + cve_by_dep.insert(dep_key(dep), CveStatus::Error(error.clone())); + } + } + } + } + + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| { + let obj = match o { + LookupOutcome::Ok { + dep, + published_at, + age, + } => json!({ + "status": "ok", + "ecosystem": dep.ecosystem.label(), + "name": dep.name, + "version": dep.version, + "dev": dep.dev, + "source": dep.source, + "published_at": published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + LookupOutcome::Recent(f) => json!({ + "status": "recent", + "ecosystem": f.dep.ecosystem.label(), + "name": f.dep.name, + "version": f.dep.version, + "dev": f.dep.dev, + "source": f.dep.source, + "published_at": f.published_at.to_rfc3339(), + "age_seconds": f.age.as_secs(), + }), + LookupOutcome::Error { dep, error } => json!({ + "status": "error", + "ecosystem": dep.ecosystem.label(), + "name": dep.name, + "version": dep.version, + "dev": dep.dev, + "source": dep.source, + "error": error, + }), + }; + + if !report.check_cve { + return obj; + } + + let dep = match o { + LookupOutcome::Ok { dep, .. } => dep, + LookupOutcome::Recent(f) => &f.dep, + LookupOutcome::Error { dep, .. } => dep, + }; + let status = cve_by_dep + .remove(&dep_key(dep)) + .unwrap_or(CveStatus::NotChecked); + let mut obj = obj; + let map = obj + .as_object_mut() + .expect("LookupOutcome JSON serializes as an object"); + map.insert("cve_status".to_string(), json!(status.label())); + match status { + CveStatus::Vulnerable(cves) => { + map.insert("cves".to_string(), json!(cves)); + } + CveStatus::Clean => { + map.insert("cves".to_string(), json!([])); + } + CveStatus::Error(err) => { + map.insert("cve_error".to_string(), json!(err)); + } + CveStatus::NotChecked => {} + } + obj + }) + .collect(); + + let unpinned: Vec<_> = report + .unpinned_warnings + .iter() + .map(|w| { + json!({ + "ecosystem": w.ecosystem.label(), + "manifest": w.manifest, + "reason": w.reason, + }) + }) + .collect(); + + let mut body = json!({ + "scanned_at": report.scanned_at.to_rfc3339(), + "threshold_seconds": report.threshold.as_secs(), + "sources": report.sources, + "summary": { + "checked": report.outcomes.len(), + "ok": report.ok_count(), + "recent": report.recent().len(), + "errors": report.errors().len(), + "unpinned": report.unpinned_warnings.len(), + }, + "results": outcomes, + "unpinned": unpinned, + }); + + if report.check_cve { + let vulnerable = report.cve_findings().len(); + let vulnerable_above_floor = report.cve_findings_above_floor().len(); + let errors = report.cve_errors().len(); + let clean = report + .cve_outcomes + .iter() + .filter(|o| matches!(o, super::CveLookupOutcome::Clean { .. })) + .count(); + let summary = json!({ + "checked": report.cve_outcomes.len(), + "vulnerable": vulnerable, + "clean": clean, + "errors": errors, + "unpinned_not_checked": report.unpinned_warnings.len(), + "severity_floor": report.severity_floor.label(), + "vulnerable_above_floor": vulnerable_above_floor, + }); + body.as_object_mut() + .expect("top-level JSON is an object") + .insert("cve_summary".to_string(), summary); + } + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::verify_deps::{CveFinding, Dependency, DependencyEcosystem}; + use crate::vuln_api::VulnMatch; + + #[test] + fn format_cve_finding_includes_advisory_id() { + let finding = CveFinding { + dep: Dependency { + name: "lodash".into(), + version: "4.17.20".into(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".into(), + dev: false, + }, + matches: vec![VulnMatch { + advisory_id: "GHSA-test-advisory".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: None, + fixed_version: None, + }], + advisory_details: vec![None], + }; + let line = format_cve_finding(&finding); + assert!(line.contains("GHSA-test-advisory")); + assert!(line.contains("lodash@4.17.20")); + } + + #[test] + fn format_cve_finding_includes_fix_line_and_badge_for_tier_one() { + let finding = CveFinding { + dep: Dependency { + name: "lodash".into(), + version: "4.17.20".into(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".into(), + dev: false, + }, + matches: vec![VulnMatch { + advisory_id: "GHSA-test-advisory".into(), + severity_level: "high".into(), + tier: 1, + vulnerable_version_range: None, + fixed_version: Some("4.17.21".into()), + }], + advisory_details: vec![None], + }; + let line = format_cve_finding(&finding); + assert!( + line.contains("→ upgrade to 4.17.21"), + "expected '→ upgrade to 4.17.21' in: {}", + line + ); + assert!( + line.contains("[TOP-FIX]"), + "expected '[TOP-FIX]' badge in: {}", + line + ); + assert!(!line.contains("tier: "), "tier: substring leaked: {}", line); + } + + #[test] + fn format_cve_finding_hides_badge_for_tier_two() { + let finding = CveFinding { + dep: Dependency { + name: "lodash".into(), + version: "4.17.20".into(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".into(), + dev: false, + }, + matches: vec![VulnMatch { + advisory_id: "GHSA-tier-two".into(), + severity_level: "low".into(), + tier: 2, + vulnerable_version_range: None, + fixed_version: Some("4.17.21".into()), + }], + advisory_details: vec![None], + }; + let line = format_cve_finding(&finding); + assert!( + !line.contains("[TOP-FIX]"), + "tier-2 should not render badge: {}", + line + ); + assert!(!line.contains("tier: "), "tier: substring leaked: {}", line); + assert!( + line.contains("→ upgrade to 4.17.21"), + "fix line missing: {}", + line + ); + } + + #[test] + fn format_cve_finding_picks_highest_fix_across_matches() { + let dep = Dependency { + name: "left-pad".into(), + version: "1.0.0".into(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".into(), + dev: false, + }; + let mk = |id: &str, fv: &str| VulnMatch { + advisory_id: id.into(), + severity_level: "low".into(), + tier: 2, + vulnerable_version_range: None, + fixed_version: Some(fv.into()), + }; + let finding = CveFinding { + dep, + matches: vec![ + mk("GHSA-a", "1.0.0"), + mk("GHSA-b", "1.2.0"), + mk("GHSA-c", "1.1.0"), + ], + advisory_details: vec![None, None, None], + }; + let line = format_cve_finding(&finding); + assert!(line.contains("→ upgrade to 1.2.0"), "got: {}", line); + assert!(!line.contains("→ upgrade to 1.0.0"), "got: {}", line); + } +} diff --git a/src/verify_deps/severity.rs b/src/verify_deps/severity.rs new file mode 100644 index 0000000..79838cf --- /dev/null +++ b/src/verify_deps/severity.rs @@ -0,0 +1,313 @@ +//! Severity ladder + floor filter for `corgea deps --check-cve --fail-cve`. +//! +//! The vuln-api emits categorical `severity_level` strings +//! (`critical | high | medium | low | none | unknown`, lowercased on the +//! wire by `cve_worker/src/worker.js`). This module locks an ordered +//! `SeverityLevel` enum on the CLI side and the user-facing +//! `SeverityFloor` used by the `--severity` flag. +//! +//! Unknown server-emitted strings parse to `SeverityLevel::Info` so a +//! future server vocabulary drift (e.g. `"emergency"`, or the existing +//! `"none"` / `"unknown"`) never silently drops findings from the +//! `--fail-cve` gate. A `CORGEA_DEBUG`-gated stderr warning fires the +//! first time a non-canonical string is seen. + +use std::collections::{BTreeSet, HashSet}; +use std::sync::{Mutex, OnceLock}; + +/// Ordered severity ladder. `Info` is the bottom rung and is also the +/// fail-open target for unknown server strings. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SeverityLevel { + Info, + Low, + Medium, + High, + Critical, +} + +impl SeverityLevel { + /// Strict parse: returns `Err` for any non-canonical string. Used by + /// `parse_severity_floor_arg` (which surfaces the error to clap). + pub fn parse(s: &str) -> Result { + match s.trim().to_ascii_lowercase().as_str() { + "info" => Ok(SeverityLevel::Info), + "low" => Ok(SeverityLevel::Low), + "medium" => Ok(SeverityLevel::Medium), + "high" => Ok(SeverityLevel::High), + "critical" => Ok(SeverityLevel::Critical), + other => Err(format!( + "unknown severity: '{}'. Expected one of: critical, high, medium, low, info.", + other + )), + } + } + + /// Lossy parse used by the gating block on `severity_level` strings + /// emitted by the vuln-api. Unknown strings (including the server's + /// own `none` / `unknown` fallback and any future addition) collapse + /// to `Info` and trigger a `CORGEA_DEBUG`-gated warn-once channel so + /// they never silently drop out of the gate. + pub fn parse_lossy(s: &str) -> Self { + match Self::parse(s) { + Ok(level) => level, + Err(_) => { + warn_unknown_severity_once(s); + SeverityLevel::Info + } + } + } + + /// Lowercase canonical label for text + JSON rendering. + pub fn label(self) -> &'static str { + match self { + SeverityLevel::Info => "info", + SeverityLevel::Low => "low", + SeverityLevel::Medium => "medium", + SeverityLevel::High => "high", + SeverityLevel::Critical => "critical", + } + } +} + +/// Floor used by `--severity`. +/// +/// - `Any` — chunk-02 behavior; `includes(level)` is always `true`. +/// - `AtLeast(min)` — single value `--severity high` matches `high | critical`. +/// - `OneOf(set)` — comma list `--severity critical,high` matches exactly those. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub enum SeverityFloor { + #[default] + Any, + AtLeast(SeverityLevel), + OneOf(BTreeSet), +} + +impl SeverityFloor { + /// True iff `level` meets this floor. + pub fn includes(&self, level: SeverityLevel) -> bool { + match self { + SeverityFloor::Any => true, + SeverityFloor::AtLeast(min) => level >= *min, + SeverityFloor::OneOf(set) => set.contains(&level), + } + } + + /// Render the floor for text / JSON output. Descending-by-severity + /// for `OneOf` so the JSON value is stable across runs + /// (`"critical,high"`, never `"high,critical"`). + pub fn label(&self) -> String { + match self { + SeverityFloor::Any => "any".to_string(), + SeverityFloor::AtLeast(level) => level.label().to_string(), + SeverityFloor::OneOf(set) => { + let mut levels: Vec = set.iter().copied().collect(); + levels.sort_by(|a, b| b.cmp(a)); // descending + levels + .iter() + .map(|l| l.label()) + .collect::>() + .join(",") + } + } + } +} + +/// Clap `value_parser` for the `--severity` flag. Empty string and +/// `"any"` (case-insensitive) map to `Any`; a value containing a comma +/// maps to `OneOf` after parsing each token; anything else maps to +/// `AtLeast` after parsing as a single `SeverityLevel`. +pub fn parse_severity_floor_arg(raw: &str) -> Result { + let trimmed = raw.trim(); + if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("any") { + return Ok(SeverityFloor::Any); + } + if trimmed.contains(',') { + let set: Result, _> = trimmed + .split(',') + .map(|p| SeverityLevel::parse(p.trim())) + .collect(); + return set.map(SeverityFloor::OneOf); + } + SeverityLevel::parse(trimmed).map(SeverityFloor::AtLeast) +} + +/// Process-local channel for warn-once-per-unknown-string behavior. +fn warn_unknown_severity_once(raw: &str) { + static SEEN: OnceLock>> = OnceLock::new(); + let seen = SEEN.get_or_init(|| Mutex::new(HashSet::new())); + let mut guard = match seen.lock() { + Ok(g) => g, + Err(p) => p.into_inner(), + }; + let key = raw.trim().to_ascii_lowercase(); + // Env-check first so that a future `CORGEA_DEBUG` toggle still surfaces + // a previously-seen unknown severity (short-circuit avoids inserting + // into SEEN until we actually intend to print). + if crate::utils::generic::get_env_var_if_exists("CORGEA_DEBUG").is_some() && guard.insert(key) { + eprintln!( + "debug: vuln-api emitted unknown severity_level '{}' — treating as 'info' for --severity filtering.", + raw + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_round_trip_canonical_values() { + assert_eq!(SeverityLevel::parse("info").unwrap(), SeverityLevel::Info); + assert_eq!(SeverityLevel::parse("low").unwrap(), SeverityLevel::Low); + assert_eq!( + SeverityLevel::parse("medium").unwrap(), + SeverityLevel::Medium + ); + assert_eq!(SeverityLevel::parse("high").unwrap(), SeverityLevel::High); + assert_eq!( + SeverityLevel::parse("critical").unwrap(), + SeverityLevel::Critical + ); + } + + #[test] + fn parse_is_case_insensitive_and_trims() { + assert_eq!( + SeverityLevel::parse("CRITICAL").unwrap(), + SeverityLevel::Critical + ); + assert_eq!( + SeverityLevel::parse(" High ").unwrap(), + SeverityLevel::High + ); + } + + #[test] + fn parse_rejects_unknown_strings() { + assert!(SeverityLevel::parse("bogus").is_err()); + assert!(SeverityLevel::parse("").is_err()); + assert!(SeverityLevel::parse("none").is_err()); + assert!(SeverityLevel::parse("unknown").is_err()); + } + + #[test] + fn parse_lossy_maps_unknown_to_info() { + assert_eq!(SeverityLevel::parse_lossy("none"), SeverityLevel::Info); + assert_eq!(SeverityLevel::parse_lossy("unknown"), SeverityLevel::Info); + assert_eq!(SeverityLevel::parse_lossy("emergency"), SeverityLevel::Info); + // Canonical values still parse strictly. + assert_eq!( + SeverityLevel::parse_lossy("critical"), + SeverityLevel::Critical + ); + } + + #[test] + fn ordering_is_info_lt_low_lt_medium_lt_high_lt_critical() { + assert!(SeverityLevel::Info < SeverityLevel::Low); + assert!(SeverityLevel::Low < SeverityLevel::Medium); + assert!(SeverityLevel::Medium < SeverityLevel::High); + assert!(SeverityLevel::High < SeverityLevel::Critical); + } + + #[test] + fn floor_any_includes_everything() { + let floor = SeverityFloor::Any; + for level in [ + SeverityLevel::Info, + SeverityLevel::Low, + SeverityLevel::Medium, + SeverityLevel::High, + SeverityLevel::Critical, + ] { + assert!(floor.includes(level), "Any should include {:?}", level); + } + } + + #[test] + fn floor_at_least_high_matches_high_and_critical_only() { + let floor = SeverityFloor::AtLeast(SeverityLevel::High); + assert!(floor.includes(SeverityLevel::Critical)); + assert!(floor.includes(SeverityLevel::High)); + assert!(!floor.includes(SeverityLevel::Medium)); + assert!(!floor.includes(SeverityLevel::Low)); + assert!(!floor.includes(SeverityLevel::Info)); + } + + #[test] + fn floor_one_of_matches_exact_set() { + let mut set = BTreeSet::new(); + set.insert(SeverityLevel::Critical); + set.insert(SeverityLevel::High); + let floor = SeverityFloor::OneOf(set); + assert!(floor.includes(SeverityLevel::Critical)); + assert!(floor.includes(SeverityLevel::High)); + assert!(!floor.includes(SeverityLevel::Medium)); + assert!(!floor.includes(SeverityLevel::Low)); + assert!(!floor.includes(SeverityLevel::Info)); + } + + #[test] + fn parse_arg_empty_and_any_map_to_any() { + assert_eq!(parse_severity_floor_arg("").unwrap(), SeverityFloor::Any); + assert_eq!(parse_severity_floor_arg("any").unwrap(), SeverityFloor::Any); + assert_eq!(parse_severity_floor_arg("ANY").unwrap(), SeverityFloor::Any); + assert_eq!( + parse_severity_floor_arg(" any ").unwrap(), + SeverityFloor::Any + ); + } + + #[test] + fn parse_arg_single_value_maps_to_at_least() { + assert_eq!( + parse_severity_floor_arg("critical").unwrap(), + SeverityFloor::AtLeast(SeverityLevel::Critical) + ); + assert_eq!( + parse_severity_floor_arg("HIGH").unwrap(), + SeverityFloor::AtLeast(SeverityLevel::High) + ); + } + + #[test] + fn parse_arg_comma_list_maps_to_one_of() { + let mut expected = BTreeSet::new(); + expected.insert(SeverityLevel::Critical); + expected.insert(SeverityLevel::High); + assert_eq!( + parse_severity_floor_arg("critical,high").unwrap(), + SeverityFloor::OneOf(expected.clone()) + ); + // Whitespace + duplicates dedup via BTreeSet. + assert_eq!( + parse_severity_floor_arg(" critical , high , critical ").unwrap(), + SeverityFloor::OneOf(expected) + ); + } + + #[test] + fn parse_arg_rejects_bad_token_in_list() { + assert!(parse_severity_floor_arg("critical,bogus").is_err()); + assert!(parse_severity_floor_arg("bogus").is_err()); + } + + #[test] + fn label_renders_one_of_in_descending_order() { + let mut set = BTreeSet::new(); + set.insert(SeverityLevel::High); + set.insert(SeverityLevel::Critical); + let floor = SeverityFloor::OneOf(set); + assert_eq!(floor.label(), "critical,high"); + } + + #[test] + fn label_any_and_at_least_render_canonical() { + assert_eq!(SeverityFloor::Any.label(), "any"); + assert_eq!( + SeverityFloor::AtLeast(SeverityLevel::Critical).label(), + "critical" + ); + } +} diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs new file mode 100644 index 0000000..f9494e6 --- /dev/null +++ b/src/vuln_api/mod.rs @@ -0,0 +1,795 @@ +//! Corgea vuln-api client. +//! +//! Deliberately independent of `utils::api::SHARED_CLIENT` because: +//! * the vuln-api host is user-configurable via `CORGEA_VULN_API_URL`, +//! so we must never silently replay Corgea cookies / non-JWT +//! `CORGEA-TOKEN` headers via redirect following or the shared +//! cookie jar. +//! * the shared client's `check_for_warnings` exits the process on +//! HTTP 410, which is wrong for per-dep CVE lookups. +//! +//! The auth header is attached explicitly per call from a caller-owned +//! token (no global state). + +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +use crate::log::debug; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +/// Cap on how much of an error response body we splice into the +/// user-facing error message. Fits a CLI line, captures +/// `{"error":"…"}`-class messages comfortably, and truncates +/// Cloudflare HTML before it gets ugly. +const ERROR_BODY_SNIPPET_LEN: usize = 300; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct VulnCheckResponse { + pub ecosystem: String, + pub package_name: String, + pub version: String, + pub is_vulnerable: bool, + pub matches: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct VulnMatch { + pub advisory_id: String, + pub severity_level: String, + pub tier: u8, + pub vulnerable_version_range: Option, + pub fixed_version: Option, +} + +/// Subset of `GET /v1/advisories/:id` we consume. +/// +/// Field-name notes (kept stable for callers, but mapped to the real +/// server shape via `#[serde(rename = …)]`): +/// +/// * `advisory_id` ← server's `id` +/// * `url` ← server's `source_url` +/// * `tier` is `Option` because the server may emit `null` +/// (see `VULNERABILITY_SERVICE.md` §5). +/// +/// The server also returns many fields we don't currently use +/// (`alias`, `summary`, `severity`, `severity_badge`, `tier_score`, +/// `llm_summary`, `packages`, `cwes`, `raw`, …). `serde` ignores +/// unknown fields by default; we add them here only when a caller +/// needs them. No top-level `remediation` field exists on the +/// server — do not add one (server's `llm_summary` is a 1-2 sentence +/// developer summary, not remediation guidance, and the semantics +/// differ). +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AdvisoryResponse { + #[serde(rename = "id")] + pub advisory_id: String, + #[serde(default)] + pub aliases: Vec, + #[serde(default)] + pub title: Option, + #[serde(default)] + pub severity_level: Option, + #[serde(default)] + pub tier: Option, + #[serde(default, rename = "source_url")] + pub url: Option, +} + +fn user_agent() -> String { + format!("corgea-cli/{} (vuln-api)", env!("CARGO_PKG_VERSION")) +} + +pub(crate) fn http_client() -> Result { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .redirect(reqwest::redirect::Policy::none()) + .build() + .map_err(|e| format!("failed to build vuln-api http client: {}", e)) +} + +fn is_jwt(token: &str) -> bool { + let parts: Vec<&str> = token.splitn(4, '.').collect(); + parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) +} + +fn normalize_base_url(base_url: &str) -> String { + base_url.trim_end_matches('/').to_string() +} + +/// Encode package name for the vuln-api path segment. +/// npm scoped names: `@scope/pkg` → `@scope%2fpkg` (mirrors registry.rs). +fn encode_package_name(ecosystem: &str, name: &str) -> String { + if ecosystem.eq_ignore_ascii_case("npm") { + if let Some(stripped) = name.strip_prefix('@') { + if let Some((scope, pkg)) = stripped.split_once('/') { + return format!("@{}%2f{}", scope, pkg); + } + } + name.to_string() + } else { + urlencoding::encode(name).into_owned() + } +} + +fn build_package_check_request<'a>( + client: &'a reqwest::blocking::Client, + url: &'a str, + token: &'a str, +) -> reqwest::blocking::RequestBuilder { + let mut req = client + .get(url) + .header("Accept", "application/json") + .header("CORGEA-SOURCE", "cli"); + if is_jwt(token) { + req = req.header("Authorization", format!("Bearer {}", token)); + } else { + req = req.header("CORGEA-TOKEN", token); + } + req +} + +/// Collapse whitespace and truncate at `max_chars` so a server error +/// body can be spliced into a single-line CLI error message without +/// dragging in HTML newlines or runaway length. Returns empty string +/// when the body is empty so the caller can format conditionally. +/// Char-boundary safe — operates on `chars()`, never byte slices. +fn body_snippet(body: &str, max_chars: usize) -> String { + let collapsed: String = body.split_whitespace().collect::>().join(" "); + if collapsed.is_empty() { + return String::new(); + } + let truncated: String = collapsed.chars().take(max_chars).collect(); + if collapsed.chars().count() > max_chars { + format!("{}…", truncated) + } else { + truncated + } +} + +fn retry_after_seconds(response: &reqwest::blocking::Response) -> u64 { + response + .headers() + .get("Retry-After") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.trim().parse::().ok()) + .map(|s| s.clamp(1, 10)) + .unwrap_or(1) +} + +fn send_package_check_with_429_retry( + client: &reqwest::blocking::Client, + url: &str, + token: &str, +) -> Result> { + let response = build_package_check_request(client, url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api request: {}", e))?; + + if response.status().as_u16() == 429 { + let wait = retry_after_seconds(&response); + std::thread::sleep(Duration::from_secs(wait)); + return build_package_check_request(client, url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api request: {}", e).into()); + } + Ok(response) +} + +pub fn check_package_version( + client: &reqwest::blocking::Client, + base_url: &str, + token: &str, + ecosystem: &str, + name: &str, + version: &str, +) -> Result> { + if token.is_empty() { + return Err("missing Corgea token for vuln-api request".into()); + } + let base = normalize_base_url(base_url); + if base.is_empty() { + return Err("vuln-api base URL is empty".into()); + } + let encoded_name = encode_package_name(ecosystem, name); + let encoded_version = urlencoding::encode(version); + let url = format!( + "{}/v1/packages/{}/{}/versions/{}/check", + base, ecosystem, encoded_name, encoded_version + ); + + debug(&format!("Sending vuln-api request to URL: {}", url)); + + let response = send_package_check_with_429_retry(client, &url, token)?; + + let status = response.status(); + match status.as_u16() { + 401 => { + return Err( + "vuln-api rejected the Corgea token (run `corgea login` to refresh)".into(), + ); + } + 403 => { + return Err("vuln-api access denied (check your Corgea plan/permissions)".into()); + } + 404 => { + return Ok(VulnCheckResponse { + ecosystem: ecosystem.to_string(), + package_name: name.to_string(), + version: version.to_string(), + is_vulnerable: false, + matches: vec![], + }); + } + 429 => { + return Err("vuln-api rate-limited this request (retry later)".into()); + } + 500..=599 => { + return Err(format!("vuln-api unavailable (HTTP {})", status.as_u16()).into()); + } + code if !status.is_success() => { + let body = response.text().unwrap_or_default(); + let snippet = body_snippet(&body, ERROR_BODY_SNIPPET_LEN); + let suffix = if snippet.is_empty() { + String::new() + } else { + format!(": {}", snippet) + }; + return Err(format!("vuln-api returned unexpected HTTP {}{}", code, suffix).into()); + } + _ => {} + } + + let response_text = response.text()?; + let parsed: VulnCheckResponse = serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse vuln-api response: {}. Body: {}", + e, response_text + )); + format!("Failed to parse vuln-api response: {}", e) + })?; + + // Confused-deputy guard: refuse to attribute advisories to a different + // (name, version, ecosystem) than what we asked about. The server is + // allowed to be silent on identity, but if it answers, it must match. + if !parsed.ecosystem.is_empty() && !parsed.ecosystem.eq_ignore_ascii_case(ecosystem) { + return Err(format!( + "vuln-api response ecosystem '{}' does not match request '{}'", + parsed.ecosystem, ecosystem + ) + .into()); + } + if !parsed.package_name.is_empty() && !parsed.package_name.eq_ignore_ascii_case(name) { + return Err(format!( + "vuln-api response package '{}' does not match request '{}'", + parsed.package_name, name + ) + .into()); + } + if !parsed.version.is_empty() && parsed.version != version { + return Err(format!( + "vuln-api response version '{}' does not match request '{}'", + parsed.version, version + ) + .into()); + } + + // is_vulnerable=true with no matches is contradictory — treat as an + // error so the caller can surface it rather than silently demoting + // the dep to "clean". + if parsed.is_vulnerable && parsed.matches.is_empty() { + return Err( + "vuln-api reported is_vulnerable=true with no matches; refusing to interpret".into(), + ); + } + + Ok(parsed) +} + +pub fn get_advisory( + client: &reqwest::blocking::Client, + base_url: &str, + token: &str, + advisory_id: &str, +) -> Result> { + if token.is_empty() { + return Err("missing Corgea token for vuln-api request".into()); + } + let base = normalize_base_url(base_url); + if base.is_empty() { + return Err("vuln-api base URL is empty".into()); + } + let encoded_id = urlencoding::encode(advisory_id); + let url = format!("{}/v1/advisories/{}", base, encoded_id); + + debug(&format!( + "Sending vuln-api advisory request to URL: {}", + url + )); + + let mut req = client + .get(&url) + .header("Accept", "application/json") + .header("CORGEA-SOURCE", "cli"); + if is_jwt(token) { + req = req.header("Authorization", format!("Bearer {}", token)); + } else { + req = req.header("CORGEA-TOKEN", token); + } + + let response = req + .send() + .map_err(|e| format!("Failed to send vuln-api advisory request: {}", e))?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().unwrap_or_default(); + let snippet = body_snippet(&body, ERROR_BODY_SNIPPET_LEN); + let suffix = if snippet.is_empty() { + String::new() + } else { + format!(": {}", snippet) + }; + return Err(format!( + "vuln-api advisory lookup failed: HTTP {}{}", + status.as_u16(), + suffix + ) + .into()); + } + + let response_text = response.text()?; + let parsed: AdvisoryResponse = serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse vuln-api advisory response: {}. Body: {}", + e, response_text + )); + format!("Failed to parse vuln-api advisory response: {}", e) + })?; + + // Identity guard: refuse a response that names a different advisory + // than we asked about. The server is allowed to be silent on + // identity (empty advisory_id), but if it answers it must match + // either the canonical id or one of the aliases. + if !parsed.advisory_id.is_empty() + && !parsed.advisory_id.eq_ignore_ascii_case(advisory_id) + && !parsed + .aliases + .iter() + .any(|a| a.eq_ignore_ascii_case(advisory_id)) + { + return Err(format!( + "vuln-api response advisory_id '{}' does not match request '{}'", + parsed.advisory_id, advisory_id + ) + .into()); + } + + Ok(parsed) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::io::{Read, Write}; + use std::net::TcpListener; + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; + + struct PackageCheckStub { + base_url: String, + _handle: thread::JoinHandle<()>, + } + + /// Keys in `retry_after_keys`: first hit → 429 + Retry-After: 1, second hit → + /// response from `responses` (or clean 200 fallback). + /// `advisory_responses` keys advisory id → (status, body) for the + /// `/v1/advisories/:id` route. Empty map = route returns 404. + fn spawn_package_check_stub_with_retry_keys( + responses: HashMap<(String, String, String), (u16, String)>, + retry_after_keys: HashMap<(String, String, String), (u16, String)>, + advisory_responses: HashMap, + ) -> PackageCheckStub { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let port = listener.local_addr().unwrap().port(); + let base_url = format!("http://127.0.0.1:{}", port); + let responses = Arc::new(Mutex::new(responses)); + let retry_after_keys = Arc::new(Mutex::new(retry_after_keys)); + let advisory_responses = Arc::new(Mutex::new(advisory_responses)); + let hit_counts: Arc>> = + Arc::new(Mutex::new(HashMap::new())); + + let handle = thread::spawn(move || { + for stream in listener.incoming().take(32) { + let Ok(mut stream) = stream else { + continue; + }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + + let (status_code, status_text, body, extra_headers) = if let Some(path) = + req.lines().next().and_then(|l| l.split_whitespace().nth(1)) + { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let eco = parts[2].to_string(); + let name = urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(); + let ver = urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(); + let key = (eco.clone(), name.clone(), ver.clone()); + let hits = { + let mut counts = hit_counts.lock().unwrap(); + let entry = counts.entry(key.clone()).or_insert(0); + *entry += 1; + *entry + }; + + let retry_body = retry_after_keys.lock().unwrap().get(&key).cloned(); + if retry_body.is_some() && hits == 1 { + let (code, body) = (429, r#"{"error":"rate limited"}"#.to_string()); + let text = "Too Many Requests"; + (code, text, body, "Retry-After: 1\r\n".to_string()) + } else { + let (code, body) = responses + .lock() + .unwrap() + .get(&key) + .cloned() + .or_else(|| retry_body) + .unwrap_or((200, r#"{"is_vulnerable":false,"matches":[]}"#.into())); + let text = match code { + 401 => "Unauthorized", + 403 => "Forbidden", + 404 => "Not Found", + 429 => "Too Many Requests", + 500..=599 => "Internal Server Error", + _ => "Error", + }; + (code, text, body, String::new()) + } + } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { + let id = urlencoding::decode(parts[2]) + .unwrap_or_default() + .into_owned(); + let (code, body) = advisory_responses + .lock() + .unwrap() + .get(&id) + .cloned() + .unwrap_or((404, r#"{"error":"not found"}"#.into())); + let text = match code { + 401 => "Unauthorized", + 403 => "Forbidden", + 404 => "Not Found", + 429 => "Too Many Requests", + 500..=599 => "Internal Server Error", + _ => "Error", + }; + (code, text, body, String::new()) + } else { + ( + 404, + "Not Found", + r#"{"error":"not found"}"#.into(), + String::new(), + ) + } + } else { + ( + 400, + "Bad Request", + r#"{"error":"bad request"}"#.into(), + String::new(), + ) + }; + + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\n\r\n{}", + status_code, status_text, extra_headers, body.len(), body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + + thread::sleep(Duration::from_millis(50)); + PackageCheckStub { + base_url, + _handle: handle, + } + } + + fn check_with_stub_status( + status_code: u16, + body: &str, + ) -> Result> { + let client = http_client().expect("test client"); + let mut responses = HashMap::new(); + responses.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + (status_code, body.to_string()), + ); + let stub = + spawn_package_check_stub_with_retry_keys(responses, HashMap::new(), HashMap::new()); + check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + } + + #[test] + fn check_package_version_401_returns_actionable_error() { + let err = check_with_stub_status(401, r#"{"error":"unauthorized"}"#) + .expect_err("401 should fail"); + assert!(err.to_string().contains("rejected the Corgea token")); + } + + #[test] + fn check_package_version_403_returns_actionable_error() { + let err = + check_with_stub_status(403, r#"{"error":"forbidden"}"#).expect_err("403 should fail"); + assert!(err.to_string().contains("access denied")); + } + + #[test] + fn check_package_version_404_returns_clean() { + let resp = + check_with_stub_status(404, r#"{"error":"not found"}"#).expect("404 should be clean"); + assert!(!resp.is_vulnerable); + assert!(resp.matches.is_empty()); + assert_eq!(resp.package_name, "lodash"); + assert_eq!(resp.version, "4.17.20"); + } + + #[test] + fn check_package_version_persistent_429_returns_actionable_error() { + let err = check_with_stub_status(429, r#"{"error":"rate limited"}"#) + .expect_err("429 should fail"); + assert!(err.to_string().contains("rate-limited")); + } + + #[test] + fn check_package_version_429_retries_then_succeeds() { + let client = http_client().unwrap(); + let vulnerable_body = r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-retry-test", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "<4.17.21", + "fixed_version": "4.17.21" + }] + }"#; + let mut retry_after_keys = HashMap::new(); + retry_after_keys.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + (200, vulnerable_body.to_string()), + ); + let stub = spawn_package_check_stub_with_retry_keys( + HashMap::new(), + retry_after_keys, + HashMap::new(), + ); + let resp = check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + .expect("retry should succeed"); + assert!(resp.is_vulnerable); + } + + #[test] + fn check_package_version_500_returns_unavailable() { + let err = + check_with_stub_status(500, r#"{"error":"internal"}"#).expect_err("500 should fail"); + assert!(err.to_string().contains("unavailable (HTTP 500)")); + } + + #[test] + fn check_package_version_unexpected_status_includes_body_snippet() { + let err = + check_with_stub_status(418, r#"{"error":"teapot"}"#).expect_err("418 should fail"); + let msg = err.to_string(); + assert!(msg.contains("unexpected HTTP 418"), "got: {}", msg); + assert!( + msg.contains("teapot"), + "expected body in error; got: {}", + msg + ); + } + + #[test] + fn check_package_version_unexpected_status_omits_body_when_empty() { + let err = check_with_stub_status(418, "").expect_err("418 should fail"); + let msg = err.to_string(); + assert!(msg.contains("unexpected HTTP 418"), "got: {}", msg); + // Body is empty → message must end at the status, no dangling ":" or whitespace. + assert!( + msg.trim_end().ends_with("418"), + "expected message to end at status code; got: {:?}", + msg + ); + } + + #[test] + fn get_advisory_non_success_includes_body_snippet() { + let client = http_client().expect("test client"); + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-deploy-gap".to_string(), + (400, r#"{"error":"Invalid url"}"#.to_string()), + ); + let stub = + spawn_package_check_stub_with_retry_keys(HashMap::new(), HashMap::new(), advisories); + let err = get_advisory(&client, &stub.base_url, "test-token", "GHSA-deploy-gap") + .expect_err("400 should fail"); + let msg = err.to_string(); + assert!( + msg.contains("advisory lookup failed: HTTP 400"), + "got: {}", + msg + ); + assert!( + msg.contains("Invalid url"), + "expected body snippet in advisory error; got: {}", + msg + ); + } + + #[test] + fn body_snippet_truncates_at_char_boundary() { + // Multi-byte char ("é" is 2 bytes UTF-8). Naïve byte-slicing would + // panic; we must operate on chars(). + let input = "é".repeat(500); + let out = body_snippet(&input, ERROR_BODY_SNIPPET_LEN); + assert!(out.ends_with('…'), "expected ellipsis; got: {:?}", out); + // 300 "é" chars + the ellipsis. + assert_eq!(out.chars().count(), ERROR_BODY_SNIPPET_LEN + 1); + } + + #[test] + fn body_snippet_collapses_whitespace() { + assert_eq!(body_snippet("foo\n bar\t\tbaz", 100), "foo bar baz"); + } + + #[test] + fn body_snippet_empty_returns_empty() { + assert_eq!(body_snippet("", 100), ""); + assert_eq!(body_snippet(" \n\t ", 100), ""); + } + + #[test] + fn encode_package_name_scoped_npm() { + assert_eq!(encode_package_name("npm", "@types/node"), "@types%2fnode"); + assert_eq!(encode_package_name("npm", "lodash"), "lodash"); + } + + #[test] + fn encode_package_name_pypi() { + assert_eq!(encode_package_name("PyPI", "requests"), "requests"); + } + + #[test] + fn encode_package_name_npm_case_insensitive() { + // Defends against vuln_api_ecosystem() casing changes. + assert_eq!(encode_package_name("NPM", "@types/node"), "@types%2fnode"); + } + + #[test] + fn deserialize_vuln_check_response() { + let body = r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-xxxx-yyyy-zzzz", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "<4.17.21", + "fixed_version": "4.17.21" + }] + }"#; + let parsed: VulnCheckResponse = serde_json::from_str(body).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + assert_eq!(parsed.matches[0].advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(parsed.matches[0].tier, 1); + } + + #[test] + fn normalize_base_url_strips_trailing_slash() { + assert_eq!( + normalize_base_url("http://localhost:8080/"), + "http://localhost:8080" + ); + } + + #[test] + fn is_jwt_detection() { + assert!(is_jwt("a.b.c")); + assert!(!is_jwt("plain-token")); + assert!(!is_jwt("a.b")); + assert!(!is_jwt("a..c")); + } + + #[test] + fn deserialize_advisory_response_real_server_shape() { + // Mirrors the worker's emitted payload (cve_worker/src/worker.js): + // server emits `id` (not `advisory_id`) and `source_url` (not `url`), + // plus many fields we ignore. No top-level `remediation` exists. + let body = r#"{ + "id": "GHSA-xxxx-yyyy-zzzz", + "source": "ghsa", + "source_url": "https://github.com/advisories/GHSA-xxxx-yyyy-zzzz", + "alias": "CVE-2026-12345", + "aliases": ["CVE-2026-12345"], + "ecosystem": "npm", + "summary": "Prototype pollution in lodash", + "severity": "HIGH", + "severity_badge": "HIGH", + "tier": 1, + "tier_score": 74.5, + "llm_summary": "Short developer-facing summary.", + "packages": [], + "cwes": [] + }"#; + let parsed: AdvisoryResponse = serde_json::from_str(body).unwrap(); + assert_eq!(parsed.advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(parsed.aliases, vec!["CVE-2026-12345".to_string()]); + assert_eq!(parsed.tier, Some(1)); + assert_eq!( + parsed.url.as_deref(), + Some("https://github.com/advisories/GHSA-xxxx-yyyy-zzzz") + ); + } + + #[test] + fn deserialize_advisory_response_tier_null_and_missing_source_url() { + // Server emits `tier: null` for unscored advisories + // (VULNERABILITY_SERVICE.md §5). `source_url` may also be absent. + let body = r#"{ + "id": "GHSA-only-id", + "tier": null + }"#; + let parsed: AdvisoryResponse = serde_json::from_str(body).unwrap(); + assert_eq!(parsed.advisory_id, "GHSA-only-id"); + assert!(parsed.tier.is_none()); + assert!(parsed.aliases.is_empty()); + assert!(parsed.title.is_none()); + assert!(parsed.severity_level.is_none()); + assert!(parsed.url.is_none()); + } +} diff --git a/src/vuln_api_stub/fixtures.rs b/src/vuln_api_stub/fixtures.rs new file mode 100644 index 0000000..626bfea --- /dev/null +++ b/src/vuln_api_stub/fixtures.rs @@ -0,0 +1,70 @@ +use super::StubFixtures; +use serde::Deserialize; +use serde_json::Value; +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +#[derive(Debug, Deserialize)] +struct FixtureFile { + #[serde(default)] + package_checks: HashMap, + #[serde(default)] + advisories: HashMap, +} + +/// Load stub fixtures from JSON. Keys in `package_checks` use `{ecosystem}/{name}/{version}`. +pub fn load_from_file(path: &Path) -> Result> { + let raw = fs::read_to_string(path)?; + let file: FixtureFile = serde_json::from_str(&raw)?; + + let mut package_checks = HashMap::new(); + for (key, value) in file.package_checks { + let (eco, name, ver) = parse_package_key(&key)?; + let body = serde_json::to_string(&value)?; + package_checks.insert((eco, name, ver), body); + } + + let mut advisories = HashMap::new(); + for (id, value) in file.advisories { + advisories.insert(id, serde_json::to_string(&value)?); + } + + Ok(StubFixtures { + package_checks, + advisories, + status_overrides: HashMap::new(), + }) +} + +fn parse_package_key(key: &str) -> Result<(String, String, String), Box> { + let parts: Vec<&str> = key.split('/').collect(); + if parts.len() != 3 { + return Err( + format!("package_checks key must be ecosystem/name/version, got {key:?}").into(), + ); + } + Ok(( + parts[0].to_string(), + parts[1].to_string(), + parts[2].to_string(), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_package_key_validates_format() { + assert_eq!( + parse_package_key("npm/lodash/4.17.20").unwrap(), + ( + "npm".to_string(), + "lodash".to_string(), + "4.17.20".to_string() + ) + ); + assert!(parse_package_key("npm/lodash").is_err()); + } +} diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs new file mode 100644 index 0000000..0853f34 --- /dev/null +++ b/src/vuln_api_stub/mod.rs @@ -0,0 +1,317 @@ +mod fixtures; + +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::Path; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +pub use fixtures::load_from_file; + +type PackageKey = (String, String, String); + +const NOT_FOUND_BODY: &str = r#"{"error":"not found"}"#; + +/// Loaded fixture data for the vuln-api stub server. +#[derive(Debug, Clone, Default)] +pub struct StubFixtures { + pub package_checks: HashMap, + pub advisories: HashMap, + pub status_overrides: HashMap, +} + +pub struct VulnApiStub { + pub base_url: String, + _handle: thread::JoinHandle<()>, +} + +impl VulnApiStub { + /// Block until the stub server thread exits (normally never, unless the listener fails). + pub fn block(self) { + let _ = self._handle.join(); + } +} + +/// Minimal TCP vuln-api stub for CLI integration tests and e2e dogfood. +pub fn spawn(fixtures: HashMap) -> VulnApiStub { + spawn_with_statuses(fixtures, HashMap::new()) +} + +pub fn spawn_with_statuses( + fixtures: HashMap, + status_overrides: HashMap, +) -> VulnApiStub { + spawn_on_port( + StubFixtures { + package_checks: fixtures, + advisories: HashMap::new(), + status_overrides, + }, + 0, + ) +} + +/// Bind stub on `port` (`0` = ephemeral). Returns base URL `http://127.0.0.1:{port}`. +pub fn spawn_on_port(fixtures: StubFixtures, port: u16) -> VulnApiStub { + let addr = if port == 0 { + "127.0.0.1:0".to_string() + } else { + format!("127.0.0.1:{port}") + }; + let listener = TcpListener::bind(&addr).unwrap_or_else(|e| panic!("bind stub on {addr}: {e}")); + let bound_port = listener.local_addr().expect("stub local_addr").port(); + let base_url = format!("http://127.0.0.1:{bound_port}"); + + let package_checks = Arc::new(fixtures.package_checks); + let advisories = Arc::new(fixtures.advisories); + let status_overrides = Arc::new(fixtures.status_overrides); + + let handle = thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { + continue; + }; + handle_connection(&mut stream, &package_checks, &advisories, &status_overrides); + } + }); + + thread::sleep(Duration::from_millis(50)); + + VulnApiStub { + base_url, + _handle: handle, + } +} + +pub fn spawn_from_file(path: &Path) -> VulnApiStub { + let fixtures = + load_from_file(path).unwrap_or_else(|e| panic!("load stub fixtures {path:?}: {e}")); + spawn_on_port(fixtures, 0) +} + +fn handle_connection( + stream: &mut std::net::TcpStream, + package_checks: &Arc>, + advisories: &Arc>, + status_overrides: &Arc>, +) { + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + + let path = req.lines().next().and_then(|l| l.split_whitespace().nth(1)); + + let (status_code, response_body) = match path { + Some(path) => { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let key = ( + parts[2].to_string(), + urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(), + urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(), + ); + let body = package_checks + .get(&key) + .cloned() + .unwrap_or_else(|| default_clean_response(&key.0, &key.1, &key.2)); + let status = status_overrides.get(&key).copied().unwrap_or(200); + (status, body) + } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { + let id = urlencoding::decode(parts[2]) + .unwrap_or_default() + .into_owned(); + match advisories.get(&id) { + Some(body) => (200, body.clone()), + None => (404, NOT_FOUND_BODY.to_string()), + } + } else { + (404, NOT_FOUND_BODY.to_string()) + } + } + None => (400, r#"{"error":"bad request"}"#.to_string()), + }; + + let status_text = status_text(status_code); + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + status_code, + status_text, + response_body.len(), + response_body + ); + let _ = stream.write_all(response.as_bytes()); +} + +fn status_text(status_code: u16) -> &'static str { + match status_code { + 404 => "Not Found", + 401 => "Unauthorized", + 403 => "Forbidden", + 429 => "Too Many Requests", + 500..=599 => "Internal Server Error", + _ if status_code >= 400 => "Error", + _ => "OK", + } +} + +fn default_clean_response(eco: &str, name: &str, ver: &str) -> String { + format!( + r#"{{"ecosystem":"{eco}","package_name":"{name}","version":"{ver}","is_vulnerable":false,"matches":[]}}"# + ) +} + +pub fn lodash_vulnerable_response() -> String { + r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-integration-test", + "severity_level": "high", + "tier": 2, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + }] + }"# + .to_string() +} + +/// One critical + one high match on a single advisory. Used to exercise +/// `--severity critical` and `--severity critical,high` gating. +pub fn lodash_critical_and_high_response() -> String { + r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "GHSA-test-critical", + "severity_level": "critical", + "tier": 1, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + }, + { + "advisory_id": "GHSA-test-high", + "severity_level": "high", + "tier": 2, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + } + ] + }"# + .to_string() +} + +/// One critical + one high + one medium match. Used to exercise +/// `--severity critical,high` `OneOf` semantics (the medium match +/// renders but is below-floor). +pub fn lodash_critical_high_and_medium_response() -> String { + r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "GHSA-test-critical", + "severity_level": "critical", + "tier": 1, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + }, + { + "advisory_id": "GHSA-test-high", + "severity_level": "high", + "tier": 2, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + }, + { + "advisory_id": "GHSA-test-medium", + "severity_level": "medium", + "tier": 2, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + } + ] + }"# + .to_string() +} + +/// Single match at the server's `unknown` fallback severity. Locks the +/// fail-open `Info` mapping so unknown strings never silently drop from +/// the gate. +pub fn lodash_unknown_severity_response() -> String { + r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-test-unknown", + "severity_level": "unknown", + "tier": 2, + "vulnerable_version_range": "< 4.17.21", + "fixed_version": "4.17.21" + }] + }"# + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Read, Write}; + use std::net::TcpStream; + + fn dogfood_fixture_path() -> std::path::PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("fixtures/deps/vuln-api-stub.json") + } + + #[test] + fn load_dogfood_fixture_file() { + let fixtures = load_from_file(&dogfood_fixture_path()).expect("load dogfood fixture"); + assert!(fixtures.package_checks.contains_key(&( + "npm".into(), + "lodash".into(), + "4.17.20".into() + ))); + assert!(fixtures.advisories.contains_key("CVE-2019-10744")); + } + + #[test] + fn stub_serves_package_check_from_file() { + let stub = spawn_from_file(&dogfood_fixture_path()); + let port: u16 = stub.base_url.rsplit(':').next().unwrap().parse().unwrap(); + let mut stream = TcpStream::connect(format!("127.0.0.1:{port}")).expect("connect stub"); + let req = "GET /v1/packages/npm/lodash/versions/4.17.20/check HTTP/1.1\r\nHost: localhost\r\n\r\n"; + stream.write_all(req.as_bytes()).unwrap(); + let mut resp = String::new(); + stream.read_to_string(&mut resp).unwrap(); + assert!(resp.contains("is_vulnerable")); + assert!(resp.contains("CVE-2019-10744")); + } +} diff --git a/src/wait.rs b/src/wait.rs index c0ce3e7..8a7cccc 100644 --- a/src/wait.rs +++ b/src/wait.rs @@ -1,7 +1,6 @@ -use crate::utils; use crate::config::Config; use crate::scanners::blast; - +use crate::utils; pub fn run(config: &Config, scan_id: Option, project_id: Option) { let project_name = match utils::generic::get_current_working_directory() { @@ -12,7 +11,8 @@ pub fn run(config: &Config, scan_id: Option, project_id: Option) } }; - let scans_result = utils::api::query_scan_list(&config.get_url(), Some(&project_name), Some(1), None); + let scans_result = + utils::api::query_scan_list(&config.get_url(), Some(&project_name), Some(1), None); let scans: Vec = match scans_result { Ok(result) => result.scans.unwrap_or_default(), Err(e) => { @@ -23,7 +23,7 @@ pub fn run(config: &Config, scan_id: Option, project_id: Option) Check out our docs at https://docs.corgea.app/install_cli#login-with-the-cli - Error details: {}", + Error details: {}", e ); std::process::exit(1); @@ -41,21 +41,24 @@ pub fn run(config: &Config, scan_id: Option, project_id: Option) } }; (scan_id.to_string(), processed) - }, - None => { - match scans.get(0) { - Some(scan) => (scan.id.clone(), scan.status == "Complete"), - None => { - eprintln!("Error querying scan list"); - std::process::exit(1); - } - } } + None => match scans.first() { + Some(scan) => (scan.id.clone(), scan.status == "Complete"), + None => { + eprintln!("Error querying scan list"); + std::process::exit(1); + } + }, }; let scan_url = match &project_id { Some(pid) => format!("{}/project/{}/?scan_id={}", config.get_url(), pid, scan_id), - None => format!("{}/project/{}?scan_id={}", config.get_url(), project_name, scan_id), + None => format!( + "{}/project/{}?scan_id={}", + config.get_url(), + project_name, + scan_id + ), }; if !processed { @@ -70,7 +73,7 @@ pub fn run(config: &Config, scan_id: Option, project_id: Option) ); blast::wait_for_scan(config, &scan_id); } else { - print!("Scan has been processed successfully!\n"); + println!("Scan has been processed successfully!"); } match blast::report_scan_status(&config.get_url(), &project_name) { @@ -79,7 +82,7 @@ pub fn run(config: &Config, scan_id: Option, project_id: Option) "\n\nYou can view the scan results at the following link:\n{}", utils::terminal::set_text_color(&scan_url, utils::terminal::TerminalColor::Green) ); - }, + } Err(e) => { eprintln!( "\n\n{}\n\n\ @@ -89,7 +92,10 @@ pub fn run(config: &Config, scan_id: Option, project_id: Option) - Server URL: {}\n\ - Error details: {}\n", utils::terminal::set_text_color( - &format!("Failed to report the scan status for project: '{}'.", project_name), + &format!( + "Failed to report the scan status for project: '{}'.", + project_name + ), utils::terminal::TerminalColor::Red ), utils::terminal::set_text_color(&scan_url, utils::terminal::TerminalColor::Blue), diff --git a/tests/check_cve_http_errors.rs b/tests/check_cve_http_errors.rs new file mode 100644 index 0000000..15e9b50 --- /dev/null +++ b/tests/check_cve_http_errors.rs @@ -0,0 +1,133 @@ +mod common; + +use common::vuln_api_stub::spawn_with_statuses; +use common::{corgea_cmd, stub_env}; +use serde_json::Value; +use std::collections::HashMap; +use std::path::PathBuf; + +fn npm_fixture_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("fixtures/deps/npm") +} + +#[test] +fn check_cve_404_is_clean_in_json() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "semver".into(), "5.4.1".into()), + r#"{"error":"not found"}"#.to_string(), + ); + let mut statuses = HashMap::new(); + statuses.insert(("npm".into(), "semver".into(), "5.4.1".into()), 404); + + let stub = spawn_with_statuses(fixtures, statuses); + let output = corgea_cmd() + .args([ + "deps", + "--check-cve", + "--cve-concurrency", + "1", + "--json", + "-e", + "npm", + "-p", + npm_fixture_dir().to_str().unwrap(), + ]) + .envs(stub_env(&stub.base_url)) + .output() + .expect("spawn corgea"); + + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let body: Value = serde_json::from_slice(&output.stdout).unwrap(); + + let summary = body.get("cve_summary").expect("cve_summary"); + assert_eq!(summary.get("errors").and_then(Value::as_u64), Some(0)); + + let results = body.get("results").and_then(Value::as_array).unwrap(); + let semver = results + .iter() + .find(|r| r["name"] == "semver") + .expect("semver"); + assert_eq!( + semver.get("cve_status").and_then(Value::as_str), + Some("clean") + ); + assert_eq!( + semver.get("cves").and_then(Value::as_array).map(Vec::len), + Some(0) + ); + assert!(semver.get("cve_error").is_none()); +} + +#[test] +fn check_cve_http_errors_render_actionable_messages() { + let mut fixtures = HashMap::new(); + let mut statuses = HashMap::new(); + + for (name, ver, code, body) in [ + ("lodash", "4.17.20", 401u16, r#"{"error":"unauthorized"}"#), + ("semver", "5.4.1", 403, r#"{"error":"forbidden"}"#), + ("json5", "2.2.1", 429, r#"{"error":"rate limited"}"#), + ] { + fixtures.insert(("npm".into(), name.into(), ver.into()), body.to_string()); + statuses.insert(("npm".into(), name.into(), ver.into()), code); + } + + let stub = spawn_with_statuses(fixtures, statuses); + let output = corgea_cmd() + .args([ + "deps", + "--check-cve", + "--cve-concurrency", + "1", + "-e", + "npm", + "-p", + npm_fixture_dir().to_str().unwrap(), + ]) + .envs(stub_env(&stub.base_url)) + .output() + .expect("spawn corgea"); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(stdout.contains("CVE lookup errors:")); + assert!(stdout.contains("rejected the Corgea token")); + assert!(stdout.contains("access denied")); + assert!(stdout.contains("rate-limited")); +} + +#[test] +fn check_cve_500_renders_unavailable_message() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + r#"{"error":"internal"}"#.to_string(), + ); + let mut statuses = HashMap::new(); + statuses.insert(("npm".into(), "lodash".into(), "4.17.20".into()), 500); + + let stub = spawn_with_statuses(fixtures, statuses); + let output = corgea_cmd() + .args([ + "deps", + "--check-cve", + "--cve-concurrency", + "1", + "-e", + "npm", + "-p", + npm_fixture_dir().to_str().unwrap(), + ]) + .envs(stub_env(&stub.base_url)) + .output() + .expect("spawn corgea"); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(stdout.contains("unavailable (HTTP 500)")); +} diff --git a/tests/check_cve_preflight.rs b/tests/check_cve_preflight.rs new file mode 100644 index 0000000..942b0fc --- /dev/null +++ b/tests/check_cve_preflight.rs @@ -0,0 +1,61 @@ +use std::path::PathBuf; +use std::process::Command; + +fn npm_fixture_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("fixtures/deps/npm") +} + +#[test] +fn check_cve_preflight_exits_two_without_token() { + let output = Command::new(env!("CARGO_BIN_EXE_corgea")) + .args([ + "deps", + "--check-cve", + "-e", + "npm", + "-p", + npm_fixture_dir().to_str().unwrap(), + ]) + .env("CORGEA_TOKEN", "") + .env_remove("CORGEA_CONFIG") + .env("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1") + .output() + .expect("spawn corgea"); + + assert_eq!( + output.status.code(), + Some(2), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Corgea token"), + "expected token requirement in stderr, got: {stderr}" + ); + assert!( + output.stdout.is_empty(), + "preflight should not print a report; stdout: {}", + String::from_utf8_lossy(&output.stdout) + ); +} + +#[test] +fn check_cve_preflight_exits_two_with_whitespace_token() { + let output = Command::new(env!("CARGO_BIN_EXE_corgea")) + .args([ + "deps", + "--check-cve", + "-e", + "npm", + "-p", + npm_fixture_dir().to_str().unwrap(), + ]) + .env("CORGEA_TOKEN", " ") + .env_remove("CORGEA_CONFIG") + .env("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1") + .output() + .expect("spawn corgea"); + + assert_eq!(output.status.code(), Some(2)); +} diff --git a/tests/common/concurrency_stub.rs b/tests/common/concurrency_stub.rs new file mode 100644 index 0000000..113795d --- /dev/null +++ b/tests/common/concurrency_stub.rs @@ -0,0 +1,150 @@ +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Duration; + +pub struct ConcurrencyStub { + pub base_url: String, + peak_in_flight: Arc, + _handle: thread::JoinHandle<()>, +} + +pub struct StubConfig { + pub per_request_sleep: Duration, + pub retry_after_mode: bool, + pub default_body: String, +} + +impl ConcurrencyStub { + pub fn spawn(config: StubConfig) -> Self { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let port = listener.local_addr().unwrap().port(); + let base_url = format!("http://127.0.0.1:{}", port); + let in_flight = Arc::new(AtomicUsize::new(0)); + let peak_in_flight = Arc::new(AtomicUsize::new(0)); + let hit_counts: Arc>> = + Arc::new(Mutex::new(HashMap::new())); + let in_flight_listener = in_flight.clone(); + let peak_listener = peak_in_flight.clone(); + + let handle = thread::spawn(move || { + let mut worker_handles = Vec::new(); + for stream in listener.incoming().take(256) { + let Ok(mut stream) = stream else { + continue; + }; + let in_flight = in_flight_listener.clone(); + let peak = peak_listener.clone(); + let hit_counts = hit_counts.clone(); + let per_request_sleep = config.per_request_sleep; + let retry_after_mode = config.retry_after_mode; + let default_body = config.default_body.clone(); + + worker_handles.push(thread::spawn(move || { + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + + let cur = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + peak.fetch_max(cur, Ordering::SeqCst); + thread::sleep(per_request_sleep); + in_flight.fetch_sub(1, Ordering::SeqCst); + + let (status_code, status_text, response_body, extra_headers) = + if let Some(path) = + req.lines().next().and_then(|l| l.split_whitespace().nth(1)) + { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let eco = parts[2].to_string(); + let name = urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(); + let ver = urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(); + let key = (eco, name, ver); + + if retry_after_mode { + let hits = { + let mut counts = hit_counts.lock().unwrap(); + let entry = counts.entry(key).or_insert(0); + *entry += 1; + *entry + }; + if hits == 1 { + ( + 429, + "Too Many Requests", + r#"{"error":"rate limited"}"#.to_string(), + "Retry-After: 1\r\n".to_string(), + ) + } else { + (200, "OK", default_body, String::new()) + } + } else { + (200, "OK", default_body, String::new()) + } + } else { + ( + 404, + "Not Found", + r#"{"error":"not found"}"#.to_string(), + String::new(), + ) + } + } else { + ( + 400, + "Bad Request", + r#"{"error":"bad request"}"#.to_string(), + String::new(), + ) + }; + + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\n\r\n{}", + status_code, + status_text, + extra_headers, + response_body.len(), + response_body + ); + let _ = stream.write_all(response.as_bytes()); + })); + } + for worker in worker_handles { + let _ = worker.join(); + } + }); + + thread::sleep(Duration::from_millis(50)); + + ConcurrencyStub { + base_url, + peak_in_flight, + _handle: handle, + } + } + + pub fn peak_concurrency(&self) -> usize { + self.peak_in_flight.load(Ordering::SeqCst) + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..9d20bc8 --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,16 @@ +pub mod concurrency_stub; +pub mod vuln_api_stub; + +use std::process::Command; + +pub fn corgea_cmd() -> Command { + Command::new(env!("CARGO_BIN_EXE_corgea")) +} + +pub fn stub_env(stub_url: &str) -> [(&'static str, String); 3] { + [ + ("CORGEA_VULN_API_URL", stub_url.to_string()), + ("CORGEA_TOKEN", "test-token".to_string()), + ("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1".to_string()), + ] +} diff --git a/tests/common/vuln_api_stub.rs b/tests/common/vuln_api_stub.rs new file mode 100644 index 0000000..d2d0147 --- /dev/null +++ b/tests/common/vuln_api_stub.rs @@ -0,0 +1,3 @@ +//! Re-exports from the shared library stub module used by integration tests and e2e dogfood. + +pub use corgea::vuln_api_stub::*; diff --git a/tests/cve_concurrency.rs b/tests/cve_concurrency.rs new file mode 100644 index 0000000..34589ba --- /dev/null +++ b/tests/cve_concurrency.rs @@ -0,0 +1,149 @@ +mod common; + +use common::concurrency_stub::{ConcurrencyStub, StubConfig}; +use common::{corgea_cmd, stub_env}; +use std::path::Path; +use std::sync::{Mutex, MutexGuard}; +use std::time::{Duration, Instant}; + +static CVE_INTEGRATION_LOCK: Mutex<()> = Mutex::new(()); + +fn integration_lock() -> MutexGuard<'static, ()> { + CVE_INTEGRATION_LOCK + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) +} + +fn write_n_dep_lockfile(dir: &Path, n: usize) { + let mut entries = String::new(); + for i in 0..n { + if !entries.is_empty() { + entries.push(','); + } + entries.push_str(&format!(r#""node_modules/pkg-{i}": {{"version":"1.0.0"}}"#)); + } + let lock = format!( + r#"{{"name":"demo","version":"1.0.0","lockfileVersion":3,"packages":{{{entries}}}}}"# + ); + std::fs::write(dir.join("package-lock.json"), lock).unwrap(); +} + +#[test] +fn invalid_cve_concurrency_exits_2() { + let _lock = integration_lock(); + for bad in ["0", "100"] { + let output = corgea_cmd() + .args(["deps", "--check-cve", "--cve-concurrency", bad]) + .output() + .expect("spawn"); + assert_eq!(output.status.code(), Some(2), "bad={bad}"); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&output.stderr), + String::from_utf8_lossy(&output.stdout), + ); + assert!( + combined.contains("invalid value") || combined.contains("1..=32"), + "expected clap range error, got: {combined}" + ); + } +} + +#[test] +fn peak_concurrency_capped_at_default() { + let _lock = integration_lock(); + let dir = tempfile::tempdir().unwrap(); + write_n_dep_lockfile(dir.path(), 50); + + let stub = ConcurrencyStub::spawn(StubConfig { + per_request_sleep: Duration::from_millis(200), + retry_after_mode: false, + default_body: r#"{"is_vulnerable":false,"matches":[]}"#.into(), + }); + + let start = Instant::now(); + let output = corgea_cmd() + .args([ + "deps", + "--check-cve", + "--cve-concurrency", + "8", + "-e", + "npm", + "-p", + dir.path().to_str().unwrap(), + "--json", + ]) + .envs(stub_env(&stub.base_url)) + .output() + .expect("spawn"); + let elapsed = start.elapsed(); + + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + assert!( + elapsed < Duration::from_secs(3), + "expected parallel speedup, took {:?}", + elapsed + ); + assert!( + stub.peak_concurrency() <= 8, + "peak was {}", + stub.peak_concurrency() + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(!stdout.contains("[CVE check]")); +} + +#[test] +fn retry_after_429_produces_finding() { + let _lock = integration_lock(); + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.20" } + } + }"#, + ) + .unwrap(); + + let stub = ConcurrencyStub::spawn(StubConfig { + per_request_sleep: Duration::from_millis(10), + retry_after_mode: true, + default_body: common::vuln_api_stub::lodash_vulnerable_response(), + }); + + let output = corgea_cmd() + .args([ + "deps", + "--check-cve", + "-e", + "npm", + "-p", + dir.path().to_str().unwrap(), + "--json", + ]) + .envs(stub_env(&stub.base_url)) + .output() + .expect("spawn"); + + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let body: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + assert_eq!( + body["cve_summary"]["vulnerable"].as_u64(), + Some(1), + "{}", + body + ); +} diff --git a/tests/cve_severity_filter.rs b/tests/cve_severity_filter.rs new file mode 100644 index 0000000..ea4d979 --- /dev/null +++ b/tests/cve_severity_filter.rs @@ -0,0 +1,520 @@ +mod common; + +use common::corgea_cmd; +use common::stub_env; +use common::vuln_api_stub::{ + lodash_critical_and_high_response, lodash_critical_high_and_medium_response, + lodash_unknown_severity_response, lodash_vulnerable_response, spawn, VulnApiStub, +}; +use serde_json::Value; +use std::collections::HashMap; +use std::path::PathBuf; + +fn npm_fixture_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("fixtures/deps/npm") +} + +fn run_deps(args: &[&str], extra_env: &[(&'static str, String)]) -> std::process::Output { + let mut cmd = corgea_cmd(); + cmd.args(args); + // Serialize requests against the in-process stub so parallel test + // runs don't overwhelm its single-threaded accept loop. Mirrors the + // CLI's `--cve-concurrency` flag (clap-validated 1..32). + cmd.args(["--cve-concurrency", "1"]); + for (key, value) in extra_env { + cmd.env(key, value); + } + cmd.output().expect("spawn corgea") +} + +fn stub_with_lodash(body: String) -> (VulnApiStub, [(&'static str, String); 3]) { + let mut fixtures = HashMap::new(); + fixtures.insert( + ( + "npm".to_string(), + "lodash".to_string(), + "4.17.20".to_string(), + ), + body, + ); + let stub = spawn(fixtures); + let env = stub_env(&stub.base_url); + (stub, env) +} + +#[test] +fn severity_critical_blocks_only_critical_findings() { + let (_stub, env) = stub_with_lodash(lodash_critical_and_high_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(1), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_critical_exits_zero_when_only_high_finding() { + // lodash_vulnerable_response emits a single match at severity "high". + let (_stub, env) = stub_with_lodash(lodash_vulnerable_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(0), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_low_blocks_everything_at_or_above_low() { + let (_stub, env) = stub_with_lodash(lodash_critical_and_high_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "low", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(1), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_any_preserves_chunk_02_behavior() { + let (_stub, env) = stub_with_lodash(lodash_vulnerable_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "any", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(1), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_oneof_matches_exact_set() { + let (_stub, env) = stub_with_lodash(lodash_critical_high_and_medium_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical,high", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(1), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_without_fail_cve_errors() { + // Pre-flight (no stub) — non-Any --severity without --fail-cve must + // exit 2 at the runtime guard before any work is done. + let output = corgea_cmd() + .args(["deps", "--check-cve", "--severity", "critical"]) + .output() + .expect("spawn corgea"); + assert_eq!( + output.status.code(), + Some(2), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("--severity requires --fail-cve"), + "expected runtime --severity requires --fail-cve message, got: {stderr}" + ); +} + +#[test] +fn explicit_severity_any_without_fail_cve_succeeds() { + // Explicit `--severity any` is a no-op gate-wise; the runtime guard + // must NOT require --fail-cve in that case, so CI matrices that + // always pass `--severity any` keep working without `--fail-cve`. + let (_stub, env) = stub_with_lodash(lodash_vulnerable_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--severity", + "any", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(0), + "explicit --severity any without --fail-cve must succeed; stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_invalid_value_exits_two() { + let output = corgea_cmd() + .args(["deps", "--check-cve", "--fail-cve", "--severity", "bogus"]) + .output() + .expect("spawn corgea"); + assert_eq!(output.status.code(), Some(2)); + let combined = format!( + "{}{}", + String::from_utf8_lossy(&output.stderr), + String::from_utf8_lossy(&output.stdout), + ); + assert!( + combined.contains("invalid value") || combined.contains("unknown severity"), + "expected clap value-parser error, got: {combined}" + ); +} + +#[test] +fn severity_unknown_server_string_treated_as_info() { + let fixture = npm_fixture_dir(); + + // --severity any: must still trip on the "unknown" finding. + let (_stub_any, env_any) = stub_with_lodash(lodash_unknown_severity_response()); + let output_any = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "any", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env_any, + ); + assert_eq!( + output_any.status.code(), + Some(1), + "Any floor must catch unknown severity; stderr: {}", + String::from_utf8_lossy(&output_any.stderr) + ); + + // --severity critical: must NOT trip on "unknown" (collapses to Info). + let (_stub_critical, env_critical) = stub_with_lodash(lodash_unknown_severity_response()); + let output_critical = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env_critical, + ); + assert_eq!( + output_critical.status.code(), + Some(0), + "Critical floor must filter out unknown severity (Info); stderr: {}", + String::from_utf8_lossy(&output_critical.stderr) + ); +} + +#[test] +fn severity_does_not_widen_fail_broad_gate() { + // --fail still trips on any CVE finding regardless of floor: even + // with --severity critical and a high-only fixture, --fail must + // still exit 1. + let (_stub, env) = stub_with_lodash(lodash_vulnerable_response()); // high only + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail", + "--fail-cve", + "--severity", + "critical", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + output.status.code(), + Some(1), + "--fail must still trip on any CVE finding regardless of --severity; stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn severity_critical_below_floor_note_appears_in_text_output() { + let (_stub, env) = stub_with_lodash(lodash_critical_and_high_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("advisory matches below --severity floor (critical)"), + "expected below-floor note in stdout, got:\n{}", + stdout + ); + assert!( + stdout.contains("informational only"), + "expected 'informational only' phrase, got:\n{}", + stdout + ); + // Below-floor matches still render with their severity tag. + assert!( + stdout.contains("(severity: high)"), + "expected below-floor match still rendered, got:\n{}", + stdout + ); +} + +#[test] +fn severity_oneof_outside_set_note_appears_in_text_output() { + let (_stub, env) = stub_with_lodash(lodash_critical_high_and_medium_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical,high", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("advisory matches outside --severity set (critical,high)"), + "expected outside-set note in stdout, got:\n{}", + stdout + ); +} + +#[test] +fn severity_any_does_not_emit_below_floor_note() { + let (_stub, env) = stub_with_lodash(lodash_critical_and_high_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "any", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + !stdout.contains("below --severity floor"), + "Any floor must not emit below-floor note, got:\n{}", + stdout + ); + assert!( + !stdout.contains("outside --severity set"), + "Any floor must not emit outside-set note, got:\n{}", + stdout + ); +} + +#[test] +fn severity_floor_emitted_in_cve_summary_json() { + let (_stub, env) = stub_with_lodash(lodash_critical_and_high_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "critical", + "--json", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + // --fail-cve trips → exit 1 — but JSON still printed on stdout + // before exit. Parse it without asserting status. + let body: Value = + serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON even on exit 1"); + let summary = body + .get("cve_summary") + .expect("cve_summary should be present"); + assert_eq!( + summary.get("severity_floor").and_then(Value::as_str), + Some("critical") + ); + assert_eq!( + summary + .get("vulnerable_above_floor") + .and_then(Value::as_u64), + Some(1) + ); + // Existing keys untouched. + assert_eq!(summary.get("vulnerable").and_then(Value::as_u64), Some(1)); +} + +#[test] +fn severity_any_emits_floor_as_any_in_json() { + let (_stub, env) = stub_with_lodash(lodash_vulnerable_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--json", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + let body: Value = serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON"); + let summary = body + .get("cve_summary") + .expect("cve_summary should be present"); + assert_eq!( + summary.get("severity_floor").and_then(Value::as_str), + Some("any") + ); + // vulnerable_above_floor must equal vulnerable when floor is Any. + let vulnerable = summary.get("vulnerable").and_then(Value::as_u64).unwrap(); + assert_eq!( + summary + .get("vulnerable_above_floor") + .and_then(Value::as_u64), + Some(vulnerable) + ); +} + +#[test] +fn severity_oneof_emits_descending_label_in_json() { + let (_stub, env) = stub_with_lodash(lodash_critical_high_and_medium_response()); + let fixture = npm_fixture_dir(); + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "--severity", + "high,critical", // user input order + "--json", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + let body: Value = serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON"); + let summary = body + .get("cve_summary") + .expect("cve_summary should be present"); + // Label is always rendered descending-by-severity for stability. + assert_eq!( + summary.get("severity_floor").and_then(Value::as_str), + Some("critical,high") + ); +} diff --git a/tests/deps_fail_cve.rs b/tests/deps_fail_cve.rs new file mode 100644 index 0000000..2e7daf9 --- /dev/null +++ b/tests/deps_fail_cve.rs @@ -0,0 +1,380 @@ +mod common; + +use common::vuln_api_stub::{lodash_vulnerable_response, spawn}; +use serde_json::Value; +use std::collections::HashMap; +use std::path::PathBuf; +use std::process::Command; + +fn npm_fixture_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("fixtures/deps/npm") +} + +fn corgea_cmd() -> Command { + Command::new(env!("CARGO_BIN_EXE_corgea")) +} + +fn stub_env(stub_url: &str) -> [(&'static str, String); 3] { + [ + ("CORGEA_VULN_API_URL", stub_url.to_string()), + ("CORGEA_TOKEN", "test-token".to_string()), + ("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1".to_string()), + ] +} + +fn run_deps(args: &[&str], extra_env: &[(&str, String)]) -> std::process::Output { + let mut cmd = corgea_cmd(); + cmd.args(args); + for (key, value) in extra_env { + cmd.env(key, value); + } + cmd.output().expect("spawn corgea") +} + +fn run_deps_json(args: &[&str], extra_env: &[(&str, String)]) -> Value { + let output = run_deps(args, extra_env); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON") +} + +#[test] +fn fail_cve_exits_one_when_vulnerable() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + lodash_vulnerable_response(), + ); + let stub = spawn(fixtures); + let fixture = npm_fixture_dir(); + + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &stub_env(&stub.base_url), + ); + + assert_eq!( + output.status.code(), + Some(1), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn fail_cve_exits_zero_when_all_clean() { + let stub = spawn(HashMap::new()); + let fixture = npm_fixture_dir(); + + let output = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &stub_env(&stub.base_url), + ); + + assert_eq!( + output.status.code(), + Some(0), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn fail_cve_and_fail_flags_are_independent() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + lodash_vulnerable_response(), + ); + let stub = spawn(fixtures); + let fixture = npm_fixture_dir(); + let env = stub_env(&stub.base_url); + let path = fixture.to_str().unwrap(); + + // CVE present, neither gate flag → success. + let neither = run_deps(&["deps", "--check-cve", "-e", "npm", "-p", path], &env); + assert_eq!(neither.status.code(), Some(0)); + + // --fail-cve alone gates on CVEs. + let fail_cve_only = run_deps( + &["deps", "--check-cve", "--fail-cve", "-e", "npm", "-p", path], + &env, + ); + assert_eq!(fail_cve_only.status.code(), Some(1)); + + // --fail alone also gates on CVE findings (legacy behavior). + let fail_only = run_deps( + &["deps", "--check-cve", "--fail", "-e", "npm", "-p", path], + &env, + ); + assert_eq!(fail_only.status.code(), Some(1)); +} + +#[test] +fn fail_cve_not_triggered_by_cve_lookup_errors() { + let fixture = npm_fixture_dir(); + let env = [ + ("CORGEA_VULN_API_URL", "http://127.0.0.1:1".to_string()), + ("CORGEA_TOKEN", "test-token".to_string()), + ("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1".to_string()), + ]; + + let fail_cve = run_deps( + &[ + "deps", + "--check-cve", + "--fail-cve", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + fail_cve.status.code(), + Some(0), + "--fail-cve should not trip on lookup errors alone; stderr: {}", + String::from_utf8_lossy(&fail_cve.stderr) + ); + + let fail = run_deps( + &[ + "deps", + "--check-cve", + "--fail", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + assert_eq!( + fail.status.code(), + Some(1), + "--fail should still trip on CVE lookup errors; stderr: {}", + String::from_utf8_lossy(&fail.stderr) + ); +} + +#[test] +fn check_cve_json_includes_cves_and_cve_summary() { + let mut fixtures = HashMap::new(); + fixtures.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + lodash_vulnerable_response(), + ); + let stub = spawn(fixtures); + let fixture = npm_fixture_dir(); + + let body = run_deps_json( + &[ + "deps", + "--check-cve", + "--json", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &stub_env(&stub.base_url), + ); + + let summary = body + .get("cve_summary") + .expect("cve_summary should be present with --check-cve"); + assert_eq!(summary.get("vulnerable").and_then(Value::as_u64), Some(1)); + assert_eq!(summary.get("clean").and_then(Value::as_u64), Some(2)); + assert_eq!(summary.get("errors").and_then(Value::as_u64), Some(0)); + assert!(summary.get("checked").and_then(Value::as_u64).is_some()); + assert!( + summary.get("skipped").is_none(), + "skipped key removed from cve_summary" + ); + // Severity-floor schema lock (chunk 08): both keys always present + // when cve_summary is emitted; default floor is "any" and + // vulnerable_above_floor == vulnerable. + assert_eq!( + summary.get("severity_floor").and_then(Value::as_str), + Some("any") + ); + assert_eq!( + summary + .get("vulnerable_above_floor") + .and_then(Value::as_u64), + Some(1) + ); + + let results = body + .get("results") + .and_then(Value::as_array) + .expect("results array"); + let lodash = results + .iter() + .find(|r| r.get("name").and_then(Value::as_str) == Some("lodash")) + .expect("lodash result"); + assert_eq!( + lodash.get("cve_status").and_then(Value::as_str), + Some("vulnerable") + ); + let cves = lodash + .get("cves") + .and_then(Value::as_array) + .expect("cves array on lodash"); + assert_eq!(cves.len(), 1); + let entry = &cves[0]; + assert_eq!( + entry.get("advisory_id").and_then(Value::as_str), + Some("GHSA-integration-test") + ); + assert_eq!( + entry.get("severity_level").and_then(Value::as_str), + Some("high") + ); + assert_eq!(entry.get("tier").and_then(Value::as_u64), Some(2)); + assert!(entry.get("vulnerable_version_range").is_some()); + assert!(entry.get("fixed_version").is_some()); +} + +#[test] +fn json_clean_deps_have_empty_cves_array() { + let stub = spawn(HashMap::new()); + let fixture = npm_fixture_dir(); + + let body = run_deps_json( + &[ + "deps", + "--check-cve", + "--json", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &stub_env(&stub.base_url), + ); + + let results = body + .get("results") + .and_then(Value::as_array) + .expect("results array"); + let semver = results + .iter() + .find(|r| r.get("name").and_then(Value::as_str) == Some("semver")) + .expect("semver result"); + assert_eq!( + semver.get("cve_status").and_then(Value::as_str), + Some("clean") + ); + assert_eq!( + semver.get("cves").and_then(Value::as_array).map(Vec::len), + Some(0) + ); + assert!(semver.get("cve_error").is_none()); + + // Severity-floor schema lock (chunk 08): floor defaults to "any" and + // vulnerable_above_floor is 0 when there are no findings. + let summary = body + .get("cve_summary") + .expect("cve_summary should be present with --check-cve"); + assert_eq!( + summary.get("severity_floor").and_then(Value::as_str), + Some("any") + ); + assert_eq!( + summary + .get("vulnerable_above_floor") + .and_then(Value::as_u64), + Some(0) + ); +} + +#[test] +fn json_omits_cve_fields_without_check_cve() { + let fixture = npm_fixture_dir(); + + let body = run_deps_json( + &[ + "deps", + "--json", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &[("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1".to_string())], + ); + assert!(body.get("cve_summary").is_none()); + let results = body + .get("results") + .and_then(Value::as_array) + .expect("results array"); + assert!(!results.is_empty()); + for dep in results { + assert!(dep.get("cves").is_none()); + assert!(dep.get("cve_status").is_none()); + } +} + +#[test] +fn cve_check_total_failure_renders_explicit_message() { + let fixture = npm_fixture_dir(); + let env = [ + ("CORGEA_VULN_API_URL", "http://127.0.0.1:1".to_string()), + ("CORGEA_TOKEN", "test-token".to_string()), + ("CORGEA_NPM_REGISTRY", "http://127.0.0.1:1".to_string()), + ]; + + let output = run_deps( + &[ + "deps", + "--check-cve", + "-e", + "npm", + "-p", + fixture.to_str().unwrap(), + ], + &env, + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("✗ CVE check did not complete"), + "expected explicit failure message under 'Known vulnerabilities:'; stdout:\n{}", + stdout + ); +} + +#[test] +fn fail_cve_without_check_cve_errors() { + let output = corgea_cmd() + .args(["deps", "--fail-cve"]) + .output() + .expect("spawn corgea"); + + assert_ne!(output.status.code(), Some(0)); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("check-cve") || stderr.contains("check_cve"), + "expected requires --check-cve message, got: {stderr}" + ); +} diff --git a/tests/skill_doc_mentions_check_cve.rs b/tests/skill_doc_mentions_check_cve.rs new file mode 100644 index 0000000..f61dc12 --- /dev/null +++ b/tests/skill_doc_mentions_check_cve.rs @@ -0,0 +1,110 @@ +use std::path::PathBuf; +use std::process::Command; + +#[test] +fn deps_help_mentions_login_and_docs() { + let output = Command::new(env!("CARGO_BIN_EXE_corgea")) + .args(["deps", "--help"]) + .output() + .expect("spawn corgea deps --help"); + + assert!( + output.status.success(), + "deps --help failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("corgea login") || stdout.contains("CORGEA_TOKEN"), + "expected login precondition in deps --help, got: {stdout}" + ); + assert!( + stdout.contains("docs.corgea.app/cli/deps"), + "expected docs URL in deps --help, got: {stdout}" + ); + assert!( + stdout.contains("--check-cve"), + "expected --check-cve flag in deps --help, got: {stdout}" + ); + assert!( + stdout.contains("--severity"), + "expected --severity flag in deps --help, got: {stdout}" + ); + assert!( + stdout.contains("docs.corgea.app/cli/deps#severity"), + "expected severity docs URL in deps --help, got: {stdout}" + ); +} + +#[test] +fn top_level_help_mentions_cve_in_deps_summary() { + let output = Command::new(env!("CARGO_BIN_EXE_corgea")) + .arg("--help") + .output() + .expect("spawn corgea --help"); + + assert!( + output.status.success(), + "corgea --help failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("CVE") || stdout.contains("cve") || stdout.contains("vulnerabilit"), + "expected CVE mention in corgea --help deps summary, got: {stdout}" + ); +} + +#[test] +fn skill_md_mentions_check_cve() { + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("skills/corgea/SKILL.md"); + let content = + std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {}: {e}", path.display())); + + assert!( + content.contains("--check-cve"), + "SKILL.md missing --check-cve" + ); + assert!( + content.contains("corgea login") || content.contains("CORGEA_TOKEN"), + "SKILL.md missing auth precondition" + ); + assert!( + content.contains("--fail-cve"), + "SKILL.md missing --fail-cve" + ); + assert!( + content.contains("--severity"), + "SKILL.md missing --severity" + ); + assert!( + content.contains("docs.corgea.app/cli/deps") || content.contains("vuln-api.corgea.app"), + "SKILL.md missing docs or vuln-api reference" + ); +} + +#[test] +fn readme_mentions_deps_cve() { + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("README.md"); + let content = + std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {}: {e}", path.display())); + + assert!( + content.contains("corgea deps"), + "README.md missing corgea deps" + ); + assert!( + content.contains("--check-cve"), + "README.md missing --check-cve" + ); + assert!( + content.contains("--severity"), + "README.md missing --severity" + ); + assert!( + content.contains("docs.corgea.app/cli/deps"), + "README.md missing link to public docs" + ); +}