From 69172021c397ff18024365994e13743f15345c5b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 21 May 2026 10:05:17 +0000 Subject: [PATCH 1/4] Add verify-deps command for supply-chain freshness checks Introduces `corgea verify-deps`, a new top-level command that scans a project's locked dependencies, looks each one up against the public registry (npm or PyPI), and flags any whose installed version was published within a configurable recency window. This is a fast, hermetic supply-chain tripwire useful right before a build or in CI. Capabilities: * Ecosystems: npm and Python (selectable via --ecosystem). * npm sources: package-lock.json (v1, v2, v3), npm-shrinkwrap.json, yarn.lock (classic). Non-registry deps (git/file/link/workspace) are skipped because they can't be looked up by version. * Python sources: poetry.lock, Pipfile.lock, uv.lock, and requirements.txt (==-pinned lines only). * Threshold: human-friendly durations -- 2d (default), 48h, 30m, 1w, bare numbers as days. Rejects negative / unknown / non-finite values. * --fail flag for CI: exits 1 when something recent is found. * --json for machine-readable output (results, summary, sources, scanned_at, threshold_seconds). * --include-dev to opt into dev dependencies; production-only by default to keep the signal tight. * Honors CORGEA_NPM_REGISTRY / CORGEA_PYPI_REGISTRY env overrides (intended for tests / mirror users). Implementation notes: * PyPI lookup uses the per-version JSON endpoint (/pypi///json) and takes the earliest upload_time across the version's artifacts. Names are URL-encoded so PyPI's case- and separator-insensitive matching does the right thing. * npm lookup hits the package metadata endpoint and reads time[]; scoped names like @types/node are encoded as @types%2fnode in the URL. The abbreviated metadata format is intentionally avoided because it omits time. * Python distribution names are normalised per PEP 503 before output. * The registry HTTP client is separate from the rest of the CLI so the user's Corgea auth header is never sent to a third-party. * Dependencies are de-duplicated by (ecosystem, name, version) before registry lookups to avoid hammering the registry on transitive collisions. Tests: * 23 hermetic unit tests covering threshold parsing, duration formatting, ecosystem parsing, name normalization, and lockfile parsers (npm v1, npm v3, yarn classic, requirements.txt, poetry, Pipfile, uv). * 5 #[ignore]'d live integration tests against npmjs.org and pypi.org (left-pad, requests, Flask, plus error paths) for end-to-end verification. Skipped by default to keep CI offline. Docs: skills/corgea/SKILL.md updated with command reference and a CI workflow snippet. Co-authored-by: Ibrahim Rahhal --- skills/corgea/SKILL.md | 6 + src/main.rs | 93 ++++++++ src/verify_deps/mod.rs | 412 ++++++++++++++++++++++++++++++++ src/verify_deps/npm.rs | 439 ++++++++++++++++++++++++++++++++++ src/verify_deps/python.rs | 453 ++++++++++++++++++++++++++++++++++++ src/verify_deps/registry.rs | 273 ++++++++++++++++++++++ src/verify_deps/report.rs | 147 ++++++++++++ 7 files changed, 1823 insertions(+) create mode 100644 src/verify_deps/mod.rs create mode 100644 src/verify_deps/npm.rs create mode 100644 src/verify_deps/python.rs create mode 100644 src/verify_deps/registry.rs create mode 100644 src/verify_deps/report.rs diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 2429d9c..09470fc 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -148,6 +148,12 @@ corgea scan --fail-on CR --out-format sarif --out-file results.sarif corgea upload report.json --project-name my-app ``` +### Block builds that pull in a freshly-published dependency + +```bash +corgea verify-deps --threshold 2d --fail +``` + ### Export results ```bash diff --git a/src/main.rs b/src/main.rs index 5da00f9..dd44042 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ mod cicd; mod log; mod setup_hooks; mod authorize; +mod verify_deps; mod scanners { pub mod fortify; pub mod blast; @@ -156,6 +157,52 @@ enum Commands { #[arg(long, short, help = "Include default config (scan types are pii, secrets and fail on levels are CR, HI, ME, LO).")] default_config: bool, }, + /// Verify installed dependencies against the registry to flag recently published versions. + /// Useful as a supply-chain tripwire: any dep whose installed version was published within + /// the configured threshold will be reported. Currently supports npm and Python. + VerifyDeps { + #[arg( + long, + short = 'e', + default_value = "all", + help = "Which ecosystem(s) to verify. Valid options are 'npm', 'python', or 'all' (default)." + )] + ecosystem: String, + + #[arg( + long, + short = 't', + default_value = "2d", + help = "Recency threshold. Any dependency published within this window is flagged. Examples: '2d' (default), '48h', '30m', '1w'. Bare numbers are interpreted as days." + )] + threshold: String, + + #[arg( + long, + help = "Include development dependencies (default: production only)." + )] + include_dev: bool, + + #[arg( + long, + short = 'f', + help = "Exit with a non-zero status code if any recently published dependency is found." + )] + fail: bool, + + #[arg( + long, + help = "Output the result as JSON instead of human-readable text." + )] + json: bool, + + #[arg( + long, + short = 'p', + help = "Path to the project to verify. Defaults to the current directory." + )] + path: Option, + }, } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -368,6 +415,52 @@ fn main() { Some(Commands::SetupHooks { default_config }) => { setup_hooks::setup_pre_commit_hook(*default_config); } + Some(Commands::VerifyDeps { ecosystem, threshold, include_dev, fail, json, path }) => { + let parsed_ecosystem = match verify_deps::Ecosystem::parse(ecosystem) { + Ok(e) => e, + Err(e) => { + eprintln!("{}", e); + std::process::exit(2); + } + }; + let parsed_threshold = match verify_deps::parse_threshold(threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("Invalid --threshold: {}", e); + std::process::exit(2); + } + }; + let project_path = std::path::PathBuf::from(path.clone().unwrap_or_else(|| ".".to_string())); + let opts = verify_deps::VerifyOptions { + ecosystem: parsed_ecosystem, + threshold: parsed_threshold, + include_dev: *include_dev, + fail: *fail, + json: *json, + path: project_path, + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + }; + + match verify_deps::run(&opts) { + Ok(report) => { + if opts.json { + verify_deps::report::print_json(&report); + } else { + verify_deps::report::print_text(&report); + } + let recent = !report.recent().is_empty(); + let errors = !report.errors().is_empty(); + if (recent || errors) && opts.fail { + std::process::exit(1); + } + } + Err(e) => { + eprintln!("verify-deps failed: {}", e); + std::process::exit(2); + } + } + } None => { utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..241a4b2 --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,412 @@ +//! Dependency freshness verification. +//! +//! Discovers installed dependencies from a project (npm and/or Python), +//! looks up publish times from the public registries (npmjs.org / pypi.org), +//! and flags any package whose installed version was published within a +//! configurable recency threshold. This is intended to act as a fast +//! supply-chain tripwire against very recently published versions of +//! dependencies (a common malware-injection pattern). + +pub mod npm; +pub mod python; +pub mod registry; +pub mod report; + +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use chrono::{DateTime, Utc}; + +use crate::utils::terminal::{set_text_color, TerminalColor}; + +/// Which ecosystem(s) to scan. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Ecosystem { + Npm, + Python, + All, +} + +impl Ecosystem { + pub fn parse(s: &str) -> Result { + match s.to_lowercase().as_str() { + "npm" | "node" | "javascript" | "js" => Ok(Ecosystem::Npm), + "python" | "py" | "pypi" => Ok(Ecosystem::Python), + "all" | "auto" => Ok(Ecosystem::All), + other => Err(format!( + "Unknown ecosystem '{}'. Valid options are: npm, python, all.", + other + )), + } + } +} + +/// A single resolved dependency that we want to verify. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Dependency { + pub name: String, + pub version: String, + pub ecosystem: DependencyEcosystem, + /// Where in the project we discovered this dependency (e.g. lockfile path). + pub source: String, + /// Whether the dependency is a development-only dependency. + pub dev: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DependencyEcosystem { + Npm, + Python, +} + +impl DependencyEcosystem { + pub fn label(self) -> &'static str { + match self { + DependencyEcosystem::Npm => "npm", + DependencyEcosystem::Python => "python", + } + } +} + +/// One verification finding: the dep was published within the threshold. +#[derive(Debug, Clone)] +pub struct Finding { + pub dep: Dependency, + pub published_at: DateTime, + pub age: Duration, +} + +/// Outcome categories for individual dependency lookups. +#[derive(Debug, Clone)] +pub enum LookupOutcome { + /// The dep is older than the threshold — safe. + Ok { + dep: Dependency, + published_at: DateTime, + age: Duration, + }, + /// The dep was published within the threshold window. + Recent(Finding), + /// We could not retrieve metadata for this dep. + Error { dep: Dependency, error: String }, +} + +#[derive(Debug, Clone)] +pub struct VerifyOptions { + pub ecosystem: Ecosystem, + pub threshold: Duration, + pub include_dev: bool, + pub fail: bool, + pub json: bool, + pub path: PathBuf, + /// Optional registry overrides (used in tests). + pub npm_registry: Option, + pub pypi_registry: Option, +} + +impl Default for VerifyOptions { + fn default() -> Self { + Self { + ecosystem: Ecosystem::All, + threshold: Duration::from_secs(2 * 24 * 60 * 60), + include_dev: false, + fail: false, + json: false, + path: PathBuf::from("."), + npm_registry: None, + pypi_registry: None, + } + } +} + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()), + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!("threshold must be a non-negative finite number: '{}'", input)); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => return Err(format!("unknown threshold unit '{}'. Use s, m, h, d, or w.", other)), + }; + + Ok(Duration::from_secs_f64(secs)) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +/// Top-level entry: discover deps and verify them. +/// +/// Returns `Ok(true)` if any recently-published deps were detected, +/// `Ok(false)` otherwise. Fails (`Err`) only on hard discovery errors. +pub fn run(opts: &VerifyOptions) -> Result { + let path = opts.path.as_path(); + if !path.exists() { + return Err(format!("path does not exist: {}", path.display())); + } + + let mut deps: Vec = Vec::new(); + let mut sources: Vec = Vec::new(); + + if matches!(opts.ecosystem, Ecosystem::Npm | Ecosystem::All) { + match npm::discover(path, opts.include_dev) { + Ok(mut found) => { + if !found.deps.is_empty() { + sources.push(found.source.clone()); + deps.append(&mut found.deps); + } + } + Err(e) => { + if opts.ecosystem == Ecosystem::Npm { + return Err(format!("npm discovery failed: {}", e)); + } else { + eprintln!( + "{}", + set_text_color( + &format!("note: skipping npm — {}", e), + TerminalColor::Yellow + ) + ); + } + } + } + } + + if matches!(opts.ecosystem, Ecosystem::Python | Ecosystem::All) { + match python::discover(path, opts.include_dev) { + Ok(mut found) => { + if !found.deps.is_empty() { + sources.push(found.source.clone()); + deps.append(&mut found.deps); + } + } + Err(e) => { + if opts.ecosystem == Ecosystem::Python { + return Err(format!("python discovery failed: {}", e)); + } else { + eprintln!( + "{}", + set_text_color( + &format!("note: skipping python — {}", e), + TerminalColor::Yellow + ) + ); + } + } + } + } + + if deps.is_empty() { + return Err(format!( + "no supported dependency manifests found in {}. Expected one of: \ + package-lock.json, npm-shrinkwrap.json, yarn.lock, requirements.txt, \ + Pipfile.lock, poetry.lock, uv.lock.", + path.display() + )); + } + + deps.sort_by(|a, b| { + a.ecosystem + .label() + .cmp(b.ecosystem.label()) + .then_with(|| a.name.cmp(&b.name)) + .then_with(|| a.version.cmp(&b.version)) + }); + deps.dedup_by(|a, b| { + a.name == b.name && a.version == b.version && a.ecosystem == b.ecosystem + }); + + let now = Utc::now(); + let threshold = chrono::Duration::from_std(opts.threshold) + .map_err(|e| format!("invalid threshold: {}", e))?; + + let mut outcomes: Vec = Vec::with_capacity(deps.len()); + + for dep in deps { + let published = match dep.ecosystem { + DependencyEcosystem::Npm => registry::npm_publish_time( + &dep.name, + &dep.version, + opts.npm_registry.as_deref(), + ), + DependencyEcosystem::Python => registry::pypi_publish_time( + &dep.name, + &dep.version, + opts.pypi_registry.as_deref(), + ), + }; + + match published { + Ok(published_at) => { + let age_chrono = now.signed_duration_since(published_at); + let age = age_chrono + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + if age_chrono < threshold { + outcomes.push(LookupOutcome::Recent(Finding { + dep, + published_at, + age, + })); + } else { + outcomes.push(LookupOutcome::Ok { + dep, + published_at, + age, + }); + } + } + Err(e) => { + outcomes.push(LookupOutcome::Error { + dep, + error: e.to_string(), + }); + } + } + } + + Ok(VerifyReport { + sources, + outcomes, + threshold: opts.threshold, + scanned_at: now, + }) +} + +/// Aggregated result of a verification run. +#[derive(Debug, Clone)] +pub struct VerifyReport { + pub sources: Vec, + pub outcomes: Vec, + pub threshold: Duration, + pub scanned_at: DateTime, +} + +impl VerifyReport { + pub fn recent(&self) -> Vec<&Finding> { + self.outcomes + .iter() + .filter_map(|o| match o { + LookupOutcome::Recent(f) => Some(f), + _ => None, + }) + .collect() + } + + pub fn errors(&self) -> Vec<(&Dependency, &str)> { + self.outcomes + .iter() + .filter_map(|o| match o { + LookupOutcome::Error { dep, error } => Some((dep, error.as_str())), + _ => None, + }) + .collect() + } + + pub fn ok_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, LookupOutcome::Ok { .. })) + .count() + } +} + +/// Helper used by lockfile parsers to bundle their result. +#[derive(Debug, Clone)] +pub struct DiscoverResult { + pub deps: Vec, + pub source: String, +} + +/// Read the file at `path` into a String, returning an informative error. +pub(crate) fn read_to_string(path: &Path) -> Result { + std::fs::read_to_string(path) + .map_err(|e| format!("failed to read {}: {}", path.display(), e)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_threshold_units() { + assert_eq!(parse_threshold("2d").unwrap(), Duration::from_secs(2 * 86400)); + assert_eq!(parse_threshold("48h").unwrap(), Duration::from_secs(48 * 3600)); + assert_eq!(parse_threshold("30m").unwrap(), Duration::from_secs(30 * 60)); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!(parse_threshold("1w").unwrap(), Duration::from_secs(7 * 86400)); + assert_eq!(parse_threshold("3").unwrap(), Duration::from_secs(3 * 86400)); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } + + #[test] + fn ecosystem_parse_aliases() { + assert_eq!(Ecosystem::parse("npm").unwrap(), Ecosystem::Npm); + assert_eq!(Ecosystem::parse("Python").unwrap(), Ecosystem::Python); + assert_eq!(Ecosystem::parse("all").unwrap(), Ecosystem::All); + assert!(Ecosystem::parse("ruby").is_err()); + } +} diff --git a/src/verify_deps/npm.rs b/src/verify_deps/npm.rs new file mode 100644 index 0000000..dcc26d9 --- /dev/null +++ b/src/verify_deps/npm.rs @@ -0,0 +1,439 @@ +//! Discover installed npm dependencies from a project directory. +//! +//! Supported, in order of preference: +//! 1. `package-lock.json` / `npm-shrinkwrap.json` (lockfile v1, v2, v3) +//! 2. `yarn.lock` (Yarn classic, v1 syntax) +//! +//! These produce *resolved* (pinned) versions so the registry lookup is +//! exact. We deliberately do not parse `package.json` directly — its +//! version specifiers are ranges, which would require resolution we +//! don't want to redo. + +use std::path::Path; + +use serde::Deserialize; + +use super::{Dependency, DependencyEcosystem, DiscoverResult}; + +const SUPPORTED_FILES: &[&str] = &[ + "package-lock.json", + "npm-shrinkwrap.json", + "yarn.lock", +]; + +pub fn discover(project_dir: &Path, include_dev: bool) -> Result { + let candidates: Vec<_> = SUPPORTED_FILES + .iter() + .map(|f| project_dir.join(f)) + .filter(|p| p.exists()) + .collect(); + + if candidates.is_empty() { + return Err(format!( + "no npm lockfile found in {}. Looked for: {}", + project_dir.display(), + SUPPORTED_FILES.join(", ") + )); + } + + let chosen = &candidates[0]; + let file_name = chosen + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default(); + + let content = super::read_to_string(chosen)?; + + let deps = match file_name { + "package-lock.json" | "npm-shrinkwrap.json" => parse_npm_lock(&content, include_dev)?, + "yarn.lock" => parse_yarn_lock(&content)?, + _ => unreachable!(), + }; + + Ok(DiscoverResult { + deps, + source: chosen.display().to_string(), + }) +} + +#[derive(Debug, Deserialize)] +struct NpmLockRoot { + #[serde(rename = "lockfileVersion")] + lockfile_version: Option, + #[serde(default)] + dependencies: std::collections::BTreeMap, + #[serde(default)] + packages: std::collections::BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct NpmLockV1Entry { + version: Option, + #[serde(default)] + dev: bool, + #[serde(rename = "optional", default)] + _optional: bool, + #[serde(default)] + dependencies: std::collections::BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct NpmLockV2Entry { + version: Option, + name: Option, + #[serde(default)] + dev: bool, + #[serde(rename = "devOptional", default)] + dev_optional: bool, + #[serde(default)] + link: bool, +} + +pub(crate) fn parse_npm_lock( + content: &str, + include_dev: bool, +) -> Result, String> { + let root: NpmLockRoot = serde_json::from_str(content) + .map_err(|e| format!("failed to parse npm lockfile: {}", e))?; + + let mut deps: Vec = Vec::new(); + let version = root.lockfile_version.unwrap_or(1); + + if version >= 2 && !root.packages.is_empty() { + for (key, entry) in &root.packages { + if key.is_empty() { + continue; + } + if entry.link { + continue; + } + let dev = entry.dev || entry.dev_optional; + if !include_dev && dev { + continue; + } + let name = entry + .name + .clone() + .or_else(|| extract_name_from_packages_key(key)) + .unwrap_or_default(); + let ver = match &entry.version { + Some(v) if !v.is_empty() => v.clone(), + _ => continue, + }; + if name.is_empty() { + continue; + } + if !is_registry_version(&ver) { + continue; + } + deps.push(Dependency { + name, + version: ver, + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".to_string(), + dev, + }); + } + } else { + collect_v1(&root.dependencies, include_dev, &mut deps); + } + + Ok(deps) +} + +fn collect_v1( + map: &std::collections::BTreeMap, + include_dev: bool, + out: &mut Vec, +) { + for (name, entry) in map { + let dev = entry.dev; + if include_dev || !dev { + if let Some(version) = entry.version.as_ref() { + if !version.is_empty() && is_registry_version(version) { + out.push(Dependency { + name: name.clone(), + version: version.clone(), + ecosystem: DependencyEcosystem::Npm, + source: "package-lock.json".to_string(), + dev, + }); + } + } + } + if !entry.dependencies.is_empty() { + collect_v1(&entry.dependencies, include_dev, out); + } + } +} + +/// Extract a package name from a v2/v3 lockfile `packages` key like +/// `node_modules/foo` or `node_modules/@scope/bar/node_modules/baz`. +fn extract_name_from_packages_key(key: &str) -> Option { + let last_nm = key.rfind("node_modules/")?; + let rest = &key[last_nm + "node_modules/".len()..]; + if rest.is_empty() { + return None; + } + if rest.starts_with('@') { + let mut parts = rest.splitn(3, '/'); + let scope = parts.next()?; + let pkg = parts.next()?; + Some(format!("{}/{}", scope, pkg)) + } else { + let first = rest.split('/').next()?; + Some(first.to_string()) + } +} + +/// Filter out non-registry version specifiers (git URLs, file refs, links). +fn is_registry_version(version: &str) -> bool { + let v = version.trim(); + if v.is_empty() { + return false; + } + let lower = v.to_ascii_lowercase(); + let bad_prefixes = [ + "git+", "git:", "git://", "ssh://", "http://", "https://", "file:", "link:", "workspace:", "npm:", + ]; + if bad_prefixes.iter().any(|p| lower.starts_with(p)) { + return false; + } + let first = v.chars().next().unwrap_or(' '); + if !(first.is_ascii_digit() || first == 'v') { + return false; + } + true +} + +/// Parse a Yarn classic (v1) lockfile. +/// +/// Yarn classic format (simplified, the bits we need): +/// +/// ```text +/// "left-pad@^1.3.0": +/// version "1.3.0" +/// resolved "https://registry.yarnpkg.com/left-pad/-/left-pad-1.3.0.tgz" +/// +/// "@scope/pkg@^1.0.0", "@scope/pkg@^1.0.1": +/// version "1.0.5" +/// ``` +pub(crate) fn parse_yarn_lock(content: &str) -> Result, String> { + let mut deps: Vec = Vec::new(); + let mut current_keys: Vec = Vec::new(); + let mut current_version: Option = None; + + let flush = + |keys: &mut Vec, + version: &mut Option, + out: &mut Vec| { + if let (Some(name), Some(ver)) = ( + keys.first().and_then(|k| yarn_key_name(k)), + version.clone(), + ) { + if is_registry_version(&ver) { + out.push(Dependency { + name, + version: ver, + ecosystem: DependencyEcosystem::Npm, + source: "yarn.lock".to_string(), + dev: false, + }); + } + } + keys.clear(); + *version = None; + }; + + for raw_line in content.lines() { + let line = raw_line; + let trimmed = line.trim_end(); + if trimmed.is_empty() || trimmed.trim_start().starts_with('#') { + if !current_keys.is_empty() && current_version.is_some() { + flush(&mut current_keys, &mut current_version, &mut deps); + } + continue; + } + let leading_ws = line.len() - line.trim_start().len(); + if leading_ws == 0 { + if !current_keys.is_empty() && current_version.is_some() { + flush(&mut current_keys, &mut current_version, &mut deps); + } else { + current_keys.clear(); + current_version = None; + } + let header = trimmed.trim_end_matches(':').trim(); + current_keys = split_yarn_header(header); + } else if let Some(rest) = trimmed.trim_start().strip_prefix("version ") { + let v = rest.trim().trim_matches('"').to_string(); + current_version = Some(v); + } + } + if !current_keys.is_empty() && current_version.is_some() { + flush(&mut current_keys, &mut current_version, &mut deps); + } + Ok(deps) +} + +/// Split a yarn lock header line of comma-separated quoted specs into +/// the individual specs. Handles e.g. +/// `"@scope/pkg@^1.0.0", "@scope/pkg@^1.0.1"`. +fn split_yarn_header(header: &str) -> Vec { + let mut out = Vec::new(); + let mut buf = String::new(); + let mut in_quotes = false; + for c in header.chars() { + match c { + '"' => in_quotes = !in_quotes, + ',' if !in_quotes => { + let s = buf.trim().trim_matches('"').to_string(); + if !s.is_empty() { + out.push(s); + } + buf.clear(); + } + _ => buf.push(c), + } + } + let s = buf.trim().trim_matches('"').to_string(); + if !s.is_empty() { + out.push(s); + } + out +} + +/// Extract the package name from a yarn key like `left-pad@^1.3.0` or +/// `@scope/name@^1.0.0`. +fn yarn_key_name(key: &str) -> Option { + let key = key.trim().trim_matches('"'); + if key.is_empty() { + return None; + } + let (name_part, _) = if key.starts_with('@') { + let after_scope = key[1..].find('@')?; + let split_at = after_scope + 1; + (&key[..split_at], &key[split_at + 1..]) + } else { + let at = key.find('@')?; + (&key[..at], &key[at + 1..]) + }; + Some(name_part.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_npm_lock_v1() { + let lock = r#"{ + "name": "demo", + "version": "1.0.0", + "lockfileVersion": 1, + "dependencies": { + "left-pad": { "version": "1.3.0" }, + "is-odd": { "version": "3.0.1", "dev": true, + "dependencies": { + "is-number": { "version": "6.0.0", "dev": true } + } + } + } + }"#; + let prod = parse_npm_lock(lock, false).unwrap(); + let names: Vec<_> = prod.iter().map(|d| (d.name.as_str(), d.version.as_str())).collect(); + assert_eq!(names, vec![("left-pad", "1.3.0")]); + + let all = parse_npm_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"left-pad".to_string())); + assert!(names.contains(&"is-odd".to_string())); + assert!(names.contains(&"is-number".to_string())); + } + + #[test] + fn parses_npm_lock_v3() { + let lock = r#"{ + "name": "demo", + "version": "1.0.0", + "lockfileVersion": 3, + "packages": { + "": { + "name": "demo", + "version": "1.0.0" + }, + "node_modules/left-pad": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/left-pad/-/left-pad-1.3.0.tgz" + }, + "node_modules/@types/node": { + "version": "20.10.5", + "dev": true + }, + "node_modules/local-link": { + "link": true, + "resolved": "../local-link" + } + } + }"#; + + let prod = parse_npm_lock(lock, false).unwrap(); + let names: Vec<_> = prod.iter().map(|d| (d.name.as_str(), d.version.as_str())).collect(); + assert_eq!(names, vec![("left-pad", "1.3.0")]); + + let all = parse_npm_lock(lock, true).unwrap(); + let mut got: Vec<_> = all.iter().map(|d| (d.name.clone(), d.version.clone())).collect(); + got.sort(); + assert_eq!( + got, + vec![ + ("@types/node".to_string(), "20.10.5".to_string()), + ("left-pad".to_string(), "1.3.0".to_string()), + ] + ); + } + + #[test] + fn parses_yarn_lock() { + let lock = r#"# THIS IS AN AUTOGENERATED FILE. +# yarn lockfile v1 + +"left-pad@^1.3.0": + version "1.3.0" + resolved "https://registry.yarnpkg.com/left-pad/-/left-pad-1.3.0.tgz#5b8a3a7765dfe001261dde915589e782f8c94d1e" + +"@types/node@^20.10.0", "@types/node@^20.10.5": + version "20.10.5" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.10.5.tgz" +"#; + let deps = parse_yarn_lock(lock).unwrap(); + assert_eq!(deps.len(), 2); + let names: Vec<_> = deps.iter().map(|d| (d.name.clone(), d.version.clone())).collect(); + assert!(names.contains(&("left-pad".to_string(), "1.3.0".to_string()))); + assert!(names.contains(&("@types/node".to_string(), "20.10.5".to_string()))); + } + + #[test] + fn ignores_non_registry_versions() { + assert!(!is_registry_version("git+https://github.com/x/y.git#abc")); + assert!(!is_registry_version("file:../pkg")); + assert!(!is_registry_version("link:../pkg")); + assert!(!is_registry_version("workspace:*")); + assert!(!is_registry_version("npm:other@1.0.0")); + assert!(is_registry_version("1.2.3")); + assert!(is_registry_version("v1.2.3")); + } + + #[test] + fn extracts_packages_key_name() { + assert_eq!(extract_name_from_packages_key("node_modules/foo").as_deref(), Some("foo")); + assert_eq!( + extract_name_from_packages_key("node_modules/@scope/bar").as_deref(), + Some("@scope/bar") + ); + assert_eq!( + extract_name_from_packages_key("node_modules/a/node_modules/@s/b").as_deref(), + Some("@s/b") + ); + assert_eq!(extract_name_from_packages_key("").as_deref(), None); + } +} diff --git a/src/verify_deps/python.rs b/src/verify_deps/python.rs new file mode 100644 index 0000000..3bb899d --- /dev/null +++ b/src/verify_deps/python.rs @@ -0,0 +1,453 @@ +//! Discover installed Python dependencies from a project directory. +//! +//! Supported, in order of preference: +//! 1. `poetry.lock` (TOML) +//! 2. `Pipfile.lock` (JSON) +//! 3. `uv.lock` (TOML) +//! 4. `requirements.txt` — only `==`-pinned lines (we can't verify a +//! range against a registry without resolving, which is out of scope). +//! +//! All resolved dependencies are pinned to exact versions. + +use std::path::Path; + +use serde::Deserialize; + +use super::{Dependency, DependencyEcosystem, DiscoverResult}; + +const SUPPORTED_FILES: &[&str] = &[ + "poetry.lock", + "Pipfile.lock", + "uv.lock", + "requirements.txt", +]; + +pub fn discover(project_dir: &Path, include_dev: bool) -> Result { + let candidates: Vec<_> = SUPPORTED_FILES + .iter() + .map(|f| project_dir.join(f)) + .filter(|p| p.exists()) + .collect(); + + if candidates.is_empty() { + return Err(format!( + "no Python lockfile found in {}. Looked for: {}", + project_dir.display(), + SUPPORTED_FILES.join(", ") + )); + } + + let chosen = &candidates[0]; + let file_name = chosen + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default(); + + let content = super::read_to_string(chosen)?; + + let deps = match file_name { + "poetry.lock" => parse_poetry_lock(&content, include_dev)?, + "Pipfile.lock" => parse_pipfile_lock(&content, include_dev)?, + "uv.lock" => parse_uv_lock(&content)?, + "requirements.txt" => parse_requirements(&content), + _ => unreachable!(), + }; + + Ok(DiscoverResult { + deps, + source: chosen.display().to_string(), + }) +} + +#[derive(Debug, Deserialize)] +struct PoetryLockRoot { + #[serde(default)] + package: Vec, +} + +#[derive(Debug, Deserialize)] +struct PoetryPackage { + name: String, + version: String, + #[serde(default)] + category: Option, + #[serde(default)] + source: Option, + #[serde(default)] + groups: Option>, +} + +#[derive(Debug, Deserialize)] +struct PoetrySource { + #[serde(rename = "type")] + source_type: Option, +} + +pub(crate) fn parse_poetry_lock(content: &str, include_dev: bool) -> Result, String> { + let root: PoetryLockRoot = + toml::from_str(content).map_err(|e| format!("failed to parse poetry.lock: {}", e))?; + + let mut out = Vec::new(); + for pkg in root.package { + if let Some(src) = &pkg.source { + if let Some(t) = &src.source_type { + let t = t.to_ascii_lowercase(); + if t == "git" || t == "directory" || t == "file" || t == "url" { + continue; + } + } + } + + let is_dev = is_poetry_dev(&pkg); + if !include_dev && is_dev { + continue; + } + + out.push(Dependency { + name: normalize_python_name(&pkg.name), + version: pkg.version, + ecosystem: DependencyEcosystem::Python, + source: "poetry.lock".to_string(), + dev: is_dev, + }); + } + Ok(out) +} + +fn is_poetry_dev(pkg: &PoetryPackage) -> bool { + if let Some(cat) = &pkg.category { + if !cat.is_empty() && cat.to_ascii_lowercase() != "main" { + return true; + } + } + if let Some(groups) = &pkg.groups { + if !groups.is_empty() + && !groups.iter().any(|g| g.eq_ignore_ascii_case("main")) + { + return true; + } + } + false +} + +#[derive(Debug, Deserialize)] +struct PipfileLockRoot { + #[serde(default)] + default: std::collections::BTreeMap, + #[serde(default)] + develop: std::collections::BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct PipfileLockEntry { + version: Option, + #[serde(default)] + git: Option, + #[serde(default)] + path: Option, +} + +pub(crate) fn parse_pipfile_lock(content: &str, include_dev: bool) -> Result, String> { + let root: PipfileLockRoot = + serde_json::from_str(content).map_err(|e| format!("failed to parse Pipfile.lock: {}", e))?; + let mut out = Vec::new(); + extend_pipfile(&root.default, false, &mut out); + if include_dev { + extend_pipfile(&root.develop, true, &mut out); + } + Ok(out) +} + +fn extend_pipfile( + map: &std::collections::BTreeMap, + dev: bool, + out: &mut Vec, +) { + for (name, entry) in map { + if entry.git.is_some() || entry.path.is_some() { + continue; + } + let version = match entry.version.as_ref() { + Some(v) => v, + None => continue, + }; + // Pipfile pins look like "==1.2.3" — strip the leading "==". + let version = version.trim_start_matches("==").trim(); + if version.is_empty() { + continue; + } + out.push(Dependency { + name: normalize_python_name(name), + version: version.to_string(), + ecosystem: DependencyEcosystem::Python, + source: "Pipfile.lock".to_string(), + dev, + }); + } +} + +#[derive(Debug, Deserialize)] +struct UvLockRoot { + #[serde(default)] + package: Vec, +} + +#[derive(Debug, Deserialize)] +struct UvPackage { + name: String, + version: Option, + #[serde(default)] + source: Option, +} + +#[derive(Debug, Deserialize)] +struct UvSource { + #[serde(default)] + registry: Option, + #[serde(default)] + git: Option, + #[serde(default)] + url: Option, + #[serde(default)] + path: Option, + #[serde(default)] + editable: Option, + #[serde(default)] + virtual_: Option, +} + +pub(crate) fn parse_uv_lock(content: &str) -> Result, String> { + let root: UvLockRoot = + toml::from_str(content).map_err(|e| format!("failed to parse uv.lock: {}", e))?; + + let mut out = Vec::new(); + for pkg in root.package { + let version = match pkg.version { + Some(v) if !v.is_empty() => v, + _ => continue, + }; + if let Some(src) = pkg.source { + // Skip non-registry sources. + if src.git.is_some() + || src.url.is_some() + || src.path.is_some() + || src.editable.is_some() + || src.virtual_.is_some() + { + continue; + } + if src.registry.is_none() { + continue; + } + } else { + continue; + } + out.push(Dependency { + name: normalize_python_name(&pkg.name), + version, + ecosystem: DependencyEcosystem::Python, + source: "uv.lock".to_string(), + dev: false, + }); + } + Ok(out) +} + +/// Parse a `requirements.txt` file. We only emit deps that are +/// `==`-pinned. Everything else (ranges, git URLs, editables) is +/// skipped silently — those can't be checked against a registry +/// without resolution. +pub(crate) fn parse_requirements(content: &str) -> Vec { + let mut out = Vec::new(); + let mut continued = String::new(); + for raw_line in content.lines() { + let mut line = raw_line.to_string(); + if let Some(idx) = line.find('#') { + line.truncate(idx); + } + let line = line.trim(); + if line.is_empty() { + continue; + } + let line = if line.ends_with('\\') { + continued.push_str(line.trim_end_matches('\\').trim()); + continued.push(' '); + continue; + } else if !continued.is_empty() { + let mut full = std::mem::take(&mut continued); + full.push_str(line); + full + } else { + line.to_string() + }; + + if line.starts_with('-') { + continue; + } + + let no_extras = match line.find(';') { + Some(i) => line[..i].trim().to_string(), + None => line.clone(), + }; + + let no_extras = no_extras.split_whitespace().next().unwrap_or("").to_string(); + if no_extras.is_empty() { + continue; + } + + if let Some(idx) = no_extras.find("==") { + let name_part = &no_extras[..idx]; + let version_part = &no_extras[idx + 2..]; + let name = name_part.split('[').next().unwrap_or("").trim(); + let version = version_part.trim().trim_matches(|c: char| c == '\'' || c == '"'); + if name.is_empty() || version.is_empty() { + continue; + } + out.push(Dependency { + name: normalize_python_name(name), + version: version.to_string(), + ecosystem: DependencyEcosystem::Python, + source: "requirements.txt".to_string(), + dev: false, + }); + } + } + out +} + +/// Normalize a Python distribution name per PEP 503 (lowercase, +/// runs of `_-.` collapsed to single `-`). +pub(crate) fn normalize_python_name(name: &str) -> String { + let lower = name.to_ascii_lowercase(); + let mut out = String::with_capacity(lower.len()); + let mut prev_dash = false; + for c in lower.chars() { + if c == '_' || c == '.' || c == '-' { + if !prev_dash { + out.push('-'); + prev_dash = true; + } + } else { + out.push(c); + prev_dash = false; + } + } + out.trim_matches('-').to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalizes_names() { + assert_eq!(normalize_python_name("Flask"), "flask"); + assert_eq!(normalize_python_name("pytest_mock"), "pytest-mock"); + assert_eq!(normalize_python_name("ruamel.yaml"), "ruamel-yaml"); + assert_eq!(normalize_python_name("Some__Weird--Name.."), "some-weird-name"); + } + + #[test] + fn parses_requirements_txt() { + let req = r#" +# A comment +requests==2.31.0 +flask==2.3.2 ; python_version >= "3.7" +numpy>=1.20 # not pinned, ignored +-r other.txt +git+https://github.com/x/y.git +django[bcrypt]==4.2.0 + "#; + let deps = parse_requirements(req); + let pairs: Vec<_> = deps.iter().map(|d| (d.name.clone(), d.version.clone())).collect(); + assert!(pairs.contains(&("requests".to_string(), "2.31.0".to_string()))); + assert!(pairs.contains(&("flask".to_string(), "2.3.2".to_string()))); + assert!(pairs.contains(&("django".to_string(), "4.2.0".to_string()))); + assert_eq!(deps.len(), 3); + } + + #[test] + fn parses_poetry_lock() { + let lock = r#" +[[package]] +name = "Requests" +version = "2.31.0" +description = "x" +category = "main" + +[[package]] +name = "pytest" +version = "7.4.0" +description = "x" +category = "dev" + +[[package]] +name = "local-pkg" +version = "1.0.0" +description = "x" +category = "main" + +[package.source] +type = "directory" +url = "../local" +"#; + let prod = parse_poetry_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod.iter().map(|d| (d.name.clone(), d.version.clone())).collect(); + assert_eq!(pairs, vec![("requests".to_string(), "2.31.0".to_string())]); + + let all = parse_poetry_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"pytest".to_string())); + assert!(!names.contains(&"local-pkg".to_string())); + } + + #[test] + fn parses_pipfile_lock() { + let lock = r#"{ + "_meta": {}, + "default": { + "requests": { "version": "==2.31.0" }, + "private": { "git": "https://example.com/x.git" } + }, + "develop": { + "pytest": { "version": "==7.4.0" } + } + }"#; + let prod = parse_pipfile_lock(lock, false).unwrap(); + let names: Vec<_> = prod.iter().map(|d| d.name.clone()).collect(); + assert_eq!(names, vec!["requests".to_string()]); + + let all = parse_pipfile_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"pytest".to_string())); + } + + #[test] + fn parses_uv_lock() { + let lock = r#" +[[package]] +name = "requests" +version = "2.31.0" + +[package.source] +registry = "https://pypi.org/simple" + +[[package]] +name = "myproj" +version = "0.1.0" + +[package.source] +virtual = "." + +[[package]] +name = "gitdep" +version = "0.0.0" + +[package.source] +git = "https://example.com/x.git" +"#; + let deps = parse_uv_lock(lock).unwrap(); + let pairs: Vec<_> = deps.iter().map(|d| (d.name.clone(), d.version.clone())).collect(); + assert_eq!(pairs, vec![("requests".to_string(), "2.31.0".to_string())]); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..a73d2ac --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,273 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both functions return the publish time of an exact (name, version) +//! tuple as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(20); + +fn user_agent() -> String { + format!("corgea-cli/{} (verify-deps)", env!("CARGO_PKG_VERSION")) +} + +fn http_client() -> Result { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .build() + .map_err(|e| format!("failed to build http client: {}", e)) +} + +#[derive(Debug, Deserialize)] +struct NpmTimeResponse { + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Look up the publish time of an exact `name@version` from the npm registry. +/// +/// We hit the package metadata URL and pull the version's timestamp out +/// of the `time` map. We only need that map, so we set the +/// `application/vnd.npm.install-v1+json` *negotiation* via the regular +/// JSON accept (the abbreviated form omits `time`, so we use the full +/// form intentionally). +pub fn npm_publish_time( + name: &str, + version: &str, + registry: Option<&str>, +) -> Result, String> { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry.unwrap_or(DEFAULT_NPM_REGISTRY).trim_end_matches('/'); + let path = encode_npm_name(name); + let url = format!("{}/{}", base, path); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("npm registry request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on npm registry ({})", + name, base + )); + } + if !status.is_success() { + return Err(format!( + "npm registry returned status {} for '{}'", + status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read npm registry response: {}", e))?; + + let parsed: NpmTimeResponse = serde_json::from_str(&body) + .map_err(|e| format!("failed to parse npm registry response for '{}': {}", name, e))?; + + let raw = parsed.time.get(version).ok_or_else(|| { + format!( + "version '{}' for package '{}' not found in npm registry metadata", + version, name + ) + })?; + + parse_iso8601(raw).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw, name, version, e + ) + }) +} + +/// URL-encode an npm package name. Scoped names contain `@` and `/`, +/// the latter must be encoded as `%2f` for the package metadata URL. +fn encode_npm_name(name: &str) -> String { + if let Some(stripped) = name.strip_prefix('@') { + if let Some((scope, pkg)) = stripped.split_once('/') { + return format!("@{}%2f{}", scope, pkg); + } + } + name.to_string() +} + +#[derive(Debug, Deserialize)] +struct PypiVersionResponse { + urls: Vec, +} + +#[derive(Debug, Deserialize)] +struct PypiUrl { + upload_time_iso_8601: Option, + upload_time: Option, +} + +/// Look up the publish time of an exact (name, version) from PyPI. +/// +/// We hit the JSON API for that exact version (`/pypi///json`) +/// and use the earliest `upload_time_iso_8601` across the version's +/// uploaded files (sdist + wheels) as the publish time. The earliest +/// time is the right one — once the first artifact is up the version +/// is effectively published. +pub fn pypi_publish_time( + name: &str, + version: &str, + registry: Option<&str>, +) -> Result, String> { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry.unwrap_or(DEFAULT_PYPI_REGISTRY).trim_end_matches('/'); + let url = format!( + "{}/pypi/{}/{}/json", + base, + urlencoding::encode(name), + urlencoding::encode(version) + ); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("PyPI request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}=={}' not found on PyPI ({})", + name, version, base + )); + } + if !status.is_success() { + return Err(format!( + "PyPI returned status {} for '{}=={}'", + status, name, version + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read PyPI response: {}", e))?; + + let parsed: PypiVersionResponse = serde_json::from_str(&body).map_err(|e| { + format!( + "failed to parse PyPI response for '{}=={}': {}", + name, version, e + ) + })?; + + let mut earliest: Option> = None; + for u in parsed.urls { + let raw = u + .upload_time_iso_8601 + .or(u.upload_time); + if let Some(raw) = raw { + if let Ok(dt) = parse_iso8601(&raw) { + earliest = match earliest { + Some(prev) if prev <= dt => Some(prev), + _ => Some(dt), + }; + } + } + } + + earliest.ok_or_else(|| { + format!( + "no upload time information found on PyPI for '{}=={}' (yanked?)", + name, version + ) + }) +} + +/// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits +/// a naive timestamp like `2023-05-22T18:30:00` (no offset) which +/// chrono's RFC3339 parser rejects, so we accept both shapes. +fn parse_iso8601(raw: &str) -> Result, String> { + if let Ok(dt) = DateTime::parse_from_rfc3339(raw) { + return Ok(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + Err(format!("unrecognised timestamp format: {}", raw)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn npm_name_encoding() { + assert_eq!(encode_npm_name("left-pad"), "left-pad"); + assert_eq!(encode_npm_name("@scope/pkg"), "@scope%2fpkg"); + assert_eq!(encode_npm_name("@types/node"), "@types%2fnode"); + } + + #[test] + fn parses_iso8601_variants() { + assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05.123Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05+00:00").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05").is_ok()); + assert!(parse_iso8601("not a date").is_err()); + } + + /// Network-touching integration tests. Skipped by default (#[ignore]) + /// so unit-test runs stay hermetic. Run with: + /// cargo test -- --ignored verify_deps::registry::tests::live + #[test] + #[ignore] + fn live_npm_left_pad() { + let dt = npm_publish_time("left-pad", "1.3.0", None).expect("npm lookup"); + assert_eq!(dt.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_pypi_requests() { + let dt = pypi_publish_time("requests", "2.31.0", None).expect("pypi lookup"); + assert_eq!(dt.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_case_insensitive() { + let dt = pypi_publish_time("Flask", "2.3.2", None).expect("pypi case-insensitive"); + assert_eq!(dt.format("%Y-%m-%d").to_string(), "2023-05-01"); + } + + #[test] + #[ignore] + fn live_npm_unknown_version() { + let err = npm_publish_time("left-pad", "999.999.999", None).err().unwrap(); + assert!(err.contains("not found"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_unknown_version() { + let err = pypi_publish_time("requests", "999.999.999", None).err().unwrap(); + assert!(err.contains("not found"), "got: {}", err); + } +} diff --git a/src/verify_deps/report.rs b/src/verify_deps/report.rs new file mode 100644 index 0000000..6d927f7 --- /dev/null +++ b/src/verify_deps/report.rs @@ -0,0 +1,147 @@ +//! Render a verification report to the terminal or as JSON. + +use serde_json::json; + +use crate::utils::terminal::{set_text_color, TerminalColor}; + +use super::{format_duration, LookupOutcome, VerifyReport}; + +/// Render the report for human consumption. +pub fn print_text(report: &VerifyReport) { + println!( + "Verifying dependencies against publish-time threshold of {}", + format_duration(report.threshold) + ); + if !report.sources.is_empty() { + println!("Sources:"); + for s in &report.sources { + println!(" - {}", s); + } + } + + let recent = report.recent(); + let errors = report.errors(); + let ok_count = report.ok_count(); + + println!( + "Checked {} dependencies — {} ok, {} recent, {} errors", + report.outcomes.len(), + ok_count, + recent.len(), + errors.len(), + ); + + if !recent.is_empty() { + println!(); + println!( + "{}", + set_text_color( + "Recently published dependencies (within threshold):", + TerminalColor::Yellow, + ) + ); + for f in &recent { + println!( + " {} {}@{} ({}) published {} ago at {}", + set_text_color("⚠", TerminalColor::Yellow), + f.dep.ecosystem.label(), + f.dep.name, + f.dep.version, + set_text_color( + &format_duration(f.age), + TerminalColor::Yellow, + ), + f.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } + } + + if !errors.is_empty() { + println!(); + println!( + "{}", + set_text_color( + "Could not verify the following dependencies:", + TerminalColor::Red, + ) + ); + for (dep, err) in &errors { + println!( + " {} {}@{} ({}): {}", + set_text_color("✗", TerminalColor::Red), + dep.name, + dep.version, + dep.ecosystem.label(), + err, + ); + } + } + + if recent.is_empty() && errors.is_empty() { + println!( + "{}", + set_text_color( + "All dependencies are older than the threshold.", + TerminalColor::Green, + ) + ); + } +} + +/// Render the report as a single JSON object on stdout. +pub fn print_json(report: &VerifyReport) { + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + LookupOutcome::Ok { + dep, + published_at, + age, + } => json!({ + "status": "ok", + "ecosystem": dep.ecosystem.label(), + "name": dep.name, + "version": dep.version, + "dev": dep.dev, + "source": dep.source, + "published_at": published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + LookupOutcome::Recent(f) => json!({ + "status": "recent", + "ecosystem": f.dep.ecosystem.label(), + "name": f.dep.name, + "version": f.dep.version, + "dev": f.dep.dev, + "source": f.dep.source, + "published_at": f.published_at.to_rfc3339(), + "age_seconds": f.age.as_secs(), + }), + LookupOutcome::Error { dep, error } => json!({ + "status": "error", + "ecosystem": dep.ecosystem.label(), + "name": dep.name, + "version": dep.version, + "dev": dep.dev, + "source": dep.source, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "scanned_at": report.scanned_at.to_rfc3339(), + "threshold_seconds": report.threshold.as_secs(), + "sources": report.sources, + "summary": { + "checked": report.outcomes.len(), + "ok": report.ok_count(), + "recent": report.recent().len(), + "errors": report.errors().len(), + }, + "results": outcomes, + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} From d1e3b70351bc701ffd5ec5cd5614748c32999046 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 21 May 2026 12:19:03 +0000 Subject: [PATCH 2/4] verify-deps: support pnpm-lock.yaml (v5, v6, v9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds pnpm-lock.yaml as a third npm-ecosystem source, alongside the existing package-lock.json/npm-shrinkwrap.json and yarn.lock parsers. Discovery prefers package-lock first, then pnpm-lock.yaml, then yarn.lock. Lockfile shapes handled in a single line-based parser: * v5/v6 `packages:` keys with leading slash + slash separator: /lodash/4.17.21: /@types/node/20.10.5: * v6+ keys with at-sign separator: /lodash@4.17.21: /@types/node@20.10.5: * v9 keys with no leading slash and quoted scoped names: lodash@4.17.21: '@types/node@20.10.5': * Peer-dep suffixes are stripped from the version before lookup — both v6 underscore form (`1.0.0_react@18.0.0`) and v9 paren form (`1.0.0(react@18.0.0)`). The bare semver is what the registry knows. Dev/prod classification: * v6 lockfiles carry a per-package `dev:` field — used directly. * v9 lockfiles don't. We parse `importers:` (and the v5 flat layout) to get top-level dependencies vs devDependencies, and treat a (name, version) appearing only in devDependencies of all importers as dev. Unclassified transitive packages stay treated as prod, which is the safer default for a supply-chain tripwire. Tests: * 7 new unit tests covering all three key conventions, peer suffix stripping in both forms, garbage rejection, v9/v6/v5 lockfile parsing, and dev/prod classification. * Verified end-to-end against a real pnpm-lock.yaml generated by `pnpm install --lockfile-only` for express@4.18.2 + @types/node@20.10.5 + typescript@5.4.5(dev): 70 transitive deps correctly resolved, typescript correctly excluded from prod scans, and live registry lookups flagged 2 actually-recent transitive deps (hasown, side-channel-list) within a 60d window. Docs: `skills/corgea/SKILL.md` updated to advertise pnpm-lock.yaml (v5/v6/v9) in the supported lockfile list, and the verify-deps section that was lost during the previous commit's edits is restored. Co-authored-by: Ibrahim Rahhal --- skills/corgea/SKILL.md | 25 ++ src/verify_deps/mod.rs | 4 +- src/verify_deps/npm.rs | 591 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 617 insertions(+), 3 deletions(-) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 09470fc..eb7fb95 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -109,6 +109,31 @@ corgea setup-hooks --default-config # Default: secrets + PII, fail on Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypass with `git commit --no-verify`. +### Verify Deps — `corgea verify-deps` + +Supply-chain tripwire: looks up every pinned dependency in the project against the public registry (npm or PyPI) and flags anything whose installed version was published within a configurable recency window. Useful for catching very-recent malicious version pushes before they get baked into a build. + +```bash +corgea verify-deps # 2-day window, prod deps, both ecosystems +corgea verify-deps --threshold 7d # widen the window to 7 days +corgea verify-deps --threshold 48h --fail # exit 1 if any recent dep is found (CI gate) +corgea verify-deps --ecosystem npm # only check npm deps +corgea verify-deps --ecosystem python --include-dev # python only, include dev deps +corgea verify-deps --path ./services/api # check a different project +corgea verify-deps --json # machine-readable output +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--ecosystem` | `-e` | `npm`, `python`, or `all` (default) | +| `--threshold` | `-t` | Recency window: `2d`, `48h`, `30m`, `1w`, etc. (default `2d`) | +| `--include-dev` | | Include development dependencies | +| `--fail` | `-f` | Exit non-zero if any recent dep is detected | +| `--json` | | JSON output instead of human text | +| `--path` | `-p` | Project directory (default: `.`) | + +Supported lockfiles (preferred → fallback): npm: `package-lock.json`, `npm-shrinkwrap.json`, `pnpm-lock.yaml` (v5/v6/v9), `yarn.lock`. Python: `poetry.lock`, `Pipfile.lock`, `uv.lock`, `requirements.txt` (only `==`-pinned lines). + ## Common Workflows ### Scan full project diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs index 241a4b2..c2a8da3 100644 --- a/src/verify_deps/mod.rs +++ b/src/verify_deps/mod.rs @@ -244,8 +244,8 @@ pub fn run(opts: &VerifyOptions) -> Result { if deps.is_empty() { return Err(format!( "no supported dependency manifests found in {}. Expected one of: \ - package-lock.json, npm-shrinkwrap.json, yarn.lock, requirements.txt, \ - Pipfile.lock, poetry.lock, uv.lock.", + package-lock.json, npm-shrinkwrap.json, pnpm-lock.yaml, yarn.lock, \ + requirements.txt, Pipfile.lock, poetry.lock, uv.lock.", path.display() )); } diff --git a/src/verify_deps/npm.rs b/src/verify_deps/npm.rs index dcc26d9..5d12240 100644 --- a/src/verify_deps/npm.rs +++ b/src/verify_deps/npm.rs @@ -2,7 +2,8 @@ //! //! Supported, in order of preference: //! 1. `package-lock.json` / `npm-shrinkwrap.json` (lockfile v1, v2, v3) -//! 2. `yarn.lock` (Yarn classic, v1 syntax) +//! 2. `pnpm-lock.yaml` (pnpm v5, v6, v7, v9) +//! 3. `yarn.lock` (Yarn classic, v1 syntax) //! //! These produce *resolved* (pinned) versions so the registry lookup is //! exact. We deliberately do not parse `package.json` directly — its @@ -18,6 +19,7 @@ use super::{Dependency, DependencyEcosystem, DiscoverResult}; const SUPPORTED_FILES: &[&str] = &[ "package-lock.json", "npm-shrinkwrap.json", + "pnpm-lock.yaml", "yarn.lock", ]; @@ -46,6 +48,7 @@ pub fn discover(project_dir: &Path, include_dev: bool) -> Result parse_npm_lock(&content, include_dev)?, + "pnpm-lock.yaml" => parse_pnpm_lock(&content, include_dev)?, "yarn.lock" => parse_yarn_lock(&content)?, _ => unreachable!(), }; @@ -320,6 +323,361 @@ fn yarn_key_name(key: &str) -> Option { Some(name_part.to_string()) } +/// Parse a pnpm-lock.yaml file. Supports lockfile versions 5.x, 6.x, +/// 7.x and 9.x — the format and key conventions vary across versions: +/// +/// * v5/v6 keys in `packages:` use `/` separators: +/// `/lodash/4.17.21:` or `/@types/node/20.10.5:` +/// * v6+ keys may use `@` for the version separator: +/// `/lodash@4.17.21:` or `/@types/node@20.10.5:` +/// * v9 keys drop the leading `/` entirely: +/// `lodash@4.17.21:` or `'@types/node@20.10.5':` +/// +/// Versions can carry a peer-deps suffix that is *not* part of the +/// resolved version — `(react@18.0.0)` in v9, `_react@18.0.0` in v6. +/// Both must be stripped before lookup, since the registry only knows +/// the bare semver version. +/// +/// Dev/prod classification: +/// * v6 packages have a `dev: true|false` field per entry — we use it. +/// * v9 packages don't carry `dev:`. We instead consult the +/// `importers:` section: a (name, version) that appears *only* in +/// `devDependencies` of all importers (and never in `dependencies`) +/// is treated as dev. This is best-effort: transitive deps that are +/// only reached through a dev top-level package are still treated as +/// non-dev, because resolving the full graph from a lockfile is out +/// of scope here. Including those in production scans is the safer +/// default for a supply-chain tripwire. +pub(crate) fn parse_pnpm_lock( + content: &str, + include_dev: bool, +) -> Result, String> { + let importers = parse_pnpm_importers(content); + let entries = parse_pnpm_packages(content)?; + + let mut deps = Vec::new(); + for entry in entries { + let key = (entry.name.clone(), entry.version.clone()); + let dev = match entry.dev_field { + Some(d) => d, + None => { + let in_prod = importers.prod.contains(&key); + let in_dev = importers.dev.contains(&key); + in_dev && !in_prod + } + }; + if !include_dev && dev { + continue; + } + if !is_registry_version(&entry.version) { + continue; + } + deps.push(Dependency { + name: entry.name, + version: entry.version, + ecosystem: DependencyEcosystem::Npm, + source: "pnpm-lock.yaml".to_string(), + dev, + }); + } + Ok(deps) +} + +#[derive(Debug, Default)] +struct PnpmImporters { + prod: std::collections::BTreeSet<(String, String)>, + dev: std::collections::BTreeSet<(String, String)>, +} + +#[derive(Debug)] +struct PnpmPackageEntry { + name: String, + version: String, + dev_field: Option, +} + +fn parse_pnpm_packages(content: &str) -> Result, String> { + let mut out = Vec::new(); + let mut state = PackagesState::Outside; + + let mut current_name: Option = None; + let mut current_version: Option = None; + let mut current_dev: Option = None; + let mut entry_indent: usize = 0; + + for raw_line in content.lines() { + if raw_line.trim().is_empty() || raw_line.trim_start().starts_with('#') { + continue; + } + let indent = leading_spaces(raw_line); + let body = &raw_line[indent..]; + + if indent == 0 { + commit_pnpm_entry(&mut out, &mut current_name, &mut current_version, &mut current_dev); + state = if body.trim_end_matches(' ') == "packages:" { + PackagesState::Inside + } else { + PackagesState::Outside + }; + continue; + } + + if !matches!(state, PackagesState::Inside) { + continue; + } + + if current_name.is_none() { + entry_indent = indent; + } + + if indent == entry_indent && body.ends_with(':') { + commit_pnpm_entry(&mut out, &mut current_name, &mut current_version, &mut current_dev); + + let key = body.trim_end_matches(':').trim(); + if let Some((name, version)) = extract_pnpm_pkg_key(key) { + current_name = Some(name); + current_version = Some(version); + current_dev = None; + } else { + current_name = None; + current_version = None; + current_dev = None; + } + } else if indent > entry_indent { + if let Some(rest) = body.strip_prefix("dev:") { + let v = rest.trim(); + if v == "true" { + current_dev = Some(true); + } else if v == "false" { + current_dev = Some(false); + } + } + } + } + commit_pnpm_entry(&mut out, &mut current_name, &mut current_version, &mut current_dev); + Ok(out) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PackagesState { + Outside, + Inside, +} + +fn commit_pnpm_entry( + out: &mut Vec, + name: &mut Option, + version: &mut Option, + dev: &mut Option, +) { + if let (Some(n), Some(v)) = (name.take(), version.take()) { + out.push(PnpmPackageEntry { + name: n, + version: v, + dev_field: dev.take(), + }); + } else { + *name = None; + *version = None; + *dev = None; + } +} + +fn parse_pnpm_importers(content: &str) -> PnpmImporters { + let mut importers = PnpmImporters::default(); + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + enum Bucket { + Prod, + Dev, + None, + } + + let mut active_bucket = Bucket::None; + let mut bucket_indent: usize = usize::MAX; + let mut in_importers_section = false; + let mut pending_name: Option<(String, usize)> = None; + + for raw_line in content.lines() { + if raw_line.trim().is_empty() || raw_line.trim_start().starts_with('#') { + continue; + } + let indent = leading_spaces(raw_line); + let body = &raw_line[indent..]; + + if indent == 0 { + in_importers_section = body.trim_end_matches(' ') == "importers:"; + if !in_importers_section { + if body.trim_end_matches(' ') == "dependencies:" { + active_bucket = Bucket::Prod; + bucket_indent = 0; + pending_name = None; + continue; + } + if body.trim_end_matches(' ') == "devDependencies:" { + active_bucket = Bucket::Dev; + bucket_indent = 0; + pending_name = None; + continue; + } + active_bucket = Bucket::None; + bucket_indent = usize::MAX; + pending_name = None; + } else { + active_bucket = Bucket::None; + bucket_indent = usize::MAX; + pending_name = None; + } + continue; + } + + if in_importers_section { + let trimmed = body.trim_end(); + if trimmed == "dependencies:" { + active_bucket = Bucket::Prod; + bucket_indent = indent; + pending_name = None; + continue; + } + if trimmed == "devDependencies:" { + active_bucket = Bucket::Dev; + bucket_indent = indent; + pending_name = None; + continue; + } + } + + if active_bucket == Bucket::None || indent <= bucket_indent { + if indent <= bucket_indent { + active_bucket = Bucket::None; + bucket_indent = usize::MAX; + pending_name = None; + } + continue; + } + + let (key_part, value_part) = match body.split_once(':') { + Some(x) => x, + None => continue, + }; + let key = key_part.trim().trim_matches('\'').trim_matches('"'); + let value = value_part.trim(); + + let expected_entry_indent = bucket_indent + 2; + if indent != expected_entry_indent { + if let Some((ref pkg, _)) = pending_name { + if key == "version" && !value.is_empty() { + let version = strip_pnpm_peer_suffix(value.trim_matches('\'').trim_matches('"')); + let pair = (pkg.clone(), version); + match active_bucket { + Bucket::Prod => { + importers.prod.insert(pair); + } + Bucket::Dev => { + importers.dev.insert(pair); + } + Bucket::None => {} + } + pending_name = None; + } + } + continue; + } + + if value.is_empty() { + pending_name = Some((key.to_string(), indent)); + } else { + let version = strip_pnpm_peer_suffix(value.trim_matches('\'').trim_matches('"')); + let pair = (key.to_string(), version); + match active_bucket { + Bucket::Prod => { + importers.prod.insert(pair); + } + Bucket::Dev => { + importers.dev.insert(pair); + } + Bucket::None => {} + } + pending_name = None; + } + } + + importers +} + +fn leading_spaces(line: &str) -> usize { + line.bytes().take_while(|b| *b == b' ').count() +} + +fn extract_pnpm_pkg_key(raw_key: &str) -> Option<(String, String)> { + // Order of trims matters: pnpm v9 quotes the *whole* scoped key + // including the version (`'@types/node@20.10.5'`), and v5/v6 wrap + // the same shape with a leading `/`. Strip both, in either order, + // until the key stabilises. + let mut key = raw_key.trim().to_string(); + for _ in 0..3 { + let trimmed = key + .trim_matches('\'') + .trim_matches('"') + .trim_start_matches('/') + .to_string(); + if trimmed == key { + break; + } + key = trimmed; + } + let key_owned = strip_pnpm_peer_suffix(&key); + let key: &str = &key_owned; + + if let Some(rest) = key.strip_prefix('@') { + let after_scope_idx = rest.find('/')?; + let post = &rest[after_scope_idx + 1..]; + let sep_offset_at = post.find('@'); + let sep_offset_slash = post.find('/'); + let sep_offset = match (sep_offset_at, sep_offset_slash) { + (Some(a), Some(b)) => Some(a.min(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + }?; + let name_end = 1 + after_scope_idx + 1 + sep_offset; + let name = &key[..name_end]; + let version = &key[name_end + 1..]; + if name.is_empty() || version.is_empty() { + return None; + } + Some((name.to_string(), version.to_string())) + } else { + let sep_at = key.find('@'); + let sep_slash = key.find('/'); + let sep = match (sep_at, sep_slash) { + (Some(a), Some(b)) => Some(a.min(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + }?; + let name = &key[..sep]; + let version = &key[sep + 1..]; + if name.is_empty() || version.is_empty() { + return None; + } + Some((name.to_string(), version.to_string())) + } +} + +fn strip_pnpm_peer_suffix(version: &str) -> String { + let v = version.trim(); + let v = match v.find('(') { + Some(idx) => &v[..idx], + None => v, + }; + let v = match v.find('_') { + Some(idx) => &v[..idx], + None => v, + }; + v.trim().to_string() +} + #[cfg(test)] mod tests { use super::*; @@ -436,4 +794,235 @@ mod tests { ); assert_eq!(extract_name_from_packages_key("").as_deref(), None); } + + #[test] + fn pnpm_pkg_key_v5() { + // v5: leading slash + slash version separator + assert_eq!( + extract_pnpm_pkg_key("/lodash/4.17.21"), + Some(("lodash".to_string(), "4.17.21".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("/@types/node/20.10.5"), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_v6() { + // v6: leading slash + at-sign version separator + assert_eq!( + extract_pnpm_pkg_key("/lodash@4.17.21"), + Some(("lodash".to_string(), "4.17.21".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("/@types/node@20.10.5"), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_v9() { + // v9: no leading slash; quoted scoped names + assert_eq!( + extract_pnpm_pkg_key("lodash@4.17.21"), + Some(("lodash".to_string(), "4.17.21".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("'@types/node@20.10.5'"), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("\"@types/node@20.10.5\""), + Some(("@types/node".to_string(), "20.10.5".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_strips_peer_suffix() { + // v9 paren style: + assert_eq!( + extract_pnpm_pkg_key("/foo@1.0.0(react@18.0.0)"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("foo@1.0.0(react@18.0.0)(typescript@5.0.0)"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + // v6 underscore style: + assert_eq!( + extract_pnpm_pkg_key("/foo/1.0.0_react@18.0.0"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + assert_eq!( + extract_pnpm_pkg_key("/foo@1.0.0_react@18.0.0"), + Some(("foo".to_string(), "1.0.0".to_string())) + ); + } + + #[test] + fn pnpm_pkg_key_rejects_garbage() { + assert_eq!(extract_pnpm_pkg_key(""), None); + assert_eq!(extract_pnpm_pkg_key("/"), None); + assert_eq!(extract_pnpm_pkg_key("/lodash"), None); + assert_eq!(extract_pnpm_pkg_key("/@scope/no-version"), None); + } + + #[test] + fn parses_pnpm_lock_v9() { + // Realistic pnpm v9 lockfile. + let lock = r#"lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + .: + dependencies: + lodash: + specifier: ^4.17.21 + version: 4.17.21 + '@scope/lib': + specifier: ^1.0.0 + version: 1.0.0 + devDependencies: + typescript: + specifier: ^5.0.0 + version: 5.4.5 + +packages: + lodash@4.17.21: + resolution: {integrity: sha512-x} + engines: {node: '>=12'} + + '@scope/lib@1.0.0': + resolution: {integrity: sha512-y} + + typescript@5.4.5: + resolution: {integrity: sha512-z} + engines: {node: '>=14.17'} + + some-transitive@2.0.0: + resolution: {integrity: sha512-w} +"#; + + let prod = parse_pnpm_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + // typescript is dev-only top-level, should be excluded. + // some-transitive is unclassified — kept as prod (best-effort). + assert!(pairs.contains(&("lodash".to_string(), "4.17.21".to_string()))); + assert!(pairs.contains(&("@scope/lib".to_string(), "1.0.0".to_string()))); + assert!(pairs.contains(&("some-transitive".to_string(), "2.0.0".to_string()))); + assert!(!pairs.contains(&("typescript".to_string(), "5.4.5".to_string()))); + + let all = parse_pnpm_lock(lock, true).unwrap(); + let names: Vec<_> = all.iter().map(|d| d.name.clone()).collect(); + assert!(names.contains(&"typescript".to_string())); + assert_eq!(all.len(), 4); + } + + #[test] + fn parses_pnpm_lock_v6() { + // v6 layout: per-package `dev:` flag drives classification. + let lock = r#"lockfileVersion: '6.0' + +dependencies: + lodash: + specifier: ^4.17.21 + version: 4.17.21 + +devDependencies: + typescript: + specifier: ^5.0.0 + version: 5.4.5 + +packages: + + /lodash@4.17.21: + resolution: {integrity: sha512-x} + dev: false + + /typescript@5.4.5: + resolution: {integrity: sha512-z} + dev: true + + /'@types/node@20.10.5': + resolution: {integrity: sha512-y} + dev: true +"#; + + let prod = parse_pnpm_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert_eq!( + pairs, + vec![("lodash".to_string(), "4.17.21".to_string())] + ); + + let all = parse_pnpm_lock(lock, true).unwrap(); + assert_eq!(all.len(), 3); + } + + #[test] + fn parses_pnpm_lock_v5_flat() { + let lock = r#"lockfileVersion: 5.4 + +dependencies: + lodash: 4.17.21 + +devDependencies: + typescript: 5.4.5 + +packages: + + /lodash/4.17.21: + resolution: {integrity: sha512-x} + dev: false + + /typescript/5.4.5: + resolution: {integrity: sha512-z} + dev: true +"#; + let prod = parse_pnpm_lock(lock, false).unwrap(); + let pairs: Vec<_> = prod + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert_eq!( + pairs, + vec![("lodash".to_string(), "4.17.21".to_string())] + ); + } + + #[test] + fn pnpm_lock_strips_peer_suffix_in_packages_section() { + let lock = r#"lockfileVersion: '9.0' + +importers: + .: + dependencies: + consumer: + specifier: ^1.0.0 + version: 1.0.0(react@18.2.0) + +packages: + consumer@1.0.0(react@18.2.0): + resolution: {integrity: sha512-x} + react@18.2.0: + resolution: {integrity: sha512-y} +"#; + let deps = parse_pnpm_lock(lock, true).unwrap(); + let pairs: Vec<_> = deps + .iter() + .map(|d| (d.name.clone(), d.version.clone())) + .collect(); + assert!(pairs.contains(&("consumer".to_string(), "1.0.0".to_string()))); + assert!(pairs.contains(&("react".to_string(), "18.2.0".to_string()))); + } } From 87b754bc6947aaf90b3de4d42fbfbc03351db753 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 21 May 2026 12:37:31 +0000 Subject: [PATCH 3/4] verify-deps: add --fail-unpinned for unfrozen-dep CI gating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new `--fail-unpinned` flag to `corgea verify-deps` so users can fail the build when any declared dependency can't be verified against a registry because it isn't pinned to an exact version. Independent of the existing `--fail` (which gates on registry freshness): the two flags compose, so a CI step like corgea verify-deps --threshold 2d --fail --fail-unpinned now enforces both 'no recently published deps' AND 'no unfrozen deps' in one shot. What counts as 'unpinned': * `package.json` declares dependencies but no `package-lock.json` / `pnpm-lock.yaml` / `yarn.lock` / `npm-shrinkwrap.json` is present. * `pyproject.toml` declares dependencies (PEP 621 `[project].dependencies` / `optional-dependencies`, `[tool.poetry.dependencies]`, or `[tool.poetry.group.*.dependencies]`) but no `poetry.lock` / `uv.lock` / `Pipfile.lock` is present. * `Pipfile` is present without a sibling `Pipfile.lock`. * `requirements.in` is present without a compiled `requirements.txt`. * Any `requirements.txt` line that isn't `==`-pinned (range specifiers, bare names, etc.). VCS / URL specifiers are explicit escape hatches and are not flagged. Behaviour: * Warnings are surfaced in the report by default — no exit-code change unless the user opts in. This keeps the existing contract for callers that just want freshness gating. * `--fail-unpinned` upgrades them to a non-zero exit. Existing `--fail` still controls only freshness, so the two are composable. * JSON output now includes a top-level `unpinned` array and an `unpinned` count in `summary`, mirroring the shape of the `recent` and `errors` fields. Implementation: * `DiscoverResult` now carries a `warnings: Vec` alongside its `deps`. Both `npm::discover` and `python::discover` populate it. When discovery would have returned the old 'no lockfile found' error AND a manifest explains why, the discovery now returns successfully with an empty deps list and a warning instead — the caller's ecosystem-skip path stays compatible because we keep the error when there's *nothing* to report. * `parse_requirements` was refactored into `parse_requirements_with_warnings` which returns `(pinned, unpinned_lines)`; the old function is retained as a thin wrapper for tests. * Added `pyproject_has_deps` (TOML parsing of PEP 621 + Poetry tables) and `package_json_has_deps` to avoid false positives on placeholder manifests with no declared deps. * `VerifyOptions` gains `fail_unpinned: bool`; `VerifyReport` gains `unpinned_warnings` plus a `has_unpinned()` helper. `main.rs` exits with status 1 when `fail_unpinned` is set and any warning was emitted. Tests: * 9 new unit tests covering: `requirements.txt` line classification with the new VCS / URL escape-hatch handling; discover-level warnings for `package.json` without a lockfile, `package.json` with a lockfile (no warning), `pyproject.toml` declaring deps without a lockfile, `pyproject.toml` with no declared deps (still bubbles the 'no lockfile' error), `Pipfile` without `Pipfile.lock`, `requirements.in` paired with `pyproject.toml`, and `requirements.txt` line-level unpinned warnings emitted through the public `discover` API. (`tempfile` is already a workspace dep so no new crates are needed.) * Verified end-to-end against a fixture project with all four failure modes (package.json, pyproject.toml, Pipfile, and unpinned requirements.txt lines): default run prints warnings with exit 0; `--fail-unpinned` exits 1; adding a real `pnpm-lock.yaml` removes the npm warning correctly. Docs: `skills/corgea/SKILL.md` updated with the flag, a CI combination example, and the `--fail-unpinned` row in the flag table. Co-authored-by: Ibrahim Rahhal --- skills/corgea/SKILL.md | 14 ++ src/main.rs | 13 +- src/verify_deps/mod.rs | 44 ++++- src/verify_deps/npm.rs | 102 ++++++++++++ src/verify_deps/python.rs | 334 ++++++++++++++++++++++++++++++++++++-- src/verify_deps/report.rs | 39 ++++- 6 files changed, 526 insertions(+), 20 deletions(-) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index eb7fb95..887d02c 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -117,6 +117,7 @@ Supply-chain tripwire: looks up every pinned dependency in the project against t corgea verify-deps # 2-day window, prod deps, both ecosystems corgea verify-deps --threshold 7d # widen the window to 7 days corgea verify-deps --threshold 48h --fail # exit 1 if any recent dep is found (CI gate) +corgea verify-deps --fail-unpinned # exit 1 if any dep can't be verified because it isn't pinned corgea verify-deps --ecosystem npm # only check npm deps corgea verify-deps --ecosystem python --include-dev # python only, include dev deps corgea verify-deps --path ./services/api # check a different project @@ -129,6 +130,7 @@ corgea verify-deps --json # machine-readable output | `--threshold` | `-t` | Recency window: `2d`, `48h`, `30m`, `1w`, etc. (default `2d`) | | `--include-dev` | | Include development dependencies | | `--fail` | `-f` | Exit non-zero if any recent dep is detected | +| `--fail-unpinned` | | Exit non-zero if any dep is unpinned (manifest with no lockfile, or unpinned `requirements.txt` line) | | `--json` | | JSON output instead of human text | | `--path` | `-p` | Project directory (default: `.`) | @@ -179,6 +181,18 @@ corgea upload report.json --project-name my-app corgea verify-deps --threshold 2d --fail ``` +### Require pinned, lockfile-resolved dependencies + +```bash +corgea verify-deps --fail-unpinned +``` + +Use this together with `--fail` to gate both freshness and pinning in one CI step: + +```bash +corgea verify-deps --threshold 2d --fail --fail-unpinned +``` + ### Export results ```bash diff --git a/src/main.rs b/src/main.rs index dd44042..4399813 100644 --- a/src/main.rs +++ b/src/main.rs @@ -190,6 +190,12 @@ enum Commands { )] fail: bool, + #[arg( + long, + help = "Exit with a non-zero status code if any dependency is unpinned (e.g. package.json without a lockfile, pyproject.toml/Pipfile without a matching lockfile, or unpinned `requirements.txt` lines). Independent of --fail." + )] + fail_unpinned: bool, + #[arg( long, help = "Output the result as JSON instead of human-readable text." @@ -415,7 +421,7 @@ fn main() { Some(Commands::SetupHooks { default_config }) => { setup_hooks::setup_pre_commit_hook(*default_config); } - Some(Commands::VerifyDeps { ecosystem, threshold, include_dev, fail, json, path }) => { + Some(Commands::VerifyDeps { ecosystem, threshold, include_dev, fail, fail_unpinned, json, path }) => { let parsed_ecosystem = match verify_deps::Ecosystem::parse(ecosystem) { Ok(e) => e, Err(e) => { @@ -436,6 +442,7 @@ fn main() { threshold: parsed_threshold, include_dev: *include_dev, fail: *fail, + fail_unpinned: *fail_unpinned, json: *json, path: project_path, npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), @@ -451,9 +458,13 @@ fn main() { } let recent = !report.recent().is_empty(); let errors = !report.errors().is_empty(); + let unpinned = report.has_unpinned(); if (recent || errors) && opts.fail { std::process::exit(1); } + if unpinned && opts.fail_unpinned { + std::process::exit(1); + } } Err(e) => { eprintln!("verify-deps failed: {}", e); diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs index c2a8da3..f1d6689 100644 --- a/src/verify_deps/mod.rs +++ b/src/verify_deps/mod.rs @@ -97,6 +97,11 @@ pub struct VerifyOptions { pub threshold: Duration, pub include_dev: bool, pub fail: bool, + /// When true, treat any unpinned dependency or missing-lockfile + /// situation (`package.json` without a lockfile, unpinned + /// `requirements.txt` lines, `pyproject.toml`/`Pipfile` without a + /// matching lockfile) as a hard failure. + pub fail_unpinned: bool, pub json: bool, pub path: PathBuf, /// Optional registry overrides (used in tests). @@ -111,6 +116,7 @@ impl Default for VerifyOptions { threshold: Duration::from_secs(2 * 24 * 60 * 60), include_dev: false, fail: false, + fail_unpinned: false, json: false, path: PathBuf::from("."), npm_registry: None, @@ -192,10 +198,12 @@ pub fn run(opts: &VerifyOptions) -> Result { let mut deps: Vec = Vec::new(); let mut sources: Vec = Vec::new(); + let mut unpinned_warnings: Vec = Vec::new(); if matches!(opts.ecosystem, Ecosystem::Npm | Ecosystem::All) { match npm::discover(path, opts.include_dev) { Ok(mut found) => { + unpinned_warnings.append(&mut found.warnings); if !found.deps.is_empty() { sources.push(found.source.clone()); deps.append(&mut found.deps); @@ -220,6 +228,7 @@ pub fn run(opts: &VerifyOptions) -> Result { if matches!(opts.ecosystem, Ecosystem::Python | Ecosystem::All) { match python::discover(path, opts.include_dev) { Ok(mut found) => { + unpinned_warnings.append(&mut found.warnings); if !found.deps.is_empty() { sources.push(found.source.clone()); deps.append(&mut found.deps); @@ -241,7 +250,7 @@ pub fn run(opts: &VerifyOptions) -> Result { } } - if deps.is_empty() { + if deps.is_empty() && unpinned_warnings.is_empty() { return Err(format!( "no supported dependency manifests found in {}. Expected one of: \ package-lock.json, npm-shrinkwrap.json, pnpm-lock.yaml, yarn.lock, \ @@ -313,6 +322,7 @@ pub fn run(opts: &VerifyOptions) -> Result { Ok(VerifyReport { sources, outcomes, + unpinned_warnings, threshold: opts.threshold, scanned_at: now, }) @@ -323,6 +333,7 @@ pub fn run(opts: &VerifyOptions) -> Result { pub struct VerifyReport { pub sources: Vec, pub outcomes: Vec, + pub unpinned_warnings: Vec, pub threshold: Duration, pub scanned_at: DateTime, } @@ -354,13 +365,42 @@ impl VerifyReport { .filter(|o| matches!(o, LookupOutcome::Ok { .. })) .count() } + + pub fn has_unpinned(&self) -> bool { + !self.unpinned_warnings.is_empty() + } } /// Helper used by lockfile parsers to bundle their result. -#[derive(Debug, Clone)] +/// +/// `source` is empty when the discoverer could not find a lockfile; +/// in that case `warnings` typically explains why (e.g. a manifest +/// was found but no lockfile to resolve it against). +#[derive(Debug, Clone, Default)] pub struct DiscoverResult { pub deps: Vec, pub source: String, + pub warnings: Vec, +} + +/// A diagnostic about a dependency we *could not* verify because it +/// isn't pinned to an exact version. Examples: +/// +/// * `package.json` is present but no `package-lock.json` / +/// `pnpm-lock.yaml` / `yarn.lock` exists. +/// * `pyproject.toml` or `Pipfile` is present without a matching +/// lockfile. +/// * A `requirements.txt` line is not `==`-pinned (e.g. `requests>=2.0`). +/// +/// These are surfaced in the regular report and, with +/// `--fail-unpinned`, cause a non-zero exit. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UnpinnedWarning { + pub ecosystem: DependencyEcosystem, + /// Which manifest the warning is about (relative path or filename). + pub manifest: String, + /// Human-readable description of why the dep can't be verified. + pub reason: String, } /// Read the file at `path` into a String, returning an informative error. diff --git a/src/verify_deps/npm.rs b/src/verify_deps/npm.rs index 5d12240..57b7e81 100644 --- a/src/verify_deps/npm.rs +++ b/src/verify_deps/npm.rs @@ -30,7 +30,29 @@ pub fn discover(project_dir: &Path, include_dev: bool) -> Result Result Result { + let content = std::fs::read_to_string(path).map_err(|_| ())?; + let parsed: serde_json::Value = serde_json::from_str(&content).map_err(|_| ())?; + let has = |key: &str| { + parsed + .get(key) + .and_then(|v| v.as_object()) + .map(|m| !m.is_empty()) + .unwrap_or(false) + }; + Ok(has("dependencies") || has("devDependencies") || has("peerDependencies") || has("optionalDependencies")) +} + #[derive(Debug, Deserialize)] struct NpmLockRoot { #[serde(rename = "lockfileVersion")] @@ -1025,4 +1065,66 @@ packages: assert!(pairs.contains(&("consumer".to_string(), "1.0.0".to_string()))); assert!(pairs.contains(&("react".to_string(), "18.2.0".to_string()))); } + + #[test] + fn discover_warns_on_package_json_without_lockfile() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package.json"), + r#"{ + "name": "demo", + "version": "1.0.0", + "dependencies": { "lodash": "^4.0.0" } + }"#, + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.deps.is_empty()); + assert_eq!(result.warnings.len(), 1); + assert!(result.warnings[0].manifest.ends_with("package.json")); + assert!(result.warnings[0].reason.contains("lockfile")); + } + + #[test] + fn discover_no_warning_for_empty_package_json() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package.json"), + r#"{ + "name": "demo", + "version": "1.0.0" + }"#, + ) + .unwrap(); + + let err = discover(dir.path(), false).err().expect("expected error"); + assert!(err.contains("no npm lockfile")); + } + + #[test] + fn discover_with_lockfile_emits_no_warnings() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("package.json"), + r#"{ "name": "demo", "version": "1.0.0", "dependencies": { "lodash": "^4.0.0" } }"#, + ) + .unwrap(); + std::fs::write( + dir.path().join("package-lock.json"), + r#"{ + "name": "demo", "version": "1.0.0", "lockfileVersion": 3, + "packages": { + "": { "name": "demo", "version": "1.0.0" }, + "node_modules/lodash": { "version": "4.17.21" } + } + }"#, + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.warnings.is_empty()); + assert_eq!(result.deps.len(), 1); + assert_eq!(result.deps[0].name, "lodash"); + } } diff --git a/src/verify_deps/python.rs b/src/verify_deps/python.rs index 3bb899d..35e1920 100644 --- a/src/verify_deps/python.rs +++ b/src/verify_deps/python.rs @@ -29,7 +29,63 @@ pub fn discover(project_dir: &Path, include_dev: bool) -> Result = Vec::new(); + + // Always look for sibling manifests that imply the project has + // dependencies, even when a lockfile is present. We surface these + // as warnings only when the corresponding lockfile is missing. + let pyproject = project_dir.join("pyproject.toml"); + let pipfile = project_dir.join("Pipfile"); + let pipfile_lock = project_dir.join("Pipfile.lock"); + let poetry_lock = project_dir.join("poetry.lock"); + let uv_lock = project_dir.join("uv.lock"); + let requirements_in = project_dir.join("requirements.in"); + + if pipfile.exists() && !pipfile_lock.exists() { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: pipfile.display().to_string(), + reason: "Pipfile is present but Pipfile.lock is missing. Run `pipenv lock` to generate one before verifying." + .to_string(), + }); + } + + if requirements_in.exists() && !project_dir.join("requirements.txt").exists() { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: requirements_in.display().to_string(), + reason: "requirements.in is present but no compiled requirements.txt was found. Run `pip-compile` (or `uv pip compile`) to produce a pinned requirements file before verifying." + .to_string(), + }); + } + + if pyproject.exists() + && !poetry_lock.exists() + && !uv_lock.exists() + && !pipfile_lock.exists() + { + if pyproject_has_deps(&pyproject).unwrap_or(false) { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: pyproject.display().to_string(), + reason: "pyproject.toml declares dependencies but no lockfile was found (looked for poetry.lock, uv.lock, Pipfile.lock). Run `poetry lock`, `uv lock`, or generate a pinned requirements.txt before verifying." + .to_string(), + }); + } + } + if candidates.is_empty() { + // Without a lockfile or pinned requirements.txt we have nothing + // to verify. If we already emitted a warning above, return it + // (and let the caller decide if it's fatal). Otherwise fall + // back to the previous "nothing to do" error. + if !warnings.is_empty() { + return Ok(DiscoverResult { + deps: Vec::new(), + source: String::new(), + warnings, + }); + } return Err(format!( "no Python lockfile found in {}. Looked for: {}", project_dir.display(), @@ -49,16 +105,77 @@ pub fn discover(project_dir: &Path, include_dev: bool) -> Result parse_poetry_lock(&content, include_dev)?, "Pipfile.lock" => parse_pipfile_lock(&content, include_dev)?, "uv.lock" => parse_uv_lock(&content)?, - "requirements.txt" => parse_requirements(&content), + "requirements.txt" => { + let (pinned, unpinned) = parse_requirements_with_warnings(&content); + for line in unpinned { + warnings.push(super::UnpinnedWarning { + ecosystem: DependencyEcosystem::Python, + manifest: chosen.display().to_string(), + reason: format!( + "requirements.txt line is not `==`-pinned: `{}`", + line + ), + }); + } + pinned + } _ => unreachable!(), }; Ok(DiscoverResult { deps, source: chosen.display().to_string(), + warnings, }) } +/// Lightweight check: does this `pyproject.toml` declare any project +/// dependencies? We look at PEP 621 `[project].dependencies` and +/// `[project].optional-dependencies`, plus the legacy +/// `[tool.poetry.dependencies]` and `[tool.poetry.group.*.dependencies]` +/// tables. Tolerates parse errors. +fn pyproject_has_deps(path: &Path) -> Result { + let content = std::fs::read_to_string(path).map_err(|_| ())?; + let parsed: toml::Value = toml::from_str(&content).map_err(|_| ())?; + + let project_deps = parsed + .get("project") + .and_then(|p| p.get("dependencies")) + .and_then(|v| v.as_array()) + .map(|a| !a.is_empty()) + .unwrap_or(false); + let project_opt = parsed + .get("project") + .and_then(|p| p.get("optional-dependencies")) + .and_then(|v| v.as_table()) + .map(|t| t.values().any(|v| v.as_array().map(|a| !a.is_empty()).unwrap_or(false))) + .unwrap_or(false); + let poetry_main = parsed + .get("tool") + .and_then(|t| t.get("poetry")) + .and_then(|p| p.get("dependencies")) + .and_then(|v| v.as_table()) + // Poetry seeds `python = "^3.10"` here; ignore that one entry. + .map(|t| t.iter().any(|(k, _)| k != "python")) + .unwrap_or(false); + let poetry_groups = parsed + .get("tool") + .and_then(|t| t.get("poetry")) + .and_then(|p| p.get("group")) + .and_then(|v| v.as_table()) + .map(|groups| { + groups.values().any(|g| { + g.get("dependencies") + .and_then(|d| d.as_table()) + .map(|t| !t.is_empty()) + .unwrap_or(false) + }) + }) + .unwrap_or(false); + + Ok(project_deps || project_opt || poetry_main || poetry_groups) +} + #[derive(Debug, Deserialize)] struct PoetryLockRoot { #[serde(default)] @@ -253,12 +370,19 @@ pub(crate) fn parse_uv_lock(content: &str) -> Result, String> { Ok(out) } -/// Parse a `requirements.txt` file. We only emit deps that are -/// `==`-pinned. Everything else (ranges, git URLs, editables) is -/// skipped silently — those can't be checked against a registry -/// without resolution. -pub(crate) fn parse_requirements(content: &str) -> Vec { - let mut out = Vec::new(); +/// Parse a `requirements.txt` file. Returns `(pinned_deps, unpinned_lines)`: +/// +/// * `pinned_deps`: deps with an exact `==` pin, ready for registry +/// lookup. +/// * `unpinned_lines`: each non-empty, non-comment, non-flag line that +/// we *could not* resolve to a pinned version (range specifiers, +/// bare names, git URLs, editables, etc.). Surfaced as warnings so +/// `--fail-unpinned` can fail on them. +pub(crate) fn parse_requirements_with_warnings( + content: &str, +) -> (Vec, Vec) { + let mut deps = Vec::new(); + let mut unpinned = Vec::new(); let mut continued = String::new(); for raw_line in content.lines() { let mut line = raw_line.to_string(); @@ -281,6 +405,8 @@ pub(crate) fn parse_requirements(content: &str) -> Vec { line.to_string() }; + // `-r other.txt`, `-c constraints.txt`, `--index-url`, etc. + // These are pip configuration directives, not deps. if line.starts_with('-') { continue; } @@ -290,29 +416,60 @@ pub(crate) fn parse_requirements(content: &str) -> Vec { None => line.clone(), }; - let no_extras = no_extras.split_whitespace().next().unwrap_or("").to_string(); - if no_extras.is_empty() { + let first_token = no_extras + .split_whitespace() + .next() + .unwrap_or("") + .to_string(); + if first_token.is_empty() { + continue; + } + + // VCS / local path / archive URL specifiers — explicit and + // unverifiable against a registry. Don't classify these as + // unpinned warnings; they're an intentional escape hatch. + let lowered = first_token.to_ascii_lowercase(); + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", + ]; + if unverifiable_prefixes + .iter() + .any(|p| lowered.starts_with(p)) + { continue; } - if let Some(idx) = no_extras.find("==") { - let name_part = &no_extras[..idx]; - let version_part = &no_extras[idx + 2..]; + if let Some(idx) = first_token.find("==") { + let name_part = &first_token[..idx]; + let version_part = &first_token[idx + 2..]; let name = name_part.split('[').next().unwrap_or("").trim(); - let version = version_part.trim().trim_matches(|c: char| c == '\'' || c == '"'); + let version = version_part + .trim() + .trim_matches(|c: char| c == '\'' || c == '"'); if name.is_empty() || version.is_empty() { + unpinned.push(line.clone()); continue; } - out.push(Dependency { + deps.push(Dependency { name: normalize_python_name(name), version: version.to_string(), ecosystem: DependencyEcosystem::Python, source: "requirements.txt".to_string(), dev: false, }); + } else { + unpinned.push(line.clone()); } } - out + (deps, unpinned) +} + +/// Backwards-compatible wrapper that drops the unpinned-line list. +/// Used by tests; the binary build path doesn't call it directly any +/// more, so the dead-code lint needs silencing. +#[allow(dead_code)] +pub(crate) fn parse_requirements(content: &str) -> Vec { + parse_requirements_with_warnings(content).0 } /// Normalize a Python distribution name per PEP 503 (lowercase, @@ -366,6 +523,36 @@ django[bcrypt]==4.2.0 assert_eq!(deps.len(), 3); } + #[test] + fn requirements_warnings_capture_unpinned_lines() { + let req = r#" +# pinned, no warning +requests==2.31.0 + +# unpinned — should produce warnings +numpy>=1.20 +flask +sqlalchemy~=2.0 + +# pip directives — ignored, not warnings +-r other.txt +--index-url https://example.com/simple + +# VCS / URL deps — explicit escape hatch, no warning +git+https://github.com/x/y.git +https://example.com/pkg.tar.gz +"#; + let (deps, unpinned) = parse_requirements_with_warnings(req); + assert_eq!( + deps.iter().map(|d| d.name.clone()).collect::>(), + vec!["requests".to_string()] + ); + assert_eq!(unpinned.len(), 3); + assert!(unpinned.iter().any(|l| l.contains("numpy>=1.20"))); + assert!(unpinned.iter().any(|l| l == "flask")); + assert!(unpinned.iter().any(|l| l.contains("sqlalchemy~=2.0"))); + } + #[test] fn parses_poetry_lock() { let lock = r#" @@ -450,4 +637,121 @@ git = "https://example.com/x.git" let pairs: Vec<_> = deps.iter().map(|d| (d.name.clone(), d.version.clone())).collect(); assert_eq!(pairs, vec![("requests".to_string(), "2.31.0".to_string())]); } + + #[test] + fn discover_warns_on_pyproject_without_lockfile() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("pyproject.toml"), + r#"[project] +name = "demo" +version = "0.1.0" +dependencies = ["requests>=2.0", "flask"] +"#, + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.deps.is_empty()); + assert_eq!(result.warnings.len(), 1); + assert!(result.warnings[0].reason.contains("pyproject.toml")); + assert!(result.warnings[0].reason.contains("lockfile")); + } + + #[test] + fn discover_no_warning_for_empty_pyproject() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("pyproject.toml"), + r#"[project] +name = "demo" +version = "0.1.0" +"#, + ) + .unwrap(); + + let err = discover(dir.path(), false).err().expect("expected error"); + assert!(err.contains("no Python lockfile found")); + } + + #[test] + fn discover_warns_on_pipfile_without_lock() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("Pipfile"), + "[packages]\nrequests = \"*\"\n", + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + assert!(result.deps.is_empty()); + assert!(result.warnings.iter().any(|w| w.reason.contains("Pipfile"))); + } + + #[test] + fn discover_emits_unpinned_warnings_from_requirements_txt() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("requirements.txt"), + "requests==2.31.0 +flask>=2.0 +numpy +", + ) + .unwrap(); + + let result = discover(dir.path(), false).expect("discover"); + let names: Vec<_> = result.deps.iter().map(|d| d.name.clone()).collect(); + assert_eq!(names, vec!["requests".to_string()]); + // Two unpinned lines: `flask>=2.0` and `numpy`. + assert_eq!(result.warnings.len(), 2); + for w in &result.warnings { + assert!(w.reason.contains("not `==`-pinned")); + } + } + + #[test] + fn discover_warns_for_requirements_in_without_compiled_txt() { + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("requirements.in"), + "requests +flask +", + ) + .unwrap(); + + let err = discover(dir.path(), false).err(); + // requirements.in alone is not enough to find a lockfile, but + // we should have surfaced the in-without-compiled-txt warning + // before getting to the "no lockfile" error. + match err { + Some(e) => assert!(e.contains("no Python lockfile")), + None => {} + } + + // When requirements.in is paired with a pyproject.toml that + // *does* declare deps, we end up returning a warning. + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write(dir.path().join("requirements.in"), "requests +").unwrap(); + std::fs::write( + dir.path().join("pyproject.toml"), + r#"[project] +name = "demo" +version = "0.1.0" +dependencies = ["requests"] +"#, + ) + .unwrap(); + let result = discover(dir.path(), false).expect("discover"); + assert!(result + .warnings + .iter() + .any(|w| w.manifest.ends_with("requirements.in"))); + assert!(result + .warnings + .iter() + .any(|w| w.manifest.ends_with("pyproject.toml"))); + } } diff --git a/src/verify_deps/report.rs b/src/verify_deps/report.rs index 6d927f7..1a26246 100644 --- a/src/verify_deps/report.rs +++ b/src/verify_deps/report.rs @@ -24,13 +24,34 @@ pub fn print_text(report: &VerifyReport) { let ok_count = report.ok_count(); println!( - "Checked {} dependencies — {} ok, {} recent, {} errors", + "Checked {} dependencies — {} ok, {} recent, {} errors, {} unpinned", report.outcomes.len(), ok_count, recent.len(), errors.len(), + report.unpinned_warnings.len(), ); + if !report.unpinned_warnings.is_empty() { + println!(); + println!( + "{}", + set_text_color( + "Unpinned dependencies (cannot be verified against the registry):", + TerminalColor::Yellow, + ) + ); + for w in &report.unpinned_warnings { + println!( + " {} [{}] {}: {}", + set_text_color("?", TerminalColor::Yellow), + w.ecosystem.label(), + w.manifest, + w.reason, + ); + } + } + if !recent.is_empty() { println!(); println!( @@ -77,7 +98,7 @@ pub fn print_text(report: &VerifyReport) { } } - if recent.is_empty() && errors.is_empty() { + if recent.is_empty() && errors.is_empty() && report.unpinned_warnings.is_empty() { println!( "{}", set_text_color( @@ -130,6 +151,18 @@ pub fn print_json(report: &VerifyReport) { }) .collect(); + let unpinned: Vec<_> = report + .unpinned_warnings + .iter() + .map(|w| { + json!({ + "ecosystem": w.ecosystem.label(), + "manifest": w.manifest, + "reason": w.reason, + }) + }) + .collect(); + let body = json!({ "scanned_at": report.scanned_at.to_rfc3339(), "threshold_seconds": report.threshold.as_secs(), @@ -139,8 +172,10 @@ pub fn print_json(report: &VerifyReport) { "ok": report.ok_count(), "recent": report.recent().len(), "errors": report.errors().len(), + "unpinned": report.unpinned_warnings.len(), }, "results": outcomes, + "unpinned": unpinned, }); println!("{}", serde_json::to_string_pretty(&body).unwrap()); From 6b93a40995ef3bc1657761d47e9476c6a2652100 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 21 May 2026 13:53:05 +0000 Subject: [PATCH 4/4] Add precheck wrapper for npm/yarn/pnpm/pip install commands `corgea precheck [args...]` is a thin registry-aware wrapper around the package manager's install commands. It resolves what the package manager would install (against registry.npmjs.org or pypi.org) and refuses to run the install when a resolved version was published within --threshold (default 2d). Use it as a drop-in for the bare command in CI scripts or interactive shells: corgea precheck npm install axios@^1.0.0 --save-dev corgea precheck pnpm add @types/node@latest corgea precheck pip install requests==2.31.0 corgea precheck pip install -r requirements.txt corgea precheck npm install (bare - verifies the lockfile) Capabilities - Supported package managers: npm, yarn, pnpm, pip (alias pip3). - Spec resolution against the registry: - npm: bare name, @latest, any dist-tag (@next, @beta, ...), exact versions, and full semver ranges (^1.0.0, ~1.2.0, ">=1.0.0 <2.0.0"). Both Rust-style comma-separated and npm-style space-separated ranges parse via a new parse_npm_range helper. - PyPI: bare name, ==X, and PEP 440 specifiers >=, <=, >, <, !=, ~= with comma-separated AND. Exact pins are honoured precisely; other specifiers fall back to "highest matching stable" using semver for ordering after a small PyPI->semver normalisation step. - Spec parsing handles common edge cases: scoped npm names (@types/node@1.0.0), npm aliases (npm:other@1.0.0), workspace specs, git / URL / file / path specs, pip extras (requests[security]==2.31.0), env markers (requests==2.31.0; python_version >= "3.7"), and pip flag-with-value pairs (-r FILE, -c FILE, -e PATH, --requirement=FILE, --editable=PATH). Tokens that can not be classified are reported as "skipped" - never block the install. - Subcommands other than install/add/i are forwarded transparently to the package manager. - Bare npm install / pip install (no positional specs) verify the existing lockfile via the existing verify-deps machinery, then exec. - pip install -r FILE reads the file and runs the same registry verification that verify-deps would run on a project's requirements.txt. Works with arbitrary file names (e.g. -r dev-reqs.txt) via a new verify_arbitrary_requirements path. Behaviour - Default: a recent finding makes precheck exit 1 without running the install. Tripwire intent. - --no-fail: demote the block to a warning; install still runs. - --check-only: never exec, regardless of result. - --fail-unpinned: also fail on unverifiable specs (URL / git / file / editable) and on unpinned lines pulled in by -r. - --json: machine-readable output mirroring the verify-deps schema (results, summary, threshold_seconds). Implementation notes - New src/precheck/{mod.rs, parse.rs} for command logic and argument parsing. Exec uses which (already a workspace dep) so the same code path resolves npm.cmd shims on Windows. - Registry layer extended with two new public APIs in verify_deps/registry.rs: - npm_resolve(name, NpmSpec, registry) - fetches full package metadata once and resolves Latest / Tag / Exact / Range using semver::VersionReq. Pre-releases are excluded from range matches unless the range itself names one (matches npm). - pypi_resolve(name, PypiSpec, registry) - uses the per-package /pypi//json endpoint, filters out yanked / empty releases, and applies PEP 440 specifiers via best-effort semver ordering. - New crate dep: semver = "1" (Rust's standard semver, also used by Cargo). - Exec preserves the package manager's exit code, including signal-based termination on Unix (128+sig). Tests - 17 new unit tests (under precheck::parse::tests and precheck::tests) covering: package-manager parsing, install-subcommand recognition, npm flag stripping with the -- boundary, scoped / unscoped npm spec classification across Latest / Tag / Exact / Range, npm "unverifiable" specs (git / URL / file / path / npm: / workspace:), pip exact / specifier / extras / env-marker parsing, and pip -r / -e extraction. - 8 new #[ignore]-gated live integration tests against npmjs.org and pypi.org covering Latest, Exact, Range (both comma- and space-style), unknown-tag failure, PyPI Latest / Exact / Specifier. - Verified end-to-end against real registries: scoped names with ranges, dist-tag resolution catching today's @types/node@25.9.1 (~1d 20h old) within the default 2d window, exec passthrough, JSON output, mixed valid+skipped specs. Docs: skills/corgea/SKILL.md updated with a Precheck section, flag table, spec-resolution rules, and a CI workflow snippet. Open follow-ups left out on purpose (happy to add on request): - Wrappers for poetry add / pipenv install / uv add / npx. - Honouring per-command --registry flags. - Support for npm || OR ranges (not natively supported by the Rust semver crate). Co-authored-by: Ibrahim Rahhal --- Cargo.lock | 7 + Cargo.toml | 1 + skills/corgea/SKILL.md | 38 ++ src/main.rs | 76 ++++ src/precheck/mod.rs | 768 ++++++++++++++++++++++++++++++++++++ src/precheck/parse.rs | 534 +++++++++++++++++++++++++ src/verify_deps/registry.rs | 507 ++++++++++++++++++++++++ 7 files changed, 1931 insertions(+) create mode 100644 src/precheck/mod.rs create mode 100644 src/precheck/parse.rs diff --git a/Cargo.lock b/Cargo.lock index 225b82d..b9e8077 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -357,6 +357,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1695,6 +1696,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index 608ffbd..5a7ce87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ http-body-util = "0.1" url = "2.5" open = "5.0" urlencoding = "2.1" +semver = "1" [target.'cfg(not(target_os = "windows"))'.dependencies] openssl = { version = "0.10", features = ["vendored"] } diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 887d02c..913f31c 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -136,6 +136,35 @@ corgea verify-deps --json # machine-readable output Supported lockfiles (preferred → fallback): npm: `package-lock.json`, `npm-shrinkwrap.json`, `pnpm-lock.yaml` (v5/v6/v9), `yarn.lock`. Python: `poetry.lock`, `Pipfile.lock`, `uv.lock`, `requirements.txt` (only `==`-pinned lines). +### Precheck — `corgea precheck [args...]` + +Wraps an install command (`npm install`, `yarn add`, `pnpm add`, `pip install`), resolves what the package manager *would* install against the public registry, and refuses to run the install when a resolved version was published within `--threshold`. Use it as a thin replacement for the bare command in CI scripts or interactive shells. + +```bash +corgea precheck npm install axios@^1.0.0 --save-dev +corgea precheck pnpm add @types/node@latest +corgea precheck yarn add lodash +corgea precheck pip install requests==2.31.0 +corgea precheck pip install -r requirements.txt +corgea precheck npm install # bare install — verifies the lockfile +``` + +| Flag | Description | +|------|-------------| +| `--threshold ` (`-t`) | Recency window (`2d`, `48h`, `30m`, `1w`). Default `2d`. | +| `--no-fail` | Demote a recent finding from a hard block to a warning (install runs anyway). | +| `--check-only` | Run the verification but never exec the install. | +| `--fail-unpinned` | Also fail on unverifiable specs (URL/git/file/editable) and unpinned `requirements.txt` lines pulled in by `-r`. | +| `--json` | Machine-readable output. | + +Spec resolution: + +* **npm / yarn / pnpm** — `pkg`, `pkg@latest`, `pkg@1.2.3`, `pkg@^1.0.0`, `pkg@>=1.0.0 <2.0.0`, `pkg@next` (any dist-tag), and scoped names (`@types/node@...`). Ranges are resolved against the registry's full version list using `semver` semantics. +* **pip** — `pkg`, `pkg==1.2.3`, `pkg>=1,<2`, `pkg~=1.4`, `pkg[extras]==X`. Exact `==` pins are honoured precisely; other PEP 440 specifiers are resolved against PyPI's release list with a best-effort comparison. +* **Skipped (warning, not blocked)** — `git+...`, `file:...`, `./local`, `http(s)://...`, `npm:alias@...`, `workspace:*`, `pip -e`. These are explicit out-of-band sources we can't verify against a registry. + +Subcommands other than `install` / `add` / `i` are forwarded straight through to the package manager unchanged, so `corgea precheck npm view ...` and similar just work. + ## Common Workflows ### Scan full project @@ -193,6 +222,15 @@ Use this together with `--fail` to gate both freshness and pinning in one CI ste corgea verify-deps --threshold 2d --fail --fail-unpinned ``` +### Pre-check an install before letting it run + +```bash +corgea precheck npm install axios@^1.0.0 +corgea precheck pip install -r requirements.txt --fail-unpinned +``` + +`corgea precheck` resolves the actual version a package manager would install, blocks if it was published within the threshold, and otherwise transparently runs the install (preserving the package manager's exit code). + ### Export results ```bash diff --git a/src/main.rs b/src/main.rs index 4399813..242d430 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ mod log; mod setup_hooks; mod authorize; mod verify_deps; +mod precheck; mod scanners { pub mod fortify; pub mod blast; @@ -209,6 +210,49 @@ enum Commands { )] path: Option, }, + /// Pre-check a package install command against the registry, then run it. + /// Wraps `npm install`, `yarn add`, `pnpm add`, or `pip install` and refuses + /// to run when a resolved version was published within --threshold. + /// Examples: + /// corgea precheck npm install axios@^1.0.0 --save-dev + /// corgea precheck pip install requests + /// corgea precheck pnpm add @types/node@latest + Precheck { + #[arg( + long, + short = 't', + default_value = "2d", + help = "Recency threshold. Resolved versions younger than this are flagged. Same syntax as `verify-deps --threshold`." + )] + threshold: String, + + #[arg( + long, + help = "Demote a recent finding from a hard block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Run the verification but never exec the install command." + )] + check_only: bool, + + #[arg( + long, + help = "Also fail when an unpinned/unverifiable spec (URL, git, file:, editable) is in the install command." + )] + fail_unpinned: bool, + + #[arg(long, help = "Output the result as JSON instead of human-readable text.")] + json: bool, + + /// Everything after `precheck` is forwarded to the package manager. + /// First positional must name the package manager: npm, yarn, + /// pnpm, pip. + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, + }, } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -472,6 +516,38 @@ fn main() { } } } + Some(Commands::Precheck { threshold, no_fail, check_only, fail_unpinned, json, cmd }) => { + if cmd.is_empty() { + eprintln!("usage: corgea precheck [args...]"); + std::process::exit(2); + } + let manager = match precheck::PackageManager::parse(&cmd[0]) { + Ok(m) => m, + Err(e) => { + eprintln!("{}", e); + std::process::exit(2); + } + }; + let parsed_threshold = match verify_deps::parse_threshold(threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("Invalid --threshold: {}", e); + std::process::exit(2); + } + }; + let opts = precheck::PrecheckOptions { + manager, + threshold: parsed_threshold, + no_fail: *no_fail, + check_only: *check_only, + fail_unpinned: *fail_unpinned, + json: *json, + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + }; + let exit_code = precheck::run(cmd, opts); + std::process::exit(exit_code); + } None => { utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..6318f9b --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,768 @@ +//! `corgea precheck [args...]` +//! +//! Wraps an install command from a supported package manager +//! (`npm` / `yarn` / `pnpm` / `pip`), resolves what the package +//! manager *would* install against the public registry, and either +//! blocks the install or runs it transparently. +//! +//! Verification rule: a package is rejected if the resolved version +//! was published within `--threshold` (default `2d`). This mirrors +//! the `verify-deps` flow but applies to the install-time set of +//! packages instead of the already-locked set. +//! +//! By default a "recent" finding makes precheck exit with status 1 +//! *without* running the install. Use `--no-fail` to demote this to a +//! warning (the install runs anyway), or `--check-only` to skip the +//! install regardless of verification result. + +pub mod parse; + +use std::ffi::OsString; +use std::process::Command; +use std::time::Duration; + +use chrono::Utc; + +use crate::utils::terminal::{set_text_color, TerminalColor}; +use crate::verify_deps; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Yarn, + Pnpm, + Pip, +} + +impl PackageManager { + pub fn parse(s: &str) -> Result { + match s { + "npm" => Ok(PackageManager::Npm), + "yarn" => Ok(PackageManager::Yarn), + "pnpm" => Ok(PackageManager::Pnpm), + "pip" | "pip3" => Ok(PackageManager::Pip), + other => Err(format!( + "Unsupported package manager '{}'. Supported: npm, yarn, pnpm, pip.", + other + )), + } + } + + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Yarn => "yarn", + PackageManager::Pnpm => "pnpm", + PackageManager::Pip => "pip", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + PackageManager::Npm => matches!(sub, "install" | "i" | "add"), + PackageManager::Yarn => matches!(sub, "add" | "install"), + PackageManager::Pnpm => matches!(sub, "add" | "install" | "i"), + PackageManager::Pip => matches!(sub, "install"), + } + } +} + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub manager: PackageManager, + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + /// If true, never exec the underlying install command. + pub check_only: bool, + /// If true, also fail on unpinned-style warnings (URL specs, + /// unparseable specs, missing `requirements.txt` reference). + pub fail_unpinned: bool, + pub json: bool, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +/// Each item the user (or a `-r` requirements file) asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly, version is older than the threshold. + Ok { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + }, + /// Resolved cleanly but version was published within the threshold. + Recent { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, +} + +impl PrecheckReport { + pub fn recent_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Recent { .. })) + .count() + } + pub fn error_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Error { .. })) + .count() + } + pub fn skipped_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Skipped { .. })) + .count() + } + pub fn ok_count(&self) -> usize { + self.outcomes + .iter() + .filter(|o| matches!(o, TargetOutcome::Ok { .. })) + .count() + } +} + +/// Top-level entry. `args` is the *remaining* argv after `corgea precheck`, +/// e.g. `["npm", "install", "axios@^1.0.0", "--save-dev"]`. +/// +/// Returns the exit code to use. The caller is responsible for +/// `std::process::exit(...)`. +pub fn run(args: &[String], opts: PrecheckOptions) -> i32 { + if args.is_empty() { + eprintln!("usage: corgea precheck [args...]"); + return 2; + } + + // We expect `args[0]` to match the configured package manager. + // (The CLI plumbing already accepted opts.manager from the user; + // this is a sanity check.) + let typed_manager = &args[0]; + if PackageManager::parse(typed_manager).ok() != Some(opts.manager) { + eprintln!( + "package manager mismatch: expected '{}', got '{}'", + opts.manager.binary_name(), + typed_manager + ); + return 2; + } + + if args.len() < 2 { + return exec_install(opts.manager, &[], opts.check_only); + } + + let subcommand = &args[1]; + let rest = &args[2..]; + + if !opts.manager.is_install_subcommand(subcommand) { + // Pass-through: not an install. We cannot verify what we + // don't understand, but we shouldn't get in the user's way. + return exec_install_with_args(opts.manager, subcommand, rest, opts.check_only); + } + + // Parse install-command args into install targets. + let parsed = match parse::parse_install_args(opts.manager, rest) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + + if !parsed.requirements_files.is_empty() { + // `pip install -r reqs.txt` — load and verify the file(s). + // Done *before* per-target resolution so a mixed command + // like `pip install -r reqs.txt requests==2.31.0` checks + // both the file and the explicit spec. + let code = verify_lockfile_or_requirements(&opts, parsed.requirements_files.clone()); + if code != 0 && !opts.no_fail { + return code; + } + } + + if parsed.targets.is_empty() && !parsed.bare_install { + // Nothing else to verify (`-r` already handled above, or a + // flag-only invocation like `npm install -D`). Exec. + return exec_install_with_args(opts.manager, subcommand, rest, opts.check_only); + } + + if parsed.bare_install { + // `npm install` / `pip install` with no args — verify the + // existing lockfile in cwd, then exec. + let exit_from_lockfile = match opts.manager { + PackageManager::Pip => verify_lockfile_or_requirements(&opts, Vec::new()), + _ => verify_npm_lockfile(&opts), + }; + if exit_from_lockfile != 0 && !opts.no_fail { + return exit_from_lockfile; + } + return exec_install_with_args(opts.manager, subcommand, rest, opts.check_only); + } + + let mut outcomes = Vec::with_capacity(parsed.targets.len()); + let now = Utc::now(); + let threshold = match chrono::Duration::from_std(opts.threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("invalid threshold: {}", e); + return 2; + } + }; + + for target in &parsed.targets { + let outcome = verify_one(target, &opts, &now, threshold); + outcomes.push(outcome); + } + + let report = PrecheckReport { + manager: opts.manager, + subcommand: subcommand.clone(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + }; + + if opts.json { + print_json(&report); + } else { + print_text(&report); + } + + let recent = report.recent_count(); + let errors = report.error_count(); + + if (recent > 0 || (errors > 0 && opts.fail_unpinned)) && !opts.no_fail { + if !opts.json { + eprintln!( + "{}", + set_text_color( + "Refusing to run install. Pass --no-fail to proceed anyway.", + TerminalColor::Red, + ) + ); + } + return 1; + } + + exec_install_with_args(opts.manager, subcommand, rest, opts.check_only) +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, + threshold: chrono::Duration, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => { + registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) + } + }; + + match resolved { + Ok(resolved) => { + let age_chrono = now.signed_duration_since(resolved.published_at); + let age = age_chrono.to_std().unwrap_or_else(|_| Duration::from_secs(0)); + if age_chrono < threshold { + TargetOutcome::Recent { + target: target.clone(), + resolved, + age, + } + } else { + TargetOutcome::Ok { + target: target.clone(), + resolved, + age, + } + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +fn verify_npm_lockfile(opts: &PrecheckOptions) -> i32 { + let verify_opts = verify_deps::VerifyOptions { + ecosystem: verify_deps::Ecosystem::Npm, + threshold: opts.threshold, + include_dev: false, + fail: !opts.no_fail, + fail_unpinned: opts.fail_unpinned, + json: opts.json, + path: std::path::PathBuf::from("."), + npm_registry: opts.npm_registry.clone(), + pypi_registry: opts.pypi_registry.clone(), + }; + delegate_to_verify_deps(verify_opts) +} + +fn verify_lockfile_or_requirements( + opts: &PrecheckOptions, + requirements_files: Vec, +) -> i32 { + if requirements_files.is_empty() { + let verify_opts = verify_deps::VerifyOptions { + ecosystem: verify_deps::Ecosystem::Python, + threshold: opts.threshold, + include_dev: false, + fail: !opts.no_fail, + fail_unpinned: opts.fail_unpinned, + json: opts.json, + path: std::path::PathBuf::from("."), + npm_registry: opts.npm_registry.clone(), + pypi_registry: opts.pypi_registry.clone(), + }; + return delegate_to_verify_deps(verify_opts); + } + + let mut overall: i32 = 0; + for req in requirements_files { + // The verify-deps machinery expects a project directory and + // looks for a sibling `requirements.txt`. We use the file's + // parent dir if it has one, falling back to cwd for relative + // paths like `-r reqs.txt`. + let parent = req + .parent() + .filter(|p| !p.as_os_str().is_empty()) + .map(std::path::Path::to_path_buf) + .unwrap_or_else(|| std::path::PathBuf::from(".")); + // verify-deps only looks for the literal file name + // `requirements.txt`. If the user pointed at a different + // file (e.g. `-r dev-reqs.txt`), copy / link it temporarily + // so the verifier can find it. We instead just parse it + // here directly when it isn't named requirements.txt. + let file_name = req + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + if file_name != "requirements.txt" { + // Parse the file ourselves and run the registry checks. + let code = verify_arbitrary_requirements(&req, &opts); + if code != 0 { + overall = code; + } + continue; + } + let verify_opts = verify_deps::VerifyOptions { + ecosystem: verify_deps::Ecosystem::Python, + threshold: opts.threshold, + include_dev: false, + fail: !opts.no_fail, + fail_unpinned: opts.fail_unpinned, + json: opts.json, + path: parent, + npm_registry: opts.npm_registry.clone(), + pypi_registry: opts.pypi_registry.clone(), + }; + let code = delegate_to_verify_deps(verify_opts); + if code != 0 { + overall = code; + } + } + overall +} + +/// Read a requirements file at an arbitrary path, parse it, and run +/// the same registry verification we'd run for a project's +/// `requirements.txt`. Used when the user passes +/// `pip install -r dev-reqs.txt` (a non-default name). +fn verify_arbitrary_requirements( + req_path: &std::path::Path, + opts: &PrecheckOptions, +) -> i32 { + let content = match std::fs::read_to_string(req_path) { + Ok(c) => c, + Err(e) => { + eprintln!( + "verify-deps: failed to read {}: {}", + req_path.display(), + e + ); + return 2; + } + }; + let (deps, unpinned) = + crate::verify_deps::python::parse_requirements_with_warnings(&content); + + if deps.is_empty() && unpinned.is_empty() { + return 0; + } + + let now = chrono::Utc::now(); + let threshold = match chrono::Duration::from_std(opts.threshold) { + Ok(t) => t, + Err(e) => { + eprintln!("invalid threshold: {}", e); + return 2; + } + }; + + let mut recent_count: usize = 0; + let mut error_count: usize = 0; + println!( + "Pre-checking {} (threshold {})", + req_path.display(), + verify_deps::format_duration(opts.threshold) + ); + for dep in &deps { + match crate::verify_deps::registry::pypi_publish_time( + &dep.name, + &dep.version, + opts.pypi_registry.as_deref(), + ) { + Ok(published_at) => { + let age_chrono = now.signed_duration_since(published_at); + let age = age_chrono.to_std().unwrap_or_else(|_| Duration::from_secs(0)); + if age_chrono < threshold { + println!( + " {} {}@{} published {} ago at {} (within threshold)", + set_text_color("⚠", TerminalColor::Yellow), + dep.name, + dep.version, + set_text_color( + &verify_deps::format_duration(age), + TerminalColor::Yellow, + ), + published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + recent_count += 1; + } else { + println!( + " {} {}@{} published {} ago", + set_text_color("✓", TerminalColor::Green), + dep.name, + dep.version, + verify_deps::format_duration(age), + ); + } + } + Err(e) => { + println!( + " {} {}@{}: {}", + set_text_color("✗", TerminalColor::Red), + dep.name, + dep.version, + e + ); + error_count += 1; + } + } + } + if !unpinned.is_empty() { + println!( + "{}", + set_text_color( + "Unpinned lines (cannot be verified):", + TerminalColor::Yellow, + ) + ); + for line in &unpinned { + println!( + " {} {}", + set_text_color("?", TerminalColor::Yellow), + line + ); + } + } + if recent_count > 0 && !opts.no_fail { + return 1; + } + if !unpinned.is_empty() && opts.fail_unpinned { + return 1; + } + if error_count > 0 && opts.fail_unpinned { + return 1; + } + 0 +} + +fn delegate_to_verify_deps(opts: verify_deps::VerifyOptions) -> i32 { + match verify_deps::run(&opts) { + Ok(report) => { + if opts.json { + verify_deps::report::print_json(&report); + } else { + verify_deps::report::print_text(&report); + } + let recent = !report.recent().is_empty(); + let unpinned = report.has_unpinned(); + if recent && opts.fail { + return 1; + } + if unpinned && opts.fail_unpinned { + return 1; + } + 0 + } + Err(e) => { + eprintln!("verify-deps failed: {}", e); + 2 + } + } +} + +fn exec_install(manager: PackageManager, args: &[String], check_only: bool) -> i32 { + if check_only { + return 0; + } + exec_command(manager.binary_name(), args) +} + +fn exec_install_with_args( + manager: PackageManager, + subcommand: &str, + rest: &[String], + check_only: bool, +) -> i32 { + if check_only { + return 0; + } + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command(manager.binary_name(), &full) +} + +fn exec_command(binary: &str, args: &[String]) -> i32 { + // Resolve the binary on PATH. On Windows this finds `.cmd` shims. + let resolved = match which::which(binary) { + Ok(p) => p, + Err(e) => { + eprintln!( + "could not find '{}' on PATH ({}). Make sure the package manager is installed.", + binary, e + ); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + match Command::new(&resolved).args(&os_args).status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + eprintln!("failed to exec {}: {}", binary, e); + 1 + } + } +} + +fn print_text(report: &PrecheckReport) { + let label = report.manager.binary_name(); + let display: Vec<&str> = report.original_args.iter().map(String::as_str).collect(); + println!( + "Pre-checking `{} {} {}` (threshold {})", + label, + report.subcommand, + display.join(" "), + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + report.skipped_count(), + report.error_count(), + ); + + for o in &report.outcomes { + match o { + TargetOutcome::Ok { target, resolved, age } => { + println!( + " {} {} → {}@{} published {} ago", + set_text_color("✓", TerminalColor::Green), + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + TargetOutcome::Recent { target, resolved, age } => { + println!( + " {} {} → {}@{} published {} ago at {} (within threshold)", + set_text_color("⚠", TerminalColor::Yellow), + target.display, + resolved.name, + resolved.version, + set_text_color(&verify_deps::format_duration(*age), TerminalColor::Yellow), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } + TargetOutcome::Skipped { target, reason } => { + println!( + " {} {}: {}", + set_text_color("?", TerminalColor::Yellow), + target.display, + reason, + ); + } + TargetOutcome::Error { target, error } => { + println!( + " {} {}: {}", + set_text_color("✗", TerminalColor::Red), + target.display, + error, + ); + } + } + } +} + +fn print_json(report: &PrecheckReport) { + use serde_json::json; + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + TargetOutcome::Ok { target, resolved, age } => json!({ + "status": "ok", + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + TargetOutcome::Recent { target, resolved, age } => json!({ + "status": "recent", + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + TargetOutcome::Skipped { target, reason } => json!({ + "status": "skipped", + "spec": target.display, + "name": target.name, + "reason": reason, + }), + TargetOutcome::Error { target, error } => json!({ + "status": "error", + "spec": target.display, + "name": target.name, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "manager": report.manager.binary_name(), + "subcommand": report.subcommand, + "args": report.original_args, + "threshold_seconds": report.threshold.as_secs(), + "summary": { + "ok": report.ok_count(), + "recent": report.recent_count(), + "skipped": report.skipped_count(), + "errors": report.error_count(), + }, + "results": outcomes, + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn package_manager_parse() { + assert_eq!(PackageManager::parse("npm").unwrap(), PackageManager::Npm); + assert_eq!(PackageManager::parse("yarn").unwrap(), PackageManager::Yarn); + assert_eq!(PackageManager::parse("pnpm").unwrap(), PackageManager::Pnpm); + assert_eq!(PackageManager::parse("pip").unwrap(), PackageManager::Pip); + assert_eq!(PackageManager::parse("pip3").unwrap(), PackageManager::Pip); + assert!(PackageManager::parse("cargo").is_err()); + } + + #[test] + fn install_subcommand_recognition() { + assert!(PackageManager::Npm.is_install_subcommand("install")); + assert!(PackageManager::Npm.is_install_subcommand("i")); + assert!(PackageManager::Npm.is_install_subcommand("add")); + assert!(!PackageManager::Npm.is_install_subcommand("update")); + + assert!(PackageManager::Yarn.is_install_subcommand("add")); + assert!(PackageManager::Yarn.is_install_subcommand("install")); + + assert!(PackageManager::Pnpm.is_install_subcommand("add")); + assert!(PackageManager::Pnpm.is_install_subcommand("install")); + assert!(PackageManager::Pnpm.is_install_subcommand("i")); + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..08a9c4a --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,534 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::PathBuf; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — the requirements files we should + /// load and verify in lieu of standalone targets. + pub requirements_files: Vec, + /// True if the user invoked the bare install (`npm install` / + /// `pip install` with no positional specs and no `-r`). + pub bare_install: bool, +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + let positionals = match manager { + PackageManager::Pip => extract_pip_positionals(args)?, + _ => extract_node_positionals(args), + }; + + let mut parsed = ParsedInstall::default(); + + for raw in &positionals.specs { + let target = match manager { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + parse_npm_spec(raw) + } + PackageManager::Pip => parse_pypi_spec(raw), + }; + parsed.targets.push(target); + } + + parsed.requirements_files = positionals.requirements_files; + + if parsed.targets.is_empty() && parsed.requirements_files.is_empty() { + parsed.bare_install = true; + } + + Ok(parsed) +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Strip flags from a npm/yarn/pnpm install argument list, returning +/// only the positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; we handle both by skipping the next token if it +/// looks like a value (doesn't start with `-` and contains `:` or `/` +/// or starts with a digit, suggesting a URL / path / port / version). +/// +/// We deliberately avoid maintaining an exhaustive flag whitelist — +/// real-world install commands are too varied. The heuristic above +/// is correct for the common cases (`--registry url`, `--prefix path`, +/// `-w pkgname`, etc.) and conservatively skips occasional ambiguous +/// values (no spec we'd want to verify ever starts with `:` or `/`). +fn extract_node_positionals(args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + // Flag. Skip the next token if it looks like a value. + if let Some(eq_idx) = a.find('=') { + // `--flag=value` already self-contained. + let _ = eq_idx; + i += 1; + continue; + } + // Heuristic: peek at the next arg. If it doesn't look + // like a package spec (i.e. contains `://` or starts with + // `/` or `.`) skip it; otherwise leave it alone for the + // next iteration. + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i += if next_is_value { 2 } else { 1 }; + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args.get(i + 1).ok_or_else(|| { + "`-r` / `--requirement` requires a file path".to_string() + })?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + // Unknown flag — apply the same value-skipping heuristic + // as in node land. + if a.contains('=') { + i += 1; + continue; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i += if next_is_value { 2 } else { 1 }; + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +pub(crate) fn parse_npm_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "git:", "git@", "ssh://", "http://", "https://", "file:", "./", "../", "/", "~/", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a URL/git/filesystem reference — registry verification skipped" + .to_string(), + }, + }; + } + if trimmed.starts_with("npm:") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "npm: aliased dependency — registry verification skipped".to_string(), + }, + }; + } + if trimmed.starts_with("workspace:") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "workspace: dependency — registry verification skipped".to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() || spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { name, display, kind } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars().next().map(|c| c.is_ascii_alphabetic()).unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +pub(crate) fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + // Find the first specifier operator (`==`, `>=`, `<=`, `!=`, `~=`, + // `>`, `<`). PEP 440 also allows `===` (arbitrary equality). + // Find the leftmost specifier operator. On ties, prefer the + // longer operator (e.g. `==` over `=`). + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let mut split_at: Option = None; + for sep in &separators { + if let Some(idx) = trimmed.find(sep) { + split_at = match split_at { + Some(prev) if prev <= idx => Some(prev), + _ => Some(idx), + }; + } + } + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&trimmed[..idx], &trimmed[idx..]), + None => (trimmed, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part.split('[').next().unwrap_or(name_part).trim(); + + // Strip env markers: `package; python_version >= "3.7"`. + let spec_no_marker = spec_part.split(';').next().unwrap_or(spec_part).trim(); + + let kind = if spec_no_marker.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec_no_marker.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else if let Some(rest) = spec_no_marker.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec_no_marker.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn npm_kind(t: &InstallTarget) -> &TargetKind { + &t.kind + } + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(&args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(&args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ("axios@>=1.0.0 <2.0.0", NpmSpec::Range(">=1.0.0 <2.0.0".to_string())), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input); + match (npm_kind(&target), &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node").name, "@types/node"); + assert_eq!(parse_npm_spec("@types/node@20.10.5").name, "@types/node"); + assert_eq!(parse_npm_spec("axios@1.2.3").name, "axios"); + assert_eq!(parse_npm_spec("axios").name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + ]; + for u in unverifiable { + let t = parse_npm_spec(u); + assert!(matches!(t.kind, TargetKind::Unverifiable { .. }), "for '{}'", u); + } + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ("requests[security]==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!(parse_pypi_spec("requests[security]==2.31.0").name, "requests"); + assert_eq!( + parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").name, + "requests" + ); + match parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").kind { + TargetKind::Pypi(PypiSpec::Exact(v)) => assert_eq!(v, "2.31.0"), + _ => panic!("expected exact spec"), + } + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!(matches!(t.kind, TargetKind::Unverifiable { .. }), "for '{}'", u); + } + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index a73d2ac..7f5965c 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -213,6 +213,431 @@ fn parse_iso8601(raw: &str) -> Result, String> { Err(format!("unrecognised timestamp format: {}", raw)) } +// Resolution helpers (npm + PyPI). Inserted before the tests module +// in registry.rs. + +/// What the user typed after `pkg@` in an install command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NpmSpec { + /// `axios`, `axios@`, or no spec — resolve to the `latest` dist-tag. + Latest, + /// `axios@latest`, `axios@next`, etc. + Tag(String), + /// `axios@1.2.3` — already resolved. + Exact(String), + /// `axios@^1.0.0`, `axios@~1.2.0`, `axios@>=1.0.0 <2.0.0`, etc. + Range(String), +} + +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, + pub published_at: DateTime, +} + +#[derive(Debug, Deserialize)] +struct NpmFullMetadata { + #[serde(default, rename = "dist-tags")] + dist_tags: std::collections::BTreeMap, + #[serde(default)] + versions: std::collections::BTreeMap, + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Resolve an `NpmSpec` against the npm registry and return the +/// concrete version + publish time. Used by the precheck flow when the +/// install command says e.g. `axios@^1.0.0` and we need to know what +/// would actually be installed before the install runs. +pub fn npm_resolve( + name: &str, + spec: &NpmSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry.unwrap_or(DEFAULT_NPM_REGISTRY).trim_end_matches('/'); + let url = format!("{}/{}", base, encode_npm_name(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("npm registry request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!("package '{}' not found on npm registry ({})", name, base)); + } + if !status.is_success() { + return Err(format!("npm registry returned status {} for '{}'", status, name)); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read npm registry response: {}", e))?; + + let meta: NpmFullMetadata = serde_json::from_str(&body) + .map_err(|e| format!("failed to parse npm registry response for '{}': {}", name, e))?; + + let resolved_version = match spec { + NpmSpec::Latest => meta + .dist_tags + .get("latest") + .cloned() + .ok_or_else(|| { + format!( + "package '{}' has no 'latest' dist-tag on the npm registry", + name + ) + })?, + NpmSpec::Tag(tag) => meta.dist_tags.get(tag).cloned().ok_or_else(|| { + format!( + "package '{}' has no dist-tag named '{}' (available: {})", + name, + tag, + meta.dist_tags + .keys() + .cloned() + .collect::>() + .join(", "), + ) + })?, + NpmSpec::Exact(v) => { + if !meta.versions.contains_key(v) { + return Err(format!( + "version '{}' for package '{}' was not found on the npm registry", + v, name + )); + } + v.clone() + } + NpmSpec::Range(range) => npm_pick_highest_matching(&meta.versions, range) + .ok_or_else(|| { + format!( + "no published version of '{}' satisfies range '{}'", + name, range + ) + })?, + }; + + let raw_time = meta.time.get(&resolved_version).ok_or_else(|| { + format!( + "publish time missing for {}@{} on the npm registry", + name, resolved_version + ) + })?; + + let published_at = parse_iso8601(raw_time).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw_time, name, resolved_version, e + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: resolved_version, + published_at, + }) +} + +/// Pick the highest semver-compatible version that satisfies `range`. +/// Pre-releases are excluded unless the range itself references a +/// pre-release (matches npm's behaviour). +/// Translate an npm-style version range (`>=1.0.0 <2.0.0`, +/// `1.x`, `>=1.0.0`) to a `semver::VersionReq`. The Rust crate uses +/// `,` as the AND separator, npm uses whitespace, so we normalise +/// before parsing. +fn parse_npm_range(range: &str) -> Option { + if let Ok(req) = semver::VersionReq::parse(range) { + return Some(req); + } + let normalised = range + .split_whitespace() + .collect::>() + .join(","); + semver::VersionReq::parse(&normalised).ok() +} + +fn npm_pick_highest_matching( + versions: &std::collections::BTreeMap, + range: &str, +) -> Option { + // npm separates predicates with spaces (`>=1.0.0 <2.0.0`); the + // Rust `semver` crate uses commas. Try both. We don't support + // npm's `||` OR syntax here — those are best-effort skipped. + let req = parse_npm_range(range)?; + let range_has_prerelease = range.contains('-'); + + let mut best: Option<(semver::Version, String)> = None; + for raw in versions.keys() { + let v = match semver::Version::parse(raw) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() && !range_has_prerelease { + continue; + } + if !req.matches(&v) { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + +/// PyPI version specifier used by the precheck flow. We parse a +/// limited subset of PEP 440 specifiers — enough for the common +/// install-command cases (`pkg`, `pkg==X`, `pkg>=X`, `pkg=2.0`, `<3,>=2`, `~=1.4`). + Specifier(String), +} + +#[derive(Debug, Deserialize)] +struct PypiInfoResponse { + info: PypiInfo, + releases: std::collections::BTreeMap>, +} + +#[derive(Debug, Deserialize)] +#[allow(dead_code)] +struct PypiInfo { + #[serde(default)] + version: Option, + #[serde(default)] + yanked: bool, +} + +/// Resolve a `PypiSpec` against PyPI and return the concrete version +/// + publish time. The latest non-prerelease, non-yanked release is +/// preferred. +pub fn pypi_resolve( + name: &str, + spec: &PypiSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry.unwrap_or(DEFAULT_PYPI_REGISTRY).trim_end_matches('/'); + let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("PyPI request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!("package '{}' not found on PyPI ({})", name, base)); + } + if !status.is_success() { + return Err(format!("PyPI returned status {} for '{}'", status, name)); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read PyPI response: {}", e))?; + + let meta: PypiInfoResponse = serde_json::from_str(&body) + .map_err(|e| format!("failed to parse PyPI response for '{}': {}", name, e))?; + + let candidates = collect_pypi_candidates(&meta); + let chosen = match spec { + PypiSpec::Latest => pick_latest_stable(&candidates).map(|c| c.0.clone()), + PypiSpec::Exact(v) => { + if candidates.iter().any(|(ver, _)| ver == v) { + Some(v.clone()) + } else { + None + } + } + PypiSpec::Specifier(spec_str) => pypi_resolve_specifier(&candidates, spec_str) + .or_else(|| pick_latest_stable(&candidates).map(|c| c.0.clone())), + }; + + let chosen = chosen.ok_or_else(|| match spec { + PypiSpec::Exact(v) => { + format!("version '{}' for package '{}' was not found on PyPI", v, name) + } + _ => format!("no installable version found for '{}' on PyPI", name), + })?; + + let published_at = pypi_publish_time(name, &chosen, registry)?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: chosen, + published_at, + }) +} + +/// Returns `(version, earliest_upload_time)` for every non-yanked +/// release that has at least one uploaded artifact. Empty release +/// entries (which PyPI sometimes keeps around for yanked / private +/// versions) are filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec<(String, DateTime)> { + let mut out = Vec::new(); + for (ver, files) in &meta.releases { + if files.is_empty() { + continue; + } + // Skip yanked-only releases. + if files.iter().all(|f| { + f.upload_time_iso_8601.is_none() && f.upload_time.is_none() + }) { + continue; + } + let mut earliest: Option> = None; + for f in files { + let raw = f.upload_time_iso_8601.clone().or(f.upload_time.clone()); + if let Some(raw) = raw { + if let Ok(dt) = parse_iso8601(&raw) { + earliest = match earliest { + Some(prev) if prev <= dt => Some(prev), + _ => Some(dt), + }; + } + } + } + if let Some(dt) = earliest { + out.push((ver.clone(), dt)); + } + } + let _ = &meta.info; // info.version may be useful in the future + out +} + +/// Pick the latest non-prerelease version using `semver` parsing as a +/// best-effort PEP 440 ordering. Falls back to the entry with the +/// latest upload time if no candidate parses as semver. +fn pick_latest_stable( + candidates: &[(String, DateTime)], +) -> Option<&(String, DateTime)> { + let mut best_semver: Option<(semver::Version, &(String, DateTime))> = None; + for c in candidates { + let normalized = normalize_for_semver(&c.0); + if let Ok(v) = semver::Version::parse(&normalized) { + if !v.pre.is_empty() { + continue; + } + match &best_semver { + Some((cur, _)) if cur >= &v => {} + _ => best_semver = Some((v, c)), + } + } + } + if let Some((_, picked)) = best_semver { + return Some(picked); + } + candidates.iter().max_by_key(|c| c.1) +} + +/// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or +/// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses +/// straight as semver if we pad to 3 components. Anything more exotic +/// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. +fn normalize_for_semver(v: &str) -> String { + if v.contains('!') || v.contains('a') || v.contains('b') || v.contains("rc") || v.contains(".dev") { + return v.to_string(); + } + let parts: Vec<&str> = v.split('.').collect(); + match parts.len() { + 1 => format!("{}.0.0", parts[0]), + 2 => format!("{}.{}.0", parts[0], parts[1]), + _ => v.to_string(), + } +} + +/// Apply a PEP 440-style specifier expression to the candidate list +/// and return the highest match. Supported operators: `==`, `>=`, `>`, +/// `<=`, `<`, `~=`, `!=`. Unknown operators cause us to give up and +/// return `None` (the caller falls back to "latest stable"). +fn pypi_resolve_specifier( + candidates: &[(String, DateTime)], + spec: &str, +) -> Option { + let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); + let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + + for p in &parts { + let (op, val): (&str, &str) = if let Some(v) = p.strip_prefix("===") { + ("==", v.trim()) + } else if let Some(v) = p.strip_prefix("==") { + ("==", v.trim()) + } else if let Some(v) = p.strip_prefix(">=") { + (">=", v.trim()) + } else if let Some(v) = p.strip_prefix("<=") { + ("<=", v.trim()) + } else if let Some(v) = p.strip_prefix("!=") { + ("!=", v.trim()) + } else if let Some(v) = p.strip_prefix("~=") { + ("~=", v.trim()) + } else if let Some(v) = p.strip_prefix(">") { + (">", v.trim()) + } else if let Some(v) = p.strip_prefix("<") { + ("<", v.trim()) + } else { + return None; + }; + let v = semver::Version::parse(&normalize_for_semver(val)).ok()?; + requirements.push((op, v)); + } + + let mut best: Option<(semver::Version, String)> = None; + for (raw, _) in candidates { + let v = match semver::Version::parse(&normalize_for_semver(raw)) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() { + continue; + } + let satisfies = requirements.iter().all(|(op, want)| match *op { + "==" => &v == want, + ">=" => &v >= want, + "<=" => &v <= want, + "!=" => &v != want, + ">" => &v > want, + "<" => &v < want, + "~=" => { + if &v < want { + return false; + } + let upper = semver::Version::new(want.major, want.minor + 1, 0); + v < upper + } + _ => false, + }); + if !satisfies { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + #[cfg(test)] mod tests { use super::*; @@ -270,4 +695,86 @@ mod tests { let err = pypi_publish_time("requests", "999.999.999", None).err().unwrap(); assert!(err.contains("not found"), "got: {}", err); } + + #[test] + #[ignore] + fn live_npm_resolve_latest() { + let r = npm_resolve("left-pad", &NpmSpec::Latest, None).expect("npm resolve latest"); + assert_eq!(r.name, "left-pad"); + assert_eq!(r.version, "1.3.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_npm_resolve_exact() { + let r = npm_resolve("left-pad", &NpmSpec::Exact("1.3.0".to_string()), None) + .expect("npm resolve exact"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_range() { + let r = npm_resolve("left-pad", &NpmSpec::Range("^1.0.0".to_string()), None) + .expect("npm resolve range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_npm_style_range() { + // npm uses spaces, the Rust crate uses commas — we should + // accept both. + let r = npm_resolve("left-pad", &NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), None) + .expect("npm resolve space-range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_unknown_tag() { + let err = npm_resolve( + "left-pad", + &NpmSpec::Tag("does-not-exist".to_string()), + None, + ) + .err() + .unwrap(); + assert!(err.contains("dist-tag"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_resolve_latest() { + let r = pypi_resolve("flask", &PypiSpec::Latest, None).expect("pypi resolve latest"); + assert_eq!(r.name, "flask"); + assert!(!r.version.is_empty()); + } + + #[test] + #[ignore] + fn live_pypi_resolve_exact() { + let r = pypi_resolve( + "requests", + &PypiSpec::Exact("2.31.0".to_string()), + None, + ) + .expect("pypi resolve exact"); + assert_eq!(r.version, "2.31.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_resolve_specifier() { + let r = pypi_resolve( + "requests", + &PypiSpec::Specifier(">=2.30,<2.32".to_string()), + None, + ) + .expect("pypi resolve specifier"); + // `requests==2.31.0` is the only release in [2.30, 2.32). + assert_eq!(r.version, "2.31.0"); + } }