diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a9f1844..d8449c29 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -421,12 +421,27 @@ jobs: - name: Copy README for PyPI package run: cp README.md pypi/socket-patch/README.md - - name: Build platform wheels + - name: Build wheels (platform socket-patch + pure-python socket-patch-hook) run: | VERSION="${{ needs.version.outputs.version }}" + # Builds the platform-tagged socket-patch wheels AND the pure-python + # socket-patch-hook wheel (the .pth carrier behind `socket-patch[hook]`). python scripts/build-pypi-wheels.py --version "$VERSION" --artifacts artifacts --dist dist - - - name: Publish to PyPI + # socket-patch and socket-patch-hook are two distinct PyPI projects. + # Publish each from its own dir so trusted publishing mints an OIDC + # token scoped to the right project (one upload spanning both projects + # can be rejected). Each project needs its own trusted publisher on + # PyPI; register a "pending" publisher for socket-patch-hook before the + # first release (repo + workflow `release.yml` + this environment). + mkdir -p dist-hook + mv dist/socket_patch_hook-*.whl dist-hook/ + + - name: Publish socket-patch to PyPI uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: packages-dir: dist/ + + - name: Publish socket-patch-hook to PyPI + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 + with: + packages-dir: dist-hook/ diff --git a/Cargo.lock b/Cargo.lock index 33f3e66e..4460ce72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2445,6 +2445,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", + "toml_edit", "uuid", "walkdir", ] @@ -2753,6 +2754,43 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.12+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" +dependencies = [ + "indexmap 2.13.0", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + [[package]] name = "tonic" version = "0.14.6" @@ -3404,6 +3442,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index 704cc791..69d8dc7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ dialoguer = "=0.11.0" indicatif = "=0.17.11" tempfile = "=3.26.0" regex = "=1.12.3" +toml_edit = "=0.25.12" once_cell = "=1.21.3" qbsdiff = "=1.4.4" tar = "=0.4.46" diff --git a/README.md b/README.md index 6f4b51aa..a359395d 100644 --- a/README.md +++ b/README.md @@ -383,18 +383,44 @@ socket-patch repair --json ### `setup` -Configure `package.json` postinstall scripts to automatically apply patches after `npm install`. +Configure your project so patches are **re-applied automatically after install** — no manual `socket-patch apply` step in CI. `setup` is a one-time operation: run it, commit the change together with your `.socket/` patches, and every later install handles the rest. It is strictly **opt-in** — nothing is hooked unless you run `setup` and commit the result. + +- **npm / yarn / pnpm / bun** — writes a `postinstall` script into `package.json` so any install re-applies patches. +- **Python (pip / uv / poetry / pdm / hatch)** — Python has no universal post-install hook, so `setup` instead commits a **`socket-patch[hook]`** dependency (for classic Poetry, the equivalent `socket-patch = { extras = ["hook"] }`). Installing it lays down a startup `.pth` (shipped by the small `socket-patch-hook` wheel) that re-applies your committed `.socket/` patches the next time the interpreter runs. It is package-manager-agnostic (it rides the interpreter, not any one installer) and **fail-open** — a hook error can never break interpreter startup. **Usage:** ```bash -socket-patch setup [options] +socket-patch setup # configure (interactive) +socket-patch setup --check # verify configured; non-zero exit if not (CI gate) +socket-patch setup --remove # revert what setup added ``` -No command-specific options — see [Global Options](#global-options) (`--dry-run`, `--yes`, `--json`, `--cwd` are the relevant ones). +**Command-specific options** (plus all [Global Options](#global-options) — `--dry-run`, `--yes`, `--json`, `--cwd`): +| Flag | Description | +|------|-------------| +| `--check` | Read-only verification that every manifest is configured; exits non-zero if any still needs setup. Never writes (safe in CI). Conflicts with `--remove`. | +| `--remove` | Revert the install hooks `setup` added (npm `package.json` scripts and the Python `socket-patch[hook]` dependency). | + +#### Disabling / opting out (Python hook) + +The Python hook is designed to be easy to skip or remove: + +- **Per interpreter / CI step:** set `SOCKET_PATCH_HOOK=off` (or `SOCKET_NO_HOOK=1`). This is checked *before any hook code runs*, so it fully bypasses the hook for that process. +- **Remove from a project:** `socket-patch setup --remove`, then `pip uninstall socket-patch-hook`. +- **Never opted in:** if you don't run `setup`, there is no hook — it is opt-in by design. + +#### What the Python hook does, and its safety model + +On interpreter startup, *only when the set of installed packages changed*, the hook runs `socket-patch apply --offline --ecosystems pypi` for the project that owns the current virtualenv, re-applying only the patches committed in that project's `.socket/`. Specifically: + +- It is **anchored to the virtualenv** it is installed in (not the working directory), so a `python` started from an unrelated directory cannot pull in a foreign `.socket/manifest.json`. +- It **verifies each file's hash before patching** and **never writes outside the installed package directory** (path-escaping manifest keys are refused). +- It resolves the `socket-patch` binary from the **installed `socket-patch` package** (not from `PATH`), so an unexpected binary on `PATH` is not executed. +- It runs **offline** (no network at startup) and is **fail-open** (any error is swallowed; it can never abort the interpreter). **Examples:** ```bash -# Interactive setup +# Interactive setup (npm and/or Python, auto-detected) socket-patch setup # Non-interactive @@ -403,6 +429,9 @@ socket-patch setup -y # Preview changes socket-patch setup --dry-run +# Verify configuration in CI (exits non-zero if not set up) +socket-patch setup --check + # JSON output for scripting socket-patch setup --json -y ``` diff --git a/crates/socket-patch-cli/src/commands/setup.rs b/crates/socket-patch-cli/src/commands/setup.rs index aee07ad9..c3d67d10 100644 --- a/crates/socket-patch-cli/src/commands/setup.rs +++ b/crates/socket-patch-cli/src/commands/setup.rs @@ -1,19 +1,25 @@ use clap::Args; +use socket_patch_core::crawlers::python_crawler::is_python_project; use socket_patch_core::package_json::detect::{is_setup_configured_str, PackageManager}; use socket_patch_core::package_json::find::{ detect_package_manager, find_package_json_files, PackageJsonLocation, WorkspaceType, }; use socket_patch_core::package_json::update::{ - remove_package_json, update_package_json, RemoveStatus, UpdateStatus, + remove_package_json, update_package_json, RemoveResult, RemoveStatus, UpdateResult, + UpdateStatus, +}; +use socket_patch_core::pth_hook::{ + add_hook_dependency, deps_contain_hook, detect_python_pm, remove_hook_dependency, ManifestKind, + PthEditResult, PthStatus, PythonPackageManager, }; use socket_patch_core::utils::telemetry::track_patch_setup; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use crate::args::GlobalArgs; use crate::output::stdin_is_tty; -/// Stringify the detected manager for telemetry. +/// Stringify the detected npm-family manager for telemetry. fn manager_name(pm: PackageManager) -> &'static str { match pm { PackageManager::Npm => "npm", @@ -24,7 +30,7 @@ fn manager_name(pm: PackageManager) -> &'static str { #[derive(Args)] pub struct SetupArgs { /// Verify the project is configured for socket-patch without changing - /// anything. Exits non-zero if any package.json still needs setup. + /// anything. Exits non-zero if any manifest still needs setup. #[arg( long = "check", conflicts_with = "remove", @@ -33,7 +39,8 @@ pub struct SetupArgs { )] pub check: bool, - /// Revert the install hooks that `setup` added to package.json. + /// Revert the install hooks that `setup` added (npm `package.json` scripts + /// and the Python `socket-patch-hook` dependency). #[arg( long = "remove", default_value_t = false, @@ -56,31 +63,25 @@ pub async fn run(args: SetupArgs) -> i32 { } /// Discover the package.json files `setup`/`check`/`remove` should act on, -/// applying the pnpm "root-only" filtering. Returns `None` when no files are -/// found (the caller emits the `no_files` result). -async fn discover(args: &SetupArgs) -> Option> { +/// applying the pnpm "root-only" filtering. Returns an empty vec when none are +/// found (callers also consider Python before reporting `no_files`). +async fn discover(args: &SetupArgs) -> Vec { let find_result = find_package_json_files(&args.common.cwd).await; // For pnpm monorepos, only update root package.json. pnpm runs root // postinstall on `pnpm install`, so workspace-level postinstall scripts are // unnecessary and would fail under pnpm's strict module isolation. - let files = match find_result.workspace_type { + match find_result.workspace_type { WorkspaceType::Pnpm => find_result .files .into_iter() .filter(|loc| loc.is_root) .collect(), _ => find_result.files, - }; - - if files.is_empty() { - None - } else { - Some(files) } } -/// Emit the shared "no package.json files found" result and exit code. +/// Emit the shared "nothing found" result and exit code. fn report_no_files(args: &SetupArgs, status: &str) -> i32 { if args.common.json { println!( @@ -92,41 +93,191 @@ fn report_no_files(args: &SetupArgs, status: &str) -> i32 { .unwrap() ); } else { - println!("No package.json files found"); + println!("No package.json or Python project found"); } 0 } +fn pathdiff(path: &str, base: &Path) -> String { + let p = Path::new(path); + p.strip_prefix(base) + .map(|r| r.display().to_string()) + .unwrap_or_else(|_| path.to_string()) +} + +// ───────────────────────────────────────────────────────────────────────── +// Python (.pth hook) helpers +// ───────────────────────────────────────────────────────────────────────── + +/// A Python manifest `setup` will edit, plus the resolved package manager. +struct PythonPlan { + pm: PythonPackageManager, + manifests: Vec<(PathBuf, ManifestKind)>, +} + +/// Decide which Python manifest(s) to edit for the detected package manager. +/// +/// pyproject-based managers (uv/poetry/pdm/hatch) edit `pyproject.toml`; pip +/// prefers an existing `requirements.txt`, then a PEP 621 `pyproject.toml`, and +/// otherwise creates `requirements.txt`. +async fn choose_python_manifests( + cwd: &Path, + pm: PythonPackageManager, +) -> Vec<(PathBuf, ManifestKind)> { + let pyproject = cwd.join("pyproject.toml"); + let requirements = cwd.join("requirements.txt"); + let pyproject_exists = tokio::fs::metadata(&pyproject).await.is_ok(); + let requirements_exists = tokio::fs::metadata(&requirements).await.is_ok(); + + match pm { + PythonPackageManager::Uv + | PythonPackageManager::Poetry + | PythonPackageManager::Pdm + | PythonPackageManager::Hatch => { + if pyproject_exists { + vec![(pyproject, ManifestKind::Pyproject)] + } else { + vec![] + } + } + PythonPackageManager::Pip => { + if requirements_exists { + vec![(requirements, ManifestKind::Requirements)] + } else if pyproject_exists { + vec![(pyproject, ManifestKind::Pyproject)] + } else { + // Nothing to edit yet: create requirements.txt so a CI + // `pip install -r requirements.txt` installs the hook. + vec![(requirements, ManifestKind::Requirements)] + } + } + } +} + +async fn plan_python(common: &GlobalArgs) -> Option { + if !is_python_project(&common.cwd).await { + return None; + } + let pm = detect_python_pm(&common.cwd).await; + let manifests = choose_python_manifests(&common.cwd, pm).await; + if manifests.is_empty() { + return None; + } + Some(PythonPlan { pm, manifests }) +} + +/// Run the hook-dependency edits for a plan (add or remove) at the given +/// dry-run setting. Returns per-manifest results. +async fn edit_python_manifests( + plan: &PythonPlan, + remove: bool, + dry_run: bool, +) -> Vec { + let mut out = Vec::new(); + for (path, kind) in &plan.manifests { + let res = if remove { + remove_hook_dependency(path, *kind, dry_run).await + } else { + add_hook_dependency(path, *kind, dry_run).await + }; + out.push(res); + } + out +} + +/// After a real (non-dry-run) edit that changed a manifest, refresh the +/// lockfile. Returns any warnings to surface. (There is no separate marker / +/// audit file: the committed dependency line is the source of truth.) +async fn finalize_python(plan: &PythonPlan, edits: &[PthEditResult], cwd: &Path) -> Vec { + let mut warnings = Vec::new(); + let any_changed = edits.iter().any(|e| e.status == PthStatus::Updated); + if !any_changed { + return warnings; + } + // Lockfile refresh (broad auto-edit): only when the manager uses a lockfile + // that exists. Best-effort — never fatal. + if let Some((program, args)) = plan.pm.lock_command() { + let lockfile = match plan.pm { + PythonPackageManager::Uv => Some("uv.lock"), + PythonPackageManager::Poetry => Some("poetry.lock"), + PythonPackageManager::Pdm => Some("pdm.lock"), + _ => None, + }; + let lock_present = match lockfile { + Some(name) => tokio::fs::metadata(cwd.join(name)).await.is_ok(), + None => false, + }; + if lock_present { + match tokio::process::Command::new(program) + .args(args) + .current_dir(cwd) + .output() + .await + { + Ok(o) if o.status.success() => {} + Ok(o) => warnings.push(format!( + "`{program} {}` failed ({}); update the lockfile manually", + args.join(" "), + o.status + )), + Err(e) => warnings.push(format!( + "could not run `{program} {}`: {e}; update the lockfile manually", + args.join(" ") + )), + } + } + } + warnings +} + +fn pth_status_str(s: &PthStatus) -> &'static str { + match s { + PthStatus::Updated => "updated", + PthStatus::AlreadyConfigured => "already_configured", + PthStatus::Error => "error", + } +} + +fn update_status_str(s: &UpdateStatus) -> &'static str { + match s { + UpdateStatus::Updated => "updated", + UpdateStatus::AlreadyConfigured => "already_configured", + UpdateStatus::Error => "error", + } +} + // ───────────────────────────────────────────────────────────────────────── // check // ───────────────────────────────────────────────────────────────────────── -/// Read-only verification that every discovered package.json is configured for -/// socket-patch. Never writes (so `--dry-run` is a harmless no-op here). Exits -/// 0 only when all files are configured and none failed to parse. +#[derive(Clone, Copy, PartialEq)] +enum CheckState { + Configured, + NeedsConfiguration, + Error, +} + +/// Read-only verification that every discovered manifest (npm package.json and +/// the Python dependency manifest) is configured for socket-patch. Never writes +/// (so `--dry-run` is a harmless no-op here). Exits 0 only when all are +/// configured and none failed to parse. async fn run_check(args: &SetupArgs) -> i32 { if !args.common.json { - println!("Searching for package.json files..."); + println!("Searching for package.json / Python manifests..."); } - let files = match discover(args).await { - Some(f) => f, - None => return report_no_files(args, "no_files"), - }; - - #[derive(Clone, Copy, PartialEq)] - enum CheckState { - Configured, - NeedsConfiguration, - Error, + let npm_files = discover(args).await; + let py_plan = plan_python(&args.common).await; + if npm_files.is_empty() && py_plan.is_none() { + return report_no_files(args, "no_files"); } - let mut entries = Vec::new(); - for loc in &files { + // (kind, path, state, error) + let mut entries: Vec<(&'static str, String, CheckState, Option)> = Vec::new(); + + for loc in &npm_files { let (state, err) = match tokio::fs::read_to_string(&loc.path).await { Ok(content) => { - // A malformed package.json cannot be verified; surface it as an - // error rather than silently "needs configuration". if serde_json::from_str::(&content).is_err() { (CheckState::Error, Some("Invalid package.json".to_string())) } else if is_setup_configured_str(&content).needs_update { @@ -137,12 +288,34 @@ async fn run_check(args: &SetupArgs) -> i32 { } Err(e) => (CheckState::Error, Some(e.to_string())), }; - entries.push((loc.path.display().to_string(), state, err)); + entries.push(("package_json", loc.path.display().to_string(), state, err)); + } + + if let Some(plan) = &py_plan { + for (path, kind) in &plan.manifests { + let (state, err) = match tokio::fs::read_to_string(path).await { + Ok(content) => { + if deps_contain_hook(&content) { + (CheckState::Configured, None) + } else { + (CheckState::NeedsConfiguration, None) + } + } + // A not-yet-created requirements.txt simply needs setup; a + // missing pyproject we'd have to edit is an error. + Err(e) if e.kind() == std::io::ErrorKind::NotFound => match kind { + ManifestKind::Requirements => (CheckState::NeedsConfiguration, None), + ManifestKind::Pyproject => (CheckState::Error, Some(e.to_string())), + }, + Err(e) => (CheckState::Error, Some(e.to_string())), + }; + entries.push(("pth", path.display().to_string(), state, err)); + } } - let configured = entries.iter().filter(|(_, s, _)| *s == CheckState::Configured).count(); - let needs = entries.iter().filter(|(_, s, _)| *s == CheckState::NeedsConfiguration).count(); - let errs = entries.iter().filter(|(_, s, _)| *s == CheckState::Error).count(); + let configured = entries.iter().filter(|(_, _, s, _)| *s == CheckState::Configured).count(); + let needs = entries.iter().filter(|(_, _, s, _)| *s == CheckState::NeedsConfiguration).count(); + let errs = entries.iter().filter(|(_, _, s, _)| *s == CheckState::Error).count(); let all_ok = needs == 0 && errs == 0; let status = if errs > 0 { @@ -161,8 +334,9 @@ async fn run_check(args: &SetupArgs) -> i32 { "configured": configured, "needsConfiguration": needs, "errors": errs, - "files": entries.iter().map(|(path, state, err)| { + "files": entries.iter().map(|(kind, path, state, err)| { serde_json::json!({ + "kind": kind, "path": path, "status": match state { CheckState::Configured => "configured", @@ -177,23 +351,22 @@ async fn run_check(args: &SetupArgs) -> i32 { ); } else { println!("\nConfiguration status:\n"); - for (path, state, err) in &entries { + for (_, path, state, err) in &entries { let rel = pathdiff(path, &args.common.cwd); match state { CheckState::Configured => println!(" ✓ {rel} (configured)"), CheckState::NeedsConfiguration => println!(" ✗ {rel} (needs setup)"), - CheckState::Error => println!( - " ! {rel}: {}", - err.as_deref().unwrap_or("unknown error") - ), + CheckState::Error => { + println!(" ! {rel}: {}", err.as_deref().unwrap_or("unknown error")) + } } } println!(); if all_ok { - println!("All package.json files are configured with socket-patch."); + println!("All manifests are configured with socket-patch."); } else { println!( - "{needs} file(s) need configuration, {errs} error(s). Run `socket-patch setup` to fix." + "{needs} manifest(s) need configuration, {errs} error(s). Run `socket-patch setup` to fix." ); } } @@ -217,142 +390,69 @@ fn render_removed(new: &Option) -> String { } } -/// Revert the install hooks `setup` added. Honors `--dry-run` (preview only), -/// `--yes` (skip confirmation), and `--json`. +/// Revert the install hooks `setup` added (npm package.json scripts + the +/// Python `socket-patch-hook` dependency). Honors `--dry-run`, `--yes`, `--json`. async fn run_remove(args: &SetupArgs) -> i32 { - if !args.common.json { - println!("Searching for package.json files..."); + let common = &args.common; + if !common.json { + println!("Searching for package.json / Python manifests..."); } - let files = match discover(args).await { - Some(f) => f, - None => return report_no_files(args, "no_files"), - }; - - if !args.common.json { - println!("Found {} package.json file(s)", files.len()); + let npm_files = discover(args).await; + let py_plan = plan_python(common).await; + if npm_files.is_empty() && py_plan.is_none() { + return report_no_files(args, "no_files"); } - // Preview every file (dry_run=true never writes). - let mut preview = Vec::new(); - for loc in &files { - preview.push(remove_package_json(&loc.path, true).await); + // Preview (dry_run=true never writes). + let mut npm_preview = Vec::new(); + for loc in &npm_files { + npm_preview.push(remove_package_json(&loc.path, true).await); } + let py_preview = match &py_plan { + Some(p) => edit_python_manifests(p, true, true).await, + None => Vec::new(), + }; - let to_remove: Vec<_> = preview.iter().filter(|r| r.status == RemoveStatus::Removed).collect(); - let not_configured: Vec<_> = - preview.iter().filter(|r| r.status == RemoveStatus::NotConfigured).collect(); - let errors: Vec<_> = preview.iter().filter(|r| r.status == RemoveStatus::Error).collect(); - - // Display proposed edits (human mode). - if !args.common.json { - println!("\nProposed changes:\n"); - if !to_remove.is_empty() { - println!("Will remove socket-patch from:"); - for r in &to_remove { - let rel = pathdiff(&r.path, &args.common.cwd); - println!(" - {rel}"); - println!(" postinstall: \"{}\"", r.old_script); - println!(" -> postinstall: {}", render_removed(&r.new_script)); - println!(" dependencies: \"{}\"", r.old_dependencies_script); - println!( - " -> dependencies: {}", - render_removed(&r.new_dependencies_script) - ); - } - println!(); - } - if !not_configured.is_empty() { - println!("Nothing to remove (will skip):"); - for r in ¬_configured { - println!(" = {}", pathdiff(&r.path, &args.common.cwd)); - } - println!(); - } - if !errors.is_empty() { - println!("Errors:"); - for r in &errors { - println!( - " ! {}: {}", - pathdiff(&r.path, &args.common.cwd), - r.error.as_deref().unwrap_or("unknown error") - ); - } - println!(); - } + if !common.json { + print_remove_preview(&npm_preview, &py_preview, common); } - let json_files = |results: &[&socket_patch_core::package_json::update::RemoveResult]| { - results - .iter() - .map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - RemoveStatus::Removed => "removed", - RemoveStatus::NotConfigured => "not_configured", - RemoveStatus::Error => "error", - }, - "error": r.error, - }) - }) - .collect::>() - }; - - // Nothing to remove: either everything is already clean (exit 0) or some - // file errored (exit 1). Mirrors the setup flow's honest error handling. - if to_remove.is_empty() { - let errs = errors.len(); - if args.common.json { - let all: Vec<_> = preview.iter().collect(); - println!( - "{}", - serde_json::to_string_pretty(&serde_json::json!({ - "status": if errs > 0 { "error" } else { "not_configured" }, - "removed": 0, - "notConfigured": not_configured.len(), - "errors": errs, - "files": json_files(&all), - })) - .unwrap() + let n_remove = npm_preview.iter().filter(|r| r.status == RemoveStatus::Removed).count() + + py_preview.iter().filter(|r| r.status == PthStatus::Updated).count(); + let preview_errs = npm_preview.iter().filter(|r| r.status == RemoveStatus::Error).count() + + py_preview.iter().filter(|r| r.status == PthStatus::Error).count(); + + // Nothing to remove: clean (exit 0) or some file errored (exit 1). + if n_remove == 0 { + if common.json { + print_remove_envelope( + if preview_errs > 0 { "error" } else { "not_configured" }, + &npm_preview, + &py_preview, + &[], ); - } else if errs > 0 { - println!("Nothing removed; {errs} file(s) could not be processed (see errors above)."); + } else if preview_errs > 0 { + println!("Nothing removed; {preview_errs} item(s) could not be processed (see errors above)."); } else { println!("No socket-patch install hooks found to remove."); } - return if errs > 0 { 1 } else { 0 }; + return if preview_errs > 0 { 1 } else { 0 }; } // Dry-run: preview already shown; report and exit without writing. - if args.common.dry_run { - if args.common.json { - let all: Vec<_> = preview.iter().collect(); - println!( - "{}", - serde_json::to_string_pretty(&serde_json::json!({ - "status": "dry_run", - "wouldRemove": to_remove.len(), - "notConfigured": not_configured.len(), - "errors": errors.len(), - "dryRun": true, - "files": json_files(&all), - })) - .unwrap() - ); + if common.dry_run { + if common.json { + print_remove_envelope("dry_run", &npm_preview, &py_preview, &[]); } else { println!("\nSummary:"); - println!(" {} file(s) would have socket-patch removed", to_remove.len()); - println!(" {} file(s) have nothing to remove", not_configured.len()); - if !errors.is_empty() { - println!(" {} error(s)", errors.len()); - } + println!(" {n_remove} item(s) would have socket-patch removed"); } - return if errors.is_empty() { 0 } else { 1 }; + return if preview_errs > 0 { 1 } else { 0 }; } // Confirm before mutating. - if !args.common.yes && !args.common.json { + if !common.yes && !common.json { if !stdin_is_tty() { eprintln!("Non-interactive mode detected, proceeding automatically."); } else { @@ -368,38 +468,44 @@ async fn run_remove(args: &SetupArgs) -> i32 { } } - if !args.common.json { + if !common.json { println!("\nRemoving changes..."); } - let mut results = Vec::new(); - for loc in &files { - results.push(remove_package_json(&loc.path, false).await); + let mut npm_results = Vec::new(); + for loc in &npm_files { + npm_results.push(remove_package_json(&loc.path, false).await); + } + let mut py_results = Vec::new(); + let mut warnings = Vec::new(); + if let Some(plan) = &py_plan { + py_results = edit_python_manifests(plan, true, false).await; + warnings = finalize_python(plan, &py_results, &common.cwd).await; } - let removed = results.iter().filter(|r| r.status == RemoveStatus::Removed).count(); - let not_cfg = results.iter().filter(|r| r.status == RemoveStatus::NotConfigured).count(); - let errs = results.iter().filter(|r| r.status == RemoveStatus::Error).count(); + let errs = npm_results.iter().filter(|r| r.status == RemoveStatus::Error).count() + + py_results.iter().filter(|r| r.status == PthStatus::Error).count(); - if args.common.json { - let all: Vec<_> = results.iter().collect(); - println!( - "{}", - serde_json::to_string_pretty(&serde_json::json!({ - "status": if errs > 0 { "partial_failure" } else { "success" }, - "removed": removed, - "notConfigured": not_cfg, - "errors": errs, - "files": json_files(&all), - })) - .unwrap() + if common.json { + print_remove_envelope( + if errs > 0 { "partial_failure" } else { "success" }, + &npm_results, + &py_results, + &warnings, ); } else { + let removed = npm_results.iter().filter(|r| r.status == RemoveStatus::Removed).count() + + py_results.iter().filter(|r| r.status == PthStatus::Updated).count(); println!("\nSummary:"); - println!(" {removed} file(s) had socket-patch removed"); - println!(" {not_cfg} file(s) had nothing to remove"); + println!(" {removed} item(s) had socket-patch removed"); if errs > 0 { println!(" {errs} error(s)"); } + for w in &warnings { + println!(" warning: {w}"); + } + if py_plan.is_some() { + println!("\nAlso run `pip uninstall socket-patch-hook` to remove the installed .pth."); + } } if errs > 0 { @@ -409,274 +515,355 @@ async fn run_remove(args: &SetupArgs) -> i32 { } } +fn print_remove_preview(npm: &[RemoveResult], py: &[PthEditResult], common: &GlobalArgs) { + let to_remove: Vec<_> = npm.iter().filter(|r| r.status == RemoveStatus::Removed).collect(); + let py_remove: Vec<_> = py.iter().filter(|r| r.status == PthStatus::Updated).collect(); + println!("\nProposed changes:\n"); + if !to_remove.is_empty() { + println!("Will remove socket-patch from:"); + for r in &to_remove { + let rel = pathdiff(&r.path, &common.cwd); + println!(" - {rel}"); + println!(" postinstall: \"{}\"", r.old_script); + println!(" -> postinstall: {}", render_removed(&r.new_script)); + println!(" dependencies: \"{}\"", r.old_dependencies_script); + println!(" -> dependencies: {}", render_removed(&r.new_dependencies_script)); + } + println!(); + } + if !py_remove.is_empty() { + println!("Will remove the socket-patch-hook dependency from:"); + for r in &py_remove { + println!(" - {}", pathdiff(&r.path, &common.cwd)); + } + println!(); + } +} + +fn print_remove_envelope( + status: &str, + npm: &[RemoveResult], + py: &[PthEditResult], + warnings: &[String], +) { + let removed = npm.iter().filter(|r| r.status == RemoveStatus::Removed).count() + + py.iter().filter(|r| r.status == PthStatus::Updated).count(); + let not_cfg = npm.iter().filter(|r| r.status == RemoveStatus::NotConfigured).count() + + py.iter().filter(|r| r.status == PthStatus::AlreadyConfigured).count(); + let errors = npm.iter().filter(|r| r.status == RemoveStatus::Error).count() + + py.iter().filter(|r| r.status == PthStatus::Error).count(); + + let mut files: Vec = npm + .iter() + .map(|r| { + serde_json::json!({ + "kind": "package_json", + "path": r.path, + "status": match r.status { + RemoveStatus::Removed => "removed", + RemoveStatus::NotConfigured => "not_configured", + RemoveStatus::Error => "error", + }, + "error": r.error, + }) + }) + .collect(); + files.extend(py.iter().map(|r| { + serde_json::json!({ + "kind": "pth", + "path": r.path, + "status": match r.status { + PthStatus::Updated => "removed", + PthStatus::AlreadyConfigured => "not_configured", + PthStatus::Error => "error", + }, + "error": r.error, + }) + })); + + let mut obj = serde_json::json!({ + "status": status, + "removed": removed, + "notConfigured": not_cfg, + "errors": errors, + "files": files, + }); + if status == "dry_run" { + obj["dryRun"] = serde_json::json!(true); + obj["wouldRemove"] = serde_json::json!(removed); + } + if !warnings.is_empty() { + obj["warnings"] = serde_json::json!(warnings); + } + println!("{}", serde_json::to_string_pretty(&obj).unwrap()); +} + // ───────────────────────────────────────────────────────────────────────── -// setup (unchanged behavior) +// setup (npm package.json + Python .pth hook, combined) // ───────────────────────────────────────────────────────────────────────── async fn run_setup(args: &SetupArgs) -> i32 { - if !args.common.json { - println!("Searching for package.json files..."); + let common = &args.common; + if !common.json { + println!("Configuring socket-patch install hooks..."); } - let package_json_files = match discover(args).await { - Some(f) => f, - None => { - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ + let npm_files = discover(args).await; + let py_plan = plan_python(common).await; + + if npm_files.is_empty() && py_plan.is_none() { + if common.json { + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ "status": "no_files", "updated": 0, "alreadyConfigured": 0, "errors": 0, "files": [], - })).unwrap()); - } else { - println!("No package.json files found"); - } - return 0; + })) + .unwrap() + ); + } else { + println!("No package.json or Python project found"); } - }; + return 0; + } - // Detect package manager from lockfiles in the project root. - let pm = detect_package_manager(&args.common.cwd).await; + let npm_pm = detect_package_manager(&common.cwd).await; - // Setup telemetry: emit once we know a real setup is being attempted - // (past the "no files found" early exit) and the package manager is - // resolved. Carries the detected manager so we can see which install - // hooks are exercised in the wild. + let telemetry_manager = match (!npm_files.is_empty(), py_plan.is_some()) { + (true, true) => format!("{}+pypi", manager_name(npm_pm)), + (true, false) => manager_name(npm_pm).to_string(), + (false, true) => "pypi".to_string(), + (false, false) => "none".to_string(), + }; track_patch_setup( - manager_name(pm), - args.common.api_token.as_deref(), - args.common.org.as_deref(), + &telemetry_manager, + common.api_token.as_deref(), + common.org.as_deref(), ) .await; - if !args.common.json { - println!("Found {} package.json file(s)", package_json_files.len()); - if pm == PackageManager::Pnpm { - println!("Detected pnpm project (using pnpm dlx)"); - } + // Preview (always dry-run first). + let mut npm_preview = Vec::new(); + for loc in &npm_files { + npm_preview.push(update_package_json(&loc.path, true, npm_pm).await); } + let py_preview = match &py_plan { + Some(plan) => edit_python_manifests(plan, false, true).await, + None => Vec::new(), + }; - // Preview changes (always preview first) - let mut preview_results = Vec::new(); - for loc in &package_json_files { - let result = update_package_json(&loc.path, true, pm).await; - preview_results.push(result); + if !common.json { + print_setup_preview(&npm_preview, &py_preview, common); } - // Display preview - let to_update: Vec<_> = preview_results - .iter() - .filter(|r| r.status == UpdateStatus::Updated) - .collect(); - let already_configured: Vec<_> = preview_results - .iter() - .filter(|r| r.status == UpdateStatus::AlreadyConfigured) - .collect(); - let errors: Vec<_> = preview_results - .iter() - .filter(|r| r.status == UpdateStatus::Error) - .collect(); - - if !args.common.json { - println!("\nPackage.json files to be updated:\n"); - - if !to_update.is_empty() { - println!("Will update:"); - for result in &to_update { - let rel_path = pathdiff(&result.path, &args.common.cwd); - println!(" + {rel_path}"); - if result.old_script.is_empty() { - println!(" postinstall: (no script)"); - } else { - println!(" postinstall: \"{}\"", result.old_script); - } - println!(" -> postinstall: \"{}\"", result.new_script); - if result.old_dependencies_script.is_empty() { - println!(" dependencies: (no script)"); - } else { - println!(" dependencies: \"{}\"", result.old_dependencies_script); - } - println!( - " -> dependencies: \"{}\"", - result.new_dependencies_script - ); - } - println!(); + let n_changes = npm_preview.iter().filter(|r| r.status == UpdateStatus::Updated).count() + + py_preview.iter().filter(|r| r.status == PthStatus::Updated).count(); + let preview_errors = npm_preview.iter().filter(|r| r.status == UpdateStatus::Error).count() + + py_preview.iter().filter(|r| r.status == PthStatus::Error).count(); + + if n_changes == 0 { + if common.json { + print_setup_envelope( + if preview_errors > 0 { "error" } else { "already_configured" }, + &npm_preview, + &py_preview, + npm_pm, + py_plan.as_ref(), + &[], + ); + } else if preview_errors > 0 { + println!("No hooks were changed; {preview_errors} item(s) could not be processed (see errors above)."); + } else { + println!("All install hooks are already configured with socket-patch!"); } + return if preview_errors > 0 { 1 } else { 0 }; + } - if !already_configured.is_empty() { - println!("Already configured (will skip):"); - for result in &already_configured { - let rel_path = pathdiff(&result.path, &args.common.cwd); - println!(" = {rel_path}"); - } - println!(); + if common.dry_run { + if common.json { + print_setup_envelope( + "dry_run", + &npm_preview, + &py_preview, + npm_pm, + py_plan.as_ref(), + &[], + ); + } else { + println!("\nSummary (dry run):"); + println!(" {n_changes} item(s) would be updated"); } + return if preview_errors > 0 { 1 } else { 0 }; + } - if !errors.is_empty() { - println!("Errors:"); - for result in &errors { - let rel_path = pathdiff(&result.path, &args.common.cwd); - println!( - " ! {}: {}", - rel_path, - result.error.as_deref().unwrap_or("unknown error") - ); + if !common.yes && !common.json { + if !stdin_is_tty() { + eprintln!("Non-interactive mode detected, proceeding automatically."); + } else { + print!("Proceed with these changes? (y/N): "); + io::stdout().flush().unwrap(); + let mut answer = String::new(); + io::stdin().read_line(&mut answer).unwrap(); + let answer = answer.trim().to_lowercase(); + if answer != "y" && answer != "yes" { + println!("Aborted"); + return 0; } - println!(); } } - if to_update.is_empty() { - // Nothing to update — but that can mean two very different things: - // every file is already configured (a clean exit 0), or some files - // failed to process (e.g. malformed JSON). Errors must surface with - // an honest status and a non-zero exit; otherwise a parse failure is - // silently reported as "already configured" and CI reads it as success. - let errs = errors.len(); - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": if errs > 0 { "error" } else { "already_configured" }, - "updated": 0, - "alreadyConfigured": already_configured.len(), - "errors": errs, - "files": preview_results.iter().map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - UpdateStatus::Updated => "updated", - UpdateStatus::AlreadyConfigured => "already_configured", - UpdateStatus::Error => "error", - }, - "error": r.error, - }) - }).collect::>(), - })).unwrap()); - } else if errs > 0 { - // Individual errors were already listed in the preview above. + if !common.json { + println!("\nApplying changes..."); + } + + let mut npm_results = Vec::new(); + for loc in &npm_files { + npm_results.push(update_package_json(&loc.path, false, npm_pm).await); + } + let mut py_results = Vec::new(); + let mut warnings = Vec::new(); + if let Some(plan) = &py_plan { + py_results = edit_python_manifests(plan, false, false).await; + warnings = finalize_python(plan, &py_results, &common.cwd).await; + } + + let errors = npm_results.iter().filter(|r| r.status == UpdateStatus::Error).count() + + py_results.iter().filter(|r| r.status == PthStatus::Error).count(); + + if common.json { + print_setup_envelope( + if errors > 0 { "partial_failure" } else { "success" }, + &npm_results, + &py_results, + npm_pm, + py_plan.as_ref(), + &warnings, + ); + } else { + let updated = npm_results.iter().filter(|r| r.status == UpdateStatus::Updated).count() + + py_results.iter().filter(|r| r.status == PthStatus::Updated).count(); + println!("\nSummary:"); + println!(" {updated} item(s) updated"); + if errors > 0 { + println!(" {errors} error(s)"); + } + for w in &warnings { + println!(" warning: {w}"); + } + if let Some(plan) = &py_plan { println!( - "No files were updated; {errs} file(s) could not be processed (see errors above)." + "\nCommit the {} dependency change (and your .socket/ patches) so \ + the hook re-applies in CI after install.", + plan.pm.as_str() ); - } else { - println!("All package.json files are already configured with socket-patch!"); } - return if errs > 0 { 1 } else { 0 }; } - // If not dry-run, ask for confirmation - if !args.common.dry_run { - if !args.common.yes && !args.common.json { - if !stdin_is_tty() { - // Non-interactive: default to yes with warning - eprintln!("Non-interactive mode detected, proceeding automatically."); - } else { - print!("Proceed with these changes? (y/N): "); - io::stdout().flush().unwrap(); - let mut answer = String::new(); - io::stdin().read_line(&mut answer).unwrap(); - let answer = answer.trim().to_lowercase(); - if answer != "y" && answer != "yes" { - println!("Aborted"); - return 0; - } - } - } + if errors > 0 { + 1 + } else { + 0 + } +} + +fn print_setup_preview(npm: &[UpdateResult], py: &[PthEditResult], common: &GlobalArgs) { + let npm_changes: Vec<_> = npm.iter().filter(|r| r.status == UpdateStatus::Updated).collect(); + let py_changes: Vec<_> = py.iter().filter(|r| r.status == PthStatus::Updated).collect(); - if !args.common.json { - println!("\nApplying changes..."); + if !npm_changes.is_empty() { + println!("\npackage.json files to update:"); + for r in &npm_changes { + println!(" + {}", pathdiff(&r.path, &common.cwd)); + println!(" -> postinstall: \"{}\"", r.new_script); } - let mut results = Vec::new(); - for loc in &package_json_files { - let result = update_package_json(&loc.path, false, pm).await; - results.push(result); + } + if !py_changes.is_empty() { + println!("\nPython manifests to update (socket-patch-hook):"); + for r in &py_changes { + println!(" + {}", pathdiff(&r.path, &common.cwd)); } + } - let updated = results.iter().filter(|r| r.status == UpdateStatus::Updated).count(); - let already = results.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count(); - let errs = results.iter().filter(|r| r.status == UpdateStatus::Error).count(); - - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": if errs > 0 { "partial_failure" } else { "success" }, - "updated": updated, - "alreadyConfigured": already, - "errors": errs, - "packageManager": match pm { - PackageManager::Npm => "npm", - PackageManager::Pnpm => "pnpm", - }, - "files": results.iter().map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - UpdateStatus::Updated => "updated", - UpdateStatus::AlreadyConfigured => "already_configured", - UpdateStatus::Error => "error", - }, - "error": r.error, - }) - }).collect::>(), - })).unwrap()); - } else { - println!("\nSummary:"); - println!(" {updated} file(s) updated"); - println!(" {already} file(s) already configured"); - if errs > 0 { - println!(" {errs} error(s)"); - } - } + let npm_already = npm.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count(); + let py_already = py.iter().filter(|r| r.status == PthStatus::AlreadyConfigured).count(); + if npm_already + py_already > 0 { + println!("\nAlready configured (will skip): {}", npm_already + py_already); + } - if errs > 0 { 1 } else { 0 } - } else { - let updated = preview_results.iter().filter(|r| r.status == UpdateStatus::Updated).count(); - let already = preview_results.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count(); - let errs = preview_results.iter().filter(|r| r.status == UpdateStatus::Error).count(); - - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": "dry_run", - "wouldUpdate": updated, - "alreadyConfigured": already, - "errors": errs, - "dryRun": true, - "packageManager": match pm { - PackageManager::Npm => "npm", - PackageManager::Pnpm => "pnpm", - }, - "files": preview_results.iter().map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - UpdateStatus::Updated => "updated", - UpdateStatus::AlreadyConfigured => "already_configured", - UpdateStatus::Error => "error", - }, - "oldScript": r.old_script, - "newScript": r.new_script, - "oldDependenciesScript": r.old_dependencies_script, - "newDependenciesScript": r.new_dependencies_script, - "error": r.error, - }) - }).collect::>(), - })).unwrap()); - } else { - println!("\nSummary:"); - println!(" {updated} file(s) would be updated"); - println!(" {already} file(s) already configured"); - if errs > 0 { - println!(" {errs} error(s)"); - } + let errs: Vec<&str> = npm + .iter() + .filter(|r| r.status == UpdateStatus::Error) + .filter_map(|r| r.error.as_deref()) + .chain( + py.iter() + .filter(|r| r.status == PthStatus::Error) + .filter_map(|r| r.error.as_deref()), + ) + .collect(); + if !errs.is_empty() { + println!("\nErrors:"); + for e in errs { + println!(" ! {e}"); } - // Mirror the non-dry-run path: an unprocessable package.json is a - // failure regardless of dry-run, so it must yield a non-zero exit. - if errs > 0 { 1 } else { 0 } } } -fn pathdiff(path: &str, base: &Path) -> String { - let p = Path::new(path); - p.strip_prefix(base) - .map(|r| r.display().to_string()) - .unwrap_or_else(|_| path.to_string()) +#[allow(clippy::too_many_arguments)] +fn print_setup_envelope( + status: &str, + npm: &[UpdateResult], + py: &[PthEditResult], + npm_pm: PackageManager, + py_plan: Option<&PythonPlan>, + warnings: &[String], +) { + let updated = npm.iter().filter(|r| r.status == UpdateStatus::Updated).count() + + py.iter().filter(|r| r.status == PthStatus::Updated).count(); + let already = npm.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count() + + py.iter().filter(|r| r.status == PthStatus::AlreadyConfigured).count(); + let errors = npm.iter().filter(|r| r.status == UpdateStatus::Error).count() + + py.iter().filter(|r| r.status == PthStatus::Error).count(); + + let mut files: Vec = npm + .iter() + .map(|r| { + serde_json::json!({ + "kind": "package_json", + "path": r.path, + "status": update_status_str(&r.status), + "error": r.error, + }) + }) + .collect(); + files.extend(py.iter().map(|r| { + serde_json::json!({ + "kind": "pth", + "path": r.path, + "status": pth_status_str(&r.status), + "error": r.error, + }) + })); + + let mut obj = serde_json::json!({ + "status": status, + "updated": updated, + "alreadyConfigured": already, + "errors": errors, + "packageManager": manager_name(npm_pm), + "files": files, + }); + if status == "dry_run" { + obj["dryRun"] = serde_json::json!(true); + obj["wouldUpdate"] = serde_json::json!(updated); + } + if let Some(plan) = py_plan { + obj["pythonPackageManager"] = serde_json::json!(plan.pm.as_str()); + } + if !warnings.is_empty() { + obj["warnings"] = serde_json::json!(warnings); + } + println!("{}", serde_json::to_string_pretty(&obj).unwrap()); } diff --git a/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs b/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs index ed877cf2..050e2399 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs @@ -25,6 +25,7 @@ use std::path::{Path, PathBuf}; use std::process::Command; +use std::sync::OnceLock; /// Path to the built binary under test (host mode passes this to the /// driver via `SOCKET_PATCH_BIN`). @@ -32,6 +33,34 @@ fn binary() -> PathBuf { env!("CARGO_BIN_EXE_socket-patch").into() } +/// Build the pure-python `socket-patch-hook` wheel once and cache the path. +/// The pypi cases need it to exercise the `.pth` post-install hook; returns +/// `None` if the build fails (those cases then degrade to a gap). Requires +/// `python3` on PATH (always present in the pypi image / host pypi runs). +fn hook_wheel() -> Option { + static CELL: OnceLock> = OnceLock::new(); + CELL.get_or_init(|| { + let root = workspace_root(); + let dist = root.join("target/setup-matrix-hook"); + std::fs::create_dir_all(&dist).ok()?; + let version = env!("CARGO_PKG_VERSION"); + let ok = Command::new("python3") + .arg(root.join("scripts/build-pypi-wheels.py")) + .args(["--version", version, "--hook-only", "--dist"]) + .arg(&dist) + .stdout(std::process::Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false); + if !ok { + return None; + } + let wheel = dist.join(format!("socket_patch_hook-{version}-py3-none-any.whl")); + wheel.exists().then_some(wheel) + }) + .clone() +} + /// Workspace root = two levels up from this crate's manifest dir. fn workspace_root() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")) @@ -199,6 +228,14 @@ fn run_case(case: &Case) -> RunResult { let driver = driver_path(); let env = case.sm_env(); + // The pypi cases need the prebuilt hook wheel to exercise the `.pth` + // post-install hook; other ecosystems ignore it. + let wheel = if case.ecosystem == "pypi" { + hook_wheel() + } else { + None + }; + let output = if host_mode() { let mut cmd = Command::new("bash"); cmd.arg(&driver); @@ -206,6 +243,9 @@ fn run_case(case: &Case) -> RunResult { cmd.env(k, v); } cmd.env("SOCKET_PATCH_BIN", binary()); + if let Some(w) = &wheel { + cmd.env("SOCKET_PATCH_HOOK_WHEEL", w); + } cmd.output().expect("spawn bash driver") } else { let script = std::fs::read_to_string(&driver) @@ -215,6 +255,22 @@ fn run_case(case: &Case) -> RunResult { for (k, v) in &env { cmd.args(["-e", &format!("{k}={v}")]); } + // Mount the hook wheel into the container, PRESERVING its PEP 427 + // filename (pip/uv/pdm reject a wheel whose filename isn't a valid + // `{name}-{ver}-{tags}.whl`, so we must not rename it on mount). + if let Some(w) = &wheel { + let name = w + .file_name() + .and_then(|n| n.to_str()) + .expect("hook wheel filename"); + let dest = format!("/tmp/{name}"); + cmd.args([ + "-v", + &format!("{}:{}:ro", w.display(), dest), + "-e", + &format!("SOCKET_PATCH_HOOK_WHEEL={dest}"), + ]); + } cmd.arg(format!("socket-patch-test-{}:latest", case.image)); cmd.args(["bash", "-c", &script]); cmd.output().expect("spawn docker run") diff --git a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs index b1907433..9763af79 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs @@ -1,9 +1,17 @@ //! setup-matrix: pypi ecosystem (pip / uv / poetry / pdm / hatch). //! -//! Python installers have no native post-install hook and `socket-patch -//! setup` is a no-op for them, so the `baseline_with_setup` / -//! `alt_content_patchset` cases are EXPECTED to fail here (BASELINE -//! GAP). The negative-control / empty / wrong-target cases should pass. +//! Python installers have no native post-install hook, so `socket-patch +//! setup` instead commits a `socket-patch-hook` dependency whose wheel ships +//! a startup `.pth` that re-applies patches after install +//! (package-manager-agnostic). pip, uv and hatch are wired + verified in +//! Docker: their `baseline_with_setup` / `alt_content_patchset` cases APPLY +//! (the harness builds the hook wheel and the driver installs it + fires an +//! interpreter). poetry / pdm are resolver-based — their `add`/`install`/`run` +//! re-resolve the whole manifest (now incl. the committed `socket-patch-hook`) +//! against a package index, which the hermetic test can't provide, so they +//! remain BASELINE GAPs (the mechanism is PM-agnostic and proven by the +//! others). Nested-workspace layouts are also still gaps. The negative-control +//! / empty / wrong-target cases must NOT apply for any of them. //! //! Run: `cargo test -p socket-patch-cli --features setup-e2e --test setup_matrix_pypi` #![cfg(feature = "setup-e2e")] diff --git a/crates/socket-patch-cli/tests/setup_pth_invariants.rs b/crates/socket-patch-cli/tests/setup_pth_invariants.rs new file mode 100644 index 00000000..bc2e5acd --- /dev/null +++ b/crates/socket-patch-cli/tests/setup_pth_invariants.rs @@ -0,0 +1,164 @@ +//! Integration tests for `setup`'s Python `.pth`-hook branch. Like the npm +//! `setup_invariants`, these operate entirely on disk (manifest detection + +//! editing + audit record) and need no network. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +fn run_setup(cwd: &Path, extra: &[&str]) -> (i32, serde_json::Value) { + let mut args = vec!["setup", "--json", "--yes"]; + args.extend_from_slice(extra); + let out = Command::new(binary()) + .args(&args) + .current_dir(cwd) + .env_remove("SOCKET_API_TOKEN") + .env("SOCKET_TELEMETRY_DISABLED", "1") + .output() + .expect("run socket-patch"); + let stdout = String::from_utf8_lossy(&out.stdout).to_string(); + let v = serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("stdout must be JSON ({e}):\n{stdout}")); + (out.status.code().unwrap_or(-1), v) +} + +fn write(path: &Path, content: &str) { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).expect("create parent"); + } + std::fs::write(path, content).expect("write file"); +} + +fn read(path: &Path) -> String { + std::fs::read_to_string(path).expect("read file") +} + +#[test] +fn pip_requirements_gets_hook_dep() { + let tmp = tempfile::tempdir().unwrap(); + write(&tmp.path().join("requirements.txt"), "requests==2.31.0\n"); + + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0, "setup should succeed; payload={v}"); + assert_eq!(v["status"], "success"); + assert_eq!(v["updated"], 1); + assert_eq!(v["pythonPackageManager"], "pip"); + let entry = &v["files"].as_array().unwrap()[0]; + assert_eq!(entry["kind"], "pth"); + + let req = read(&tmp.path().join("requirements.txt")); + assert!(req.contains("socket-patch[hook]"), "got:\n{req}"); + assert!(req.contains("requests==2.31.0"), "must preserve existing deps"); + + // The committed dependency is the source of truth — no separate marker file. + assert!( + !tmp.path().join(".socket/hook.json").exists(), + "setup must not write a separate marker/audit file" + ); +} + +#[test] +fn uv_pyproject_array_edited_and_format_preserved() { + let tmp = tempfile::tempdir().unwrap(); + let original = "[project]\nname = \"x\"\nversion = \"0.0.0\"\ndependencies = [\n \"requests\",\n]\n\n[tool.uv]\n"; + write(&tmp.path().join("pyproject.toml"), original); + write(&tmp.path().join("uv.lock"), ""); // detected as uv + + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0, "payload={v}"); + assert_eq!(v["pythonPackageManager"], "uv"); + + let py = read(&tmp.path().join("pyproject.toml")); + assert!(py.contains("socket-patch[hook]")); + assert!(py.contains("[tool.uv]"), "unrelated tables preserved"); + assert!(py.contains("name = \"x\"")); +} + +#[test] +fn idempotent_second_run_reports_already_configured() { + let tmp = tempfile::tempdir().unwrap(); + write(&tmp.path().join("requirements.txt"), "requests\n"); + + let (_, _) = run_setup(tmp.path(), &[]); + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0); + assert_eq!(v["status"], "already_configured"); + let req = read(&tmp.path().join("requirements.txt")); + assert_eq!( + req.matches("socket-patch[hook]").count(), + 1, + "must not duplicate the hook dependency" + ); +} + +#[test] +fn dry_run_does_not_modify_or_create_files() { + let tmp = tempfile::tempdir().unwrap(); + let original = "requests\n"; + write(&tmp.path().join("requirements.txt"), original); + + let (code, v) = run_setup(tmp.path(), &["--dry-run"]); + assert_eq!(code, 0); + assert_eq!(v["status"], "dry_run"); + assert_eq!(v["dryRun"], true); + assert_eq!(v["wouldUpdate"], 1); + + assert_eq!(read(&tmp.path().join("requirements.txt")), original); +} + +#[test] +fn remove_reverses_dep() { + let tmp = tempfile::tempdir().unwrap(); + write(&tmp.path().join("requirements.txt"), "requests\n"); + // Configure first. + let (_, v) = run_setup(tmp.path(), &[]); + assert_eq!(v["status"], "success"); + + let (code, v) = run_setup(tmp.path(), &["--remove"]); + assert_eq!(code, 0, "payload={v}"); + let req = read(&tmp.path().join("requirements.txt")); + assert!(!req.contains("socket-patch[hook]"), "got:\n{req}"); + assert!(req.contains("requests")); +} + +#[test] +fn polyglot_configures_both_npm_and_python() { + let tmp = tempfile::tempdir().unwrap(); + write( + &tmp.path().join("package.json"), + "{ \"name\": \"x\", \"version\": \"0.0.0\" }\n", + ); + write( + &tmp.path().join("pyproject.toml"), + "[project]\nname = \"x\"\nversion = \"0.0.0\"\ndependencies = []\n", + ); + + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0, "payload={v}"); + assert_eq!(v["updated"], 2); + let kinds: Vec<&str> = v["files"] + .as_array() + .unwrap() + .iter() + .map(|f| f["kind"].as_str().unwrap()) + .collect(); + assert!(kinds.contains(&"package_json")); + assert!(kinds.contains(&"pth")); + + assert!(read(&tmp.path().join("package.json")).contains("socket-patch")); + assert!(read(&tmp.path().join("pyproject.toml")).contains("socket-patch[hook]")); +} + +#[test] +fn pure_python_with_no_manifest_files_is_no_op() { + // `setup.py`-only project (no pyproject/requirements): pip path would + // create requirements.txt. But an EMPTY dir with neither markers nor + // package.json must report no_files. + let tmp = tempfile::tempdir().unwrap(); + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0); + assert_eq!(v["status"], "no_files"); +} diff --git a/crates/socket-patch-core/Cargo.toml b/crates/socket-patch-core/Cargo.toml index 3aa4f268..32760aa8 100644 --- a/crates/socket-patch-core/Cargo.toml +++ b/crates/socket-patch-core/Cargo.toml @@ -18,6 +18,7 @@ thiserror = { workspace = true } walkdir = { workspace = true } uuid = { workspace = true } regex = { workspace = true } +toml_edit = { workspace = true } once_cell = { workspace = true } qbsdiff = { workspace = true } tar = { workspace = true } diff --git a/crates/socket-patch-core/src/crawlers/python_crawler.rs b/crates/socket-patch-core/src/crawlers/python_crawler.rs index 087d7437..fa9f6dbd 100644 --- a/crates/socket-patch-core/src/crawlers/python_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/python_crawler.rs @@ -503,7 +503,7 @@ pub async fn get_global_python_site_packages() -> Vec { /// * `requirements.txt` — pip-compile / bare requirements /// * `uv.lock` — uv-managed projects (PEP 751 export sibling is /// `pylock.toml` but in practice `uv.lock` is what ships) -async fn is_python_project(cwd: &Path) -> bool { +pub async fn is_python_project(cwd: &Path) -> bool { let markers = [ "pyproject.toml", "setup.py", diff --git a/crates/socket-patch-core/src/lib.rs b/crates/socket-patch-core/src/lib.rs index 3d5871bb..44b8f890 100644 --- a/crates/socket-patch-core/src/lib.rs +++ b/crates/socket-patch-core/src/lib.rs @@ -5,5 +5,6 @@ pub mod hash; pub mod manifest; pub mod package_json; pub mod patch; +pub mod pth_hook; pub mod utils; pub mod vex; diff --git a/crates/socket-patch-core/src/patch/apply.rs b/crates/socket-patch-core/src/patch/apply.rs index 761b6694..3fd4249e 100644 --- a/crates/socket-patch-core/src/patch/apply.rs +++ b/crates/socket-patch-core/src/patch/apply.rs @@ -117,6 +117,28 @@ pub fn normalize_file_path(file_name: &str) -> &str { } } +/// True if a (post-`normalize_file_path`) manifest key is a safe relative path +/// that stays inside the package directory when joined to it. +/// +/// SECURITY: manifest file keys come from a committed `.socket/manifest.json`, +/// which the auto-running install hook applies without explicit user action. An +/// unvalidated key like `../../home/u/.bashrc` or `/etc/cron.d/x` would let a +/// poisoned manifest write OUTSIDE site-packages (arbitrary-file write → code +/// execution) via `pkg_path.join(key)` — `Path::join` discards the base on an +/// absolute key, and `..` components walk out. We reject anything that isn't a +/// plain relative path (no absolute/root/prefix components, no `..`, no NUL). +pub fn is_safe_relative_subpath(normalized: &str) -> bool { + use std::path::Component; + if normalized.is_empty() || normalized.contains('\0') { + return false; + } + let path = Path::new(normalized); + if path.is_absolute() { + return false; + } + path.components().all(|c| matches!(c, Component::Normal(_) | Component::CurDir)) +} + /// Verify a single file can be patched. pub async fn verify_file_patch( pkg_path: &Path, @@ -124,6 +146,17 @@ pub async fn verify_file_patch( file_info: &PatchFileInfo, ) -> VerifyResult { let normalized = normalize_file_path(file_name); + // SECURITY: never resolve a key that escapes the package directory. + if !is_safe_relative_subpath(normalized) { + return VerifyResult { + file: file_name.to_string(), + status: VerifyStatus::NotFound, + message: Some("Unsafe patch path (escapes package directory)".to_string()), + current_hash: None, + expected_hash: None, + target_hash: None, + }; + } let filepath = pkg_path.join(normalized); let is_new_file = file_info.before_hash.is_empty(); @@ -297,6 +330,13 @@ pub async fn apply_file_patch( expected_hash: &str, ) -> Result<(), std::io::Error> { let normalized = normalize_file_path(file_name); + // SECURITY: refuse to write through a key that escapes the package dir. + if !is_safe_relative_subpath(normalized) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsafe patch path (escapes package directory): {file_name}"), + )); + } let filepath = pkg_path.join(normalized); // Hash-check the in-memory content BEFORE touching disk. Removes @@ -608,6 +648,18 @@ pub async fn apply_package_patch( // First, verify all files for (file_name, file_info) in files { + // SECURITY: reject any manifest key that would escape the package dir + // (absolute path or `..`). Abort the whole package apply before any + // disk write — NOT skippable by `--force`, since a path escape is never + // a legitimate patch target. + if !is_safe_relative_subpath(normalize_file_path(file_name)) { + result.success = false; + result.error = Some(format!( + "Refusing patch with unsafe file path (escapes package directory): {file_name}" + )); + return result; + } + let mut verify_result = verify_file_patch(pkg_path, file_name, file_info).await; if verify_result.status != VerifyStatus::Ready @@ -948,6 +1000,64 @@ mod tests { ); } + #[test] + fn test_is_safe_relative_subpath() { + // Legitimate manifest keys (post-normalize) are accepted. + for ok in [ + "six.py", + "index.js", + "lib/server.js", + "pydantic_ai/models/openai.py", + "./a.py", + ] { + assert!(is_safe_relative_subpath(ok), "should accept {ok:?}"); + } + // Path escapes are rejected on every platform. + for bad in [ + "../etc/passwd", + "../../home/u/.bashrc", + "/etc/passwd", + "a/../../b", + "foo/..", + "", + "with\0null", + "/", + ] { + assert!(!is_safe_relative_subpath(bad), "should reject {bad:?}"); + } + // Windows drive/UNC prefixes are absolute only on Windows (on Unix a + // backslash is an ordinary filename char, so the path stays under the + // package dir and is harmless). + #[cfg(windows)] + for bad in ["\\\\server\\share\\x", "C:\\Windows\\x"] { + assert!(!is_safe_relative_subpath(bad), "should reject {bad:?}"); + } + // The `package/`-prefixed escape that previously slipped through: + // `package//etc/passwd` normalizes to `/etc/passwd`. + assert!(!is_safe_relative_subpath(normalize_file_path("package//etc/passwd"))); + } + + #[tokio::test] + async fn test_apply_file_patch_rejects_escaping_path() { + // apply_file_patch must refuse to write outside the package dir even if + // the (attacker-chosen) content hashes to the declared afterHash. + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("site-packages"); + tokio::fs::create_dir_all(&pkg).await.unwrap(); + let content = b"pwned\n"; + let after = compute_git_sha256_from_bytes(content); + for key in ["../escape.txt", "../../etc/whatever", "/abs/whatever"] { + let res = apply_file_patch(&pkg, key, content, &after).await; + assert!(res.is_err(), "must reject {key:?}"); + assert!( + res.unwrap_err().to_string().contains("Unsafe patch path"), + "wrong error for {key:?}" + ); + } + // Nothing was written outside the package dir. + assert!(!dir.path().join("escape.txt").exists()); + } + #[tokio::test] async fn test_verify_file_patch_not_found() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/pth_hook/detect.rs b/crates/socket-patch-core/src/pth_hook/detect.rs new file mode 100644 index 00000000..9756f2f3 --- /dev/null +++ b/crates/socket-patch-core/src/pth_hook/detect.rs @@ -0,0 +1,185 @@ +//! Detect a Python project's dependency manager and probe for the hook dep. + +use std::path::Path; + +/// The dependency `setup` adds (PEP 508 form, used for `requirements.txt` and +/// PEP 621 `[project].dependencies`): the `socket-patch[hook]` extra, which +/// pulls both the socket-patch CLI and the socket-patch-hook wheel (the `.pth` +/// carrier). A single, familiar line. Classic Poetry can't express an extra as +/// a bare key, so [`super::edit`] emits the equivalent +/// `socket-patch = { extras = ["hook"] }` there instead. +pub const HOOK_DEP: &str = "socket-patch[hook]"; + +/// Substrings (space-insensitive, lower-cased) that mean the hook is already +/// declared — the `socket-patch[hook]` extra, the standalone wheel, or the +/// underscore spelling. (The Poetry `extras = ["hook"]` form is detected +/// structurally by [`super::edit`], not by this textual check.) +const HOOK_MARKERS: &[&str] = &["socket-patch[hook]", "socket-patch-hook", "socket_patch_hook"]; + +/// Which Python dependency-management style a project uses. Drives both which +/// manifest/table `setup` edits and which lockfile (if any) to refresh. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PythonPackageManager { + Uv, + Poetry, + Pdm, + Hatch, + Pip, +} + +impl PythonPackageManager { + pub fn as_str(&self) -> &'static str { + match self { + Self::Uv => "uv", + Self::Poetry => "poetry", + Self::Pdm => "pdm", + Self::Hatch => "hatch", + Self::Pip => "pip", + } + } + + /// The lockfile-refresh command `(program, args)` for managers whose frozen + /// CI install reads a lockfile that must be regenerated after editing the + /// dependency list. `None` for managers that resolve dependencies directly + /// from the manifest at install time (pip, hatch). + pub fn lock_command(&self) -> Option<(&'static str, &'static [&'static str])> { + match self { + Self::Uv => Some(("uv", &["lock"])), + Self::Poetry => Some(("poetry", &["lock"])), + Self::Pdm => Some(("pdm", &["lock"])), + Self::Hatch | Self::Pip => None, + } + } +} + +/// Detect the dependency manager from lockfiles and `pyproject.toml` tables. +/// +/// Lockfiles are the strongest signal; `[tool.*]` tables come next; a project +/// with only `requirements.txt` / a PEP 621 `pyproject.toml` falls through to +/// `Pip`. +pub async fn detect_python_pm(cwd: &Path) -> PythonPackageManager { + if tokio::fs::metadata(cwd.join("uv.lock")).await.is_ok() { + return PythonPackageManager::Uv; + } + if tokio::fs::metadata(cwd.join("pdm.lock")).await.is_ok() { + return PythonPackageManager::Pdm; + } + if tokio::fs::metadata(cwd.join("poetry.lock")).await.is_ok() { + return PythonPackageManager::Poetry; + } + if let Ok(content) = tokio::fs::read_to_string(cwd.join("pyproject.toml")).await { + // Header-anchored checks so a stray substring in a value/comment does + // not misclassify. + if has_table(&content, "tool.uv") { + return PythonPackageManager::Uv; + } + if has_table(&content, "tool.poetry") { + return PythonPackageManager::Poetry; + } + if has_table(&content, "tool.pdm") { + return PythonPackageManager::Pdm; + } + if has_table(&content, "tool.hatch") { + return PythonPackageManager::Hatch; + } + } + PythonPackageManager::Pip +} + +/// True if a `[prefix]` or `[prefix.*]` table header appears in the TOML text. +fn has_table(content: &str, prefix: &str) -> bool { + content.lines().any(|line| { + let l = line.trim(); + if let Some(rest) = l.strip_prefix('[') { + let header = rest.trim_start_matches('[').trim_end_matches(']'); + header == prefix || header.starts_with(&format!("{prefix}.")) + } else { + false + } + }) +} + +/// True if the given manifest text already declares the hook dependency, in any +/// form. Space- and case-insensitive so `socket-patch [hook]` / `Socket-Patch` +/// are recognised. +pub fn deps_contain_hook(text: &str) -> bool { + let normalized: String = text.to_lowercase().chars().filter(|c| !c.is_whitespace()).collect(); + HOOK_MARKERS + .iter() + .any(|m| normalized.contains(&m.to_lowercase())) +} + +/// True if a single PEP 508 dependency spec is the hook dependency. +pub fn spec_is_hook(spec: &str) -> bool { + deps_contain_hook(spec) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deps_contain_hook_positive_forms() { + assert!(deps_contain_hook("socket-patch[hook]")); + assert!(deps_contain_hook("socket-patch [hook]")); + assert!(deps_contain_hook("Socket-Patch[hook]>=3.3.0")); + assert!(deps_contain_hook("socket-patch-hook==3.3.0")); + assert!(deps_contain_hook("socket_patch_hook")); + } + + #[test] + fn test_deps_contain_hook_negative() { + // A plain socket-patch dependency is NOT the hook. + assert!(!deps_contain_hook("socket-patch>=3.3.0")); + assert!(!deps_contain_hook("requests==2.31.0")); + assert!(!deps_contain_hook("")); + } + + #[test] + fn test_has_table() { + let toml = "[tool.poetry]\nname='x'\n[tool.poetry.dependencies]\n"; + assert!(has_table(toml, "tool.poetry")); + assert!(!has_table(toml, "tool.pdm")); + assert!(has_table("[project]\n", "project")); + // not fooled by a value that contains the text + assert!(!has_table("name = \"tool.poetry helper\"\n", "tool.poetry")); + } + + #[tokio::test] + async fn test_detect_uv_by_lock() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("uv.lock"), "").await.unwrap(); + assert_eq!(detect_python_pm(dir.path()).await, PythonPackageManager::Uv); + } + + #[tokio::test] + async fn test_detect_poetry_by_table() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "[tool.poetry]\nname = \"x\"\n", + ) + .await + .unwrap(); + assert_eq!( + detect_python_pm(dir.path()).await, + PythonPackageManager::Poetry + ); + } + + #[tokio::test] + async fn test_detect_pip_fallback() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("requirements.txt"), "requests\n") + .await + .unwrap(); + assert_eq!(detect_python_pm(dir.path()).await, PythonPackageManager::Pip); + } + + #[test] + fn test_lock_command() { + assert_eq!(PythonPackageManager::Uv.lock_command(), Some(("uv", &["lock"][..]))); + assert_eq!(PythonPackageManager::Pip.lock_command(), None); + assert_eq!(PythonPackageManager::Hatch.lock_command(), None); + } +} diff --git a/crates/socket-patch-core/src/pth_hook/edit.rs b/crates/socket-patch-core/src/pth_hook/edit.rs new file mode 100644 index 00000000..11c89cc3 --- /dev/null +++ b/crates/socket-patch-core/src/pth_hook/edit.rs @@ -0,0 +1,592 @@ +//! Add / remove the `socket-patch[hook]` dependency in a project's manifest. +//! +//! Two manifest kinds are supported: +//! * **pyproject.toml** — edited with `toml_edit` so the user's existing +//! formatting and comments are preserved. Targets the PEP 621 +//! `[project].dependencies` array, or a classic Poetry +//! `[tool.poetry.dependencies]` table when that is the only dependency +//! surface present. +//! * **requirements.txt** — a plain line append / removal. +//! +//! All operations are idempotent and honour `dry_run` (compute the result and +//! report status without writing). This mirrors the contracts of +//! [`crate::package_json::update`] for the npm side. + +use std::path::Path; +use tokio::fs; +use toml_edit::{Array, DocumentMut, InlineTable, Item, Table, Value}; + +use super::detect::{deps_contain_hook, spec_is_hook, HOOK_DEP}; + +/// Which manifest format a path is. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ManifestKind { + Pyproject, + Requirements, +} + +/// Outcome of editing one manifest. Mirrors `package_json::update::UpdateStatus`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PthStatus { + Updated, + AlreadyConfigured, + Error, +} + +#[derive(Debug, Clone)] +pub struct PthEditResult { + pub path: String, + pub kind: ManifestKind, + pub status: PthStatus, + pub error: Option, +} + +impl PthEditResult { + fn ok(path: &Path, kind: ManifestKind, status: PthStatus) -> Self { + Self { + path: path.display().to_string(), + kind, + status, + error: None, + } + } + fn err(path: &Path, kind: ManifestKind, msg: impl Into) -> Self { + Self { + path: path.display().to_string(), + kind, + status: PthStatus::Error, + error: Some(msg.into()), + } + } +} + +/// Add the hook dependency to a manifest. Idempotent. +pub async fn add_hook_dependency(path: &Path, kind: ManifestKind, dry_run: bool) -> PthEditResult { + let content = match fs::read_to_string(path).await { + Ok(c) => c, + // A missing requirements.txt is created (the pip-from-scratch path); + // a missing pyproject.toml is an error (we don't synthesize one). + Err(e) + if e.kind() == std::io::ErrorKind::NotFound + && kind == ManifestKind::Requirements => + { + String::new() + } + Err(e) => return PthEditResult::err(path, kind, e.to_string()), + }; + + let outcome = match kind { + ManifestKind::Pyproject => pyproject_add(&content), + ManifestKind::Requirements => requirements_add(&content), + }; + + match outcome { + Ok(None) => PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured), + Ok(Some(new_content)) => { + if !dry_run { + if let Err(e) = fs::write(path, &new_content).await { + return PthEditResult::err(path, kind, e.to_string()); + } + } + PthEditResult::ok(path, kind, PthStatus::Updated) + } + Err(e) => PthEditResult::err(path, kind, e), + } +} + +/// Remove the hook dependency from a manifest. Idempotent (already-absent -> +/// `AlreadyConfigured`, i.e. nothing to do). +pub async fn remove_hook_dependency( + path: &Path, + kind: ManifestKind, + dry_run: bool, +) -> PthEditResult { + let content = match fs::read_to_string(path).await { + Ok(c) => c, + // Nothing on disk → nothing to remove (idempotent no-op). + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + return PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured) + } + Err(e) => return PthEditResult::err(path, kind, e.to_string()), + }; + + let outcome = match kind { + ManifestKind::Pyproject => pyproject_remove(&content), + ManifestKind::Requirements => requirements_remove(&content), + }; + + match outcome { + Ok(None) => PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured), + Ok(Some(new_content)) => { + if !dry_run { + if let Err(e) = fs::write(path, &new_content).await { + return PthEditResult::err(path, kind, e.to_string()); + } + } + PthEditResult::ok(path, kind, PthStatus::Updated) + } + Err(e) => PthEditResult::err(path, kind, e), + } +} + +// ── requirements.txt ──────────────────────────────────────────────────────── + +/// The file's dominant newline style, so edits don't rewrite CRLF as LF. +fn newline_of(content: &str) -> &'static str { + if content.contains("\r\n") { + "\r\n" + } else { + "\n" + } +} + +/// Returns `Some(new_content)` if a line was appended, `None` if already there. +fn requirements_add(content: &str) -> Result, String> { + if content + .lines() + .any(|l| deps_contain_hook(strip_requirement_comment(l))) + { + return Ok(None); + } + let nl = newline_of(content); + let mut new = content.to_string(); + if !new.is_empty() && !new.ends_with('\n') { + new.push_str(nl); + } + new.push_str(HOOK_DEP); + new.push_str(nl); + Ok(Some(new)) +} + +/// Returns `Some(new_content)` if any hook line was removed, `None` otherwise. +fn requirements_remove(content: &str) -> Result, String> { + let kept: Vec<&str> = content + .lines() + .filter(|l| !deps_contain_hook(strip_requirement_comment(l))) + .collect(); + if kept.len() == content.lines().count() { + return Ok(None); + } + let nl = newline_of(content); + let mut new = kept.join(nl); + if !new.is_empty() { + new.push_str(nl); + } + Ok(Some(new)) +} + +/// Strip a trailing `# comment` so we match against the requirement spec only. +fn strip_requirement_comment(line: &str) -> &str { + match line.find('#') { + Some(i) => &line[..i], + None => line, + } +} + +// ── pyproject.toml ─────────────────────────────────────────────────────────── + +/// Returns `Some(new_content)` if the doc was modified, `None` if the hook dep +/// was already present, or `Err` on malformed TOML / wrong-typed tables. +fn pyproject_add(content: &str) -> Result, String> { + let mut doc = content + .parse::() + .map_err(|e| format!("Invalid pyproject.toml: {e}"))?; + + // Prefer PEP 621 `[project].dependencies` when there is a *real* PEP 621 + // surface; otherwise fall back to a classic Poetry `[tool.poetry]` table. + // A `[project]` table that exists only implicitly (e.g. conjured by a + // `[project.urls]` sub-table in a Poetry-1.x project) is NOT a real PEP 621 + // surface — routing such a project to PEP 621 would add a + // `[project].dependencies` that Poetry ignores at install time. The inner + // helpers detect an already-present hook dependency structurally (which the + // textual marker check can't, e.g. a Poetry `extras = ["hook"]` table). + let real_pep621 = doc + .get("project") + .and_then(Item::as_table) + .map(|t| !t.is_implicit() || t.contains_key("dependencies")) + .unwrap_or(false); + let has_poetry = doc + .get("tool") + .and_then(Item::as_table) + .and_then(|t| t.get("poetry")) + .and_then(Item::as_table) + .is_some(); + + let changed = if has_poetry && !real_pep621 { + poetry_add(&mut doc)? + } else { + pep621_add(&mut doc)? + }; + Ok(if changed { Some(doc.to_string()) } else { None }) +} + +fn pyproject_remove(content: &str) -> Result, String> { + let mut doc = content + .parse::() + .map_err(|e| format!("Invalid pyproject.toml: {e}"))?; + + let mut changed = false; + changed |= pep621_remove(&mut doc); + changed |= poetry_remove(&mut doc); + + Ok(if changed { Some(doc.to_string()) } else { None }) +} + +/// Ensure `parent[key]` is a table, creating it if absent. Errors if present +/// but a non-table. +fn ensure_table<'a>(parent: &'a mut Table, key: &str, implicit: bool) -> Result<&'a mut Table, String> { + if !parent.contains_key(key) { + let mut t = Table::new(); + t.set_implicit(implicit); + parent.insert(key, Item::Table(t)); + } + parent + .get_mut(key) + .and_then(Item::as_table_mut) + .ok_or_else(|| format!("`{key}` is not a table")) +} + +fn pep621_add(doc: &mut DocumentMut) -> Result { + let root = doc.as_table_mut(); + let project = ensure_table(root, "project", false)?; + if !project.contains_key("dependencies") { + project.insert("dependencies", Item::Value(Value::Array(Array::new()))); + } + let deps = project + .get_mut("dependencies") + .and_then(Item::as_array_mut) + .ok_or("`project.dependencies` is not an array")?; + if deps + .iter() + .any(|v| v.as_str().map(spec_is_hook).unwrap_or(false)) + { + return Ok(false); + } + deps.push(HOOK_DEP); + Ok(true) +} + +fn pep621_remove(doc: &mut DocumentMut) -> bool { + let deps = match doc + .get_mut("project") + .and_then(Item::as_table_mut) + .and_then(|p| p.get_mut("dependencies")) + .and_then(Item::as_array_mut) + { + Some(d) => d, + None => return false, + }; + let before = deps.len(); + deps.retain(|v| !v.as_str().map(spec_is_hook).unwrap_or(false)); + deps.len() != before +} + +fn poetry_add(doc: &mut DocumentMut) -> Result { + let root = doc.as_table_mut(); + let tool = ensure_table(root, "tool", true)?; + let poetry = ensure_table(tool, "poetry", true)?; + let deps = ensure_table(poetry, "dependencies", false)?; + + // Classic Poetry can't express `socket-patch[hook]` as a key, so declare + // the equivalent: `socket-patch` carrying the `hook` extra. Already wired + // if a bare `socket-patch-hook` key exists or the extra is already present. + if deps.contains_key("socket-patch-hook") { + return Ok(false); + } + if let Some(item) = deps.get_mut("socket-patch") { + if item_has_hook_extra(item) { + return Ok(false); + } + // An existing `socket-patch` dep (bare string or a table): merge the + // `hook` extra in place, preserving its version / source / markers. + if let Some(tbl) = item.as_table_like_mut() { + let mut extras = tbl + .get("extras") + .and_then(Item::as_array) + .cloned() + .unwrap_or_default(); + extras.push("hook"); + tbl.insert("extras", Item::Value(Value::Array(extras))); + } else { + let version = item.as_str().map(str::to_string).unwrap_or_else(|| "*".to_string()); + deps.insert("socket-patch", Item::Value(hook_inline_table(&version))); + } + return Ok(true); + } + deps.insert("socket-patch", Item::Value(hook_inline_table("*"))); + Ok(true) +} + +fn poetry_remove(doc: &mut DocumentMut) -> bool { + let deps = match doc + .get_mut("tool") + .and_then(Item::as_table_mut) + .and_then(|t| t.get_mut("poetry")) + .and_then(Item::as_table_mut) + .and_then(|p| p.get_mut("dependencies")) + .and_then(Item::as_table_mut) + { + Some(d) => d, + None => return false, + }; + + let mut changed = false; + // Drop a legacy bare `socket-patch-hook` key if present. + if deps.remove("socket-patch-hook").is_some() { + changed = true; + } + // Strip the `hook` extra from a `socket-patch` dep table, leaving the rest + // of the spec intact. + if let Some(tbl) = deps.get_mut("socket-patch").and_then(Item::as_table_like_mut) { + if let Some(extras) = tbl.get_mut("extras").and_then(Item::as_array_mut) { + let before = extras.len(); + extras.retain(|v| v.as_str() != Some("hook")); + if extras.len() != before { + changed = true; + } + if extras.is_empty() { + tbl.remove("extras"); + } + } + } + changed +} + +/// Build `{ version = "", extras = ["hook"] }`. +fn hook_inline_table(version: &str) -> Value { + let mut it = InlineTable::new(); + it.insert("version", Value::from(version)); + let mut extras = Array::new(); + extras.push("hook"); + it.insert("extras", Value::Array(extras)); + Value::InlineTable(it) +} + +/// True if a dependency item (inline table or sub-table) already carries the +/// `hook` extra. +fn item_has_hook_extra(item: &Item) -> bool { + item.as_table_like() + .and_then(|t| t.get("extras")) + .and_then(Item::as_array) + .map(|a| a.iter().any(|v| v.as_str() == Some("hook"))) + .unwrap_or(false) +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── requirements.txt ───────────────────────────────────────────── + + #[test] + fn test_requirements_add() { + let out = requirements_add("requests==2.31.0\n").unwrap().unwrap(); + assert!(out.contains("requests==2.31.0")); + assert!(out.contains("socket-patch[hook]")); + assert!(out.ends_with('\n')); + } + + #[test] + fn test_requirements_add_no_trailing_newline() { + let out = requirements_add("requests").unwrap().unwrap(); + assert_eq!(out, "requests\nsocket-patch[hook]\n"); + } + + #[test] + fn test_requirements_add_idempotent() { + // The extra, the standalone wheel, and a pinned variant are all recognized. + assert!(requirements_add("socket-patch[hook]\n").unwrap().is_none()); + assert!(requirements_add("socket-patch-hook\n").unwrap().is_none()); + assert!(requirements_add("socket-patch-hook==3.3.0\n").unwrap().is_none()); + } + + #[test] + fn test_requirements_remove() { + let out = requirements_remove("requests\nsocket-patch[hook]\n") + .unwrap() + .unwrap(); + assert_eq!(out, "requests\n"); + } + + #[test] + fn test_requirements_remove_absent() { + assert!(requirements_remove("requests\n").unwrap().is_none()); + } + + // ── pyproject PEP 621 ──────────────────────────────────────────── + + #[test] + fn test_pep621_add_to_existing_array() { + let toml = "[project]\nname = \"x\"\ndependencies = [\"requests\"]\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + assert!(out.contains("socket-patch[hook]")); + assert!(out.contains("requests")); + // Re-parse to confirm validity + idempotency. + assert!(pyproject_add(&out).unwrap().is_none()); + } + + #[test] + fn test_pep621_add_creates_dependencies() { + let toml = "[project]\nname = \"x\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + let deps = doc["project"]["dependencies"].as_array().unwrap(); + assert!(deps.iter().any(|v| v.as_str() == Some("socket-patch[hook]"))); + } + + #[test] + fn test_pep621_preserves_other_content() { + let toml = "[build-system]\nrequires = [\"setuptools\"]\n\n[project]\nname = \"x\"\nversion = \"1.0\"\ndependencies = [\n \"requests\",\n]\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + assert!(out.contains("[build-system]")); + assert!(out.contains("version = \"1.0\"")); + assert!(out.contains("requests")); + assert!(out.contains("socket-patch[hook]")); + } + + #[test] + fn test_pep621_remove() { + let toml = "[project]\ndependencies = [\"requests\", \"socket-patch[hook]\"]\n"; + let out = pyproject_remove(toml).unwrap().unwrap(); + assert!(!out.contains("socket-patch[hook]")); + assert!(out.contains("requests")); + } + + // ── pyproject Poetry (the `socket-patch[hook]` equivalent: the + // `socket-patch` dep carrying the `hook` extra) ───────────────── + + #[test] + fn test_poetry_add_new_dep() { + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert!( + item_has_hook_extra(&doc["tool"]["poetry"]["dependencies"]["socket-patch"]), + "poetry dep must carry the hook extra; got:\n{out}" + ); + // Idempotent. + assert!(pyproject_add(&out).unwrap().is_none()); + } + + #[test] + fn test_poetry_merges_extra_into_existing_dep() { + // An existing `socket-patch = "^3.3.0"` gains the hook extra, version kept. + let toml = "[tool.poetry]\nname = \"x\"\n[tool.poetry.dependencies]\nsocket-patch = \"^3.3.0\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + let item = &doc["tool"]["poetry"]["dependencies"]["socket-patch"]; + assert!(item_has_hook_extra(item), "hook extra must be added"); + assert_eq!( + item.as_table_like().and_then(|t| t.get("version")).and_then(Item::as_str), + Some("^3.3.0"), + "existing version must be preserved" + ); + } + + #[test] + fn test_poetry_subtable_dependency_preserved() { + // A `[tool.poetry.dependencies.socket-patch]` sub-table gains the hook + // extra while keeping its version / source. + let toml = "[tool.poetry.dependencies.socket-patch]\nversion = \"^3.3.0\"\ngit = \"https://example.com/x.git\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + let sp = &doc["tool"]["poetry"]["dependencies"]["socket-patch"]; + assert!(item_has_hook_extra(sp), "hook extra must be added"); + assert_eq!( + sp.as_table_like().and_then(|t| t.get("git")).and_then(Item::as_str), + Some("https://example.com/x.git"), + "sub-table keys must survive" + ); + // Idempotent. + assert!(pyproject_add(&out).unwrap().is_none()); + } + + #[test] + fn test_poetry_remove_strips_extra() { + let toml = "[tool.poetry.dependencies]\nsocket-patch = {version = \"*\", extras = [\"hook\"]}\npython = \"^3.9\"\n"; + let out = pyproject_remove(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert!(!item_has_hook_extra( + &doc["tool"]["poetry"]["dependencies"]["socket-patch"] + )); + assert!(doc["tool"]["poetry"]["dependencies"].get("python").is_some()); + } + + #[test] + fn test_pep621_preferred_when_both_present() { + // poetry 2.x: both [project] and [tool.poetry] — edit the PEP 621 array. + let toml = "[project]\nname = \"x\"\ndependencies = []\n\n[tool.poetry]\nname = \"x\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert!(doc["project"]["dependencies"] + .as_array() + .unwrap() + .iter() + .any(|v| v.as_str() == Some("socket-patch[hook]"))); + } + + #[test] + fn test_invalid_toml_errors() { + assert!(pyproject_add("this is = = not toml [[[").is_err()); + } + + #[test] + fn test_classic_poetry_with_project_urls_routes_to_poetry() { + // `[project.urls]` conjures an implicit `[project]` table; a Poetry 1.x + // project must still be edited in the Poetry table, not given a + // `[project].dependencies` Poetry ignores. + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n\n[project.urls]\nHome = \"https://example.com\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert!( + item_has_hook_extra(&doc["tool"]["poetry"]["dependencies"]["socket-patch"]), + "must edit the poetry table, not create [project].dependencies; got:\n{out}" + ); + assert!(doc.get("project").and_then(|p| p.get("dependencies")).is_none()); + } + + #[test] + fn test_requirements_preserves_crlf() { + let out = requirements_add("requests\r\n").unwrap().unwrap(); + assert_eq!(out, "requests\r\nsocket-patch[hook]\r\n"); + let removed = requirements_remove(&out).unwrap().unwrap(); + assert_eq!(removed, "requests\r\n"); + } + + // ── file-level NotFound handling (the create / no-op paths) ────── + + #[tokio::test] + async fn test_add_creates_missing_requirements() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); // does not exist + let res = add_hook_dependency(&req, ManifestKind::Requirements, false).await; + assert_eq!(res.status, PthStatus::Updated); + let body = tokio::fs::read_to_string(&req).await.unwrap(); + assert_eq!(body, "socket-patch[hook]\n"); + } + + #[tokio::test] + async fn test_add_missing_pyproject_is_error() { + let dir = tempfile::tempdir().unwrap(); + let py = dir.path().join("pyproject.toml"); // does not exist + let res = add_hook_dependency(&py, ManifestKind::Pyproject, false).await; + assert_eq!(res.status, PthStatus::Error); + } + + #[tokio::test] + async fn test_remove_missing_file_is_noop() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); // does not exist + let res = remove_hook_dependency(&req, ManifestKind::Requirements, false).await; + assert_eq!(res.status, PthStatus::AlreadyConfigured); + } + + #[tokio::test] + async fn test_add_dry_run_does_not_create() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); + let res = add_hook_dependency(&req, ManifestKind::Requirements, true).await; + assert_eq!(res.status, PthStatus::Updated); + assert!(!req.exists(), "dry-run must not create the file"); + } +} diff --git a/crates/socket-patch-core/src/pth_hook/mod.rs b/crates/socket-patch-core/src/pth_hook/mod.rs new file mode 100644 index 00000000..1fbf6278 --- /dev/null +++ b/crates/socket-patch-core/src/pth_hook/mod.rs @@ -0,0 +1,25 @@ +//! Python `.pth` post-install hook setup. +//! +//! Where npm-family ecosystems get an automatic post-install patch hook via a +//! `package.json` `postinstall` script ([`crate::package_json`]), Python has no +//! universal installer hook. Instead, `socket-patch setup` declares a committed +//! dependency on the `socket-patch-hook` wheel (via the `socket-patch[hook]` +//! extra); installing that wheel lays a startup `.pth` into site-packages that +//! re-applies patches after any install — package-manager-agnostic, because it +//! rides on the interpreter's startup hook rather than any one installer. +//! +//! This module is the Rust side: detecting the project's dependency manager +//! ([`detect`]) and editing its manifest(s) to add/remove the hook dependency +//! ([`edit`]). All actual patching stays in `socket-patch apply`. +//! +//! The committed dependency line is the single source of truth that the hook is +//! active — there is no separate marker/audit file (git history is the audit +//! trail), so nothing can drift out of sync with the manifest. + +pub mod detect; +pub mod edit; + +pub use detect::{deps_contain_hook, detect_python_pm, PythonPackageManager, HOOK_DEP}; +pub use edit::{ + add_hook_dependency, remove_hook_dependency, ManifestKind, PthEditResult, PthStatus, +}; diff --git a/pypi/socket-patch-hook/README.md b/pypi/socket-patch-hook/README.md new file mode 100644 index 00000000..3058d030 --- /dev/null +++ b/pypi/socket-patch-hook/README.md @@ -0,0 +1,62 @@ +# socket-patch-hook + +A tiny, package-manager-agnostic **post-install hook** for +[`socket-patch`](https://pypi.org/project/socket-patch/). + +Python package managers (pip, uv, poetry, pdm, hatch) have no universal +post-install step, so a `pip install` / `--force-reinstall` can silently revert +files that `socket-patch` previously patched. This package closes that gap. + +## How it works + +Installing this wheel lays down a startup `.pth` file in `site-packages` +(RECORD-tracked, so `pip uninstall` removes it cleanly). At interpreter startup +the hook does a microsecond-cheap check of whether the set of installed +distributions changed since the last run; only then does it re-apply your +project's **committed** patches by invoking `socket-patch apply --offline`. All +real patching (hash verification, atomic writes, locking) is done by the +`socket-patch` binary — this package only *triggers* it. + +Because it rides on Python's interpreter-startup `.pth` mechanism (not on any +one installer's hooks), it works the same under every Python package manager. + +## Safety + +A `.pth` that runs code at startup deserves a careful safety model. This one: + +- **Fail-open** — every code path is wrapped so it can never raise into the + interpreter; the worst outcome of any bug is that patches aren't re-applied. +- **Venv-anchored** — it applies only the `.socket/manifest.json` of the project + that owns the virtualenv it's installed in, never whatever `.socket/` happens + to sit above the current working directory. +- **Hash-verified, in-tree only** — the underlying `socket-patch apply` verifies + each file's hash before patching and refuses manifest keys that would write + outside the installed package directory. +- **Trusted binary** — it runs the `socket-patch` binary from the installed + `socket-patch` package, not the first one found on `PATH`. +- **Offline + cheap** — no network at startup; the no-change path is a couple of + syscalls. It only spawns `socket-patch` when installed packages changed. +- **Opt-in + easy off** — present only when a project committed it; disable any + interpreter with `SOCKET_PATCH_HOOK=off`. + +## Activating it + +Don't add this by hand. Run, in your project: + +``` +socket-patch setup +``` + +That commits a `socket-patch[hook]` dependency to your repo — the `[hook]` +extra on the main `socket-patch` package, which pulls in both the CLI and this +wheel (you never reference `socket-patch-hook` directly). The committed +dependency is the source of truth — there's no separate marker file. The hook +then activates automatically in CI after install. Remove it with `socket-patch +setup --remove` followed by `pip uninstall socket-patch-hook`. (Classic Poetry +can't express an extra as a bare key, so there `setup` writes the equivalent +`socket-patch = { extras = ["hook"] }`.) + +## Disabling at runtime + +Set `SOCKET_PATCH_HOOK=off` (or `SOCKET_NO_HOOK=1`) to fully bypass the hook for +a given interpreter — checked before any hook code runs. diff --git a/pypi/socket-patch-hook/pyproject.toml b/pypi/socket-patch-hook/pyproject.toml new file mode 100644 index 00000000..bbf2dd4a --- /dev/null +++ b/pypi/socket-patch-hook/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=64"] +build-backend = "setuptools.build_meta" + +[project] +name = "socket-patch-hook" +version = "3.3.0" +description = "Auto-apply Socket security patches after install via a package-manager-agnostic .pth startup hook" +readme = "README.md" +license = "MIT" +requires-python = ">=3.8" +authors = [ + { name = "Socket Security" } +] +keywords = ["security", "patch", "hook", "dependencies", "pth"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Topic :: Security", + "Topic :: Software Development :: Build Tools", +] +# Intentionally NO dependency on socket-patch: the hook is version-agnostic and +# invokes whatever `socket-patch` CLI is on PATH (or pip-installed in the env), +# no-opping if none is present. This keeps the committed `socket-patch-hook` +# dependency a single stable token that never needs a version bump, and lets the +# CLI be provisioned independently (pip, pipx, a GitHub Action, system install). +# (The canonical build is scripts/build-pypi-wheels.py, which also lays down the +# startup .pth; this block keeps the directory a valid project for `pip install .`.) +dependencies = [] + +[project.urls] +Homepage = "https://github.com/SocketDev/socket-patch" +Repository = "https://github.com/SocketDev/socket-patch" + +[tool.setuptools] +packages = ["socket_patch_hook"] diff --git a/pypi/socket-patch-hook/socket_patch_hook.pth b/pypi/socket-patch-hook/socket_patch_hook.pth new file mode 100644 index 00000000..38c4307f --- /dev/null +++ b/pypi/socket-patch-hook/socket_patch_hook.pth @@ -0,0 +1,13 @@ +# socket-patch post-install hook — installed by the `socket-patch-hook` wheel. +# Re-applies this project's committed Socket security patches (.socket/) after a +# pip/uv/poetry/etc. install reverts a patched file. At interpreter startup it +# does a cheap "did the installed packages change?" check and, only then, runs +# `socket-patch apply --offline`. Fail-open: every error is swallowed so it can +# never break interpreter startup, and it does nothing unless this environment's +# project has a committed .socket/manifest.json. +# Disable (this interpreter): SOCKET_PATCH_HOOK=off (or SOCKET_NO_HOOK=1) +# Remove (this project): socket-patch setup --remove then pip uninstall socket-patch-hook +# Details: https://github.com/SocketDev/socket-patch +# (Lines starting with `#` are ignored by Python's site module; the single +# `import` line below is the only code it executes.) +import os; exec("try:\n import socket_patch_hook as _h; _h.run()\nexcept Exception: pass") if (os.environ.get('SOCKET_PATCH_HOOK','').strip().lower() not in ('off','0','false','no') and os.environ.get('SOCKET_NO_HOOK','').strip().lower() not in ('1','true','yes','on')) else None diff --git a/pypi/socket-patch-hook/socket_patch_hook/__init__.py b/pypi/socket-patch-hook/socket_patch_hook/__init__.py new file mode 100644 index 00000000..9e3dee76 --- /dev/null +++ b/pypi/socket-patch-hook/socket_patch_hook/__init__.py @@ -0,0 +1,294 @@ +"""socket-patch post-install hook (package-manager-agnostic). + +This module is imported at Python interpreter startup by a wheel-shipped +``socket_patch_hook.pth`` file (the same ``.pth`` ``import``-line mechanism +coverage.py uses). When the set of installed distributions has changed since the +last run -- e.g. ``pip install`` / ``--force-reinstall`` / ``uv sync`` reverted a +file that Socket had patched -- it re-applies the project's committed patches by +invoking the hardened ``socket-patch apply`` binary in offline mode. All actual +patching (hash verification, atomic writes, locking) stays in that binary; this +module only *triggers* it. + +Hard safety contract: + * ``run()`` must NEVER raise into ``site.py`` (a raise here would hit every + interpreter start in the environment). Every step is failure-swallowing. + * The common, no-change path must cost only a few syscalls (it does: a bounded + parent walk, one ``scandir`` of site-packages, and one small file read). + * The worst outcome of any bug here is that patches are simply not re-applied. + +Disable entirely with ``SOCKET_PATCH_HOOK=off`` (also checked in the ``.pth`` +line before this module is even imported) or ``SOCKET_NO_HOOK=1``. +""" + +import os +import sys + +__all__ = ["run"] + +# Set in the environment of the spawned ``apply`` process so a nested +# interpreter started underneath it does not re-trigger the hook. (The apply +# binary itself is native Rust, but it -- or a tool it shells out to -- may +# invoke ``python``, which would re-process the ``.pth``.) +_REENTRANCY_ENV = "_SOCKET_PATCH_HOOK_ACTIVE" + +# Upper bound on the parent-directory walk used to locate the project root. +_MAX_PARENTS = 40 + +# Generous safety net for a single hook-triggered apply. The apply is offline +# and local, so this only ever fires if something is badly wrong; it exists so a +# hung apply can never wedge interpreter startup forever. +_APPLY_TIMEOUT_SECONDS = 120 + + +def _truthy(value): + return str(value or "").strip().lower() in ("1", "true", "yes", "on") + + +def _disabled(): + """True if the user has switched the hook off via env var.""" + if _truthy(os.environ.get("SOCKET_NO_HOOK")): + return True + return os.environ.get("SOCKET_PATCH_HOOK", "").strip().lower() in ( + "off", + "0", + "false", + "no", + ) + + +def _site_packages_dir(): + # __file__ == /socket_patch_hook/__init__.py + return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def _find_project_root(): + """Locate the project whose committed ``.socket/manifest.json`` this + environment opted into. Returns ``None`` (hook no-ops) if none is found. + + SECURITY — which manifest do we trust? When running inside a virtualenv we + anchor the search to the **venv** (``sys.prefix``), NOT the current working + directory: the committed ``socket-patch[hook]`` dependency installed this + hook into THIS venv, so the owning project is an ancestor of the venv (e.g. + ``/.venv``). Anchoring to the venv ties the patches we apply to the + project that opted in, instead of whatever ``.socket/`` happens to sit above + the cwd — which could belong to an unrelated or hostile parent/sibling + project (a `python` started from elsewhere must not pull in a foreign + manifest). Only when there is no venv (a system / container interpreter, + where there is nothing to anchor to) do we fall back to the cwd. + """ + in_venv = getattr(sys, "prefix", "") != getattr(sys, "base_prefix", getattr(sys, "prefix", "")) + anchors = [] + if in_venv: + anchors.append(sys.prefix) + env_venv = os.environ.get("VIRTUAL_ENV") + if env_venv: + anchors.append(env_venv) + else: + try: + anchors.append(os.getcwd()) + except OSError: + pass + + seen = set() + for start in anchors: + try: + d = os.path.abspath(start) + except OSError: + continue + for _ in range(_MAX_PARENTS): + if d in seen: + break + seen.add(d) + if os.path.isfile(os.path.join(d, ".socket", "manifest.json")): + return d + parent = os.path.dirname(d) + if parent == d: # reached the filesystem root + break + d = parent + return None + + +def _fingerprint(site_dir): + """Cheap signature of the installed distributions in ``site_dir``. + + A SHA-1 of the sorted ``(name, mtime)`` of every ``*.dist-info`` / + ``*.egg-info`` entry. This changes on any install / reinstall / uninstall, + but is deliberately immune to: + * our own patch writes (which touch package *files*, not the metadata + dirs), so the fingerprint is stable across an apply -- no re-apply loop; + * the stamp file (kept in a user cache, outside site-packages); + * ``__pycache__`` / ``.pyc`` churn. + Returns ``"?"`` on error so we fail toward a (harmless, idempotent) re-apply. + """ + import hashlib + + try: + items = [] + with os.scandir(site_dir) as it: + for entry in it: + name = entry.name + if name.endswith(".dist-info") or name.endswith(".egg-info"): + try: + mtime = entry.stat().st_mtime_ns + except OSError: + mtime = 0 + items.append("%s:%d" % (name, mtime)) + items.sort() + return hashlib.sha1( + "\n".join(items).encode("utf-8", "replace") + ).hexdigest() + except OSError: + return "?" + + +def _cache_dir(): + if os.name == "nt": + base = os.environ.get("LOCALAPPDATA") or os.path.expanduser("~") + else: + base = os.environ.get("XDG_CACHE_HOME") or os.path.join( + os.path.expanduser("~"), ".cache" + ) + return os.path.join(base, "socket-patch", "hook-stamps") + + +def _stamp_path(site_dir): + """Per-site-packages stamp file, in a user cache so writing it never + perturbs the site-packages fingerprint and never dirties the repo.""" + import hashlib + + key = hashlib.sha1( + os.path.abspath(site_dir).encode("utf-8", "replace") + ).hexdigest() + return os.path.join(_cache_dir(), key) + + +def _read_stamp(path): + try: + with open(path, "r") as f: + return f.read().strip() + except OSError: + return None + + +def _write_stamp(path, value): + tmp = None + try: + os.makedirs(os.path.dirname(path), exist_ok=True) + tmp = "%s.%d.tmp" % (path, os.getpid()) + with open(tmp, "w") as f: + f.write(value) + os.replace(tmp, path) + except OSError: + if tmp: + try: + os.unlink(tmp) + except OSError: + pass + + +def _resolve_binary(): + """Locate the ``socket-patch`` binary to run. + + SECURITY — order matters. We prefer the binary **bundled in the installed + ``socket_patch`` package** (the one `socket-patch[hook]` pulls in: a + RECORD-tracked file resolved by the dependency solver) and only fall back to + ``PATH`` if that package isn't present. Resolving via ``PATH`` first would + let a malicious ``socket-patch`` placed earlier on ``PATH`` (or `.` on PATH) + be executed at every interpreter startup. Returns ``None`` if neither is + found, in which case the hook no-ops. + """ + try: + import socket_patch + + resolver = getattr(socket_patch, "_resolve_binary", None) + if resolver is not None: + path = resolver() + if path: + return path + except Exception: + pass + try: + import shutil + + return shutil.which("socket-patch") + except Exception: + return None + + +def _apply(binary, project_root): + """Run ``socket-patch apply`` synchronously, offline, best-effort. + + Synchronous so the patched bytes are in place before the interpreter + proceeds to user imports. Offline so it only ever re-heals from the + committed ``.socket/`` cache and never blocks startup on the network. + ``--lock-timeout 0`` so a parallel interpreter that loses the apply lock + (e.g. under ``pytest -n``) skips instantly instead of piling up. + + Returns ``True`` only if apply exited 0. A non-zero exit (e.g. losing the + apply lock to a sibling interpreter) returns ``False`` so the caller does + NOT stamp the state as handled and the heal is retried on the next start. + """ + import subprocess + + argv = [ + binary, + "apply", + "--offline", + "--silent", + "--ecosystems", + "pypi", + "--cwd", + project_root, + "--lock-timeout", + "0", + ] + env = dict(os.environ) + env[_REENTRANCY_ENV] = "1" + kwargs = { + "cwd": project_root, + "env": env, + "stdin": subprocess.DEVNULL, + "stdout": subprocess.DEVNULL, + "stderr": subprocess.DEVNULL, + "timeout": _APPLY_TIMEOUT_SECONDS, + } + # Don't flash a console window for a pythonw-hosted (no-console) app. + if os.name == "nt": + kwargs["creationflags"] = getattr(subprocess, "CREATE_NO_WINDOW", 0) + try: + return subprocess.run(argv, **kwargs).returncode == 0 + except Exception: + # Includes TimeoutExpired and OSError (binary vanished mid-run). + return False + + +def run(): + """Entry point invoked by the ``.pth`` line. Never raises.""" + try: + # Cheapest possible bail-outs first. + if os.environ.get(_REENTRANCY_ENV): + return + if _disabled(): + return + project_root = _find_project_root() + if project_root is None: + return + site_dir = _site_packages_dir() + fp = _fingerprint(site_dir) + stamp_path = _stamp_path(site_dir) + if _read_stamp(stamp_path) == fp: + return # nothing installed/reinstalled since the last apply + binary = _resolve_binary() + if not binary: + return + # Stamp only on a successful apply. The dist-info fingerprint is + # unchanged by an apply (which patches package files, not metadata + # dirs), so storing the pre-apply value is correct -- and gating on + # success means a lock-contended / failed apply is retried next start + # rather than being silently marked as handled. + if _apply(binary, project_root): + _write_stamp(stamp_path, fp) + except Exception: + # Final backstop. The .pth wrapper also guards, but a raise here would + # hit every interpreter start, so never rely on a single layer. + return diff --git a/pypi/socket-patch-hook/test_hook.py b/pypi/socket-patch-hook/test_hook.py new file mode 100644 index 00000000..e843f981 --- /dev/null +++ b/pypi/socket-patch-hook/test_hook.py @@ -0,0 +1,260 @@ +"""Tests for the socket-patch startup hook. + +Run with: ``python -m unittest test_hook`` (no third-party deps required). + +The overriding contract under test is *safety*: the hook must never raise, must +no-op cheaply when there is nothing to do, must invoke ``socket-patch apply`` +with the right offline arguments only when the installed distributions have +changed, and must only ever apply the manifest of the project that owns this +environment (never a foreign one above the cwd). +""" + +import os +import sys +import unittest +from unittest import mock + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import socket_patch_hook as hook # noqa: E402 + + +class HookTestBase(unittest.TestCase): + def setUp(self): + self._cwd = os.getcwd() + # Isolate env: clear switches + reentrancy + venv + cache redirect. + self._saved_env = dict(os.environ) + for k in ("SOCKET_PATCH_HOOK", "SOCKET_NO_HOOK", "VIRTUAL_ENV", hook._REENTRANCY_ENV): + os.environ.pop(k, None) + self._tmp = self._mkdtemp() + os.environ["XDG_CACHE_HOME"] = os.path.join(self._tmp, "cache") + os.environ["LOCALAPPDATA"] = os.path.join(self._tmp, "cache") + + def tearDown(self): + os.chdir(self._cwd) + os.environ.clear() + os.environ.update(self._saved_env) + + def _mkdtemp(self): + import tempfile + + d = tempfile.mkdtemp() + self.addCleanup(self._rmtree, d) + return d + + @staticmethod + def _rmtree(path): + import shutil + + shutil.rmtree(path, ignore_errors=True) + + def _make_project(self): + """A temp dir that looks like a socket-patch project (has a manifest).""" + root = self._mkdtemp() + os.makedirs(os.path.join(root, ".socket")) + with open(os.path.join(root, ".socket", "manifest.json"), "w") as f: + f.write('{"patches": {}}') + return root + + +class TestRunSpawning(HookTestBase): + # These exercise the spawn/guard/stamp logic; project discovery is mocked + # (it has its own tests in TestProjectRootDiscovery). + def test_applies_when_manifest_present_and_state_changed(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", return_value=mock.Mock(returncode=0)) as run: + hook.run() + self.assertEqual(run.call_count, 1) + argv = run.call_args[0][0] + self.assertEqual(argv[0], "/fake/socket-patch") + self.assertIn("apply", argv) + self.assertIn("--offline", argv) + self.assertIn("--silent", argv) + self.assertEqual(argv[argv.index("--ecosystems") + 1], "pypi") + self.assertEqual( + os.path.realpath(argv[argv.index("--cwd") + 1]), + os.path.realpath(root), + ) + self.assertEqual(argv[argv.index("--lock-timeout") + 1], "0") + env = run.call_args[1]["env"] + self.assertEqual(env[hook._REENTRANCY_ENV], "1") + + def test_second_run_is_a_noop_when_state_unchanged(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", return_value=mock.Mock(returncode=0)) as run: + hook.run() # first run applies + writes the stamp (success) + hook.run() # second run: fingerprint matches stamp -> skip + self.assertEqual(run.call_count, 1) + + def test_failed_apply_does_not_stamp_so_it_retries(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", return_value=mock.Mock(returncode=1)) as run: + hook.run() + hook.run() + self.assertEqual(run.call_count, 2, "a failed apply must be retried next start") + + def test_noop_without_manifest(self): + with mock.patch.object(hook, "_find_project_root", return_value=None), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + def test_noop_when_binary_missing(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value=None), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + +class TestDisableSwitches(HookTestBase): + def _run_disabled(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run") as run: + hook.run() + return run + + def test_socket_patch_hook_off(self): + os.environ["SOCKET_PATCH_HOOK"] = "off" + self._run_disabled().assert_not_called() + + def test_socket_no_hook(self): + os.environ["SOCKET_NO_HOOK"] = "1" + self._run_disabled().assert_not_called() + + def test_reentrancy_guard(self): + os.environ[hook._REENTRANCY_ENV] = "1" + self._run_disabled().assert_not_called() + + +class TestNeverRaises(HookTestBase): + def test_run_swallows_resolver_errors(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", side_effect=RuntimeError("boom")): + hook.run() # must not propagate + + def test_run_swallows_subprocess_errors(self): + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", side_effect=OSError("no such binary")): + hook.run() # must not raise + + def test_apply_timeout_is_swallowed(self): + import subprocess + + root = self._make_project() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch( + "subprocess.run", + side_effect=subprocess.TimeoutExpired(cmd="x", timeout=1), + ): + hook.run() # must not raise + + def test_run_swallows_discovery_errors(self): + with mock.patch.object(hook, "_find_project_root", side_effect=RuntimeError("boom")), \ + mock.patch("subprocess.run") as run: + hook.run() # must not raise + run.assert_not_called() + + +class TestProjectRootDiscovery(HookTestBase): + """The hook must apply only the manifest of the project that OWNS this + environment — anchored to the venv, not whatever .socket/ sits above cwd.""" + + def _socket(self, d): + os.makedirs(os.path.join(d, ".socket")) + with open(os.path.join(d, ".socket", "manifest.json"), "w") as f: + f.write('{"patches": {}}') + + def test_anchors_to_venv_not_cwd(self): + # venv at /.venv; manifest at ; cwd is elsewhere. + proj = os.path.join(self._tmp, "proj") + self._socket(proj) + venv = os.path.join(proj, ".venv") + elsewhere = os.path.join(self._tmp, "elsewhere") + os.makedirs(elsewhere) + os.chdir(elsewhere) + with mock.patch.object(sys, "prefix", venv), \ + mock.patch.object(sys, "base_prefix", self._tmp): # in_venv = True + got = hook._find_project_root() + self.assertEqual(os.path.realpath(got), os.path.realpath(proj)) + + def test_in_venv_ignores_unrelated_cwd_manifest(self): + # SECURITY: a hostile .socket/ above the cwd must NOT be picked up when + # running inside a venv whose project committed no manifest. + proj = os.path.join(self._tmp, "proj") # venv's project: NO .socket + os.makedirs(proj) + venv = os.path.join(proj, ".venv") + attacker = os.path.join(self._tmp, "attacker") + self._socket(attacker) + os.chdir(attacker) + with mock.patch.object(sys, "prefix", venv), \ + mock.patch.object(sys, "base_prefix", self._tmp): # in_venv = True + got = hook._find_project_root() + self.assertIsNone(got, "must not apply a foreign manifest found above cwd") + + def test_system_python_falls_back_to_cwd(self): + # No venv (sys.prefix == base_prefix): the container/system case, where + # the project is wherever the process runs from. + proj = os.path.join(self._tmp, "proj") + self._socket(proj) + os.chdir(proj) + with mock.patch.object(sys, "prefix", "/usr"), \ + mock.patch.object(sys, "base_prefix", "/usr"): # in_venv = False + got = hook._find_project_root() + self.assertEqual(os.path.realpath(got), os.path.realpath(proj)) + + +class TestPthLine(unittest.TestCase): + """The .pth must be valid: comment lines are ignored by site.py, the import + line execs, and the kill switch short-circuits before importing.""" + + def _pth_import_line(self): + # site.py execs only lines starting with `import`; `#` lines are + # comments. Mirror that: run the import line(s) the way site would. + here = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(here, "socket_patch_hook.pth")) as f: + lines = [ + ln.rstrip("\n") + for ln in f + if ln.strip() and not ln.lstrip().startswith("#") + ] + # Exactly one executable (import) line. + assert len(lines) == 1, f"expected one import line, got {lines!r}" + assert lines[0].startswith("import "), lines[0] + return lines[0] + + def test_pth_line_executes_and_calls_run(self): + line = self._pth_import_line() + with mock.patch.object(hook, "run") as run: + os.environ.pop("SOCKET_PATCH_HOOK", None) + os.environ.pop("SOCKET_NO_HOOK", None) + exec(compile(line, "socket_patch_hook.pth", "exec"), {}) + run.assert_called_once() + + def test_pth_line_respects_off_switch(self): + line = self._pth_import_line() + with mock.patch.object(hook, "run") as run: + os.environ["SOCKET_PATCH_HOOK"] = "off" + try: + exec(compile(line, "socket_patch_hook.pth", "exec"), {}) + finally: + os.environ.pop("SOCKET_PATCH_HOOK", None) + run.assert_not_called() + + +if __name__ == "__main__": + unittest.main() diff --git a/pypi/socket-patch/pyproject.toml b/pypi/socket-patch/pyproject.toml index 2a6cec1c..e816b868 100644 --- a/pypi/socket-patch/pyproject.toml +++ b/pypi/socket-patch/pyproject.toml @@ -21,6 +21,14 @@ classifiers = [ "Topic :: Software Development :: Build Tools", ] +[project.optional-dependencies] +# `pip install socket-patch[hook]` additionally installs the +# package-manager-agnostic .pth startup hook that re-applies patches after +# install. Unpinned so the hook updates independently of the CLI. `setup` +# itself commits a bare `socket-patch-hook` dependency (the hook needs no +# specific CLI version — it runs whatever `socket-patch` is on PATH). +hook = ["socket-patch-hook"] + [project.urls] Homepage = "https://github.com/SocketDev/socket-patch" Repository = "https://github.com/SocketDev/socket-patch" diff --git a/pypi/socket-patch/socket_patch/__init__.py b/pypi/socket-patch/socket_patch/__init__.py index bfcb9d2f..b4cf04ec 100644 --- a/pypi/socket-patch/socket_patch/__init__.py +++ b/pypi/socket-patch/socket_patch/__init__.py @@ -3,20 +3,42 @@ import subprocess -def main(): - bin_dir = os.path.join(os.path.dirname(__file__), "bin") +def _resolve_binary(): + """Locate the bundled socket-patch binary, or return ``None``. + + Single source of truth for binary discovery, reused by both ``main()`` (the + console-script entry point) and the ``socket_patch_hook`` startup hook. Never + raises: returns ``None`` if the binary can't be found, so callers that run at + interpreter startup stay safe. + """ + bin_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bin") try: entries = os.listdir(bin_dir) except OSError: - entries = [] + return None bins = [e for e in entries if e.startswith("socket-patch")] if len(bins) != 1: + return None + bin_path = os.path.join(bin_dir, bins[0]) + try: + if not os.access(bin_path, os.X_OK): + os.chmod(bin_path, os.stat(bin_path).st_mode | 0o111) + except OSError: + return None + return bin_path + + +def main(): + bin_path = _resolve_binary() + if bin_path is None: + bin_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bin") + try: + count = len([e for e in os.listdir(bin_dir) if e.startswith("socket-patch")]) + except OSError: + count = 0 print( - f"Expected exactly one socket-patch binary in {bin_dir}, found {len(bins)}", + f"Expected exactly one socket-patch binary in {bin_dir}, found {count}", file=sys.stderr, ) sys.exit(1) - bin_path = os.path.join(bin_dir, bins[0]) - if not os.access(bin_path, os.X_OK): - os.chmod(bin_path, os.stat(bin_path).st_mode | 0o111) raise SystemExit(subprocess.call([bin_path] + sys.argv[1:])) diff --git a/scripts/build-pypi-wheels.py b/scripts/build-pypi-wheels.py index ab9f03b0..e1fcd392 100755 --- a/scripts/build-pypi-wheels.py +++ b/scripts/build-pypi-wheels.py @@ -190,6 +190,11 @@ def build_wheel( f"Summary: {metadata['description']}\n" f"License: {metadata['license']}\n" f"Requires-Python: {metadata['requires_python']}\n" + # `pip install socket-patch[hook]` additionally installs the + # package-manager-agnostic .pth post-install hook (a separate + # pure-python wheel). Unpinned so the hook can update independently. + f"Provides-Extra: hook\n" + f'Requires-Dist: socket-patch-hook; extra == "hook"\n' ) if metadata.get("readme"): metadata_header += "Description-Content-Type: text/markdown\n" @@ -237,6 +242,79 @@ def build_wheel( return wheel_path +DIST_NAME_HOOK = "socket_patch_hook" +PKG_NAME_HOOK = "socket-patch-hook" + + +def build_hook_wheel(version: str, hook_dir: Path, dist_dir: Path) -> Path: + """Build the pure-python ``socket-patch-hook`` wheel (``py3-none-any``). + + Unlike the platform wheels, this ships no binary. It contains the + ``socket_patch_hook`` package and — crucially — a top-level + ``socket_patch_hook.pth`` that pip installs into the site-packages root, so + Python executes it at interpreter startup. It depends on ``socket-patch`` + (the binary wheel) for the actual ``apply``. + """ + init_path = hook_dir / "socket_patch_hook" / "__init__.py" + pth_path = hook_dir / "socket_patch_hook.pth" + readme_path = hook_dir / "README.md" + init_py = init_path.read_bytes() + pth = pth_path.read_bytes() + readme = readme_path.read_text() if readme_path.exists() else "" + + wheel_name = f"{DIST_NAME_HOOK}-{version}-py3-none-any.whl" + wheel_path = dist_dir / wheel_name + dist_info = f"{DIST_NAME_HOOK}-{version}.dist-info" + + files = [] + # The package module. + files.append((f"{DIST_NAME_HOOK}/__init__.py", init_py, False)) + # The startup hook — at the wheel root so it installs to site-packages. + files.append(("socket_patch_hook.pth", pth, False)) + + # No Requires-Dist on socket-patch: the hook is version-agnostic and finds + # whatever `socket-patch` CLI is on PATH at runtime (provisioned separately). + metadata_content = ( + f"Metadata-Version: 2.1\n" + f"Name: {PKG_NAME_HOOK}\n" + f"Version: {version}\n" + f"Summary: Package-manager-agnostic post-install patch hook for socket-patch\n" + f"License: MIT\n" + f"Requires-Python: >=3.8\n" + ) + if readme: + metadata_content += "Description-Content-Type: text/markdown\n" + metadata_content += f"\n{readme}" + files.append((f"{dist_info}/METADATA", metadata_content.encode(), False)) + + # Pure-python: Root-Is-Purelib true so the .pth lands in site-packages. + wheel_content = ( + "Wheel-Version: 1.0\n" + "Generator: build-pypi-wheels.py\n" + "Root-Is-Purelib: true\n" + "Tag: py3-none-any\n" + ).encode() + files.append((f"{dist_info}/WHEEL", wheel_content, False)) + + record_lines = [] + for name, data, _ in files: + record_lines.append(f"{name},{sha256_digest(data)},{len(data)}") + record_name = f"{dist_info}/RECORD" + record_lines.append(f"{record_name},,") + files.append((record_name, "\n".join(record_lines).encode(), False)) + + with zipfile.ZipFile(wheel_path, "w", zipfile.ZIP_DEFLATED) as zf: + for name, data, _ in files: + info_obj = zipfile.ZipInfo(name) + info_obj.external_attr = ( + stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH + ) << 16 + info_obj.compress_type = zipfile.ZIP_DEFLATED + zf.writestr(info_obj, data) + + return wheel_path + + def main(): parser = argparse.ArgumentParser( description="Build platform-tagged PyPI wheels for socket-patch" @@ -248,8 +326,8 @@ def main(): ) parser.add_argument( "--artifacts", - required=True, - help="Directory containing build artifacts", + default=None, + help="Directory containing build artifacts (required unless --hook-only)", ) parser.add_argument( "--dist", @@ -261,23 +339,52 @@ def main(): default=None, help="Directory containing pyproject.toml (default: pypi/socket-patch relative to script)", ) + parser.add_argument( + "--hook-dir", + default=None, + help="Directory of the socket-patch-hook package (default: pypi/socket-patch-hook)", + ) + parser.add_argument( + "--hook-only", + action="store_true", + help="Build only the pure-python socket-patch-hook wheel (no binary artifacts needed)", + ) + parser.add_argument( + "--skip-hook", + action="store_true", + help="Skip building the socket-patch-hook wheel", + ) args = parser.parse_args() - artifacts_dir = Path(args.artifacts) dist_dir = Path(args.dist) dist_dir.mkdir(parents=True, exist_ok=True) + repo_root = Path(__file__).resolve().parent.parent + hook_dir = Path(args.hook_dir) if args.hook_dir else repo_root / "pypi" / "socket-patch-hook" + + built = [] + skipped = [] + + # The pure-python hook wheel needs no platform artifacts. + if args.hook_only: + wheel_path = build_hook_wheel(args.version, hook_dir, dist_dir) + size_kb = wheel_path.stat().st_size / 1024 + print(f"Built hook wheel: {wheel_path.name} ({size_kb:.1f} KB)") + return + + if not args.artifacts: + parser.error("--artifacts is required unless --hook-only is given") + + artifacts_dir = Path(args.artifacts) + if args.pyproject_dir: pyproject_dir = Path(args.pyproject_dir) else: - pyproject_dir = Path(__file__).resolve().parent.parent / "pypi" / "socket-patch" + pyproject_dir = repo_root / "pypi" / "socket-patch" metadata = read_pyproject_metadata(pyproject_dir) init_py = read_init_py(pyproject_dir) - built = [] - skipped = [] - for target, info in TARGETS.items(): archive_ext = info["archive_ext"] archive_path = artifacts_dir / f"socket-patch-{target}.{archive_ext}" @@ -300,6 +407,12 @@ def main(): print(f" -> {wheel_path.name} ({size_mb:.1f} MB)") built.append(wheel_path) + if not args.skip_hook: + hook_wheel = build_hook_wheel(args.version, hook_dir, dist_dir) + size_kb = hook_wheel.stat().st_size / 1024 + print(f" -> {hook_wheel.name} ({size_kb:.1f} KB) [pure-python hook]") + built.append(hook_wheel) + print(f"\nBuilt {len(built)} wheel(s) in {dist_dir}/") if skipped: print(f"Skipped {len(skipped)} target(s) (artifact not found): {', '.join(skipped)}") diff --git a/tests/setup_matrix/matrix.json b/tests/setup_matrix/matrix.json index ed6ccbdc..1776f878 100644 --- a/tests/setup_matrix/matrix.json +++ b/tests/setup_matrix/matrix.json @@ -92,14 +92,14 @@ }, { - "ecosystem": "pypi", "pm": "pip", "image": "pypi", "hook_family": "none", - "baseline_supported": false, + "ecosystem": "pypi", "pm": "pip", "image": "pypi", "hook_family": "pth", + "baseline_supported": true, "package": "six", "version": "1.16.0", "purl": "pkg:pypi/six@1.16.0", "manifest_key": "six.py", "apply_ecosystems": "pypi" }, { - "ecosystem": "pypi", "pm": "uv", "image": "pypi", "hook_family": "none", - "baseline_supported": false, + "ecosystem": "pypi", "pm": "uv", "image": "pypi", "hook_family": "pth", + "baseline_supported": true, "package": "six", "version": "1.16.0", "purl": "pkg:pypi/six@1.16.0", "manifest_key": "six.py", "apply_ecosystems": "pypi" }, @@ -116,8 +116,8 @@ "manifest_key": "six.py", "apply_ecosystems": "pypi" }, { - "ecosystem": "pypi", "pm": "hatch", "image": "pypi", "hook_family": "none", - "baseline_supported": false, + "ecosystem": "pypi", "pm": "hatch", "image": "pypi", "hook_family": "pth", + "baseline_supported": true, "package": "six", "version": "1.16.0", "purl": "pkg:pypi/six@1.16.0", "manifest_key": "six.py", "apply_ecosystems": "pypi" }, diff --git a/tests/setup_matrix/run-case.sh b/tests/setup_matrix/run-case.sh index 418c48e3..c6448818 100755 --- a/tests/setup_matrix/run-case.sh +++ b/tests/setup_matrix/run-case.sh @@ -227,7 +227,22 @@ EOF { "name": "sm-proj", "version": "0.0.0", "nodeModulesDir": "auto" } EOF ;; - pip|uv) : ;; + pip) + # A requirements.txt makes `setup` detect a pip project and add the + # `socket-patch[hook]` dependency to it. + printf '%s==%s\n' "$SM_PACKAGE" "$SM_VERSION" > requirements.txt ;; + uv) + # A PEP 621 pyproject + uv.lock makes `setup` detect a uv project. + cat > pyproject.toml < uv.lock ;; poetry) cat > pyproject.toml </dev/null 2>&1 \ + || "$venv/bin/python" -m ensurepip --upgrade >/dev/null 2>&1 || true + "$venv/bin/python" -m pip install --quiet --no-deps "$SOCKET_PATCH_HOOK_WHEEL" \ + || note "hook wheel install failed" ;; + esac + # The hook resolves `socket-patch` off PATH (it isn't pip-installed here). + ln -sf "$SP_BIN" "$venv/bin/socket-patch" 2>/dev/null || true +} + +# Start an interpreter so the `.pth` hook fires (models a CI app start / +# the next python invocation after install). No-op if there is no venv. +pth_trigger() { # $1=venv dir + local venv="$1" + [ -x "$venv/bin/python" ] || return 0 + PATH="$PWD/$venv/bin:$PATH" "$venv/bin/python" -c "pass" >/dev/null 2>&1 || true +} + # --- per-PM native install (the hook, if configured, fires here) ------ run_install() { case "$SM_PM" in @@ -303,11 +351,34 @@ run_install() { pnpm) pnpm install --no-frozen-lockfile ;; bun) bun add "$SM_PACKAGE@$SM_VERSION" ;; deno) deno install --allow-scripts ;; - pip) python3 -m venv venv && ./venv/bin/pip install --disable-pip-version-check --quiet --no-cache-dir "$SM_PACKAGE==$SM_VERSION" ;; - uv) uv venv venv && uv pip install --python venv/bin/python --quiet "$SM_PACKAGE==$SM_VERSION" ;; + pip) + python3 -m venv venv + pth_install_into_venv venv pip + ./venv/bin/pip install --disable-pip-version-check --quiet --no-cache-dir "$SM_PACKAGE==$SM_VERSION" + pth_trigger venv ;; + uv) + uv venv venv + pth_install_into_venv venv uv + uv pip install --python venv/bin/python --quiet "$SM_PACKAGE==$SM_VERSION" + pth_trigger venv ;; + # poetry / pdm are resolver-based: `add` re-resolves the whole manifest + # (which setup edited to add `socket-patch-hook`) against a package index. + # In this hermetic test the hook wheel isn't published, so resolution + # fails — these PMs can't be exercised without a local index, so they stay + # documented gaps (baseline_supported:false). The .pth mechanism itself is + # package-manager-agnostic (proven by pip/uv/hatch). poetry) poetry config virtualenvs.in-project true --local && poetry add --no-interaction "$SM_PACKAGE@$SM_VERSION" ;; pdm) pdm config python.use_venv true >/dev/null 2>&1; pdm add "$SM_PACKAGE==$SM_VERSION" ;; - hatch) HATCH_DATA_DIR="$PWD/.hatch" hatch env create && HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "import ${SM_PACKAGE//-/_}" ;; + hatch) + HATCH_DATA_DIR="$PWD/.hatch" hatch env create + HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "import ${SM_PACKAGE//-/_}" + # hatch manages its env outside .venv; install the hook + fire an + # interpreter through `hatch run`. + if [ "$SM_RUN_SETUP" = 1 ] && [ -n "${SOCKET_PATCH_HOOK_WHEEL:-}" ] && [ -f "${SOCKET_PATCH_HOOK_WHEEL:-}" ]; then + HATCH_DATA_DIR="$PWD/.hatch" hatch run pip install --no-deps "$SOCKET_PATCH_HOOK_WHEEL" \ + || note "hatch hook wheel install failed" + fi + HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "pass" || true ;; cargo) cargo fetch ;; bundler) bundle config set --local path vendor/bundle && bundle install ;; go) GOFLAGS=-mod=mod go mod download "$SM_PACKAGE@$SM_VERSION" ;; @@ -324,7 +395,10 @@ resolve_target() { local base; base="$(basename "$rel")" case "$SM_ECOSYSTEM" in npm|deno) printf '%s\n' "$PWD/node_modules/$SM_PACKAGE/$rel" ;; - pypi) find "$PWD" -name "$base" 2>/dev/null | head -1 ;; + # Exclude vendored copies (pip/setuptools bundle their own six.py under + # */_vendor/*); the patch lands in the installed package at the + # site-packages root. + pypi) find "$PWD" -name "$base" -not -path '*/_vendor/*' 2>/dev/null | head -1 ;; cargo) find "${CARGO_HOME:-$HOME/.cargo}/registry/src" -path "*/${SM_PACKAGE}-${SM_VERSION}/${rel}" 2>/dev/null | head -1 ;; gem) find "$PWD/vendor" -path "*/${SM_PACKAGE}-${SM_VERSION}/${rel}" 2>/dev/null | head -1 ;; golang) local gmc; gmc="$(go env GOMODCACHE 2>/dev/null || echo "${GOPATH:-$HOME/go}/pkg/mod")"; find "$gmc" -path "*/$(basename "$SM_PACKAGE")@${SM_VERSION}/${rel}" 2>/dev/null | head -1 ;; @@ -465,7 +539,7 @@ resolve_targets() { fi case "$SM_ECOSYSTEM" in npm|deno|monorepo) find "$PWD" -path "*/node_modules/$SM_PACKAGE/$rel" 2>/dev/null ;; - pypi) find "$PWD" -name "$base" 2>/dev/null ;; + pypi) find "$PWD" -name "$base" -not -path '*/_vendor/*' 2>/dev/null ;; *) resolve_target ;; esac } @@ -503,11 +577,19 @@ run_file() { # $1 = absolute path to the resolved package file esac ;; deno) deno run -A "$1" ;; pypi) + # Run the patched module with the in-project venv interpreter directly. + # Going through ` run` re-resolves the project, which (after setup) + # includes the committed `socket-patch-hook` dependency — unpublished in + # this hermetic test, so the resolve would fail for a reason unrelated to + # whether six.py is patched. Direct execution faithfully runs the on-disk + # patched file and observes its marker. (hatch manages its env outside + # an in-project .venv, and its skip-install env doesn't re-resolve, so it + # keeps using `hatch run`.) case "$SM_PM" in - uv) uv run python "$1" ;; - poetry) poetry run python "$1" ;; - pdm) pdm run python "$1" ;; - hatch) hatch run python "$1" ;; + uv) ./venv/bin/python "$1" ;; + poetry) ./.venv/bin/python "$1" ;; + pdm) ./.venv/bin/python "$1" ;; + hatch) HATCH_DATA_DIR="$PWD/.hatch" hatch run python "$1" ;; pip) ./venv/bin/python "$1" ;; *) python3 "$1" ;; esac ;; @@ -579,6 +661,9 @@ fi # "1". export SOCKET_OFFLINE=true SOCKET_FORCE=true SOCKET_API_TOKEN=fake SOCKET_ORG_SLUG=test-org export SOCKET_TELEMETRY_DISABLED=1 SOCKET_EXPERIMENTAL_MAVEN=1 SOCKET_EXPERIMENTAL_NUGET=1 +# Isolate the pypi `.pth` hook's change-detection stamp per case so runs +# don't bleed into each other (the stamp lives under XDG_CACHE_HOME). +export XDG_CACHE_HOME="$WORKDIR/.cache" # NOTE: deliberately do NOT export SOCKET_CWD. The install hook's apply # must run with whatever cwd the package manager sets for the lifecycle # script — the project root for a single project, and the *member* dir