From ff4b2ae11854a10036372ea26a7b80d6eae0f6ea Mon Sep 17 00:00:00 2001 From: lelia <2418071+lelia@users.noreply.github.com> Date: Tue, 2 Jun 2026 18:15:17 -0400 Subject: [PATCH 1/4] fix(core): always omit license details from full-scan diff request (#CE-224 follow-on) The full-scan diff request (fullscans.stream_diff) now always sets include_license_details=false, decoupled from the --exclude-license-details flag. This prevents the CE-224 truncation crash (Unterminated string / JSON parse failure on large repos, reported by the tremendous org) from recurring even when the flag is not passed. Why this is safe (no output changes): the license fields the diff endpoint can embed are never consumed off the diff. With --generate-license off, the only consumer (the legal/FOSSA artifact builder) never runs. With --generate-license on, get_license_text_via_purl re-fetches license data from the dedicated PURL endpoint and overwrites whatever the diff embedded before anything reads it. Either way the embedded payload was dead weight that only bloated the response. --exclude-license-details still works but its scope is now narrower: it controls only the dashboard report URL, not the internal diff payload. Help text updated. Core.get_added_and_removed_packages(..., include_license_details=True) remains as an explicit override seam (exercised in tests). Minor bump to 2.4.0: outputs are provably unchanged, but this is a deliberate default-behavior change (2.3.0 made the flag propagate; 2.4.0 makes the lean diff the default), which warrants a minor bump per the project's semver policy. Signed-off-by: lelia <2418071+lelia@users.noreply.github.com> --- CHANGELOG.md | 41 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- socketsecurity/__init__.py | 2 +- socketsecurity/config.py | 7 +++++- socketsecurity/core/__init__.py | 35 +++++++++++++++++++++++++--- tests/core/test_sdk_methods.py | 20 ++++++++++++++-- uv.lock | 2 +- 7 files changed, 100 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f78b70..9c2a741 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,46 @@ # Changelog +## 2.4.0 + +### Changed: license details are no longer requested on the full-scan diff + +The internal full-scan diff request (`fullscans.stream_diff`, used to compare +alerts between two scans) now always sets `include_license_details=false`, +regardless of the `--exclude-license-details` flag. + +**Why this is safe (no output changes):** the license fields the diff endpoint +can embed were never actually consumed off the diff: + +- With `--generate-license` **off**, the only consumer of a package's + `licenseDetails`/`licenseAttrib` — the legal/FOSSA artifact builder — is never + invoked, so the embedded license data was parsed and immediately discarded. +- With `--generate-license` **on**, the CLI re-fetches license data from the + dedicated PURL endpoint (`get_license_text_via_purl`) and **overwrites** + whatever the diff embedded before anything reads it. + +So in every code path the diff's license payload was dead weight. On large +dependency trees it inflated the diff response past ~2.3 MB and truncated it +mid-string, crashing `response.json()` with +`Unterminated string starting at: ...` (CE-224, reported by the `tremendous` +org). Dropping it keeps the diff lean with **zero change to any output +artifact** (SBOM, legal/FOSSA attribution, report contents). + +**Why a minor bump (2.4.0), not a patch:** this is a deliberate default-behavior +change. 2.3.0 fixed the `--exclude-license-details` flag so it correctly +propagated to the diff; this release goes further and makes the lean diff the +default so the crash cannot recur even when the flag is not passed. Per the +project's semver policy a default-behavior change warrants a minor bump, even +though outputs are provably unchanged. + +**Effect on `--exclude-license-details`:** the flag still works, but its scope is +now narrower — it controls only the human-facing dashboard report URL +(`?include_license_details=false`), not the internal diff payload. Its `--help` +text was updated to reflect this. + +Override seam: `Core.get_added_and_removed_packages(..., include_license_details=True)` +can still request embedded license details explicitly (used in tests); nothing +in the CLI wires the user flag to it anymore. + ## 2.3.1 ### New: brotli-compressed `.socket.facts.json` upload diff --git a/pyproject.toml b/pyproject.toml index 982da93..b4c94c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" [project] name = "socketsecurity" -version = "2.3.1" +version = "2.4.0" requires-python = ">= 3.11" license = {"file" = "LICENSE"} dependencies = [ diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index b459b8c..55d08d9 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,3 +1,3 @@ __author__ = 'socket.dev' -__version__ = '2.3.1' +__version__ = '2.4.0' USER_AGENT = f'SocketPythonCLI/{__version__}' diff --git a/socketsecurity/config.py b/socketsecurity/config.py index 7a262de..5a3cce7 100644 --- a/socketsecurity/config.py +++ b/socketsecurity/config.py @@ -705,7 +705,12 @@ def create_argument_parser() -> argparse.ArgumentParser: "--exclude-license-details", dest="exclude_license_details", action="store_true", - help="Exclude license details from the diff report (boosts performance for large repos)" + help=( + "Exclude license details from the dashboard report URL. " + "As of 2.4.0 the internal diff request always omits license details " + "(they were unused there and bloated large-repo responses), so this " + "flag now only affects the report link, not diff performance." + ) ) output_group.add_argument( "--max-purl-batch-size", diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index c2bb862..0e98323 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -1070,7 +1070,7 @@ def get_added_and_removed_packages( self, head_full_scan_id: str, new_full_scan_id: str, - include_license_details: bool = True + include_license_details: bool = False ) -> Tuple[Dict[str, Package], Dict[str, Package], Dict[str, Package]]: """ Get packages that were added and removed between scans. @@ -1078,6 +1078,27 @@ def get_added_and_removed_packages( Args: head_full_scan_id: Previous scan (maybe None if first scan) new_full_scan_id: New scan just created + include_license_details: Whether to ask the diff endpoint to embed + per-package license attribution/details in the response. + + Defaults to ``False`` on purpose. The diff endpoint exists to + compare alerts between two scans; the license fields it can embed + are never consumed off the diff: + * When ``--generate-license`` is OFF, the only consumer of + ``Package.licenseDetails``/``licenseAttrib`` (the legal/FOSSA + artifact builder) is never invoked, so the embedded license + data is parsed and then dropped on the floor. + * When ``--generate-license`` is ON, ``get_license_text_via_purl`` + re-fetches license data from the dedicated PURL endpoint and + OVERWRITES whatever the diff embedded, before anything reads it. + Either way the embedded license payload is dead weight, and on + large dependency trees it inflated the diff response past ~2.3MB + and truncated it mid-string, crashing ``response.json()`` + (CE-224, customer: Tremendous). Defaulting to ``False`` keeps the + diff lean with zero change to any output artifact. The parameter + is retained as an explicit override seam, not wired to the + ``--exclude-license-details`` user flag (which still governs the + human-facing dashboard report URL). Returns: Tuple of (added_packages, removed_packages) dictionaries @@ -1299,7 +1320,15 @@ def create_new_diff( except OSError as e: log.warning(f"Failed to clean up temporary file {temp_file}: {e}") - # Handle diff generation - now we always have both scans + # Handle diff generation - now we always have both scans. + # + # Note: we intentionally do NOT forward params.include_license_details + # (the --exclude-license-details user flag) into the diff request. The + # diff path never consumes embedded license data (see + # get_added_and_removed_packages docstring), so requesting it only bloats + # the response and risks the CE-224 truncation crash on large repos. The + # user flag still controls the dashboard report URL below; it just no + # longer gates this internal diff payload. ( added_packages, removed_packages, @@ -1307,7 +1336,7 @@ def create_new_diff( ) = self.get_added_and_removed_packages( head_full_scan_id, new_full_scan.id, - include_license_details=getattr(params, "include_license_details", True) + include_license_details=False ) # Separate unchanged packages from added/removed for --strict-blocking support diff --git a/tests/core/test_sdk_methods.py b/tests/core/test_sdk_methods.py index fdcbef3..2cad8e5 100644 --- a/tests/core/test_sdk_methods.py +++ b/tests/core/test_sdk_methods.py @@ -95,13 +95,17 @@ def test_get_added_and_removed_packages(core): # Get two different scans to compare added, removed, all_packages = core.get_added_and_removed_packages("head", "new") - # Verify SDK was called correctly + # Verify SDK was called correctly. + # include_license_details defaults to "false": the diff path never consumes + # embedded license data (license artifacts come from the PURL endpoint), so + # requesting it only bloats the response and risks the CE-224 truncation + # crash on large repos. core.sdk.fullscans.stream_diff.assert_called_once_with( core.config.org_slug, "head", "new", use_types=True, - include_license_details="true", + include_license_details="false", ) # Verify the results @@ -116,6 +120,18 @@ def test_get_added_and_removed_packages(core): assert "dp2_t1" in removed # Verify transitive dependencies are also tracked assert "pypi/direct_package_1@1.6.0" in all_packages # Unchanged package is in full package map +def test_get_added_and_removed_packages_license_override(core): + """The include_license_details override seam still works when explicitly requested.""" + core.get_added_and_removed_packages("head", "new", include_license_details=True) + + core.sdk.fullscans.stream_diff.assert_called_once_with( + core.config.org_slug, + "head", + "new", + use_types=True, + include_license_details="true", + ) + def test_empty_alerts_preserved(core): """Test that empty alerts arrays stay as empty arrays and don't become None""" # Get the scan that contains dp2 (which has empty alerts array) diff --git a/uv.lock b/uv.lock index 0ffdcce..2458000 100644 --- a/uv.lock +++ b/uv.lock @@ -1270,7 +1270,7 @@ wheels = [ [[package]] name = "socketsecurity" -version = "2.3.1" +version = "2.4.0" source = { editable = "." } dependencies = [ { name = "brotli", marker = "platform_python_implementation == 'CPython'" }, From 7144926ef348b85a87d1442ace050e0a2e1df686 Mon Sep 17 00:00:00 2001 From: lelia <2418071+lelia@users.noreply.github.com> Date: Tue, 2 Jun 2026 18:46:33 -0400 Subject: [PATCH 2/4] chore: trim changelog release notes --- CHANGELOG.md | 81 +++++++++++----------------------------------------- 1 file changed, 17 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c2a741..b549163 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,42 +4,13 @@ ### Changed: license details are no longer requested on the full-scan diff -The internal full-scan diff request (`fullscans.stream_diff`, used to compare -alerts between two scans) now always sets `include_license_details=false`, -regardless of the `--exclude-license-details` flag. - -**Why this is safe (no output changes):** the license fields the diff endpoint -can embed were never actually consumed off the diff: - -- With `--generate-license` **off**, the only consumer of a package's - `licenseDetails`/`licenseAttrib` — the legal/FOSSA artifact builder — is never - invoked, so the embedded license data was parsed and immediately discarded. -- With `--generate-license` **on**, the CLI re-fetches license data from the - dedicated PURL endpoint (`get_license_text_via_purl`) and **overwrites** - whatever the diff embedded before anything reads it. - -So in every code path the diff's license payload was dead weight. On large -dependency trees it inflated the diff response past ~2.3 MB and truncated it -mid-string, crashing `response.json()` with -`Unterminated string starting at: ...` (CE-224, reported by the `tremendous` -org). Dropping it keeps the diff lean with **zero change to any output -artifact** (SBOM, legal/FOSSA attribution, report contents). - -**Why a minor bump (2.4.0), not a patch:** this is a deliberate default-behavior -change. 2.3.0 fixed the `--exclude-license-details` flag so it correctly -propagated to the diff; this release goes further and makes the lean diff the -default so the crash cannot recur even when the flag is not passed. Per the -project's semver policy a default-behavior change warrants a minor bump, even -though outputs are provably unchanged. - -**Effect on `--exclude-license-details`:** the flag still works, but its scope is -now narrower — it controls only the human-facing dashboard report URL -(`?include_license_details=false`), not the internal diff payload. Its `--help` -text was updated to reflect this. - -Override seam: `Core.get_added_and_removed_packages(..., include_license_details=True)` -can still request embedded license details explicitly (used in tests); nothing -in the CLI wires the user flag to it anymore. +- Full-scan diff requests now always set `include_license_details=false`, keeping + large diff responses smaller and avoiding truncation crashes on large repos. +- `--exclude-license-details` still controls the dashboard report URL, but no + longer affects the internal diff request. Its `--help` text has been updated + to reflect the narrower scope. +- License artifact output is unchanged: `--generate-license` continues to fetch + license details from the dedicated PURL endpoint. ## 2.3.1 @@ -72,40 +43,21 @@ Details: ### New: `--exit-code-on-api-error` -Adds a configurable exit code for API / infrastructure failures (timeouts, -network errors, unexpected exceptions), so CI pipelines can distinguish them -from blocking security findings (exit `1`): - -``` -socketcli --exit-code-on-api-error 100 ... -``` - -Default is `3` (the code the CLI already used for these errors), so **default -behavior is unchanged** — the exit code only changes when you pass the flag. -Set it to a Buildkite `soft_fail` code, or to `0` to swallow infra errors. - -**Interaction to be aware of:** `--disable-blocking` forces exit `0` for *all* -outcomes and therefore overrides `--exit-code-on-api-error`. Use the new flag -*without* `--disable-blocking` if you want a custom infra-error code to take -effect. See the exit-code reference in the README. - -> A future `3.0` release is planned to make infrastructure errors exit non-zero -> even under `--disable-blocking` (so outages stop being silently swallowed). -> That is a breaking change and is intentionally **not** in this release. +- Added `--exit-code-on-api-error` so CI can distinguish API / infrastructure + failures from blocking security findings. The default remains `3`; the flag + only changes behavior when set explicitly. +- `--disable-blocking` still takes precedence and exits `0` for all outcomes. ### New: commit message auto-truncation -`--commit-message` values longer than 200 characters are now automatically -truncated before being sent to the API, preventing HTTP 413 errors from -oversized URL query parameters (common with AI-generated commit messages or -`$BUILDKITE_MESSAGE`). +- `--commit-message` values longer than 200 characters are now truncated before + being sent to the API, preventing HTTP 413 errors from oversized query + parameters. ### Improved: Buildkite log formatting -When running inside a Buildkite job (`BUILDKITE=true`), infrastructure errors -emit Buildkite log section markers (`^^^ +++` / `--- :warning:`) so the error -section auto-expands in the BK UI, plus a `soft_fail` hint. No effect on other -CI platforms. +- Infrastructure errors now emit Buildkite log section markers when + `BUILDKITE=true`, making those failures easier to find in Buildkite logs. ### Fixed @@ -114,6 +66,7 @@ CI platforms. which was constructed without the CLI timeout and defaulted to 1200s. - `--exclude-license-details` now propagates to the full-scan diff comparison request (it was only applied to full-scan params / report URLs before). + ## 2.2.93 - Bundled twelve Dependabot dependency updates: `urllib3`, `gitpython`, `python-dotenv`, `pytest`, `uv`, `cryptography`, `pygments`, `requests`, and `idna` (main app), plus `axios`, `requests`, and `flask` (e2e fixtures). `idna` 3.11 → 3.15 includes the fix for CVE-2026-45409. From 433724e9ec10adc5181acf20e0d11edc354ed68b Mon Sep 17 00:00:00 2001 From: lelia <2418071+lelia@users.noreply.github.com> Date: Tue, 2 Jun 2026 19:34:25 -0400 Subject: [PATCH 3/4] chore: require socketdev 3.1.2 --- CHANGELOG.md | 1 + pyproject.toml | 2 +- uv.lock | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b549163..e7d1be5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ to reflect the narrower scope. - License artifact output is unchanged: `--generate-license` continues to fetch license details from the dedicated PURL endpoint. +- Requires `socketdev>=3.1.2`. ## 2.3.1 diff --git a/pyproject.toml b/pyproject.toml index b4c94c3..c88ce43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ 'GitPython', 'packaging', 'python-dotenv', - "socketdev>=3.0.33,<4.0.0", + "socketdev>=3.1.2,<4.0.0", "bs4>=0.0.2", "markdown>=3.10", "brotli>=1.0.9; platform_python_implementation == 'CPython'", diff --git a/uv.lock b/uv.lock index 2458000..957202f 100644 --- a/uv.lock +++ b/uv.lock @@ -1257,15 +1257,15 @@ wheels = [ [[package]] name = "socketdev" -version = "3.1.0" +version = "3.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/95/3e/50f05942e23d12043028d71c0e502c0d02c470686afc3dfbab0d1931e5c1/socketdev-3.1.0.tar.gz", hash = "sha256:a9534189d50c9f6c39e802280cc2317f830dd0c9970677e8cde843a69daa84ed", size = 172581, upload-time = "2026-05-21T17:14:03.607Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/3c/974f11a7064d12303049ed46b2a475ff6e65c073c0985558195756d30543/socketdev-3.1.2.tar.gz", hash = "sha256:3dc46258f29f66f8ed84767ab6158237d38a7de4ecb4b28950b4f0bb0d49ff68", size = 178479, upload-time = "2026-06-02T23:33:17.251Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/df/76/4fb37245468dd9c67137059ce6833db97d76c808bf0d10397f1b5a2943d1/socketdev-3.1.0-py3-none-any.whl", hash = "sha256:e9245916d423952aba4f0018bea2bca28740530ec30308089c48dddb2133e38a", size = 67255, upload-time = "2026-05-21T17:14:01.873Z" }, + { url = "https://files.pythonhosted.org/packages/51/e9/72a8ccf2c3a20d436616e303b3c51a700e0def781806d361bd0f65ab436b/socketdev-3.1.2-py3-none-any.whl", hash = "sha256:14a4e913fa5c2bbea856820b2ebc9f7c21960c8c42e77a8fd2ae4ef626ba0f49", size = 67225, upload-time = "2026-06-02T23:33:15.714Z" }, ] [[package]] @@ -1327,7 +1327,7 @@ requires-dist = [ { name = "python-dotenv" }, { name = "requests" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.3.0" }, - { name = "socketdev", specifier = ">=3.0.33,<4.0.0" }, + { name = "socketdev", specifier = ">=3.1.2,<4.0.0" }, { name = "twine", marker = "extra == 'dev'" }, { name = "uv", marker = "extra == 'dev'", specifier = ">=0.1.0" }, ] From 970fb554f0f0a4383ca3ce56dc09bac81827fbff Mon Sep 17 00:00:00 2001 From: lelia <2418071+lelia@users.noreply.github.com> Date: Tue, 2 Jun 2026 19:36:18 -0400 Subject: [PATCH 4/4] docs: note exclude license flag scope change --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7d1be5..41545c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,9 @@ - Full-scan diff requests now always set `include_license_details=false`, keeping large diff responses smaller and avoiding truncation crashes on large repos. -- `--exclude-license-details` still controls the dashboard report URL, but no - longer affects the internal diff request. Its `--help` text has been updated - to reflect the narrower scope. +- Soft breaking change for flag-scripted use: `--exclude-license-details` still + controls the dashboard report URL, but no longer affects the internal diff + request. Its `--help` text has been updated to reflect the narrower scope. - License artifact output is unchanged: `--generate-license` continues to fetch license details from the dedicated PURL endpoint. - Requires `socketdev>=3.1.2`.