Skip to content

Commit 9eb52f5

Browse files
committed
fix(core): clean up partial .socket.facts.json.br on compression failure
Address review feedback on the facts-file brotli compression: - If `_compress_facts_file` fails part-way through writing (e.g. the disk fills up mid-stream — the exact large-file scenario this targets), remove the partially-written `.socket.facts.json.br` before re-raising, so no orphaned `.br` is left in the target directory. The caller still falls back to uploading the plain file. Adds a regression test. - Hoist the 1 MiB streaming chunk size to a module constant (`SOCKET_FACTS_BROTLI_CHUNK_SIZE`) alongside the other brotli knobs.
1 parent d944ae3 commit 9eb52f5

6 files changed

Lines changed: 56 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Changelog
22

3-
## 2.3.1
3+
## 2.3.2
44

55
### New: brotli-compressed `.socket.facts.json` upload
66

@@ -23,7 +23,8 @@ Details:
2323
that exact name). A custom `--reach-output-file` name is uploaded uncompressed, as before.
2424
- Empty baseline-scan placeholder files are not compressed.
2525
- Compression never blocks an upload: if it fails for any reason it falls back to uploading
26-
the plain file.
26+
the plain file, and a partially-written `.socket.facts.json.br` is removed rather than
27+
left behind in the target directory.
2728
- Adds a `brotli` (CPython) / `brotlicffi` (PyPy) dependency.
2829

2930
## 2.3.0

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
66

77
[project]
88
name = "socketsecurity"
9-
version = "2.3.1"
9+
version = "2.3.2"
1010
requires-python = ">= 3.11"
1111
license = {"file" = "LICENSE"}
1212
dependencies = [

socketsecurity/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.3.1'
2+
__version__ = '2.3.2'
33
USER_AGENT = f'SocketPythonCLI/{__version__}'

socketsecurity/core/__init__.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@
6868
SOCKET_FACTS_BROTLI_QUALITY = 5
6969
# Largest brotli window (2**24 bytes); improves the ratio on large facts files.
7070
SOCKET_FACTS_BROTLI_LGWIN = 24
71+
# Stream the facts file in 1 MiB chunks so large files aren't held fully in memory.
72+
SOCKET_FACTS_BROTLI_CHUNK_SIZE = 1024 * 1024
7173

7274

7375
def _humanize_alert_type(alert_type: str) -> str:
@@ -569,7 +571,9 @@ def _compress_facts_file(source_path: str) -> str:
569571
The source is streamed in chunks so a large facts file (hundreds of MB) never has
570572
to be held in memory at once. The compressed file is written next to the source so
571573
that the multipart key the SDK derives keeps the same directory prefix, only with a
572-
``.br`` basename.
574+
``.br`` basename. Any existing ``.socket.facts.json.br`` sibling is overwritten, and a
575+
partially-written output is removed if compression fails part-way through (e.g. the
576+
disk fills up mid-stream) so no orphaned ``.br`` is left in the target directory.
573577
574578
Args:
575579
source_path: Path to the plain ``.socket.facts.json`` file.
@@ -589,16 +593,25 @@ def _compress_facts_file(source_path: str) -> str:
589593
quality=SOCKET_FACTS_BROTLI_QUALITY,
590594
lgwin=SOCKET_FACTS_BROTLI_LGWIN,
591595
)
592-
chunk_size = 1024 * 1024 # 1 MiB
593-
with open(source_path, "rb") as src, open(target_path, "wb") as dst:
594-
while True:
595-
chunk = src.read(chunk_size)
596-
if not chunk:
597-
break
598-
compressed = compressor.process(chunk)
599-
if compressed:
600-
dst.write(compressed)
601-
dst.write(compressor.finish())
596+
try:
597+
with open(source_path, "rb") as src, open(target_path, "wb") as dst:
598+
while True:
599+
chunk = src.read(SOCKET_FACTS_BROTLI_CHUNK_SIZE)
600+
if not chunk:
601+
break
602+
compressed = compressor.process(chunk)
603+
if compressed:
604+
dst.write(compressed)
605+
dst.write(compressor.finish())
606+
except BaseException:
607+
# Don't leave a half-written .br behind for the caller to miss (it only tracks
608+
# the path for cleanup once this returns). Remove it, then re-raise so the caller
609+
# falls back to uploading the plain file.
610+
try:
611+
os.unlink(target_path)
612+
except OSError:
613+
pass
614+
raise
602615
return target_path
603616

604617
def _compress_facts_files_for_upload(self, files: List[str]) -> Tuple[List[str], List[str]]:

tests/core/test_facts_compression.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import json
88
import os
99

10+
import pytest
11+
1012
try:
1113
import brotli
1214
except ImportError: # pragma: no cover - PyPy / non-CPython fallback
@@ -59,6 +61,30 @@ def test_compress_for_upload_rewrites_facts_entry(tmp_path):
5961
assert manifest in upload_files
6062

6163

64+
def test_compress_facts_file_removes_partial_output_on_failure(tmp_path, monkeypatch):
65+
"""If compression fails mid-stream, the half-written .br is removed (not orphaned)."""
66+
source = _write(str(tmp_path / SOCKET_FACTS_FILENAME), b'{"a": 1}' * 1000)
67+
68+
class ExplodingCompressor:
69+
def __init__(self, *args, **kwargs):
70+
pass
71+
72+
def process(self, _data):
73+
raise RuntimeError("disk full")
74+
75+
def finish(self): # pragma: no cover - never reached
76+
return b""
77+
78+
# Patch the module the helper imports (brotli on CPython, brotlicffi elsewhere).
79+
monkeypatch.setattr(brotli, "Compressor", ExplodingCompressor)
80+
81+
with pytest.raises(RuntimeError, match="disk full"):
82+
Core._compress_facts_file(source)
83+
84+
# No orphaned .br left behind in the target directory.
85+
assert not (tmp_path / SOCKET_FACTS_BROTLI_FILENAME).exists()
86+
87+
6288
def test_compress_for_upload_preserves_directory_prefix(tmp_path):
6389
"""The `.br` sibling keeps the facts file's directory so the relative key is preserved."""
6490
core = Core.__new__(Core)

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)