Skip to content

Commit 064bfc3

Browse files
committed
feat(reach): add unified --exclude-paths, deprecate --reach-exclude-paths
Add a single --exclude-paths flag (Node CLI parity) that filters BOTH SCA manifest discovery and reachability analysis: - New Core matcher: anchored micromatch-style globs compiled to regex (no new deps). Scan-root-relative POSIX paths, '*' does not cross '/', '**' does, each pattern P expanded to [P, P/**]. Threaded into find_files via cli_config and a no-op when the flag is unset. - Reach side unions --exclude-paths with the now-deprecated --reach-exclude-paths and forwards to coana --exclude-dirs. - Pattern validation mirrors Node's assertValidExcludePaths (rejects negation, absolute paths, '..' traversal, and degenerate match-everything). - --reach-exclude-paths soft-deprecated: still works, marked [DEPRECATED] in help, warns at runtime. Adds tests including the Node parity cases for the matcher plus validation.
1 parent 9c9c6a7 commit 064bfc3

7 files changed

Lines changed: 289 additions & 7 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
66

77
[project]
88
name = "socketsecurity"
9-
version = "2.4.1"
9+
version = "2.4.2"
1010
requires-python = ">= 3.11"
1111
license = {"file" = "LICENSE"}
1212
dependencies = [

socketsecurity/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.4.1'
2+
__version__ = '2.4.2'
33
USER_AGENT = f'SocketPythonCLI/{__version__}'

socketsecurity/config.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,30 @@ def load_cli_config_file(config_path: str) -> dict:
5555
return scoped
5656
return data
5757

58+
def validate_exclude_paths(patterns: List[str]) -> None:
59+
"""Validate --exclude-paths patterns (mirrors Node's assertValidExcludePaths).
60+
61+
Patterns are scan-root-relative globs. Reject the cases coana's --exclude-dirs / fast-glob
62+
cannot honor: negation, absolute paths, ``..`` traversal, and degenerate match-everything.
63+
Exits with code 1 on the first invalid pattern.
64+
"""
65+
degenerate = {"", ".", "./", "./**", "/", "**", "/**"}
66+
for p in patterns:
67+
norm = (p or "").strip().replace("\\", "/")
68+
if norm.startswith("!"):
69+
logging.error(f"--exclude-paths: negation patterns are not supported: {p!r}")
70+
exit(1)
71+
if norm.startswith("/"):
72+
logging.error(f"--exclude-paths: patterns must be scan-root relative (no leading '/'): {p!r}")
73+
exit(1)
74+
if norm == ".." or norm.startswith("../") or "/../" in norm or norm.endswith("/.."):
75+
logging.error(f"--exclude-paths: '..' path traversal is not allowed: {p!r}")
76+
exit(1)
77+
if norm in degenerate:
78+
logging.error(f"--exclude-paths: pattern would exclude everything: {p!r}")
79+
exit(1)
80+
81+
5882
@dataclass
5983
class PluginConfig:
6084
enabled: bool = False
@@ -106,6 +130,7 @@ class CliConfig:
106130
include_module_folders: bool = False
107131
repo_is_public: bool = False
108132
excluded_ecosystems: list[str] = field(default_factory=lambda: [])
133+
exclude_paths: Optional[List[str]] = None
109134
version: str = __version__
110135
jira_plugin: PluginConfig = field(default_factory=PluginConfig)
111136
slack_plugin: PluginConfig = field(default_factory=PluginConfig)
@@ -167,6 +192,12 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
167192

168193
args = parser.parse_args(args_list)
169194

195+
if args.reach_exclude_paths:
196+
logging.warning(
197+
"--reach-exclude-paths is deprecated; use --exclude-paths instead. "
198+
"It is still honored and unioned with --exclude-paths."
199+
)
200+
170201
# Get API token from env or args (check multiple env var names)
171202
api_token = (
172203
os.getenv("SOCKET_SECURITY_API_KEY") or
@@ -258,6 +289,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
258289
'reach_lazy_mode': args.reach_lazy_mode,
259290
'reach_ecosystems': args.reach_ecosystems.split(',') if args.reach_ecosystems else None,
260291
'reach_exclude_paths': args.reach_exclude_paths.split(',') if args.reach_exclude_paths else None,
292+
'exclude_paths': [p.strip() for p in args.exclude_paths.split(',') if p.strip()] if args.exclude_paths else None,
261293
'reach_skip_cache': args.reach_skip_cache,
262294
'reach_min_severity': args.reach_min_severity,
263295
'reach_output_file': args.reach_output_file,
@@ -361,6 +393,10 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
361393
logging.error("--sarif-reachability potentially/reachable-or-potentially requires --sarif-scope full")
362394
exit(1)
363395

396+
# Validate --exclude-paths patterns up front (mirrors Node's assertValidExcludePaths).
397+
if config_args.get("exclude_paths"):
398+
validate_exclude_paths(config_args["exclude_paths"])
399+
364400
# Validate that only_facts_file requires reach
365401
if args.only_facts_file and not args.reach:
366402
logging.error("--only-facts-file requires --reach to be specified")
@@ -570,6 +606,15 @@ def create_argument_parser() -> argparse.ArgumentParser:
570606
help="List of ecosystems to exclude from analysis (JSON array string)"
571607
)
572608

609+
path_group.add_argument(
610+
"--exclude-paths",
611+
dest="exclude_paths",
612+
metavar="<list>",
613+
help="Comma-separated paths/globs to exclude from BOTH manifest discovery and "
614+
"reachability analysis (e.g. 'tests/**,packages/legacy,*.spec.ts'). "
615+
"Supersedes --reach-exclude-paths."
616+
)
617+
573618
# Branch and Scan Configuration
574619
config_group = parser.add_argument_group('Branch and Scan Configuration')
575620
config_group.add_argument(
@@ -920,7 +965,8 @@ def create_argument_parser() -> argparse.ArgumentParser:
920965
"--reach-exclude-paths",
921966
dest="reach_exclude_paths",
922967
metavar="<list>",
923-
help="Paths to exclude from reachability analysis (comma-separated)"
968+
help="[DEPRECATED: use --exclude-paths] Paths to exclude from reachability analysis "
969+
"(comma-separated). Still honored and unioned with --exclude-paths."
924970
)
925971
reachability_group.add_argument(
926972
"--reach-min-severity",

socketsecurity/core/__init__.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,67 @@ def is_excluded(file_path: str, excluded_dirs: Set[str]) -> bool:
213213
return True
214214
return False
215215

216+
@staticmethod
217+
def _exclude_glob_to_regex(pattern: str) -> str:
218+
"""Translate a micromatch-style glob into an anchored regex string.
219+
220+
Mirrors the Node CLI's --exclude-paths matcher (src/commands/scan/exclude-paths.mts):
221+
patterns are matched against scan-root-relative POSIX paths, case-sensitively, where
222+
``*`` does NOT cross ``/`` and ``**`` DOES. Patterns are anchored at the scan root, so
223+
``tests`` matches ``tests`` (not ``src/tests``); use ``**/tests`` to match at any depth.
224+
"""
225+
i, n = 0, len(pattern)
226+
out = ["^"]
227+
while i < n:
228+
c = pattern[i]
229+
if c == "*":
230+
if i + 1 < n and pattern[i + 1] == "*":
231+
if i + 2 < n and pattern[i + 2] == "/":
232+
out.append("(?:[^/]+/)*") # '**/' -> zero or more path segments
233+
i += 3
234+
else:
235+
out.append(".*") # '**' at end / before non-slash -> any, incl '/'
236+
i += 2
237+
else:
238+
out.append("[^/]*") # '*' -> within a single path segment
239+
i += 1
240+
elif c == "?":
241+
out.append("[^/]")
242+
i += 1
243+
else:
244+
out.append(re.escape(c))
245+
i += 1
246+
out.append("$")
247+
return "".join(out)
248+
249+
@staticmethod
250+
def compile_exclude_paths(patterns: Optional[List[str]]) -> List["re.Pattern"]:
251+
"""Compile --exclude-paths globs into anchored regexes (compiled once per scan).
252+
253+
Each pattern ``P`` is expanded the way Node feeds fast-glob's ``ignore``: ``P`` (a file-
254+
or dir-shaped exact match) plus ``P/**`` (its subtree), unless ``P`` already ends with
255+
``/**``. Validation of the patterns happens earlier, in CliConfig.from_args.
256+
"""
257+
compiled: List["re.Pattern"] = []
258+
for raw in patterns or []:
259+
p = (raw or "").strip().replace("\\", "/").rstrip("/")
260+
if not p:
261+
continue
262+
globs = [p] if p.endswith("/**") else [p, f"{p}/**"]
263+
compiled.extend(re.compile(Core._exclude_glob_to_regex(g)) for g in globs)
264+
return compiled
265+
266+
@staticmethod
267+
def path_matches_exclude_regexes(rel_path: str, regexes: List["re.Pattern"]) -> bool:
268+
rp = rel_path.replace(os.sep, "/").replace("\\", "/")
269+
return any(r.match(rp) for r in regexes)
270+
271+
@staticmethod
272+
def matches_exclude_paths(file_path: str, base_path: str, patterns: List[str]) -> bool:
273+
"""Convenience matcher (compiles patterns per call); used in tests/ad-hoc checks."""
274+
rel_path = os.path.relpath(file_path, base_path).replace(os.sep, "/")
275+
return Core.path_matches_exclude_regexes(rel_path, Core.compile_exclude_paths(patterns))
276+
216277
def save_submitted_files_list(self, files: List[str], output_path: str) -> None:
217278
"""
218279
Save the list of submitted file names to a JSON file for debugging.
@@ -336,6 +397,17 @@ def find_files(self, path: str, ecosystems: Optional[List[str]] = None) -> List[
336397
start_time = time.time()
337398
files: Set[str] = set()
338399

400+
# Unified --exclude-paths: filter discovered manifests by the same paths/globs that are
401+
# forwarded to coana's --exclude-dirs. Only consulted when the user supplied the flag.
402+
# Patterns are anchored to `path` (the scan root this pass walks), matching coana's
403+
# target and the Node CLI's fast-glob cwd. NOTE: when scanning multiple --sub-path
404+
# targets, find_files runs once per sub-path, so a pattern like `tests` anchors to each
405+
# sub-path independently (Node anchors all patterns to a single scan-root cwd). This only
406+
# differs for the multi-target full-scan + --exclude-paths combo; the reach flow is
407+
# single-target, so it matches Node there.
408+
exclude_paths = getattr(self.cli_config, "exclude_paths", None) if self.cli_config else None
409+
exclude_regexes = Core.compile_exclude_paths(exclude_paths) if exclude_paths else []
410+
339411
# Get supported patterns from the API
340412
patterns = self.get_supported_patterns()
341413

@@ -365,8 +437,15 @@ def find_files(self, path: str, ecosystems: Optional[List[str]] = None) -> List[
365437

366438
for glob_file in glob_files:
367439
glob_file_str = str(glob_file)
368-
if os.path.isfile(glob_file_str) and not Core.is_excluded(glob_file_str, self.config.excluded_dirs):
369-
files.add(glob_file_str.replace("\\", "/"))
440+
if not os.path.isfile(glob_file_str):
441+
continue
442+
if Core.is_excluded(glob_file_str, self.config.excluded_dirs):
443+
continue
444+
if exclude_regexes:
445+
rel = os.path.relpath(glob_file_str, path)
446+
if Core.path_matches_exclude_regexes(rel, exclude_regexes):
447+
continue
448+
files.add(glob_file_str.replace("\\", "/"))
370449

371450
glob_end = time.time()
372451
log.debug(f"Globbing took {glob_end - glob_start:.4f} seconds")

socketsecurity/socketcli.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,15 @@ def main_code():
388388
timeout=config.reach_analysis_timeout,
389389
memory_limit=config.reach_analysis_memory_limit,
390390
ecosystems=config.reach_ecosystems,
391-
exclude_paths=config.reach_exclude_paths,
391+
# Union the deprecated --reach-exclude-paths with the unified --exclude-paths
392+
# and forward verbatim to coana's --exclude-dirs. Patterns are scan-root
393+
# relative; coana resolves --exclude-dirs relative to its `run` target, which
394+
# here is `.` == cwd == scan root, so passthrough is correct. If a nested
395+
# target is ever supported, re-anchor patterns to the target first (see Node's
396+
# pathRelativeToTarget in exclude-paths.mts).
397+
exclude_paths=(
398+
(config.reach_exclude_paths or []) + (config.exclude_paths or [])
399+
) or None,
392400
min_severity=config.reach_min_severity,
393401
skip_cache=config.reach_skip_cache or False,
394402
disable_analytics=config.reach_disable_analytics or False,

tests/unit/test_exclude_paths.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""Tests for the unified --exclude-paths flag (G2, Node alignment).
2+
3+
Covers the path matcher, config parsing + soft-deprecation of --reach-exclude-paths,
4+
and that --exclude-paths filters SCA manifest discovery via Core.find_files.
5+
"""
6+
import logging
7+
import types
8+
from unittest.mock import MagicMock
9+
10+
import pytest
11+
12+
from socketsecurity.config import CliConfig
13+
from socketsecurity.core import Core
14+
from socketsecurity.core.socket_config import SocketConfig
15+
16+
# ---- matcher -------------------------------------------------------------
17+
18+
@pytest.mark.parametrize(
19+
"rel, patterns, expected",
20+
[
21+
# directory prefix -> the directory's whole subtree
22+
("packages/legacy/package.json", ["packages/legacy"], True),
23+
("packages/keep/package.json", ["packages/legacy"], False),
24+
# root-anchored: a bare name matches at the root only, NOT nested
25+
("tests/x.json", ["tests"], True),
26+
("src/tests/x.json", ["tests"], False),
27+
# **/ matches at any depth
28+
("src/tests/x.json", ["**/tests"], True),
29+
("tests/unit/x.json", ["tests/**"], True),
30+
("tests", ["tests/**"], False), # P/** is the subtree, not P itself
31+
# '*' does NOT cross '/': anchored basename glob is root-level only
32+
("index.spec.ts", ["*.spec.ts"], True),
33+
("src/app/index.spec.ts", ["*.spec.ts"], False),
34+
("src/app/index.spec.ts", ["**/*.spec.ts"], True),
35+
("src/app/index.ts", ["**/*.spec.ts"], False),
36+
# single-star matches exactly one path segment
37+
("packages/a/node_modules/x.json", ["packages/*/node_modules"], True),
38+
("packages/a/b/node_modules/x.json", ["packages/*/node_modules"], False),
39+
],
40+
)
41+
def test_matches_exclude_paths(rel, patterns, expected):
42+
assert Core.matches_exclude_paths(rel, ".", patterns) is expected
43+
44+
45+
@pytest.mark.parametrize(
46+
"pattern, excluded, kept",
47+
[
48+
# Node parity cases (src/commands/scan/exclude-paths.mts), anchored at scan root.
49+
("tests", "tests/pkg/package.json", "src/tests/package.json"),
50+
("package-lock.json", "package-lock.json", "packages/a/package-lock.json"),
51+
("**/node_modules", "packages/a/node_modules/dep/package.json", "src/app/package.json"),
52+
("packages/legacy", "packages/legacy/p.json", "packages/legacy-x/p.json"),
53+
("src/*.json", "src/a.json", "src/sub/a.json"),
54+
],
55+
)
56+
def test_matches_exclude_paths_node_parity(pattern, excluded, kept):
57+
assert Core.matches_exclude_paths(excluded, ".", [pattern]) is True
58+
assert Core.matches_exclude_paths(kept, ".", [pattern]) is False
59+
60+
61+
def test_matches_exclude_paths_empty_is_false():
62+
assert Core.matches_exclude_paths("a/b.json", ".", []) is False
63+
assert Core.matches_exclude_paths("a/b.json", ".", [" "]) is False
64+
65+
66+
# ---- config parsing ------------------------------------------------------
67+
68+
BASE_ARGS = ["--api-token", "test-token", "--repo", "test-repo"]
69+
70+
71+
def test_exclude_paths_parses_to_list():
72+
config = CliConfig.from_args(BASE_ARGS + ["--exclude-paths", "tests/**, packages/legacy , *.spec.ts"])
73+
assert config.exclude_paths == ["tests/**", "packages/legacy", "*.spec.ts"]
74+
75+
76+
def test_exclude_paths_defaults_none():
77+
config = CliConfig.from_args(BASE_ARGS)
78+
assert config.exclude_paths is None
79+
80+
81+
def test_reach_exclude_paths_still_works_and_warns(caplog):
82+
with caplog.at_level(logging.WARNING):
83+
config = CliConfig.from_args(BASE_ARGS + ["--reach", "--reach-exclude-paths", "a,b"])
84+
assert config.reach_exclude_paths == ["a", "b"]
85+
assert any("deprecated" in r.message for r in caplog.records)
86+
87+
88+
@pytest.mark.parametrize(
89+
"bad",
90+
["!foo", "/abs/path", "..", "../escape", "a/../b", ".", "**", "/**", "./**"],
91+
)
92+
def test_exclude_paths_validation_rejects(bad):
93+
with pytest.raises(SystemExit) as exc:
94+
CliConfig.from_args(BASE_ARGS + ["--exclude-paths", bad])
95+
assert exc.value.code == 1
96+
97+
98+
def test_exclude_paths_validation_rejects_within_csv():
99+
with pytest.raises(SystemExit) as exc:
100+
CliConfig.from_args(BASE_ARGS + ["--exclude-paths", "src,..,tests"])
101+
assert exc.value.code == 1
102+
103+
104+
def test_exclude_paths_valid_globs_accepted():
105+
config = CliConfig.from_args(BASE_ARGS + ["--exclude-paths", "tests/**,**/*.spec.ts,packages/legacy"])
106+
assert config.exclude_paths == ["tests/**", "**/*.spec.ts", "packages/legacy"]
107+
108+
109+
# ---- find_files integration ---------------------------------------------
110+
111+
def _make_core(exclude_paths):
112+
core = Core.__new__(Core)
113+
core.config = SocketConfig(api_key="test-key")
114+
core.cli_config = types.SimpleNamespace(exclude_paths=exclude_paths)
115+
core.sdk = MagicMock()
116+
return core
117+
118+
119+
def _seed_manifests(tmp_path):
120+
for rel in ("package.json", "sub/package.json", "legacy/package.json"):
121+
p = tmp_path / rel
122+
p.parent.mkdir(parents=True, exist_ok=True)
123+
p.write_text("{}", encoding="utf-8")
124+
125+
126+
def test_find_files_excludes_matching_paths(tmp_path, mocker):
127+
_seed_manifests(tmp_path)
128+
core = _make_core(["legacy"])
129+
mocker.patch.object(
130+
core, "get_supported_patterns",
131+
return_value={"npm": {"package.json": {"pattern": "package.json"}}},
132+
)
133+
134+
found = core.find_files(str(tmp_path))
135+
assert any(f.endswith("/package.json") and "/legacy/" not in f for f in found)
136+
assert not any("/legacy/" in f for f in found)
137+
138+
139+
def test_find_files_no_exclude_paths_keeps_all(tmp_path, mocker):
140+
_seed_manifests(tmp_path)
141+
core = _make_core(None)
142+
mocker.patch.object(
143+
core, "get_supported_patterns",
144+
return_value={"npm": {"package.json": {"pattern": "package.json"}}},
145+
)
146+
147+
found = core.find_files(str(tmp_path))
148+
assert any("/legacy/" in f for f in found)
149+
assert len(found) == 3

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)