Skip to content

Commit f67f91a

Browse files
committed
feat(reach): add unified --exclude-paths, deprecate --reach-exclude-paths
Add a single --exclude-paths flag (Node CLI parity) that filters BOTH SCA manifest discovery and reachability analysis: - New Core matcher: anchored micromatch-style globs compiled to regex (no new deps). Scan-root-relative POSIX paths, '*' does not cross '/', '**' does, each pattern P expanded to [P, P/**]. Threaded into find_files via cli_config; no-op when unset. - Reach side unions --exclude-paths with the now-deprecated --reach-exclude-paths and forwards to coana --exclude-dirs. - Validation mirrors Node's assertValidExcludePaths (rejects negation, absolute paths, '..' traversal, degenerate match-everything; trailing slash stripped so '**/' is rejected). Accepts comma-strings and config-file lists. - --reach-exclude-paths soft-deprecated: still works, [DEPRECATED] in help, warns at runtime. Adds a CHANGELOG 2.4.3 entry and tests incl. the Node parity cases, validation, and config-file paths.
1 parent eb17457 commit f67f91a

8 files changed

Lines changed: 351 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# Changelog
22

3+
## 2.4.3
4+
5+
### Added: unified `--exclude-paths` for manifest discovery and reachability
6+
7+
- New `--exclude-paths` flag (comma-separated globs) that excludes matching paths from
8+
BOTH SCA manifest discovery and reachability analysis. Patterns are scan-root-relative
9+
anchored globs (`*` does not cross `/`, `**` does), matching the Node CLI's behavior.
10+
- Pattern validation rejects unsupported forms (negation, absolute paths, `..` traversal,
11+
and match-everything patterns). Patterns may be supplied on the CLI as a comma-separated
12+
string or via a `--config` file list.
13+
- `--reach-exclude-paths` is now deprecated in favor of `--exclude-paths`. It still works
14+
(and is unioned into the Coana `--exclude-dirs` argument) but is marked deprecated in
15+
`--help` and warns at runtime.
16+
317
## 2.4.2
418

519
### Added: reachability flag and Coana environment alignment with the Node CLI

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
66

77
[project]
88
name = "socketsecurity"
9-
version = "2.4.2"
9+
version = "2.4.3"
1010
requires-python = ">= 3.11"
1111
license = {"file" = "LICENSE"}
1212
dependencies = [

socketsecurity/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.4.2'
2+
__version__ = '2.4.3'
33
USER_AGENT = f'SocketPythonCLI/{__version__}'

socketsecurity/config.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,50 @@ def load_cli_config_file(config_path: str) -> dict:
5555
return scoped
5656
return data
5757

58+
def normalize_exclude_paths(value) -> Optional[List[str]]:
59+
"""Normalize a --exclude-paths value into a clean list of patterns.
60+
61+
Accepts a comma-separated string (CLI) or a list/tuple (e.g. a JSON/TOML --config file
62+
value), so config-file-supplied patterns flow through the same validation as CLI ones.
63+
"""
64+
if not value:
65+
return None
66+
if isinstance(value, str):
67+
items = value.split(",")
68+
elif isinstance(value, (list, tuple)):
69+
items = value
70+
else:
71+
return None
72+
cleaned = [str(p).strip() for p in items if str(p).strip()]
73+
return cleaned or None
74+
75+
76+
def validate_exclude_paths(patterns: List[str]) -> None:
77+
"""Validate --exclude-paths patterns (mirrors Node's assertValidExcludePaths).
78+
79+
Patterns are scan-root-relative globs. Reject the cases coana's --exclude-dirs / fast-glob
80+
cannot honor: negation, absolute paths, ``..`` traversal, and degenerate match-everything.
81+
Exits with code 1 on the first invalid pattern.
82+
"""
83+
# Degenerate match-everything forms, compared against the trailing-slash-stripped pattern
84+
# (so "**/" reduces to "**" and is rejected, matching Node's stripTrailingSlash + check).
85+
degenerate = {"", ".", "**", "./**", "/**"}
86+
for p in patterns:
87+
norm = (p or "").strip().replace("\\", "/")
88+
if norm.startswith("!"):
89+
logging.error(f"--exclude-paths: negation patterns are not supported: {p!r}")
90+
exit(1)
91+
if norm.startswith("/"):
92+
logging.error(f"--exclude-paths: patterns must be scan-root relative (no leading '/'): {p!r}")
93+
exit(1)
94+
if norm == ".." or norm.startswith("../") or "/../" in norm or norm.endswith("/.."):
95+
logging.error(f"--exclude-paths: '..' path traversal is not allowed: {p!r}")
96+
exit(1)
97+
if norm.rstrip("/") in degenerate:
98+
logging.error(f"--exclude-paths: pattern would exclude everything: {p!r}")
99+
exit(1)
100+
101+
58102
@dataclass
59103
class PluginConfig:
60104
enabled: bool = False
@@ -106,6 +150,7 @@ class CliConfig:
106150
include_module_folders: bool = False
107151
repo_is_public: bool = False
108152
excluded_ecosystems: list[str] = field(default_factory=lambda: [])
153+
exclude_paths: Optional[List[str]] = None
109154
version: str = __version__
110155
jira_plugin: PluginConfig = field(default_factory=PluginConfig)
111156
slack_plugin: PluginConfig = field(default_factory=PluginConfig)
@@ -167,6 +212,12 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
167212

168213
args = parser.parse_args(args_list)
169214

215+
if args.reach_exclude_paths:
216+
logging.warning(
217+
"--reach-exclude-paths is deprecated; use --exclude-paths instead. "
218+
"It is still honored and unioned with --exclude-paths."
219+
)
220+
170221
# Get API token from env or args (check multiple env var names)
171222
api_token = (
172223
os.getenv("SOCKET_SECURITY_API_KEY") or
@@ -258,6 +309,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
258309
'reach_lazy_mode': args.reach_lazy_mode,
259310
'reach_ecosystems': args.reach_ecosystems.split(',') if args.reach_ecosystems else None,
260311
'reach_exclude_paths': args.reach_exclude_paths.split(',') if args.reach_exclude_paths else None,
312+
'exclude_paths': normalize_exclude_paths(args.exclude_paths),
261313
'reach_skip_cache': args.reach_skip_cache,
262314
'reach_min_severity': args.reach_min_severity,
263315
'reach_output_file': args.reach_output_file,
@@ -361,6 +413,10 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
361413
logging.error("--sarif-reachability potentially/reachable-or-potentially requires --sarif-scope full")
362414
exit(1)
363415

416+
# Validate --exclude-paths patterns up front (mirrors Node's assertValidExcludePaths).
417+
if config_args.get("exclude_paths"):
418+
validate_exclude_paths(config_args["exclude_paths"])
419+
364420
# Validate that only_facts_file requires reach
365421
if args.only_facts_file and not args.reach:
366422
logging.error("--only-facts-file requires --reach to be specified")
@@ -570,6 +626,15 @@ def create_argument_parser() -> argparse.ArgumentParser:
570626
help="List of ecosystems to exclude from analysis (JSON array string)"
571627
)
572628

629+
path_group.add_argument(
630+
"--exclude-paths",
631+
dest="exclude_paths",
632+
metavar="<list>",
633+
help="Comma-separated paths/globs to exclude from BOTH manifest discovery and "
634+
"reachability analysis (e.g. 'tests/**,packages/legacy,*.spec.ts'). "
635+
"Supersedes --reach-exclude-paths."
636+
)
637+
573638
# Branch and Scan Configuration
574639
config_group = parser.add_argument_group('Branch and Scan Configuration')
575640
config_group.add_argument(
@@ -920,7 +985,8 @@ def create_argument_parser() -> argparse.ArgumentParser:
920985
"--reach-exclude-paths",
921986
dest="reach_exclude_paths",
922987
metavar="<list>",
923-
help="Paths to exclude from reachability analysis (comma-separated)"
988+
help="[DEPRECATED: use --exclude-paths] Paths to exclude from reachability analysis "
989+
"(comma-separated). Still honored and unioned with --exclude-paths."
924990
)
925991
reachability_group.add_argument(
926992
"--reach-min-severity",

socketsecurity/core/__init__.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,67 @@ def is_excluded(file_path: str, excluded_dirs: Set[str]) -> bool:
213213
return True
214214
return False
215215

216+
@staticmethod
217+
def _exclude_glob_to_regex(pattern: str) -> str:
218+
"""Translate a micromatch-style glob into an anchored regex string.
219+
220+
Mirrors the Node CLI's --exclude-paths matcher (src/commands/scan/exclude-paths.mts):
221+
patterns are matched against scan-root-relative POSIX paths, case-sensitively, where
222+
``*`` does NOT cross ``/`` and ``**`` DOES. Patterns are anchored at the scan root, so
223+
``tests`` matches ``tests`` (not ``src/tests``); use ``**/tests`` to match at any depth.
224+
"""
225+
i, n = 0, len(pattern)
226+
out = ["^"]
227+
while i < n:
228+
c = pattern[i]
229+
if c == "*":
230+
if i + 1 < n and pattern[i + 1] == "*":
231+
if i + 2 < n and pattern[i + 2] == "/":
232+
out.append("(?:[^/]+/)*") # '**/' -> zero or more path segments
233+
i += 3
234+
else:
235+
out.append(".*") # '**' at end / before non-slash -> any, incl '/'
236+
i += 2
237+
else:
238+
out.append("[^/]*") # '*' -> within a single path segment
239+
i += 1
240+
elif c == "?":
241+
out.append("[^/]")
242+
i += 1
243+
else:
244+
out.append(re.escape(c))
245+
i += 1
246+
out.append("$")
247+
return "".join(out)
248+
249+
@staticmethod
250+
def compile_exclude_paths(patterns: Optional[List[str]]) -> List["re.Pattern"]:
251+
"""Compile --exclude-paths globs into anchored regexes (compiled once per scan).
252+
253+
Each pattern ``P`` is expanded the way Node feeds fast-glob's ``ignore``: ``P`` (a file-
254+
or dir-shaped exact match) plus ``P/**`` (its subtree), unless ``P`` already ends with
255+
``/**``. Validation of the patterns happens earlier, in CliConfig.from_args.
256+
"""
257+
compiled: List["re.Pattern"] = []
258+
for raw in patterns or []:
259+
p = (raw or "").strip().replace("\\", "/").rstrip("/")
260+
if not p:
261+
continue
262+
globs = [p] if p.endswith("/**") else [p, f"{p}/**"]
263+
compiled.extend(re.compile(Core._exclude_glob_to_regex(g)) for g in globs)
264+
return compiled
265+
266+
@staticmethod
267+
def path_matches_exclude_regexes(rel_path: str, regexes: List["re.Pattern"]) -> bool:
268+
rp = rel_path.replace(os.sep, "/").replace("\\", "/")
269+
return any(r.match(rp) for r in regexes)
270+
271+
@staticmethod
272+
def matches_exclude_paths(file_path: str, base_path: str, patterns: List[str]) -> bool:
273+
"""Convenience matcher (compiles patterns per call); used in tests/ad-hoc checks."""
274+
rel_path = os.path.relpath(file_path, base_path).replace(os.sep, "/")
275+
return Core.path_matches_exclude_regexes(rel_path, Core.compile_exclude_paths(patterns))
276+
216277
def save_submitted_files_list(self, files: List[str], output_path: str) -> None:
217278
"""
218279
Save the list of submitted file names to a JSON file for debugging.
@@ -336,6 +397,17 @@ def find_files(self, path: str, ecosystems: Optional[List[str]] = None) -> List[
336397
start_time = time.time()
337398
files: Set[str] = set()
338399

400+
# Unified --exclude-paths: filter discovered manifests by the same paths/globs that are
401+
# forwarded to coana's --exclude-dirs. Only consulted when the user supplied the flag.
402+
# Patterns are anchored to `path` (the scan root this pass walks), matching coana's
403+
# target and the Node CLI's fast-glob cwd. NOTE: when scanning multiple --sub-path
404+
# targets, find_files runs once per sub-path, so a pattern like `tests` anchors to each
405+
# sub-path independently (Node anchors all patterns to a single scan-root cwd). This only
406+
# differs for the multi-target full-scan + --exclude-paths combo; the reach flow is
407+
# single-target, so it matches Node there.
408+
exclude_paths = getattr(self.cli_config, "exclude_paths", None) if self.cli_config else None
409+
exclude_regexes = Core.compile_exclude_paths(exclude_paths) if exclude_paths else []
410+
339411
# Get supported patterns from the API
340412
patterns = self.get_supported_patterns()
341413

@@ -365,8 +437,15 @@ def find_files(self, path: str, ecosystems: Optional[List[str]] = None) -> List[
365437

366438
for glob_file in glob_files:
367439
glob_file_str = str(glob_file)
368-
if os.path.isfile(glob_file_str) and not Core.is_excluded(glob_file_str, self.config.excluded_dirs):
369-
files.add(glob_file_str.replace("\\", "/"))
440+
if not os.path.isfile(glob_file_str):
441+
continue
442+
if Core.is_excluded(glob_file_str, self.config.excluded_dirs):
443+
continue
444+
if exclude_regexes:
445+
rel = os.path.relpath(glob_file_str, path)
446+
if Core.path_matches_exclude_regexes(rel, exclude_regexes):
447+
continue
448+
files.add(glob_file_str.replace("\\", "/"))
370449

371450
glob_end = time.time()
372451
log.debug(f"Globbing took {glob_end - glob_start:.4f} seconds")

socketsecurity/socketcli.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,15 @@ def main_code():
388388
timeout=config.reach_analysis_timeout,
389389
memory_limit=config.reach_analysis_memory_limit,
390390
ecosystems=config.reach_ecosystems,
391-
exclude_paths=config.reach_exclude_paths,
391+
# Union the deprecated --reach-exclude-paths with the unified --exclude-paths
392+
# and forward verbatim to coana's --exclude-dirs. Patterns are scan-root
393+
# relative; coana resolves --exclude-dirs relative to its `run` target, which
394+
# here is `.` == cwd == scan root, so passthrough is correct. If a nested
395+
# target is ever supported, re-anchor patterns to the target first (see Node's
396+
# pathRelativeToTarget in exclude-paths.mts).
397+
exclude_paths=(
398+
(config.reach_exclude_paths or []) + (config.exclude_paths or [])
399+
) or None,
392400
min_severity=config.reach_min_severity,
393401
skip_cache=config.reach_skip_cache or False,
394402
disable_analytics=config.reach_disable_analytics or False,

0 commit comments

Comments
 (0)