From 3dc259a905cc5302d77066040f39b01c3d3374a1 Mon Sep 17 00:00:00 2001 From: Adva Oren Date: Wed, 20 May 2026 14:14:11 +0300 Subject: [PATCH 1/2] fix(python): handle pip options, hashes, and line continuations in requirements.txt (TC-4527) Add preprocessRequirementsLines() to PythonControllerBase that properly handles all requirements.txt line types: pip options (--extra-index-url, -r, -c, etc.), inline options (--hash, --config-settings), line continuations (\), PEP 508 direct references (name @ url), bare URLs, VCS URLs, and local paths. Applied in getDependenciesImpl, installingRequirementsOneByOne, and getIgnoredDependencies to fix parsing errors when requirements.txt contains non-package lines. Co-Authored-By: Claude Opus 4.6 --- .../providers/PythonPipProvider.java | 5 +- .../utils/PythonControllerBase.java | 68 ++++++++- .../utils/PythonControllerBaseTest.java | 140 ++++++++++++++++++ 3 files changed, 203 insertions(+), 10 deletions(-) diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java index bfe021c8..862f87f0 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java @@ -95,8 +95,9 @@ public Content provideComponent() throws IOException { @Override protected Set getIgnoredDependencies(String manifestContent) { - String[] lines = manifestContent.split(System.lineSeparator()); - return Arrays.stream(lines) + List rawLines = Arrays.asList(manifestContent.split("\\R")); + List preprocessed = PythonControllerBase.preprocessRequirementsLines(rawLines); + return preprocessed.stream() .filter(this::containsIgnorePattern) .map(PythonPipProvider::extractDepFull) .map(this::splitToNameVersion) diff --git a/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java b/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java index 67d92569..0a069e07 100644 --- a/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java +++ b/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java @@ -112,10 +112,9 @@ public final List> getDependencies( private void installingRequirementsOneByOne(String pathToRequirements) { try { - List requirementsRows = Files.readAllLines(Path.of(pathToRequirements)); + List requirementsRows = + preprocessRequirementsLines(Files.readAllLines(Path.of(pathToRequirements))); requirementsRows.stream() - .filter((line) -> !line.trim().startsWith("#")) - .filter((line) -> !line.trim().isEmpty()) .forEach( (dependency) -> { String dependencyName = getDependencyName(dependency); @@ -151,11 +150,7 @@ private List> getDependenciesImpl( } List linesOfRequirements; try { - linesOfRequirements = - Files.readAllLines(requirementsPath).stream() - .filter((line) -> !line.trim().startsWith("#") && !line.trim().isEmpty()) - .map(String::trim) - .collect(Collectors.toList()); + linesOfRequirements = preprocessRequirementsLines(Files.readAllLines(requirementsPath)); } catch (IOException e) { log.warning( "Error while trying to read the requirements.txt file, will not be able to install" @@ -377,6 +372,63 @@ protected String getDependencyNameShow(String pipShowOutput) { return versionToken.substring(0, endOfLine).trim(); } + /** + * Preprocesses raw requirements.txt lines by joining line continuations, stripping inline + * options, and filtering out pip option lines, URLs, local paths, and empty/comment lines. + */ + public static List preprocessRequirementsLines(List rawLines) { + // Join line continuations (trailing backslash, possibly followed by whitespace) + List joined = new ArrayList<>(); + StringBuilder current = new StringBuilder(); + for (String line : rawLines) { + String stripped = line.stripTrailing(); + if (stripped.endsWith("\\")) { + current.append(stripped, 0, stripped.length() - 1); + } else { + current.append(line); + joined.add(current.toString()); + current = new StringBuilder(); + } + } + if (current.length() > 0) { + joined.add(current.toString()); + } + + List result = new ArrayList<>(); + for (String raw : joined) { + String line = raw.trim(); + if (line.isEmpty() || line.startsWith("#")) { + continue; + } + // Filter out pip options (lines starting with -) + if (line.startsWith("-")) { + continue; + } + // Filter out local path requirements (./path, ../path, /abs/path) + if (line.startsWith("./") || line.startsWith("../") || line.startsWith("/")) { + continue; + } + // Strip PEP 508 direct references (name @ url -> name) before URL check + int atIndex = line.indexOf(" @ "); + if (atIndex != -1) { + line = line.substring(0, atIndex).trim(); + } + // Strip inline pip options (--hash=..., --config-settings=..., etc.) + int optionIndex = line.indexOf(" --"); + if (optionIndex != -1) { + line = line.substring(0, optionIndex).trim(); + } + // Filter out bare URLs and VCS URLs (any line containing :// is not a package name) + if (line.contains("://")) { + continue; + } + if (!line.isEmpty()) { + result.add(line); + } + } + return result; + } + public static String getDependencyName(String dep) { int markerSeparator = dep.indexOf(";"); String requirement = markerSeparator == -1 ? dep : dep.substring(0, markerSeparator); diff --git a/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java b/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java index 729a0dce..ea01cc79 100644 --- a/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java +++ b/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java @@ -2043,4 +2043,144 @@ void when_spliting_pip_show_dep_with_license() { + "Requires: \n" + "Required-by: cycler, gensim, gTTS, python-dateutil, tweepy\n"); } + + @Test + void preprocessRequirementsLines_filters_extra_index_url() { + List input = + List.of( + "--extra-index-url https://pypi.example.com/simple", + "requests==2.28.0", + "--index-url https://pypi.org/simple", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_filters_short_pip_options() { + List input = + List.of("-r other-requirements.txt", "-c constraints.txt", "numpy==1.24.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("numpy==1.24.0"), result); + } + + @Test + void preprocessRequirementsLines_strips_hashes() { + List input = + List.of("requests==2.28.0 --hash=sha256:abc123 --hash=sha256:def456", "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_strips_config_settings() { + List input = + List.of( + "aiohappyeyeballs==2.6.1 --config-settings=KEY=VALUE --config-settings=OTHER=VAL", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("aiohappyeyeballs==2.6.1", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_joins_line_continuations() { + List input = + List.of( + "requests==2.28.0 \\", + " --hash=sha256:abc123 \\", + " --hash=sha256:def456", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_joins_continuations_with_config_settings() { + List input = + List.of( + "aiohappyeyeballs==2.6.1 \\", + " --config-settings=SAMPLE_TEXT=TEST_VALUE \\", + " --config-settings=ANOTHER_KEY=ANOTHER_VALUE", + "async-timeout==5.0.1 ; python_full_version < '3.11'"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals( + List.of("aiohappyeyeballs==2.6.1", "async-timeout==5.0.1 ; python_full_version < '3.11'"), + result); + } + + @Test + void preprocessRequirementsLines_filters_comments_and_empty_lines() { + List input = List.of("# this is a comment", "", " ", "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_strips_direct_references() { + List input = + List.of( + "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", + "requests[security] @ https://github.com/psf/requests/archive/main.zip", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("pip", "requests[security]", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_handles_trailing_whitespace_after_backslash() { + List input = + List.of("requests==2.28.0 \\ ", " --hash=sha256:abc123", "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_filters_bare_urls() { + List input = + List.of( + "https://example.com/packages/MyPackage-1.0.tar.gz", + "http://example.com/packages/other.whl", + "git+https://git.example.com/MyProject.git@v1.0", + "git+git://git.example.com/repo.git", + "hg+http://hg.example.com/repo", + "hg+ssh://hg.example.com/repo", + "svn+svn://svn.example.com/project/trunk", + "bzr+ftp://bzr.example.com/repo", + "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_filters_local_paths() { + List input = + List.of( + "./local-package", + "../parent-package", + "./downloads/MyPackage-1.0.tar.gz", + "/absolute/path/to/package", + "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_combined_scenario() { + List input = + List.of( + "--extra-index-url https://pypi.example.com/simple", + "# A comment", + "requests==2.28.0 \\ ", + " --hash=sha256:abc123", + "", + "--trusted-host pypi.example.com", + "flask==2.0.3 --hash=sha256:xyz789", + "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", + "https://example.com/packages/other.tar.gz", + "git+git://git.example.com/repo.git", + "./local-package", + "numpy>=1.24.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3", "pip", "numpy>=1.24.0"), result); + } } From adf3b10bee2e57f5a653ed4ccb37b38d2007d759 Mon Sep 17 00:00:00 2001 From: Adva Oren Date: Wed, 20 May 2026 14:28:26 +0300 Subject: [PATCH 2/2] fix: address code review feedback for requirements.txt preprocessing - Revert getIgnoredDependencies to use raw lines so ignore markers (e.g. #trustify-da-ignore) in inline comments are not stripped before containsIgnorePattern runs; keep \R split fix - Use compiled Pattern with \s-- for inline option stripping to handle tabs and multiple spaces, not just single space - Add Windows drive-letter path filtering (C:\path, c:/path) - Tighten :// URL check to only inspect the requirement part before markers, avoiding false positives from marker strings - Add tests for Windows paths and final-line backslash edge case Co-Authored-By: Claude Opus 4.6 --- .../providers/PythonPipProvider.java | 5 ++-- .../utils/PythonControllerBase.java | 24 +++++++++++++------ .../utils/PythonControllerBaseTest.java | 19 +++++++++++++++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java index 862f87f0..2b375ea4 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java @@ -95,9 +95,8 @@ public Content provideComponent() throws IOException { @Override protected Set getIgnoredDependencies(String manifestContent) { - List rawLines = Arrays.asList(manifestContent.split("\\R")); - List preprocessed = PythonControllerBase.preprocessRequirementsLines(rawLines); - return preprocessed.stream() + String[] lines = manifestContent.split("\\R"); + return Arrays.stream(lines) .filter(this::containsIgnorePattern) .map(PythonPipProvider::extractDepFull) .map(this::splitToNameVersion) diff --git a/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java b/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java index 0a069e07..c7441cb9 100644 --- a/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java +++ b/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java @@ -35,6 +35,8 @@ import java.util.List; import java.util.Map; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -372,6 +374,9 @@ protected String getDependencyNameShow(String pipShowOutput) { return versionToken.substring(0, endOfLine).trim(); } + private static final Pattern INLINE_OPTION_PATTERN = Pattern.compile("\\s--"); + private static final Pattern WINDOWS_DRIVE_PATH_PATTERN = Pattern.compile("^[a-zA-Z]:[/\\\\]"); + /** * Preprocesses raw requirements.txt lines by joining line continuations, stripping inline * options, and filtering out pip option lines, URLs, local paths, and empty/comment lines. @@ -404,8 +409,11 @@ public static List preprocessRequirementsLines(List rawLines) { if (line.startsWith("-")) { continue; } - // Filter out local path requirements (./path, ../path, /abs/path) - if (line.startsWith("./") || line.startsWith("../") || line.startsWith("/")) { + // Filter out local path requirements (./path, ../path, /abs/path, C:\path, C:/path) + if (line.startsWith("./") + || line.startsWith("../") + || line.startsWith("/") + || WINDOWS_DRIVE_PATH_PATTERN.matcher(line).find()) { continue; } // Strip PEP 508 direct references (name @ url -> name) before URL check @@ -414,12 +422,14 @@ public static List preprocessRequirementsLines(List rawLines) { line = line.substring(0, atIndex).trim(); } // Strip inline pip options (--hash=..., --config-settings=..., etc.) - int optionIndex = line.indexOf(" --"); - if (optionIndex != -1) { - line = line.substring(0, optionIndex).trim(); + Matcher optionMatcher = INLINE_OPTION_PATTERN.matcher(line); + if (optionMatcher.find()) { + line = line.substring(0, optionMatcher.start()).trim(); } - // Filter out bare URLs and VCS URLs (any line containing :// is not a package name) - if (line.contains("://")) { + // Filter out bare URLs and VCS URLs — check the requirement part (before any marker) + // to avoid false positives from marker strings + String requirementPart = line.contains(";") ? line.substring(0, line.indexOf(";")) : line; + if (requirementPart.contains("://")) { continue; } if (!line.isEmpty()) { diff --git a/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java b/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java index ea01cc79..e36f78cf 100644 --- a/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java +++ b/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java @@ -2164,6 +2164,25 @@ void preprocessRequirementsLines_filters_local_paths() { assertEquals(List.of("requests==2.28.0"), result); } + @Test + void preprocessRequirementsLines_filters_windows_paths() { + List input = + List.of( + "C:\\Users\\dev\\my-package", + "c:/projects/my-lib", + "D:\\packages\\MyPackage-1.0.tar.gz", + "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_handles_final_line_ending_with_backslash() { + List input = List.of("flask==2.0.3", "requests==2.28.0 \\"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("flask==2.0.3", "requests==2.28.0"), result); + } + @Test void preprocessRequirementsLines_combined_scenario() { List input =