diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java index bfe021c8..2b375ea4 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPipProvider.java @@ -95,7 +95,7 @@ public Content provideComponent() throws IOException { @Override protected Set getIgnoredDependencies(String manifestContent) { - String[] lines = manifestContent.split(System.lineSeparator()); + String[] lines = manifestContent.split("\\R"); return Arrays.stream(lines) .filter(this::containsIgnorePattern) .map(PythonPipProvider::extractDepFull) diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonUvProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonUvProvider.java index 60311a70..07b9d55f 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonUvProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonUvProvider.java @@ -193,6 +193,21 @@ UvDependencyData parseUvExport(String exportOutput) throws IOException { // Package line: name==version [; env-marker] if (!line.startsWith(" ") && !trimmed.startsWith("#")) { inViaBlock = false; + + // PEP 440 direct references (name @ url) — skip, no pinned version available + if (isDirectReference(trimmed)) { + log.fine("Skipping PEP 440 direct reference: " + trimmed); + currentKey = null; + continue; + } + + // Path dependencies (./local, ../local, /absolute, ~/home, C:\win) — skip + if (isPathDependency(trimmed)) { + log.fine("Skipping path dependency: " + trimmed); + currentKey = null; + continue; + } + if (!trimmed.contains("==")) { throw new IOException("uv export: package '" + trimmed + "' has no pinned version"); } @@ -284,6 +299,22 @@ private static String parseEditableInstall( } } + private static final Pattern WINDOWS_DRIVE_PATH = Pattern.compile("^[a-zA-Z]:[/\\\\]"); + + /** Returns {@code true} if the line is a PEP 440 direct reference ({@code name @ url}). */ + static boolean isDirectReference(String trimmedLine) { + return trimmedLine.contains(" @ "); + } + + /** Returns {@code true} if the line is a local or absolute path dependency. */ + static boolean isPathDependency(String trimmedLine) { + return trimmedLine.startsWith("./") + || trimmedLine.startsWith("../") + || trimmedLine.startsWith("/") + || trimmedLine.startsWith("~/") + || WINDOWS_DRIVE_PATH.matcher(trimmedLine).find(); + } + private static final Pattern BARE_PACKAGE_NAME = Pattern.compile("[A-Za-z0-9][A-Za-z0-9._-]*"); private static void recordViaParent( diff --git a/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java b/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java index 67d92569..c7441cb9 100644 --- a/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java +++ b/src/main/java/io/github/guacsec/trustifyda/utils/PythonControllerBase.java @@ -35,6 +35,8 @@ import java.util.List; import java.util.Map; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -112,10 +114,9 @@ public final List> getDependencies( private void installingRequirementsOneByOne(String pathToRequirements) { try { - List requirementsRows = Files.readAllLines(Path.of(pathToRequirements)); + List requirementsRows = + preprocessRequirementsLines(Files.readAllLines(Path.of(pathToRequirements))); requirementsRows.stream() - .filter((line) -> !line.trim().startsWith("#")) - .filter((line) -> !line.trim().isEmpty()) .forEach( (dependency) -> { String dependencyName = getDependencyName(dependency); @@ -151,11 +152,7 @@ private List> getDependenciesImpl( } List linesOfRequirements; try { - linesOfRequirements = - Files.readAllLines(requirementsPath).stream() - .filter((line) -> !line.trim().startsWith("#") && !line.trim().isEmpty()) - .map(String::trim) - .collect(Collectors.toList()); + linesOfRequirements = preprocessRequirementsLines(Files.readAllLines(requirementsPath)); } catch (IOException e) { log.warning( "Error while trying to read the requirements.txt file, will not be able to install" @@ -377,6 +374,71 @@ protected String getDependencyNameShow(String pipShowOutput) { return versionToken.substring(0, endOfLine).trim(); } + private static final Pattern INLINE_OPTION_PATTERN = Pattern.compile("\\s--"); + private static final Pattern WINDOWS_DRIVE_PATH_PATTERN = Pattern.compile("^[a-zA-Z]:[/\\\\]"); + + /** + * Preprocesses raw requirements.txt lines by joining line continuations, stripping inline + * options, and filtering out pip option lines, URLs, local paths, and empty/comment lines. + */ + public static List preprocessRequirementsLines(List rawLines) { + // Join line continuations (trailing backslash, possibly followed by whitespace) + List joined = new ArrayList<>(); + StringBuilder current = new StringBuilder(); + for (String line : rawLines) { + String stripped = line.stripTrailing(); + if (stripped.endsWith("\\")) { + current.append(stripped, 0, stripped.length() - 1); + } else { + current.append(line); + joined.add(current.toString()); + current = new StringBuilder(); + } + } + if (current.length() > 0) { + joined.add(current.toString()); + } + + List result = new ArrayList<>(); + for (String raw : joined) { + String line = raw.trim(); + if (line.isEmpty() || line.startsWith("#")) { + continue; + } + // Filter out pip options (lines starting with -) + if (line.startsWith("-")) { + continue; + } + // Filter out local path requirements (./path, ../path, /abs/path, C:\path, C:/path) + if (line.startsWith("./") + || line.startsWith("../") + || line.startsWith("/") + || WINDOWS_DRIVE_PATH_PATTERN.matcher(line).find()) { + continue; + } + // Strip PEP 508 direct references (name @ url -> name) before URL check + int atIndex = line.indexOf(" @ "); + if (atIndex != -1) { + line = line.substring(0, atIndex).trim(); + } + // Strip inline pip options (--hash=..., --config-settings=..., etc.) + Matcher optionMatcher = INLINE_OPTION_PATTERN.matcher(line); + if (optionMatcher.find()) { + line = line.substring(0, optionMatcher.start()).trim(); + } + // Filter out bare URLs and VCS URLs — check the requirement part (before any marker) + // to avoid false positives from marker strings + String requirementPart = line.contains(";") ? line.substring(0, line.indexOf(";")) : line; + if (requirementPart.contains("://")) { + continue; + } + if (!line.isEmpty()) { + result.add(line); + } + } + return result; + } + public static String getDependencyName(String dep) { int markerSeparator = dep.indexOf(";"); String requirement = markerSeparator == -1 ? dep : dep.substring(0, markerSeparator); diff --git a/src/test/java/io/github/guacsec/trustifyda/providers/Python_Uv_Provider_Test.java b/src/test/java/io/github/guacsec/trustifyda/providers/Python_Uv_Provider_Test.java index 7fda0b84..4c979f18 100644 --- a/src/test/java/io/github/guacsec/trustifyda/providers/Python_Uv_Provider_Test.java +++ b/src/test/java/io/github/guacsec/trustifyda/providers/Python_Uv_Provider_Test.java @@ -392,6 +392,212 @@ void test_parseUvExport_via_skips_non_bare_package_names() throws IOException { assertThat(data.graph().values().stream().allMatch(p -> p.children().isEmpty())).isTrue(); } + /** Verifies that PEP 440 direct references (name @ url) are skipped without throwing. */ + @Test + void test_parseUvExport_skips_direct_references() throws IOException { + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + var provider = new PythonUvProvider(pyprojectPath); + + String exportOutput = + "# This file was autogenerated by uv\n" + + "certifi @ git+https://github.com/certifi/python-certifi.git@abcdef1234567890\n" + + " # via requests\n" + + "anyio==3.6.2\n" + + " # via test-project\n"; + + // Given/When + var data = provider.parseUvExport(exportOutput); + + // Then — direct reference is skipped, not in graph + assertThat(data.graph()).doesNotContainKey("certifi"); + // anyio after the skipped line is still parsed correctly + assertThat(data.graph()).containsKey("anyio"); + assertThat(data.graph().get("anyio").version()).isEqualTo("3.6.2"); + assertThat(data.directDeps()).contains("anyio"); + } + + /** Verifies that path dependencies (./local-package) are skipped without throwing. */ + @Test + void test_parseUvExport_skips_path_dependencies() throws IOException { + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + var provider = new PythonUvProvider(pyprojectPath); + + String exportOutput = + "# This file was autogenerated by uv\n" + + "./local-package\n" + + " # via test-project\n" + + "../sibling-package\n" + + " # via test-project\n" + + "/absolute/path/package\n" + + " # via test-project\n" + + "~/home-relative/package\n" + + " # via test-project\n" + + "C:\\Users\\dev\\my-package\n" + + " # via test-project\n" + + "anyio==3.6.2\n" + + " # via test-project\n"; + + // Given/When + var data = provider.parseUvExport(exportOutput); + + // Then — all path dependency forms are skipped + assertThat(data.graph()).doesNotContainKey("./local-package"); + assertThat(data.graph()).doesNotContainKey("../sibling-package"); + assertThat(data.graph()).doesNotContainKey("/absolute/path/package"); + assertThat(data.graph()).doesNotContainKey("~/home-relative/package"); + assertThat(data.graph()).doesNotContainKey("c:\\users\\dev\\my-package"); + // anyio is still parsed correctly + assertThat(data.graph()).containsKey("anyio"); + assertThat(data.directDeps()).contains("anyio"); + } + + /** + * Verifies that # via comments after a skipped path dependency do not corrupt the graph by + * attaching to the previous package. + */ + @Test + void test_parseUvExport_via_after_skipped_path_dep_does_not_corrupt_graph() throws IOException { + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + var provider = new PythonUvProvider(pyprojectPath); + + // Given — anyio is parsed first, then a path dependency is skipped. The "# via requests" + // after the skipped path dep should NOT make anyio a child of requests. + String exportOutput = + "# This file was autogenerated by uv\n" + + "anyio==3.6.2\n" + + " # via test-project\n" + + "./local-package\n" + + " # via requests\n" + + "requests==2.25.1\n" + + " # via test-project\n"; + + // When + var data = provider.parseUvExport(exportOutput); + + // Then — requests should NOT have anyio as a child (that would be a corruption) + assertThat(data.graph().get("requests").children()).doesNotContain("anyio"); + // Both anyio and requests are direct deps + assertThat(data.directDeps()).containsExactlyInAnyOrder("anyio", "requests"); + } + + /** + * Verifies that # via comments after skipped direct references do not create incorrect + * parent-child relationships with the previous package. + */ + @Test + void test_parseUvExport_via_after_skipped_does_not_corrupt_graph() throws IOException { + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + var provider = new PythonUvProvider(pyprojectPath); + + // anyio is parsed first, then a direct reference is skipped. The "# via requests" after + // the skipped package should NOT make anyio a child of requests. + String exportOutput = + "# This file was autogenerated by uv\n" + + "anyio==3.6.2\n" + + " # via test-project\n" + + "certifi @ git+https://github.com/certifi/python-certifi.git@abcdef\n" + + " # via requests\n" + + "requests==2.25.1\n" + + " # via test-project\n"; + + // Given/When + var data = provider.parseUvExport(exportOutput); + + // Then — requests should NOT have anyio as a child (that would be a corruption) + assertThat(data.graph().get("requests").children()).doesNotContain("anyio"); + // certifi is skipped + assertThat(data.graph()).doesNotContainKey("certifi"); + // Both anyio and requests are direct deps + assertThat(data.directDeps()).containsExactlyInAnyOrder("anyio", "requests"); + } + + /** + * Verifies that parseUvExport correctly handles a fixture file containing both direct references + * and path dependencies mixed with normal packages. + */ + @Test + void test_parseUvExport_with_direct_refs_fixture() throws IOException { + Path exportPath = Path.of(UV_FIXTURE, "uv_export_direct_refs.txt"); + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + var provider = new PythonUvProvider(pyprojectPath); + String exportOutput = Files.readString(exportPath); + + // Given/When + var data = provider.parseUvExport(exportOutput); + + // Then — direct reference and path dependency are skipped + assertThat(data.graph()).doesNotContainKey("certifi"); + assertThat(data.graph()).doesNotContainKey("./local-package"); + + // Normal packages are parsed correctly + assertThat(data.graph()).containsKeys("anyio", "flask", "requests", "idna", "sniffio"); + assertThat(data.graph().get("anyio").version()).isEqualTo("3.6.2"); + assertThat(data.graph().get("flask").version()).isEqualTo("2.0.3"); + assertThat(data.graph().get("requests").version()).isEqualTo("2.25.1"); + + // Direct deps are correctly identified + assertThat(data.directDeps()).containsExactlyInAnyOrder("anyio", "flask", "requests"); + + // charset-normalizer is a child of requests (not corrupted by the skipped certifi) + assertThat(data.graph().get("requests").children()).contains("charset-normalizer"); + } + + /** + * Verifies that provideStack succeeds with export output containing direct references and path + * dependencies. + */ + @Test + void test_provideStack_with_direct_refs() throws IOException { + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + String exportOutput = Files.readString(Path.of(UV_FIXTURE, "uv_export_direct_refs.txt")); + + System.setProperty(PythonUvProvider.PROP_TRUSTIFY_DA_UV_EXPORT, exportOutput); + try { + var provider = new PythonUvProvider(pyprojectPath); + var content = provider.provideStack(); + assertThat(content.type).isEqualTo(Api.CYCLONEDX_MEDIA_TYPE); + String sbomJson = new String(content.buffer); + assertThat(sbomJson).contains("CycloneDX"); + // Skipped packages should not appear + assertThat(sbomJson).doesNotContain("pkg:pypi/certifi@"); + // Normal packages should appear + assertThat(sbomJson).contains("pkg:pypi/anyio@3.6.2"); + assertThat(sbomJson).contains("pkg:pypi/flask@2.0.3"); + assertThat(sbomJson).contains("pkg:pypi/requests@2.25.1"); + } catch (RuntimeException | NoClassDefFoundError e) { + Assumptions.assumeTrue(false, "Skipping: SBOM serialization unavailable - " + e.getMessage()); + } finally { + System.clearProperty(PythonUvProvider.PROP_TRUSTIFY_DA_UV_EXPORT); + } + } + + /** + * Verifies that provideComponent succeeds with export output containing direct references and + * path dependencies. + */ + @Test + void test_provideComponent_with_direct_refs() throws IOException { + Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); + String exportOutput = Files.readString(Path.of(UV_FIXTURE, "uv_export_direct_refs.txt")); + + System.setProperty(PythonUvProvider.PROP_TRUSTIFY_DA_UV_EXPORT, exportOutput); + try { + var provider = new PythonUvProvider(pyprojectPath); + var content = provider.provideComponent(); + assertThat(content.type).isEqualTo(Api.CYCLONEDX_MEDIA_TYPE); + String sbomJson = new String(content.buffer); + assertThat(sbomJson).contains("CycloneDX"); + assertThat(sbomJson).doesNotContain("pkg:pypi/certifi@"); + assertThat(sbomJson).contains("pkg:pypi/anyio@3.6.2"); + assertThat(sbomJson).contains("pkg:pypi/flask@2.0.3"); + assertThat(sbomJson).contains("pkg:pypi/requests@2.25.1"); + } catch (RuntimeException | NoClassDefFoundError e) { + Assumptions.assumeTrue(false, "Skipping: SBOM serialization unavailable - " + e.getMessage()); + } finally { + System.clearProperty(PythonUvProvider.PROP_TRUSTIFY_DA_UV_EXPORT); + } + } + @Test void test_parseUvExport_throws_on_unpinned_version() { Path pyprojectPath = Path.of(UV_FIXTURE, "pyproject.toml"); diff --git a/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java b/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java index 729a0dce..e36f78cf 100644 --- a/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java +++ b/src/test/java/io/github/guacsec/trustifyda/utils/PythonControllerBaseTest.java @@ -2043,4 +2043,163 @@ void when_spliting_pip_show_dep_with_license() { + "Requires: \n" + "Required-by: cycler, gensim, gTTS, python-dateutil, tweepy\n"); } + + @Test + void preprocessRequirementsLines_filters_extra_index_url() { + List input = + List.of( + "--extra-index-url https://pypi.example.com/simple", + "requests==2.28.0", + "--index-url https://pypi.org/simple", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_filters_short_pip_options() { + List input = + List.of("-r other-requirements.txt", "-c constraints.txt", "numpy==1.24.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("numpy==1.24.0"), result); + } + + @Test + void preprocessRequirementsLines_strips_hashes() { + List input = + List.of("requests==2.28.0 --hash=sha256:abc123 --hash=sha256:def456", "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_strips_config_settings() { + List input = + List.of( + "aiohappyeyeballs==2.6.1 --config-settings=KEY=VALUE --config-settings=OTHER=VAL", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("aiohappyeyeballs==2.6.1", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_joins_line_continuations() { + List input = + List.of( + "requests==2.28.0 \\", + " --hash=sha256:abc123 \\", + " --hash=sha256:def456", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_joins_continuations_with_config_settings() { + List input = + List.of( + "aiohappyeyeballs==2.6.1 \\", + " --config-settings=SAMPLE_TEXT=TEST_VALUE \\", + " --config-settings=ANOTHER_KEY=ANOTHER_VALUE", + "async-timeout==5.0.1 ; python_full_version < '3.11'"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals( + List.of("aiohappyeyeballs==2.6.1", "async-timeout==5.0.1 ; python_full_version < '3.11'"), + result); + } + + @Test + void preprocessRequirementsLines_filters_comments_and_empty_lines() { + List input = List.of("# this is a comment", "", " ", "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_strips_direct_references() { + List input = + List.of( + "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", + "requests[security] @ https://github.com/psf/requests/archive/main.zip", + "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("pip", "requests[security]", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_handles_trailing_whitespace_after_backslash() { + List input = + List.of("requests==2.28.0 \\ ", " --hash=sha256:abc123", "flask==2.0.3"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3"), result); + } + + @Test + void preprocessRequirementsLines_filters_bare_urls() { + List input = + List.of( + "https://example.com/packages/MyPackage-1.0.tar.gz", + "http://example.com/packages/other.whl", + "git+https://git.example.com/MyProject.git@v1.0", + "git+git://git.example.com/repo.git", + "hg+http://hg.example.com/repo", + "hg+ssh://hg.example.com/repo", + "svn+svn://svn.example.com/project/trunk", + "bzr+ftp://bzr.example.com/repo", + "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_filters_local_paths() { + List input = + List.of( + "./local-package", + "../parent-package", + "./downloads/MyPackage-1.0.tar.gz", + "/absolute/path/to/package", + "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_filters_windows_paths() { + List input = + List.of( + "C:\\Users\\dev\\my-package", + "c:/projects/my-lib", + "D:\\packages\\MyPackage-1.0.tar.gz", + "requests==2.28.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_handles_final_line_ending_with_backslash() { + List input = List.of("flask==2.0.3", "requests==2.28.0 \\"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("flask==2.0.3", "requests==2.28.0"), result); + } + + @Test + void preprocessRequirementsLines_combined_scenario() { + List input = + List.of( + "--extra-index-url https://pypi.example.com/simple", + "# A comment", + "requests==2.28.0 \\ ", + " --hash=sha256:abc123", + "", + "--trusted-host pypi.example.com", + "flask==2.0.3 --hash=sha256:xyz789", + "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", + "https://example.com/packages/other.tar.gz", + "git+git://git.example.com/repo.git", + "./local-package", + "numpy>=1.24.0"); + List result = PythonControllerBase.preprocessRequirementsLines(input); + assertEquals(List.of("requests==2.28.0", "flask==2.0.3", "pip", "numpy>=1.24.0"), result); + } } diff --git a/src/test/resources/tst_manifests/pip/pip_pyproject_toml_uv/uv_export_direct_refs.txt b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_uv/uv_export_direct_refs.txt new file mode 100644 index 00000000..69ce9ae3 --- /dev/null +++ b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_uv/uv_export_direct_refs.txt @@ -0,0 +1,20 @@ +# This file was autogenerated by uv via the following command: +# uv export --format requirements.txt --frozen --no-hashes --no-dev +anyio==3.6.2 + # via test-project +certifi @ git+https://github.com/certifi/python-certifi.git@abcdef1234567890 + # via requests +./local-package + # via test-project +charset-normalizer==3.1.0 + # via requests +flask==2.0.3 + # via test-project +idna==3.4 + # via + # anyio + # requests +requests==2.25.1 + # via test-project +sniffio==1.3.0 + # via anyio