From 4920bd66d206dedb8bc6ca905efbbb4504ec895a Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Fri, 13 Mar 2026 06:56:07 -0400 Subject: [PATCH] feat(sources): generate .git_archival.txt for setuptools-scm builds When building from an sdist that lacks .git metadata, setuptools-scm cannot determine the package version. This adds a .git_archival.txt file with the correct version so setuptools-scm can resolve it. - Skips packages with .git directories (no fix needed) - Replaces existing files that have unprocessed placeholders or missing fields - Creates a new file only when PKG-INFO is also absent (git clones or custom downloads, not PyPI sdists) Closes: #961 Co-Authored-By: Claude Signed-off-by: Lalatendu Mohanty --- src/fromager/sources.py | 91 +++++++++++++++++++++++++++++++++++- tests/test_sources.py | 100 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+), 1 deletion(-) diff --git a/src/fromager/sources.py b/src/fromager/sources.py index 54f31bbe..28b84a79 100644 --- a/src/fromager/sources.py +++ b/src/fromager/sources.py @@ -533,7 +533,14 @@ def prepare_source( ) write_build_meta(source_root_dir.parent, req, source_filename, version) if source_root_dir is not None: - logger.info(f"prepared source for {req} at {source_root_dir}") + # Place .git_archival.txt before the build backend is imported — + # setuptools-scm resolves the version during + # get_requires_for_build_wheel(), so the file must exist by then. + ensure_git_archival( + sdist_root_dir=source_root_dir, + version=version, + ) + logger.info("prepared source for %s at %s", req, source_root_dir) return source_root_dir @@ -771,6 +778,88 @@ def ensure_pkg_info( return had_pkg_info +# Template .git_archival.txt files contain "$Format:…$" placeholders that +# `git archive` expands into real values. setuptools-scm detects the +# unexpanded "%(describe" placeholder and falls back to other version-detection +# methods when it is present. +_UNPROCESSED_ARCHIVAL_MARKER = "%(describe" +_REQUIRED_ARCHIVAL_FIELDS = {"describe-name"} +_GIT_ARCHIVAL_CONTENT = "describe-name: {version}\n" + + +def _is_valid_git_archival(content: str) -> bool: + """Check whether ``.git_archival.txt`` content has the required fields.""" + if _UNPROCESSED_ARCHIVAL_MARKER in content: + return False + fields: dict[str, str] = {} + for line in content.splitlines(): + if ":" not in line: + continue + key, _, value = line.partition(":") + fields[key.strip()] = value.strip() + return all(fields.get(f) for f in _REQUIRED_ARCHIVAL_FIELDS) + + +def _has_git_metadata(sdist_root_dir: pathlib.Path) -> bool: + """Check whether ``.git`` exists in sdist root directory.""" + return sdist_root_dir.joinpath(".git").exists() + + +def _write_git_archival(archival_file: pathlib.Path, version: Version) -> None: + """Write a ``.git_archival.txt`` with the given version.""" + archival_file.write_text(_GIT_ARCHIVAL_CONTENT.format(version=version)) + + +def ensure_git_archival( + *, + version: Version, + sdist_root_dir: pathlib.Path, +) -> bool | None: + """Ensure ``.git_archival.txt`` is valid for setuptools-scm version resolution. + + Behaviour: + + * Skips packages with ``.git`` metadata (git clones need no fix). + * Replaces existing files that are unprocessed or missing required fields. + * Creates a new file when no ``.git_archival.txt`` exists **and** + ``PKG-INFO`` is also absent (indicating a git clone or custom + download rather than a PyPI sdist). + + Returns ``True`` (valid file present), ``False`` (created/replaced), + or ``None`` (no action taken). + """ + if _has_git_metadata(sdist_root_dir): + logger.debug( + "git metadata found, skipping .git_archival.txt for %s", sdist_root_dir + ) + return True + + archival_file = sdist_root_dir / ".git_archival.txt" + + # Existing file: validate and replace if invalid + if archival_file.is_file(): + if _is_valid_git_archival(archival_file.read_text()): + logger.debug( + "valid .git_archival.txt already present in %s", sdist_root_dir + ) + return True + logger.info("replacing invalid .git_archival.txt in %s", sdist_root_dir) + _write_git_archival(archival_file, version) + return False + + # No file: create when PKG-INFO is also absent (git clone / custom download) + pkg_info = sdist_root_dir / "PKG-INFO" + if not pkg_info.is_file(): + logger.info( + "creating .git_archival.txt in %s (no PKG-INFO, likely a git clone)", + sdist_root_dir, + ) + _write_git_archival(archival_file, version) + return False + + return None + + def validate_sdist_filename( req: Requirement, version: Version, diff --git a/tests/test_sources.py b/tests/test_sources.py index 651d07fe..8b93403d 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -312,3 +312,103 @@ def test_scan_compiled_extensions( assert matches == [pathlib.Path(filename)] else: assert matches == [] + + +class TestEnsureGitArchival: + """Tests for ensure_git_archival().""" + + def test_skips_when_pkg_info_present(self, tmp_path: pathlib.Path) -> None: + """Verify no file is created when PKG-INFO exists (PyPI sdist).""" + (tmp_path / "PKG-INFO").write_text("Metadata-Version: 1.0\n") + version = Version("1.2.3") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + archival = tmp_path / ".git_archival.txt" + + assert result is None + assert not archival.is_file() + + def test_creates_file_when_no_pkg_info(self, tmp_path: pathlib.Path) -> None: + """Verify file is created when PKG-INFO is missing (git clone).""" + version = Version("1.2.3") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + archival = tmp_path / ".git_archival.txt" + + assert result is False + assert archival.is_file() + content = archival.read_text() + assert "describe-name: 1.2.3\n" in content + + def test_replaces_unprocessed_file(self, tmp_path: pathlib.Path) -> None: + """Verify unprocessed template file is replaced.""" + archival = tmp_path / ".git_archival.txt" + archival.write_text( + "node: $Format:%H$\n" + "node-date: $Format:%cI$\n" + "describe-name: $Format:%(describe:tags=true)$\n" + ) + version = Version("4.5.6") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + + assert result is False + content = archival.read_text() + assert "describe-name: 4.5.6\n" in content + assert "%(describe" not in content + + def test_preserves_valid_file(self, tmp_path: pathlib.Path) -> None: + """Verify a valid archival file is left untouched.""" + archival = tmp_path / ".git_archival.txt" + original = ( + "node: abc123\n" + "node-date: 2025-01-01T00:00:00+00:00\n" + "describe-name: v1.0.0-0-gabc123\n" + ) + archival.write_text(original) + version = Version("9.9.9") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + + assert result is True + assert archival.read_text() == original + + def test_preserves_valid_file_describe_name_only( + self, tmp_path: pathlib.Path + ) -> None: + """Verify a file with only describe-name is valid.""" + archival = tmp_path / ".git_archival.txt" + original = "describe-name: 2.0.0\n" + archival.write_text(original) + version = Version("9.9.9") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + + assert result is True + assert archival.read_text() == original + + def test_replaces_truncated_file(self, tmp_path: pathlib.Path) -> None: + """Verify a truncated file missing required fields is replaced.""" + archival = tmp_path / ".git_archival.txt" + archival.write_text("node-date: 2025-01-01T00:00:00+00:00\n") + version = Version("3.0.0") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + + assert result is False + content = archival.read_text() + assert "describe-name: 3.0.0\n" in content + + def test_replaces_file_with_empty_values(self, tmp_path: pathlib.Path) -> None: + """Verify a file with required fields but empty values is replaced.""" + archival = tmp_path / ".git_archival.txt" + archival.write_text("describe-name:\n") + version = Version("5.0.0") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + + assert result is False + content = archival.read_text() + assert "describe-name: 5.0.0\n" in content + + def test_skips_when_git_dir_exists(self, tmp_path: pathlib.Path) -> None: + """Verify no file is created when .git directory exists.""" + (tmp_path / ".git").mkdir() + version = Version("1.0.0") + result = sources.ensure_git_archival(sdist_root_dir=tmp_path, version=version) + + assert result is True + assert not (tmp_path / ".git_archival.txt").exists()