From ddae79ff25221236fa16d68d4da9fabd53d989c8 Mon Sep 17 00:00:00 2001 From: Kaleb Barrett Date: Mon, 4 May 2026 15:34:26 -0400 Subject: [PATCH 1/3] Add support for git archive generated git info files The user is intended to write a template file and modify git attributes to fill in the template with repo information when building the archive. This is used by Github's `Download as ZIP` function. The implementation largely follows setuptools-scm's with some minor caveats: * This tool supports branch in template, this defaults to HEAD when there is no branch, just like the case where we have branch info. * The precedence of version resolving is a bit different. Here we do PKG-INFO, then archive file, then git info, while setuptools-scm puts git info first, assuming any git info must be the current repo's git info. This does mean that dirty clones that have the archival file will accidentally use that, but that should be somewhat obvious of a diagnosis. --- docs/changelog/next_release/131.feature.rst | 5 + docs/comparison.rst | 2 +- docs/git_archive.rst | 105 +++++ docs/index.rst | 1 + setuptools_git_versioning/__init__.py | 3 + setuptools_git_versioning/archival.py | 209 ++++++++++ setuptools_git_versioning/version.py | 11 + tests/test_integration/test_archival.py | 436 ++++++++++++++++++++ 8 files changed, 771 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/next_release/131.feature.rst create mode 100644 docs/git_archive.rst create mode 100644 setuptools_git_versioning/archival.py create mode 100644 tests/test_integration/test_archival.py diff --git a/docs/changelog/next_release/131.feature.rst b/docs/changelog/next_release/131.feature.rst new file mode 100644 index 0000000..3dd0417 --- /dev/null +++ b/docs/changelog/next_release/131.feature.rst @@ -0,0 +1,5 @@ +Add support for ``git archive`` builds via a tracked ``.git_archival.txt`` +file. When the file is present and its ``$Format:...$`` placeholders have +been substituted, the version is derived from its contents instead of +running ``git``. See ``git_archive`` documentation for the required +``.git_archival.txt`` and ``.gitattributes`` setup. diff --git a/docs/comparison.rst b/docs/comparison.rst index 1e87e3c..b1694a6 100644 --- a/docs/comparison.rst +++ b/docs/comparison.rst @@ -37,7 +37,7 @@ VCS support +---------------------------+-----+-----------+-------------------------------+-----------------------------+ | Package | Git | Mercurial | Can be used in git submodules | Support for ``git-archive`` | +===========================+=====+===========+===============================+=============================+ -| setuptools-git-versioning | yes | no | yes | no | +| setuptools-git-versioning | yes | no | yes | yes | +---------------------------+-----+-----------+-------------------------------+-----------------------------+ | setuptools-scm | yes | yes | yes | yes | +---------------------------+-----+-----------+-------------------------------+-----------------------------+ diff --git a/docs/git_archive.rst b/docs/git_archive.rst new file mode 100644 index 0000000..edd837f --- /dev/null +++ b/docs/git_archive.rst @@ -0,0 +1,105 @@ +.. _git-archive: + +Supporting ``git archive`` builds +--------------------------------- + +By default ``setuptools-git-versioning`` reads version information by running +``git`` against the project's ``.git`` directory. When the project is built +from a ``git archive`` tarball (for example, GitHub's "Download ZIP", or a +manual ``git archive HEAD -o release.tar``), no ``.git`` directory exists and +``git`` cannot be invoked. + +To make ``git archive`` builds work, add a ``.git_archival.txt`` file to your +repository whose contents will be rewritten by git at archive time. The +project will read the rewritten file when building from the archive. + +Setup +~~~~~ + +1. Create ``.git_archival.txt`` in the repository root: + + .. code-block:: text + :caption: .git_archival.txt + + node: $Format:%H$ + describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ + +2. Tell git to substitute the ``$Format:...$`` placeholders by adding the + following line to ``.gitattributes`` in the repository root (creating the + file if it does not exist): + + .. code-block:: text + :caption: .gitattributes + + .git_archival.txt export-subst + +3. Commit both files: + + .. code-block:: bash + + git add .git_archival.txt .gitattributes + git commit -m "add git archive support" + +When ``git archive`` runs, the placeholders are expanded into the actual +commit SHA and ``git describe`` output for the archived commit. When the +package is later built from the extracted archive, +``setuptools-git-versioning`` reads the file and resolves the version using +the configured ``template`` / ``dev_template`` / ``dirty_template``. + +The same file format is used by ``setuptools-scm``, so a single +``.git_archival.txt`` works with both tools. + +Optional: include branch information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your templates reference ``{branch}``, also add a ``ref-names`` line: + +.. code-block:: text + :caption: .git_archival.txt (with branch info) + + node: $Format:%H$ + describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ + ref-names: $Format:%D$ + +.. warning:: + + Including ``ref-names`` causes the archive's contents to change every + time a new ref points at the archived commit (for example, when a new + branch is created). This breaks archive checksum stability across + re-archivals of the same commit. Only opt in if you actually need + ``{branch}`` substitution. + +If ``ref-names`` is not present (or is present but indicates a detached +``HEAD``) and a template references ``{branch}``, the literal string +``HEAD`` is substituted - matching the output of +``git rev-parse --abbrev-ref HEAD`` in detached-HEAD state. + +Priority and interaction with other schemas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The version source is selected in this order: + +1. ``PKG-INFO`` (sdist install) - wins whenever present. +2. ``.git_archival.txt`` - used when the file exists and its placeholders + have been substituted. +3. The normal flow: ``version_callback``, ``version_file``, live ``git`` + commands, ``starting_version``. + +This means ``.git_archival.txt`` only takes effect when there is no +``PKG-INFO`` (so a normal sdist install still wins) and is opportunistic in +working checkouts: a stray un-substituted file logs a warning and is +ignored, falling through to the live ``git`` flow. + +Limitations +~~~~~~~~~~~ + +- ``tag_filter``, ``tag_formatter``, and ``sort_by`` have no effect on + archive builds. The tag is whatever ``git describe`` chose at archive + time. +- ``count_commits_from_version_file`` and ``version_file`` are not consulted + in the archive flow. +- Older git versions (<2.32) do not understand the ``%(describe...)`` + placeholder. In that case the file is left with the literal text + ``%(describe...)`` and ``setuptools-git-versioning`` will warn and fall + back to the ``ref-names`` field for the tag (which only succeeds when + ``HEAD`` is exactly on a tag). diff --git a/docs/index.rst b/docs/index.rst index 3822589..df7004d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,7 @@ command ci runtime_version + git_archive schemas/index options/index substitutions/index diff --git a/setuptools_git_versioning/__init__.py b/setuptools_git_versioning/__init__.py index b1c15dc..dfbe466 100644 --- a/setuptools_git_versioning/__init__.py +++ b/setuptools_git_versioning/__init__.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any +from setuptools_git_versioning.archival import parse_archival_file, version_from_archival from setuptools_git_versioning.git import ( count_since, get_all_tags, @@ -37,5 +38,7 @@ def parse_config(dist: Distribution, attr: Any, value: Any) -> None: "get_version", "infer_version", "is_dirty", + "parse_archival_file", + "version_from_archival", "version_from_git", ] diff --git a/setuptools_git_versioning/archival.py b/setuptools_git_versioning/archival.py new file mode 100644 index 0000000..8447eff --- /dev/null +++ b/setuptools_git_versioning/archival.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import logging +import os # noqa: TC003 +import re +from dataclasses import dataclass +from email.parser import HeaderParser +from pathlib import Path +from typing import TYPE_CHECKING + +from setuptools_git_versioning.defaults import ( + DEFAULT_DEV_TEMPLATE, + DEFAULT_DIRTY_TEMPLATE, + DEFAULT_TEMPLATE, +) +from setuptools_git_versioning.log import DEBUG, INFO +from setuptools_git_versioning.subst import resolve_substitutions + +if TYPE_CHECKING: + from packaging.version import Version + +ARCHIVAL_FILENAME = ".git_archival.txt" +DESCRIBE_UNSUPPORTED = "%(describe" +FORMAT_UNSUBSTITUTED = "$Format" +DESCRIBE_PARTS = 3 # tag-N-gSHA + +REF_TAG_RE = re.compile(r"(?<=\btag: )([^,]+)\b") +REF_HEAD_RE = re.compile(r"HEAD\s*->\s*([^,]+)") +FULL_SHA_RE = re.compile(r"^([0-9a-f]{40}|[0-9a-f]{64})$") # SHA-1 or SHA-256 + +log = logging.getLogger(__name__) + + +@dataclass +class ArchivalData: + tag: str + ccount: int + sha: str + full_sha: str + dirty: bool + branch: str | None + + +def parse_archival_file(path: str | os.PathLike) -> dict[str, str]: + """Read a .git_archival.txt file and return its key/value pairs. + + Keys are normalized to lowercase so lookups behave consistently + regardless of whether the file uses `node:` or `Node:` etc. + """ + content = Path(path).read_text(encoding="utf-8") + log.log(DEBUG, "'%s' content:\n%s", ARCHIVAL_FILENAME, content) + message = HeaderParser().parsestr(content) + + # HeaderParser treats the first blank line as the end of headers. + # Anything after it ends up in the message body and is silently + # dropped from .items(). Warn the user instead of losing fields. + payload = message.get_payload() + if isinstance(payload, str) and payload.strip(): + log.warning( + "'%s' contains content after a blank line; those fields will be ignored", + ARCHIVAL_FILENAME, + ) + + return {key.lower(): value for key, value in message.items()} + + +def _parse_describe(describe: str) -> tuple[str, int, str | None, bool]: + """Parse a `git describe`-style string into (tag, ccount, short_sha, dirty).""" + dirty = False + if describe.endswith("-dirty"): + dirty = True + describe = describe[: -len("-dirty")] + + parts = describe.rsplit("-", 2) + if len(parts) < DESCRIBE_PARTS: + return describe, 0, None, dirty + + tag, ccount_str, gnode = parts + try: + ccount = int(ccount_str) + except ValueError: + return describe, 0, None, dirty + + short_sha = gnode[1:] if gnode.startswith("g") else gnode + return tag, ccount, short_sha, dirty + + +def _branch_from_ref_names(ref_names: str) -> str | None: + match = REF_HEAD_RE.search(ref_names) + if match: + return match.group(1).strip() + return None + + +def archival_to_version_data(data: dict[str, str]) -> ArchivalData | None: + """Convert parsed archival data into structured version info, or None. + + Returns None when the file looks unsubstituted or otherwise unusable so + the caller can fall through to live git. + """ + if any(FORMAT_UNSUBSTITUTED in value for value in data.values()): + log.warning( + "'%s' contains unprocessed '$Format:...$' placeholders, skipping", + ARCHIVAL_FILENAME, + ) + return None + + node = data.get("node", "").strip() + full_sha = node if FULL_SHA_RE.match(node) else "" + ref_names = data.get("ref-names", "") + branch = _branch_from_ref_names(ref_names) + describe = data.get("describe-name", "").strip() + + describe_tag: str | None = None + ccount = 0 + short_sha = "" + dirty = False + + if describe and DESCRIBE_UNSUPPORTED not in describe: + describe_tag, ccount, parsed_sha, dirty = _parse_describe(describe) + if parsed_sha: + short_sha = parsed_sha + elif describe: + log.warning( + "git archive did not expand %(describe...) (git <2.32), falling back to ref-names", + ) + + if describe_tag is not None: + tag = describe_tag + else: + tags = REF_TAG_RE.findall(ref_names) + if not tags: + log.log( + INFO, + "'%s' has no usable describe-name or tag in ref-names", + ARCHIVAL_FILENAME, + ) + return None + tag = tags[0].strip() + + # Prefer the full SHA when available so {sha} matches the live-git + # path's `full_sha[:8]` rendering. Fall back to the short SHA from + # describe-name only when no valid `node` field is present. + if full_sha: + short_sha = full_sha[:8] + elif short_sha: + full_sha = short_sha + + return ArchivalData( + tag=tag, + ccount=ccount, + sha=short_sha[:8], + full_sha=full_sha, + dirty=dirty, + branch=branch, + ) + + +def version_from_archival( + project_root: str | os.PathLike, + *, + template: str = DEFAULT_TEMPLATE, + dev_template: str = DEFAULT_DEV_TEMPLATE, + dirty_template: str = DEFAULT_DIRTY_TEMPLATE, +) -> Version | None: + """Return a Version derived from .git_archival.txt, or None if unavailable.""" + archival_path = Path(project_root).joinpath(ARCHIVAL_FILENAME) + if not archival_path.exists(): + log.log(DEBUG, "No '%s' present at '%s'", ARCHIVAL_FILENAME, project_root) + return None + + log.log(INFO, "File '%s' is found, reading its content", archival_path) + data = parse_archival_file(archival_path) + info = archival_to_version_data(data) + if info is None: + return None + + log.log(DEBUG, "Parsed archival data: %r", info) + + if info.dirty: + log.log(INFO, "Using template from 'dirty_template' option") + chosen = dirty_template + elif info.ccount > 0: + log.log(INFO, "Using template from 'dev_template' option") + chosen = dev_template + else: + log.log(INFO, "Using template from 'template' option") + chosen = template + + # When ref-names is absent or doesn't reveal a current branch, default + # to the literal "HEAD" so `{branch}` substitution mirrors what + # `git rev-parse --abbrev-ref HEAD` produces in detached-HEAD state. + branch = info.branch if info.branch is not None else "HEAD" + + rendered = resolve_substitutions( + chosen, + sha=info.sha, + tag=info.tag, + ccount=info.ccount, + branch=branch, + full_sha=info.full_sha, + ) + log.log(INFO, "Version number after resolving substitutions: %r", rendered) + + # Deferred to avoid a top-level circular import: + # `version.py` imports `version_from_archival` from this module. + from setuptools_git_versioning.version import sanitize_version + + return sanitize_version(rendered) diff --git a/setuptools_git_versioning/version.py b/setuptools_git_versioning/version.py index 9339622..bc2fb98 100644 --- a/setuptools_git_versioning/version.py +++ b/setuptools_git_versioning/version.py @@ -11,6 +11,7 @@ # where 'packaging' is not installed yet from packaging.version import Version +from setuptools_git_versioning.archival import version_from_archival from setuptools_git_versioning.defaults import ( DEFAULT_DEV_TEMPLATE, DEFAULT_DIRTY_TEMPLATE, @@ -114,6 +115,16 @@ def version_from_git( # noqa: PLR0915, PLR0912, PLR0913, C901 # running on sdist package, do not sanitize return Version(version_str) + archival_version = version_from_archival( + project_root, + template=template, + dev_template=dev_template, + dirty_template=dirty_template, + ) + if archival_version is not None: + log.log(INFO, "Resolved version from '.git_archival.txt': %s", archival_version) + return archival_version + if version_callback is not None: if version_file is not None: msg = "Either 'version_file' or 'version_callback' can be passed, but not both at the same time" diff --git a/tests/test_integration/test_archival.py b/tests/test_integration/test_archival.py new file mode 100644 index 0000000..a7c5140 --- /dev/null +++ b/tests/test_integration/test_archival.py @@ -0,0 +1,436 @@ +from __future__ import annotations + +import shutil +import tarfile +from typing import TYPE_CHECKING + +import pytest +from packaging.version import Version + +from setuptools_git_versioning.archival import ( + ARCHIVAL_FILENAME, + ArchivalData, + archival_to_version_data, + parse_archival_file, + version_from_archival, +) +from tests.lib.util import ( + create_file, + create_tag, + execute, + get_version, +) + +if TYPE_CHECKING: + from pathlib import Path + +pytestmark = pytest.mark.all + +GIT_ARCHIVAL_STABLE = "node: $Format:%H$\ndescribe-name: $Format:%(describe:tags=true,match=*[0-9]*)$\n" + + +# --------------------------------------------------------------------------- +# Unit tests: parse_archival_file + archival_to_version_data +# --------------------------------------------------------------------------- + + +def test_parse_archival_file_round_trip(tmp_path: Path) -> None: + archival = tmp_path / ARCHIVAL_FILENAME + archival.write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3-5-g4060507deadbeef0123456789abcdef012345678\n" + "ref-names: HEAD -> main, tag: v1.2.3\n", + encoding="utf-8", + ) + + data = parse_archival_file(archival) + assert data["node"] == "4060507deadbeef0123456789abcdef012345678" + assert data["describe-name"] == "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678" + assert data["ref-names"] == "HEAD -> main, tag: v1.2.3" + + +def test_parse_archival_file_warns_on_content_after_blank_line( + tmp_path: Path, caplog: pytest.LogCaptureFixture +) -> None: + """A blank line in the file ends MIME header parsing. Anything after + it would be silently lost; the parser should warn instead.""" + archival = tmp_path / ARCHIVAL_FILENAME + archival.write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3\n" + "\n" + "ref-names: HEAD -> main, tag: v1.2.3\n", + encoding="utf-8", + ) + + with caplog.at_level("WARNING"): + data = parse_archival_file(archival) + + assert "ref-names" not in data + assert "after a blank line" in caplog.text + + +def test_parse_archival_file_normalizes_keys_to_lowercase(tmp_path: Path) -> None: + """Keys in .git_archival.txt should be looked up case-insensitively; + parse_archival_file normalizes them to lowercase.""" + archival = tmp_path / ARCHIVAL_FILENAME + archival.write_text( + "Node: 4060507deadbeef0123456789abcdef012345678\nDescribe-Name: v1.2.3\n", + encoding="utf-8", + ) + + data = parse_archival_file(archival) + assert data["node"] == "4060507deadbeef0123456789abcdef012345678" + assert data["describe-name"] == "v1.2.3" + + +def test_archival_to_version_data_post_tag() -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678", + } + result = archival_to_version_data(data) + assert result == ArchivalData( + tag="v1.2.3", + ccount=5, + sha="4060507d", + full_sha="4060507deadbeef0123456789abcdef012345678", + dirty=False, + branch=None, + ) + + +def test_archival_to_version_data_bare_tag() -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "v1.2.3", + } + result = archival_to_version_data(data) + assert result is not None + assert result.tag == "v1.2.3" + assert result.ccount == 0 + assert result.full_sha == "4060507deadbeef0123456789abcdef012345678" + assert result.sha == "4060507d" + assert result.dirty is False + + +def test_archival_to_version_data_dirty_suffix() -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678-dirty", + } + result = archival_to_version_data(data) + assert result is not None + assert result.dirty is True + assert result.tag == "v1.2.3" + assert result.ccount == 5 + + +def test_archival_to_version_data_unsubstituted_returns_none(caplog: pytest.LogCaptureFixture) -> None: + data = {"node": "$Format:%H$", "describe-name": "$Format:%(describe)$"} + with caplog.at_level("WARNING"): + result = archival_to_version_data(data) + assert result is None + assert "unprocessed" in caplog.text + + +def test_archival_to_version_data_old_git_falls_back_to_ref_names( + caplog: pytest.LogCaptureFixture, +) -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "%(describe:tags=true,match=*[0-9]*)", + "ref-names": "HEAD -> main, tag: v1.2.3", + } + with caplog.at_level("WARNING"): + result = archival_to_version_data(data) + assert result is not None + assert result.tag == "v1.2.3" + assert result.ccount == 0 + assert "git <2.32" in caplog.text + + +def test_archival_to_version_data_old_git_no_tag_in_ref_names_returns_none( + caplog: pytest.LogCaptureFixture, +) -> None: + """Old-git fallback path: describe-name is unexpanded AND ref-names + has no `tag:` entry. The function should warn about the old-git + fallback and then return None. + """ + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "%(describe:tags=true,match=*[0-9]*)", + "ref-names": "HEAD -> main, origin/main", + } + with caplog.at_level("WARNING"): + result = archival_to_version_data(data) + assert result is None + assert "git <2.32" in caplog.text + + +def test_archival_to_version_data_accepts_sha256_node() -> None: + """SHA-256 git repositories produce 64-char node hashes. The parser + should accept them and use them as full_sha just like 40-char SHA-1. + """ + sha256_node = "4060507deadbeef0123456789abcdef0123456789abcdef0123456789abcdef0" + data = { + "node": sha256_node, + "describe-name": "v1.2.3", + } + result = archival_to_version_data(data) + assert result is not None + assert result.tag == "v1.2.3" + assert result.full_sha == sha256_node + assert result.sha == sha256_node[:8] + + +def test_archival_to_version_data_prefers_full_sha_over_describe_short_sha() -> None: + """When both `node` (40 chars) and describe-name's short SHA (7 chars) + are present, `sha` should be the 8-char prefix of `node`, matching the + live-git path's `full_sha[:8]` rendering rather than the truncated + short SHA from describe-name. + """ + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "v1.2.3-5-g4060507", # conventional 7-char short SHA + } + result = archival_to_version_data(data) + assert result is not None + assert result.tag == "v1.2.3" + assert result.ccount == 5 + assert result.sha == "4060507d" # 8 chars from node, not 7-char "4060507" + assert result.full_sha == "4060507deadbeef0123456789abcdef012345678" + + +def test_archival_to_version_data_describe_with_non_numeric_middle_part() -> None: + """When a describe-name happens to rsplit into 3 parts but the middle + part is non-numeric (e.g., a tag like `foo-bar-baz`), the function + should fall back to treating the whole string as a bare tag rather + than crashing on int() conversion. + """ + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "foo-bar-baz", + } + result = archival_to_version_data(data) + assert result is not None + assert result.tag == "foo-bar-baz" + assert result.ccount == 0 + + +def test_archival_to_version_data_short_sha_fallback_when_node_missing() -> None: + """When `node` is absent/invalid, full_sha should fall back to the + short SHA from describe-name (best-effort).""" + data = { + "describe-name": "v1.2.3-5-gabc1234", + } + result = archival_to_version_data(data) + assert result is not None + assert result.tag == "v1.2.3" + assert result.ccount == 5 + assert result.sha == "abc1234" + assert result.full_sha == "abc1234" + + +def test_archival_to_version_data_branch_from_ref_names() -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678", + "ref-names": "HEAD -> feature/x, origin/main", + } + result = archival_to_version_data(data) + assert result is not None + assert result.branch == "feature/x" + + +def test_archival_to_version_data_branch_absent() -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678", + } + result = archival_to_version_data(data) + assert result is not None + assert result.branch is None + + +def test_archival_to_version_data_no_tag_anywhere_returns_none() -> None: + data = { + "node": "4060507deadbeef0123456789abcdef012345678", + "ref-names": "HEAD -> main", + } + assert archival_to_version_data(data) is None + + +def test_version_from_archival_missing_file_returns_none(tmp_path: Path) -> None: + assert version_from_archival(tmp_path) is None + + +def test_version_from_archival_post_tag(tmp_path: Path) -> None: + (tmp_path / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3-5-g4060507deadbeef0123456789abcdef012345678\n", + encoding="utf-8", + ) + version = version_from_archival(tmp_path) + assert version == Version("1.2.3.post5+git.4060507d") + + +def test_version_from_archival_bare_tag_uses_template(tmp_path: Path) -> None: + (tmp_path / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\ndescribe-name: v1.2.3\n", + encoding="utf-8", + ) + version = version_from_archival(tmp_path) + assert version == Version("1.2.3") + + +def test_version_from_archival_dirty(tmp_path: Path) -> None: + (tmp_path / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3-5-g4060507deadbeef0123456789abcdef012345678-dirty\n", + encoding="utf-8", + ) + version = version_from_archival(tmp_path) + assert version == Version("1.2.3.post5+git.4060507d.dirty") + + +def test_version_from_archival_branch_defaults_to_head_when_missing(tmp_path: Path) -> None: + (tmp_path / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3-5-g4060507deadbeef0123456789abcdef012345678\n", + encoding="utf-8", + ) + version = version_from_archival( + tmp_path, + dev_template="{tag}.post{ccount}+git.{sha}.{branch}", + ) + assert version == Version("1.2.3.post5+git.4060507d.HEAD") + + +# --------------------------------------------------------------------------- +# Integration: real `git archive` round-trip + build +# --------------------------------------------------------------------------- + + +def _add_archival_template(repo: Path) -> None: + create_file(repo, ARCHIVAL_FILENAME, GIT_ARCHIVAL_STABLE, commit=False) + create_file(repo, ".gitattributes", f"{ARCHIVAL_FILENAME} export-subst\n", commit=False) + execute(repo, "git", "add", ARCHIVAL_FILENAME, ".gitattributes") + execute(repo, "git", "commit", "-m", "add git archive support") + + +def _git_archive_extract(repo: Path, dest: Path) -> None: + archive = repo / "archive.tar" + execute(repo, "git", "archive", "--format=tar", f"--output={archive}", "HEAD") + with tarfile.open(archive, "r") as tf: + tf.extractall(dest) + archive.unlink() + + +@pytest.mark.important +def test_archival_end_to_end_post_tag(repo: Path, tmp_path_factory: pytest.TempPathFactory, create_config) -> None: + create_config(repo, {"dev_template": "{tag}.post{ccount}"}) + create_tag(repo, "1.2.3") + create_file(repo) # one commit after the tag + _add_archival_template(repo) + + extracted = tmp_path_factory.mktemp("extracted") + _git_archive_extract(repo, extracted) + assert not (extracted / ".git").exists() + + archival_text = (extracted / ARCHIVAL_FILENAME).read_text(encoding="utf-8") + assert "$Format:" not in archival_text # placeholders were substituted + + # Carry over coverage config so the integration test contributes coverage data + shutil.copy(repo / ".coveragerc", extracted / ".coveragerc") + + assert get_version(extracted) == "1.2.3.post2" + + +@pytest.mark.important +def test_archival_end_to_end_dirty(repo: Path, tmp_path_factory: pytest.TempPathFactory, create_config) -> None: + """When the archival file's describe-name carries a `-dirty` suffix, + the dirty_template is used. + + Note: `git archive` itself cannot produce a `-dirty` describe-name. + It archives the committed tree (working-tree modifications are not + included), and the `%(describe:...)` placeholder is evaluated against + the archived commit, not the working tree - so dirtying the repo + before `git archive` has no effect on the substituted output. + The `-dirty` suffix only reaches the archival file if a user + generates it outside `git archive` (e.g., `git describe --dirty + > .git_archival.txt`) or hand-edits it. We simulate that here by + patching the extracted file. + """ + create_config(repo, {"dirty_template": "{tag}.post{ccount}+dirty"}) + create_tag(repo, "1.2.3") + create_file(repo) + _add_archival_template(repo) + + extracted = tmp_path_factory.mktemp("extracted") + _git_archive_extract(repo, extracted) + + archival_path = extracted / ARCHIVAL_FILENAME + lines = archival_path.read_text(encoding="utf-8").splitlines() + patched = [(line + "-dirty") if line.startswith("describe-name:") else line for line in lines] + archival_path.write_text("\n".join(patched) + "\n", encoding="utf-8") + + shutil.copy(repo / ".coveragerc", extracted / ".coveragerc") + + # ccount=2: one post-tag commit + the archival-template commit + assert get_version(extracted) == "1.2.3.post2+dirty" + + +@pytest.mark.important +def test_archival_end_to_end_bare_tag(repo: Path, tmp_path_factory: pytest.TempPathFactory, create_config) -> None: + create_config(repo, {"template": "{tag}"}) + _add_archival_template(repo) + create_tag(repo, "1.2.3") + + extracted = tmp_path_factory.mktemp("extracted") + _git_archive_extract(repo, extracted) + shutil.copy(repo / ".coveragerc", extracted / ".coveragerc") + + assert get_version(extracted) == "1.2.3" + + +def test_archival_unsubstituted_falls_through_to_live_git(repo: Path, caplog: pytest.LogCaptureFixture) -> None: + """When the .git_archival.txt file is read inside a working checkout + (placeholders not yet expanded), the archival path should warn and fall + through to the live-git flow. + """ + create_file(repo, ARCHIVAL_FILENAME, GIT_ARCHIVAL_STABLE) + create_tag(repo, "1.2.3") + + from setuptools_git_versioning.version import version_from_git + + with caplog.at_level("WARNING"): + version = version_from_git(root=repo) + assert version == Version("1.2.3") + assert "unprocessed" in caplog.text + + +def test_archival_priority_pkg_info_still_wins(tmp_path_factory: pytest.TempPathFactory) -> None: + """When PKG-INFO is present (sdist), it takes precedence over .git_archival.txt.""" + project = tmp_path_factory.mktemp("sdist") + (project / "PKG-INFO").write_text("Version: 9.9.9\n", encoding="utf-8") + (project / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\ndescribe-name: v1.2.3\n", + encoding="utf-8", + ) + from setuptools_git_versioning.version import version_from_git + + assert version_from_git(root=project) == Version("9.9.9") + + +def test_archival_priority_before_live_git(tmp_path_factory: pytest.TempPathFactory) -> None: + """When .git_archival.txt is present and there's no .git, the archival result is used.""" + project = tmp_path_factory.mktemp("archive_no_git") + (project / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3-5-g4060507deadbeef0123456789abcdef012345678\n", + encoding="utf-8", + ) + from setuptools_git_versioning.version import version_from_git + + assert version_from_git(root=project) == Version("1.2.3.post5+git.4060507d") From 556e7ee6a83fc47913b7032c09bc73f394248c32 Mon Sep 17 00:00:00 2001 From: Kaleb Barrett Date: Wed, 13 May 2026 17:46:57 -0600 Subject: [PATCH 2/3] Address comments --- docs/git_archive.rst | 10 +- setuptools_git_versioning/archival.py | 24 ++++- setuptools_git_versioning/version.py | 3 + tests/test_integration/test_archival.py | 116 ++++++++++++++++++++---- 4 files changed, 129 insertions(+), 24 deletions(-) diff --git a/docs/git_archive.rst b/docs/git_archive.rst index edd837f..2c9f54e 100644 --- a/docs/git_archive.rst +++ b/docs/git_archive.rst @@ -44,7 +44,9 @@ When ``git archive`` runs, the placeholders are expanded into the actual commit SHA and ``git describe`` output for the archived commit. When the package is later built from the extracted archive, ``setuptools-git-versioning`` reads the file and resolves the version using -the configured ``template`` / ``dev_template`` / ``dirty_template``. +the configured ``template`` / ``dev_template`` / ``dirty_template``. If +configured, ``tag_formatter`` and ``branch_formatter`` are applied to the +archived tag and branch names before template substitution. The same file format is used by ``setuptools-scm``, so a single ``.git_archival.txt`` works with both tools. @@ -93,9 +95,9 @@ ignored, falling through to the live ``git`` flow. Limitations ~~~~~~~~~~~ -- ``tag_filter``, ``tag_formatter``, and ``sort_by`` have no effect on - archive builds. The tag is whatever ``git describe`` chose at archive - time. +- ``tag_filter`` and ``sort_by`` have no effect on archive builds. The tag is + still whatever ``git describe`` chose at archive time, even if + ``tag_formatter`` rewrites it afterward. - ``count_commits_from_version_file`` and ``version_file`` are not consulted in the archive flow. - Older git versions (<2.32) do not understand the ``%(describe...)`` diff --git a/setuptools_git_versioning/archival.py b/setuptools_git_versioning/archival.py index 8447eff..98f7fca 100644 --- a/setuptools_git_versioning/archival.py +++ b/setuptools_git_versioning/archival.py @@ -13,10 +13,13 @@ DEFAULT_DIRTY_TEMPLATE, DEFAULT_TEMPLATE, ) +from setuptools_git_versioning.factories import create_branch_formatter, create_tag_formatter from setuptools_git_versioning.log import DEBUG, INFO from setuptools_git_versioning.subst import resolve_substitutions if TYPE_CHECKING: + from typing import Callable + from packaging.version import Version ARCHIVAL_FILENAME = ".git_archival.txt" @@ -92,7 +95,7 @@ def _branch_from_ref_names(ref_names: str) -> str | None: return None -def archival_to_version_data(data: dict[str, str]) -> ArchivalData | None: +def get_data_from_archival_file(data: dict[str, str]) -> ArchivalData | None: """Convert parsed archival data into structured version info, or None. Returns None when the file looks unsubstituted or otherwise unusable so @@ -156,12 +159,15 @@ def archival_to_version_data(data: dict[str, str]) -> ArchivalData | None: ) -def version_from_archival( +def version_from_archival( # noqa: PLR0913 project_root: str | os.PathLike, *, + package_name: str | None = None, template: str = DEFAULT_TEMPLATE, dev_template: str = DEFAULT_DEV_TEMPLATE, dirty_template: str = DEFAULT_DIRTY_TEMPLATE, + tag_formatter: Callable[[str], str] | str | None = None, + branch_formatter: Callable[[str], str] | str | None = None, ) -> Version | None: """Return a Version derived from .git_archival.txt, or None if unavailable.""" archival_path = Path(project_root).joinpath(ARCHIVAL_FILENAME) @@ -171,12 +177,18 @@ def version_from_archival( log.log(INFO, "File '%s' is found, reading its content", archival_path) data = parse_archival_file(archival_path) - info = archival_to_version_data(data) + info = get_data_from_archival_file(data) if info is None: return None log.log(DEBUG, "Parsed archival data: %r", info) + tag = info.tag + if tag_formatter is not None: + tag_format_callback = create_tag_formatter(tag_formatter, package_name=package_name, root=project_root) + tag = tag_format_callback(tag) + log.log(DEBUG, "Tag after formatting: %r", tag) + if info.dirty: log.log(INFO, "Using template from 'dirty_template' option") chosen = dirty_template @@ -191,11 +203,15 @@ def version_from_archival( # to the literal "HEAD" so `{branch}` substitution mirrors what # `git rev-parse --abbrev-ref HEAD` produces in detached-HEAD state. branch = info.branch if info.branch is not None else "HEAD" + if branch_formatter is not None: + branch_format_callback = create_branch_formatter(branch_formatter, package_name=package_name, root=project_root) + branch = branch_format_callback(branch) + log.log(INFO, "Branch after formatting: %r", branch) rendered = resolve_substitutions( chosen, sha=info.sha, - tag=info.tag, + tag=tag, ccount=info.ccount, branch=branch, full_sha=info.full_sha, diff --git a/setuptools_git_versioning/version.py b/setuptools_git_versioning/version.py index bc2fb98..01efa16 100644 --- a/setuptools_git_versioning/version.py +++ b/setuptools_git_versioning/version.py @@ -117,9 +117,12 @@ def version_from_git( # noqa: PLR0915, PLR0912, PLR0913, C901 archival_version = version_from_archival( project_root, + package_name=package_name, template=template, dev_template=dev_template, dirty_template=dirty_template, + tag_formatter=tag_formatter, + branch_formatter=branch_formatter, ) if archival_version is not None: log.log(INFO, "Resolved version from '.git_archival.txt': %s", archival_version) diff --git a/tests/test_integration/test_archival.py b/tests/test_integration/test_archival.py index a7c5140..4bbbda1 100644 --- a/tests/test_integration/test_archival.py +++ b/tests/test_integration/test_archival.py @@ -10,7 +10,7 @@ from setuptools_git_versioning.archival import ( ARCHIVAL_FILENAME, ArchivalData, - archival_to_version_data, + get_data_from_archival_file, parse_archival_file, version_from_archival, ) @@ -27,6 +27,9 @@ pytestmark = pytest.mark.all GIT_ARCHIVAL_STABLE = "node: $Format:%H$\ndescribe-name: $Format:%(describe:tags=true,match=*[0-9]*)$\n" +GIT_ARCHIVAL_WITH_BRANCH = ( + "node: $Format:%H$\ndescribe-name: $Format:%(describe:tags=true,match=*[0-9]*)$\nref-names: $Format:%D$\n" +) # --------------------------------------------------------------------------- @@ -89,7 +92,7 @@ def test_archival_to_version_data_post_tag() -> None: "node": "4060507deadbeef0123456789abcdef012345678", "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result == ArchivalData( tag="v1.2.3", ccount=5, @@ -105,7 +108,7 @@ def test_archival_to_version_data_bare_tag() -> None: "node": "4060507deadbeef0123456789abcdef012345678", "describe-name": "v1.2.3", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.tag == "v1.2.3" assert result.ccount == 0 @@ -119,7 +122,7 @@ def test_archival_to_version_data_dirty_suffix() -> None: "node": "4060507deadbeef0123456789abcdef012345678", "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678-dirty", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.dirty is True assert result.tag == "v1.2.3" @@ -129,7 +132,7 @@ def test_archival_to_version_data_dirty_suffix() -> None: def test_archival_to_version_data_unsubstituted_returns_none(caplog: pytest.LogCaptureFixture) -> None: data = {"node": "$Format:%H$", "describe-name": "$Format:%(describe)$"} with caplog.at_level("WARNING"): - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is None assert "unprocessed" in caplog.text @@ -143,7 +146,7 @@ def test_archival_to_version_data_old_git_falls_back_to_ref_names( "ref-names": "HEAD -> main, tag: v1.2.3", } with caplog.at_level("WARNING"): - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.tag == "v1.2.3" assert result.ccount == 0 @@ -163,7 +166,7 @@ def test_archival_to_version_data_old_git_no_tag_in_ref_names_returns_none( "ref-names": "HEAD -> main, origin/main", } with caplog.at_level("WARNING"): - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is None assert "git <2.32" in caplog.text @@ -177,7 +180,7 @@ def test_archival_to_version_data_accepts_sha256_node() -> None: "node": sha256_node, "describe-name": "v1.2.3", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.tag == "v1.2.3" assert result.full_sha == sha256_node @@ -194,7 +197,7 @@ def test_archival_to_version_data_prefers_full_sha_over_describe_short_sha() -> "node": "4060507deadbeef0123456789abcdef012345678", "describe-name": "v1.2.3-5-g4060507", # conventional 7-char short SHA } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.tag == "v1.2.3" assert result.ccount == 5 @@ -212,7 +215,7 @@ def test_archival_to_version_data_describe_with_non_numeric_middle_part() -> Non "node": "4060507deadbeef0123456789abcdef012345678", "describe-name": "foo-bar-baz", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.tag == "foo-bar-baz" assert result.ccount == 0 @@ -224,7 +227,7 @@ def test_archival_to_version_data_short_sha_fallback_when_node_missing() -> None data = { "describe-name": "v1.2.3-5-gabc1234", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.tag == "v1.2.3" assert result.ccount == 5 @@ -238,7 +241,7 @@ def test_archival_to_version_data_branch_from_ref_names() -> None: "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678", "ref-names": "HEAD -> feature/x, origin/main", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.branch == "feature/x" @@ -248,7 +251,7 @@ def test_archival_to_version_data_branch_absent() -> None: "node": "4060507deadbeef0123456789abcdef012345678", "describe-name": "v1.2.3-5-g4060507deadbeef0123456789abcdef012345678", } - result = archival_to_version_data(data) + result = get_data_from_archival_file(data) assert result is not None assert result.branch is None @@ -258,7 +261,7 @@ def test_archival_to_version_data_no_tag_anywhere_returns_none() -> None: "node": "4060507deadbeef0123456789abcdef012345678", "ref-names": "HEAD -> main", } - assert archival_to_version_data(data) is None + assert get_data_from_archival_file(data) is None def test_version_from_archival_missing_file_returns_none(tmp_path: Path) -> None: @@ -307,13 +310,41 @@ def test_version_from_archival_branch_defaults_to_head_when_missing(tmp_path: Pa assert version == Version("1.2.3.post5+git.4060507d.HEAD") +def test_version_from_archival_applies_tag_formatter(tmp_path: Path) -> None: + (tmp_path / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\ndescribe-name: release/1.2.3\n", + encoding="utf-8", + ) + version = version_from_archival( + tmp_path, + tag_formatter=lambda tag: tag.removeprefix("release/"), + ) + assert version == Version("1.2.3") + + +def test_version_from_archival_applies_branch_formatter(tmp_path: Path) -> None: + (tmp_path / ARCHIVAL_FILENAME).write_text( + "node: 4060507deadbeef0123456789abcdef012345678\n" + "describe-name: v1.2.3-5-g4060507deadbeef0123456789abcdef012345678\n" + "ref-names: HEAD -> feature/issue-1234-add-a-great-feature\n", + encoding="utf-8", + ) + version = version_from_archival( + tmp_path, + dev_template="{tag}.post{ccount}+{branch}", + branch_formatter=lambda branch: branch.split("/")[1].split("-")[1], + ) + assert version == Version("1.2.3.post5+1234") + + # --------------------------------------------------------------------------- # Integration: real `git archive` round-trip + build # --------------------------------------------------------------------------- -def _add_archival_template(repo: Path) -> None: - create_file(repo, ARCHIVAL_FILENAME, GIT_ARCHIVAL_STABLE, commit=False) +def _add_archival_template(repo: Path, *, include_ref_names: bool = False) -> None: + template = GIT_ARCHIVAL_WITH_BRANCH if include_ref_names else GIT_ARCHIVAL_STABLE + create_file(repo, ARCHIVAL_FILENAME, template, commit=False) create_file(repo, ".gitattributes", f"{ARCHIVAL_FILENAME} export-subst\n", commit=False) execute(repo, "git", "add", ARCHIVAL_FILENAME, ".gitattributes") execute(repo, "git", "commit", "-m", "add git archive support") @@ -394,6 +425,59 @@ def test_archival_end_to_end_bare_tag(repo: Path, tmp_path_factory: pytest.TempP assert get_version(extracted) == "1.2.3" +@pytest.mark.important +def test_archival_end_to_end_tag_formatter(repo: Path, tmp_path_factory: pytest.TempPathFactory, create_config) -> None: + create_file( + repo, + "util.py", + "def tag_formatter(tag):\n return tag.removeprefix('release/')\n", + ) + create_config( + repo, + { + "tag_formatter": "util:tag_formatter", + }, + ) + _add_archival_template(repo) + create_tag(repo, "release/1.2.3") + + extracted = tmp_path_factory.mktemp("extracted") + _git_archive_extract(repo, extracted) + shutil.copy(repo / ".coveragerc", extracted / ".coveragerc") + + assert get_version(extracted) == "1.2.3" + + +@pytest.mark.important +def test_archival_end_to_end_branch_formatter( + repo: Path, + tmp_path_factory: pytest.TempPathFactory, + create_config, +) -> None: + create_tag(repo, "1.2.3") + create_file(repo) # one commit after the tag + execute(repo, "git", "checkout", "-b", "feature/issue-1234-add-a-great-feature") + create_file( + repo, + "util.py", + "def branch_formatter(branch):\n return branch.split('/')[1].split('-')[1]\n", + ) + create_config( + repo, + { + "dev_template": "{tag}.post{ccount}+{branch}", + "branch_formatter": "util:branch_formatter", + }, + ) + _add_archival_template(repo, include_ref_names=True) + + extracted = tmp_path_factory.mktemp("extracted") + _git_archive_extract(repo, extracted) + shutil.copy(repo / ".coveragerc", extracted / ".coveragerc") + + assert get_version(extracted) == "1.2.3.post4+1234" + + def test_archival_unsubstituted_falls_through_to_live_git(repo: Path, caplog: pytest.LogCaptureFixture) -> None: """When the .git_archival.txt file is read inside a working checkout (placeholders not yet expanded), the archival path should warn and fall From 9ca7bd8c53e50d2f73fe9462c00e95b7a33aa761 Mon Sep 17 00:00:00 2001 From: Kaleb Barrett Date: Thu, 14 May 2026 08:50:02 -0600 Subject: [PATCH 3/3] Fix Python <3.9 --- tests/test_integration/test_archival.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_integration/test_archival.py b/tests/test_integration/test_archival.py index 4bbbda1..d97bf49 100644 --- a/tests/test_integration/test_archival.py +++ b/tests/test_integration/test_archival.py @@ -317,7 +317,7 @@ def test_version_from_archival_applies_tag_formatter(tmp_path: Path) -> None: ) version = version_from_archival( tmp_path, - tag_formatter=lambda tag: tag.removeprefix("release/"), + tag_formatter=lambda tag: tag[len("release/") :] if tag.startswith("release/") else tag, ) assert version == Version("1.2.3") @@ -430,7 +430,7 @@ def test_archival_end_to_end_tag_formatter(repo: Path, tmp_path_factory: pytest. create_file( repo, "util.py", - "def tag_formatter(tag):\n return tag.removeprefix('release/')\n", + "def tag_formatter(tag):\n return tag[len('release/'):] if tag.startswith('release/') else tag\n", ) create_config( repo,