From 1e7e8a2e1eda75e5b80243010411cdac2ec565a1 Mon Sep 17 00:00:00 2001
From: PavelMakarchuk <pavel.ma99@gmail.com>
Date: Mon, 23 Feb 2026 19:46:03 -0500
Subject: [PATCH 1/8] Add results.json schema validation and source tracking

New `policyengine.results` module with two pieces:

- `schema.py`: Pydantic models (ResultsJson, ValueEntry, TableEntry,
  ChartEntry) that validate results.json at generation time. Catches
  missing source_line/source_url, row/column mismatches in tables, and
  vague alt text on charts before they reach the blog build step.

- `tracking.py`: `tracked_value()` helper that captures the caller's
  line number via `inspect` and builds the source_url automatically.
  Eliminates repetitive inspect.currentframe() boilerplate in analysis
  scripts.

These support the blog post content pipeline where every number in a
published post links back to the exact line of analysis code that
produced it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/policyengine/results/__init__.py |  17 +++
 src/policyengine/results/schema.py   | 114 +++++++++++++++
 src/policyengine/results/tracking.py |  68 +++++++++
 tests/test_results.py                | 199 +++++++++++++++++++++++++++
 4 files changed, 398 insertions(+)
 create mode 100644 src/policyengine/results/__init__.py
 create mode 100644 src/policyengine/results/schema.py
 create mode 100644 src/policyengine/results/tracking.py
 create mode 100644 tests/test_results.py

diff --git a/src/policyengine/results/__init__.py b/src/policyengine/results/__init__.py
new file mode 100644
index 00000000..812f5027
--- /dev/null
+++ b/src/policyengine/results/__init__.py
@@ -0,0 +1,17 @@
+from policyengine.results.schema import (
+    ChartEntry,
+    ResultsJson,
+    ResultsMetadata,
+    TableEntry,
+    ValueEntry,
+)
+from policyengine.results.tracking import tracked_value
+
+__all__ = [
+    "ChartEntry",
+    "ResultsJson",
+    "ResultsMetadata",
+    "TableEntry",
+    "ValueEntry",
+    "tracked_value",
+]
diff --git a/src/policyengine/results/schema.py b/src/policyengine/results/schema.py
new file mode 100644
index 00000000..1562883a
--- /dev/null
+++ b/src/policyengine/results/schema.py
@@ -0,0 +1,114 @@
+"""Pydantic schema for results.json — the contract between analysis
+repos and blog posts.
+
+Every PolicyEngine blog post references a results.json file produced
+by an analysis script.  This module validates that the file conforms
+to the expected schema so errors are caught at generation time rather
+than at build time when resolve-posts tries to render templates.
+"""
+
+import json
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, model_validator
+
+
+class ResultsMetadata(BaseModel):
+    """Top-level metadata about the analysis."""
+
+    title: str
+    repo: str
+    slug: str | None = None
+    commit: str | None = None
+    generated_at: str | None = None
+    policyengine_version: str | None = None
+    dataset: str | None = None
+    country_id: str | None = None
+    year: int | None = None
+
+
+class ValueEntry(BaseModel):
+    """A single traceable value in results.json."""
+
+    value: Any
+    display: str
+    source_line: int
+    source_url: str
+
+
+class TableEntry(BaseModel):
+    """A table in results.json."""
+
+    title: str
+    headers: list[str]
+    rows: list[list[str]]
+    source_line: int
+    source_url: str
+
+    @model_validator(mode="after")
+    def check_row_widths(self) -> "TableEntry":
+        n_cols = len(self.headers)
+        for i, row in enumerate(self.rows):
+            if len(row) != n_cols:
+                raise ValueError(
+                    f"Row {i} has {len(row)} columns but headers "
+                    f"has {n_cols}"
+                )
+        return self
+
+
+class ChartEntry(BaseModel):
+    """A chart reference in results.json."""
+
+    url: str
+    alt: str
+    width: int = 1200
+    height: int = 600
+    source_line: int
+    source_url: str
+
+    @model_validator(mode="after")
+    def check_alt_text(self) -> "ChartEntry":
+        if len(self.alt) < 20:
+            raise ValueError(
+                f"Alt text is too short ({len(self.alt)} chars). "
+                "Include chart type and 2-3 key data points."
+            )
+        return self
+
+
+class ResultsJson(BaseModel):
+    """Full results.json schema.
+
+    Usage::
+
+        from policyengine.results import ResultsJson
+
+        results = ResultsJson(
+            metadata=ResultsMetadata(
+                title="SALT Cap Repeal",
+                repo="PolicyEngine/analyses",
+            ),
+            values={
+                "budget_impact": ValueEntry(
+                    value=-15.2e9,
+                    display="$15.2 billion",
+                    source_line=47,
+                    source_url="https://github.com/.../analysis.py#L47",
+                ),
+            },
+        )
+        results.write("results.json")
+    """
+
+    metadata: ResultsMetadata
+    values: dict[str, ValueEntry] = {}
+    tables: dict[str, TableEntry] = {}
+    charts: dict[str, ChartEntry] = {}
+
+    def write(self, path: str | Path) -> None:
+        """Write validated results.json to disk."""
+        path = Path(path)
+        data = json.loads(self.model_dump_json())
+        path.write_text(json.dumps(data, indent=2))
diff --git a/src/policyengine/results/tracking.py b/src/policyengine/results/tracking.py
new file mode 100644
index 00000000..5650b820
--- /dev/null
+++ b/src/policyengine/results/tracking.py
@@ -0,0 +1,68 @@
+"""Source-line tracking helper for results.json values.
+
+The key traceability guarantee of the blog pipeline is that every
+number in a blog post links back to the exact line of code that
+produced it.  This module provides a helper that captures the
+caller's line number automatically via ``inspect``.
+"""
+
+import inspect
+from typing import Any
+
+
+def tracked_value(
+    value: Any,
+    display: str,
+    repo: str,
+    filename: str = "analysis.py",
+    branch: str = "main",
+    *,
+    _stack_offset: int = 1,
+) -> dict:
+    """Build a results.json value entry with automatic source tracking.
+
+    Captures the caller's file and line number so every value in
+    results.json points to the code that produced it.
+
+    Args:
+        value: The raw numeric value.
+        display: Human-readable formatted string (e.g. "$15.2 billion").
+        repo: GitHub org/repo (e.g. "PolicyEngine/analyses").
+        filename: Script filename within the repo.
+        branch: Git branch for the source URL.
+        _stack_offset: How many frames to skip (default 1 = caller).
+
+    Returns:
+        Dict matching the ValueEntry schema::
+
+            {
+                "value": -15200000000,
+                "display": "$15.2 billion",
+                "source_line": 47,
+                "source_url": "https://github.com/.../analysis.py#L47",
+            }
+
+    Example::
+
+        from policyengine.results import tracked_value
+
+        budget = reform_revenue - baseline_revenue
+        results["values"]["budget_impact"] = tracked_value(
+            value=budget,
+            display=f"${abs(budget)/1e9:.1f} billion",
+            repo="PolicyEngine/analyses",
+        )
+    """
+    frame = inspect.stack()[_stack_offset]
+    line = frame.lineno
+
+    source_url = (
+        f"https://github.com/{repo}/blob/{branch}/{filename}#L{line}"
+    )
+
+    return {
+        "value": value,
+        "display": display,
+        "source_line": line,
+        "source_url": source_url,
+    }
diff --git a/tests/test_results.py b/tests/test_results.py
new file mode 100644
index 00000000..91ace925
--- /dev/null
+++ b/tests/test_results.py
@@ -0,0 +1,199 @@
+"""Tests for the results.json schema validation and source tracking."""
+
+import json
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from policyengine.results import (
+    ChartEntry,
+    ResultsJson,
+    ResultsMetadata,
+    TableEntry,
+    ValueEntry,
+    tracked_value,
+)
+
+
+def test_valid_results_json():
+    """A fully valid results.json passes validation."""
+    results = ResultsJson(
+        metadata=ResultsMetadata(
+            title="Test Analysis",
+            repo="PolicyEngine/test",
+            year=2026,
+            country_id="us",
+        ),
+        values={
+            "budget_impact": ValueEntry(
+                value=-15200000000,
+                display="$15.2 billion",
+                source_line=47,
+                source_url="https://github.com/PolicyEngine/test/blob/main/analysis.py#L47",
+            ),
+        },
+        tables={
+            "household": TableEntry(
+                title="Household impacts",
+                headers=["Household", "Income", "Change"],
+                rows=[
+                    ["Single", "$50,000", "+$1,200"],
+                    ["Married", "$100,000", "+$2,400"],
+                ],
+                source_line=80,
+                source_url="https://github.com/PolicyEngine/test/blob/main/analysis.py#L80",
+            ),
+        },
+        charts={
+            "decile": ChartEntry(
+                url="https://PolicyEngine.github.io/test/charts/decile.png",
+                alt="Bar chart showing impact by decile. Top decile gains $8,200.",
+                source_line=105,
+                source_url="https://github.com/PolicyEngine/test/blob/main/analysis.py#L105",
+            ),
+        },
+    )
+
+    assert results.metadata.title == "Test Analysis"
+    assert results.values["budget_impact"].value == -15200000000
+    assert len(results.tables["household"].rows) == 2
+    assert results.charts["decile"].width == 1200
+
+
+def test_value_entry_requires_source_line():
+    """ValueEntry without source_line raises ValidationError."""
+    with pytest.raises(Exception):
+        ValueEntry(
+            value=100,
+            display="$100",
+            source_url="https://github.com/x/y#L1",
+        )
+
+
+def test_value_entry_requires_source_url():
+    """ValueEntry without source_url raises ValidationError."""
+    with pytest.raises(Exception):
+        ValueEntry(
+            value=100,
+            display="$100",
+            source_line=10,
+        )
+
+
+def test_table_row_width_mismatch():
+    """Table with wrong number of columns per row raises error."""
+    with pytest.raises(Exception):
+        TableEntry(
+            title="Bad table",
+            headers=["A", "B", "C"],
+            rows=[["x", "y"]],  # 2 cols, need 3
+            source_line=1,
+            source_url="https://github.com/x/y#L1",
+        )
+
+
+def test_chart_alt_text_too_short():
+    """Chart with vague alt text raises error."""
+    with pytest.raises(Exception):
+        ChartEntry(
+            url="https://example.com/chart.png",
+            alt="A chart.",  # Too short
+            source_line=1,
+            source_url="https://github.com/x/y#L1",
+        )
+
+
+def test_chart_alt_text_descriptive():
+    """Chart with descriptive alt text passes."""
+    chart = ChartEntry(
+        url="https://example.com/chart.png",
+        alt="Bar chart showing reform impact by income decile. Top decile gains $8,200 average.",
+        source_line=1,
+        source_url="https://github.com/x/y#L1",
+    )
+    assert chart.width == 1200
+    assert chart.height == 600
+
+
+def test_write_results_json():
+    """ResultsJson.write() produces valid JSON file."""
+    results = ResultsJson(
+        metadata=ResultsMetadata(
+            title="Write Test",
+            repo="PolicyEngine/test",
+        ),
+        values={
+            "x": ValueEntry(
+                value=42,
+                display="42",
+                source_line=1,
+                source_url="https://github.com/x/y#L1",
+            ),
+        },
+    )
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "results.json"
+        results.write(path)
+
+        data = json.loads(path.read_text())
+        assert data["metadata"]["title"] == "Write Test"
+        assert data["values"]["x"]["value"] == 42
+        assert data["values"]["x"]["source_line"] == 1
+
+
+def test_empty_results_json():
+    """ResultsJson with only metadata is valid."""
+    results = ResultsJson(
+        metadata=ResultsMetadata(
+            title="Empty",
+            repo="PolicyEngine/test",
+        ),
+    )
+    assert results.values == {}
+    assert results.tables == {}
+    assert results.charts == {}
+
+
+def test_tracked_value():
+    """tracked_value() captures line number and builds source URL."""
+    result = tracked_value(
+        value=-15200000000,
+        display="$15.2 billion",
+        repo="PolicyEngine/analyses",
+        filename="analysis.py",
+    )
+
+    assert result["value"] == -15200000000
+    assert result["display"] == "$15.2 billion"
+    assert isinstance(result["source_line"], int)
+    assert result["source_line"] > 0
+    assert "PolicyEngine/analyses" in result["source_url"]
+    assert "analysis.py#L" in result["source_url"]
+
+
+def test_tracked_value_custom_filename():
+    """tracked_value() respects custom filename and branch."""
+    result = tracked_value(
+        value=100,
+        display="$100",
+        repo="PolicyEngine/analyses",
+        filename="salt-cap/analysis.py",
+        branch="dev",
+    )
+
+    assert "salt-cap/analysis.py" in result["source_url"]
+    assert "/blob/dev/" in result["source_url"]
+
+
+def test_tracked_value_validates_as_value_entry():
+    """tracked_value() output can be used to construct a ValueEntry."""
+    result = tracked_value(
+        value=42,
+        display="42",
+        repo="PolicyEngine/test",
+    )
+    entry = ValueEntry(**result)
+    assert entry.value == 42
+    assert entry.source_line > 0

From 94c396008fde5a9bc8ac9212ca9a2d5b9887230e Mon Sep 17 00:00:00 2001
From: PavelMakarchuk <pavel.ma99@gmail.com>
Date: Mon, 23 Feb 2026 19:57:47 -0500
Subject: [PATCH 2/8] Fix write() double-serialization and add parent dir
 creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Use model_dump(mode="json") instead of json.loads(model_dump_json())
  to avoid unnecessary serialize→parse→serialize round-trip
- Create parent directories automatically so callers don't need
  to mkdir first
- Add trailing newline to output file
- Add test for nested directory creation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/policyengine/results/schema.py |  5 +++--
 tests/test_results.py              | 22 +++++++++++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/policyengine/results/schema.py b/src/policyengine/results/schema.py
index 1562883a..8ca94fb8 100644
--- a/src/policyengine/results/schema.py
+++ b/src/policyengine/results/schema.py
@@ -110,5 +110,6 @@ class ResultsJson(BaseModel):
     def write(self, path: str | Path) -> None:
         """Write validated results.json to disk."""
         path = Path(path)
-        data = json.loads(self.model_dump_json())
-        path.write_text(json.dumps(data, indent=2))
+        path.parent.mkdir(parents=True, exist_ok=True)
+        data = self.model_dump(mode="json")
+        path.write_text(json.dumps(data, indent=2) + "\n")
diff --git a/tests/test_results.py b/tests/test_results.py
index 91ace925..43be3439 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -137,12 +137,32 @@ def test_write_results_json():
         path = Path(tmpdir) / "results.json"
         results.write(path)
 
-        data = json.loads(path.read_text())
+        raw = path.read_text()
+        assert raw.endswith("\n"), "File should end with a newline"
+        data = json.loads(raw)
         assert data["metadata"]["title"] == "Write Test"
         assert data["values"]["x"]["value"] == 42
         assert data["values"]["x"]["source_line"] == 1
 
 
+def test_write_creates_parent_directories():
+    """ResultsJson.write() creates parent directories if needed."""
+    results = ResultsJson(
+        metadata=ResultsMetadata(
+            title="Nested",
+            repo="PolicyEngine/test",
+        ),
+    )
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = Path(tmpdir) / "sub" / "dir" / "results.json"
+        results.write(path)
+
+        assert path.exists()
+        data = json.loads(path.read_text())
+        assert data["metadata"]["title"] == "Nested"
+
+
 def test_empty_results_json():
     """ResultsJson with only metadata is valid."""
     results = ResultsJson(

From 7d59241fce93403b77a0ac666bd91f41c4849800 Mon Sep 17 00:00:00 2001
From: Max Ghenis <max@policyengine.org>
Date: Sat, 18 Apr 2026 07:23:26 -0400
Subject: [PATCH 3/8] Harden TRACE TRO export and add per-simulation TROs

Builds on #274's bundle-level TRO and closes the gaps that would surface
at an AEA replication review:

- schema:creator is now a schema.org Organization, not a version string
- model wheel is hashed as a fourth composition artifact (read from the
  manifest when present, fetched from the PyPI JSON API otherwise and
  degrades silently when unreachable)
- every trov:path resolves over HTTPS (Hugging Face resolve URLs, PyPI
  download URL) so a reviewer can dereference the TRO without custom
  clients
- certification metadata moves from prose in schema:description to
  structured pe:* fields on TrustedResearchPerformance
  (pe:certifiedForModelVersion, pe:compatibilityBasis,
  pe:builtWithModelVersion, pe:dataBuildFingerprint, pe:dataBuildId)
- GitHub Actions runs add pe:ciRunUrl / pe:ciGitSha attestation
- JSON Schema ships at data/schemas/trace_tro.schema.json and every
  generated TRO is validated against it in tests

Adds the per-simulation layer that the bundle-level TRO doesn't cover:

- build_simulation_trace_tro chains a bundle TRO to a reform + results
- policyengine.results.build_results_trace_tro /
  write_results_with_trace_tro emit a TRO alongside a ResultsJson
  payload

Wiring:

- policyengine trace-tro CLI (plus release-manifest subcommand)
- TaxBenefitModelVersion.trace_tro property and the
  build_trace_tro_from_release_bundle / compute_trace_composition_fingerprint /
  serialize_trace_tro / extract_bundle_tro_reference /
  build_simulation_trace_tro re-exports from policyengine.core that
  were dropped when #276 merged
- scripts/generate_trace_tros.py regenerates bundled TROs before a
  policyengine.py release
- jsonschema added to dev dependencies

Restores the TRACE TRO tests that #276 removed as part of the
test_release_manifests.py rewrite, now isolated in tests/test_trace_tro.py
with coverage for determinism, schema conformance, CI attestation, and
per-simulation chaining.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 changelog.d/trace-tro-hardening.added.md      |   8 +
 docs/release-bundles.md                       |  68 ++-
 pyproject.toml                                |   4 +
 scripts/generate_trace_tros.py                |  57 ++
 src/policyengine/cli.py                       | 109 ++++
 src/policyengine/core/__init__.py             |  13 +
 src/policyengine/core/release_manifest.py     |  56 ++
 .../core/tax_benefit_model_version.py         |  28 +-
 src/policyengine/core/trace_tro.py            | 568 ++++++++++++++----
 .../data/schemas/trace_tro.schema.json        | 210 +++++++
 src/policyengine/results/__init__.py          |   6 +
 src/policyengine/results/trace_tro.py         | 113 ++++
 tests/test_trace_tro.py                       | 482 +++++++++++++++
 13 files changed, 1601 insertions(+), 121 deletions(-)
 create mode 100644 changelog.d/trace-tro-hardening.added.md
 create mode 100644 scripts/generate_trace_tros.py
 create mode 100644 src/policyengine/cli.py
 create mode 100644 src/policyengine/data/schemas/trace_tro.schema.json
 create mode 100644 src/policyengine/results/trace_tro.py
 create mode 100644 tests/test_trace_tro.py

diff --git a/changelog.d/trace-tro-hardening.added.md b/changelog.d/trace-tro-hardening.added.md
new file mode 100644
index 00000000..a758664d
--- /dev/null
+++ b/changelog.d/trace-tro-hardening.added.md
@@ -0,0 +1,8 @@
+TRACE TRO hardening: bundle TROs now hash the country model wheel (read from
+`PackageVersion.sha256` when present, otherwise fetched from PyPI), use HTTPS
+artifact locations, carry structured `pe:*` certification fields and GitHub
+Actions attestation metadata, and are validated in CI against a shipped JSON
+Schema. Adds a `policyengine trace-tro` CLI, per-simulation TROs through
+`policyengine.results.build_results_trace_tro` / `write_results_with_trace_tro`,
+and restores the `TaxBenefitModelVersion.trace_tro` property and
+`policyengine.core` re-exports that were dropped in #276.
diff --git a/docs/release-bundles.md b/docs/release-bundles.md
index 56fb8075..8a9e24a6 100644
--- a/docs/release-bundles.md
+++ b/docs/release-bundles.md
@@ -195,19 +195,67 @@ TRACE sits on top of those manifests as a standards-based export layer.
 
 ### What gets exported
 
-Country `*-data` repos should emit a `trace.tro.jsonld` file for each published data
-release. That TRO should cover:
-
-- the release manifest itself
-- each published artifact hash listed in the release manifest
-- the build-time model provenance recorded in the release manifest
-
-`policyengine.py` should emit a separate certified-bundle TRO. That TRO should cover:
+`policyengine.py` emits a certified-bundle TRO for each supported country. The
+composition pins four artifacts by sha256:
 
 - the bundled country release manifest shipped in `policyengine.py`
 - the country data release manifest resolved for the certified data package version
-- the certified dataset artifact hash
-- the certification basis used to allow runtime reuse
+- the certified dataset artifact
+- the country model wheel published to PyPI (hash read from the bundled manifest
+  when present, otherwise fetched from the PyPI JSON API at emit time)
+
+Every artifact location in the TRO is a dereferenceable HTTPS URI or a path
+relative to the shipped wheel. Certification metadata is carried as structured
+`pe:*` fields on the `trov:TrustedResearchPerformance` node so downstream
+tooling can read `pe:certifiedForModelVersion`, `pe:compatibilityBasis`,
+`pe:builtWithModelVersion`, `pe:dataBuildFingerprint`, and `pe:dataBuildId`
+without parsing prose. When emitted under GitHub Actions, the TRO also carries
+`pe:ciRunUrl` and `pe:ciGitSha` attestation.
+
+Country `*-data` repos should also emit a matching `trace.tro.jsonld` per
+data release covering the release manifest and every staged artifact hash.
+That is a country-data concern and lives in those repos.
+
+#### Emitting a TRO
+
+From Python:
+
+```python
+from policyengine.core.release_manifest import get_data_release_manifest, get_release_manifest
+from policyengine.core.trace_tro import build_trace_tro_from_release_bundle, serialize_trace_tro
+
+country = get_release_manifest("us")
+tro = build_trace_tro_from_release_bundle(country, get_data_release_manifest("us"))
+Path("us.trace.tro.jsonld").write_bytes(serialize_trace_tro(tro))
+```
+
+From the CLI:
+
+```
+policyengine trace-tro us --out us.trace.tro.jsonld
+```
+
+Per-simulation TROs chain a bundle TRO to a reform plus a `results.json`
+payload. Use `policyengine.results.write_results_with_trace_tro` to emit the
+pair alongside each published result.
+
+#### Schema validation
+
+Generated TROs are validated against
+`policyengine/data/schemas/trace_tro.schema.json` in CI. Regressions to the
+shape — including mis-typed `schema:creator`, missing composition fingerprints,
+or non-HTTPS artifact locations — fail the test suite before reaching a
+release.
+
+#### Known limitations
+
+- `schema:creator` and all `schema:*` references use schema.org vocabulary;
+  we do not (yet) validate against schema.org's own SHACL shapes.
+- TROs are emitted unsigned. A signed attestation (sigstore or in-toto)
+  is a future addition that will bind TROs to a trusted-system key.
+- The model wheel is hashed by PyPI's published sha256. If a wheel is
+  yanked and re-uploaded under the same version, the hash will change and
+  the TRO becomes invalid — which is the correct behaviour.
 
 ### What TRACE does not replace
 
diff --git a/pyproject.toml b/pyproject.toml
index 09206bdd..34d33130 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,9 @@ dependencies = [
     "psutil>=5.9.0",
 ]
 
+[project.scripts]
+policyengine = "policyengine.cli:main"
+
 [project.optional-dependencies]
 uk = [
     "policyengine_core>=3.25.0",
@@ -46,6 +49,7 @@ dev = [
     "yaml-changelog>=0.1.7",
     "itables",
     "build",
+    "jsonschema>=4.0.0",
     "pytest-asyncio>=0.26.0",
     "ruff>=0.9.0",
     "policyengine_core>=3.25.0",
diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py
new file mode 100644
index 00000000..7df3dfff
--- /dev/null
+++ b/scripts/generate_trace_tros.py
@@ -0,0 +1,57 @@
+"""Regenerate bundled TRACE TRO artifacts for every country release manifest.
+
+Writes ``data/release_manifests/{country}.trace.tro.jsonld`` for each
+country whose bundled manifest ships in the wheel. Run this before
+releasing a new ``policyengine.py`` version so the packaged TRO
+matches the pinned bundle. Network access is required to fetch the
+data release manifest and model wheel hash.
+"""
+
+from __future__ import annotations
+
+import sys
+from importlib.resources import files
+from pathlib import Path
+
+from policyengine.core.release_manifest import (
+    get_data_release_manifest,
+    get_release_manifest,
+)
+from policyengine.core.trace_tro import (
+    build_trace_tro_from_release_bundle,
+    serialize_trace_tro,
+)
+
+
+def regenerate_all() -> list[Path]:
+    manifest_root = Path(
+        str(files("policyengine").joinpath("data", "release_manifests"))
+    )
+    written: list[Path] = []
+    for manifest_path in sorted(manifest_root.glob("*.json")):
+        country_id = manifest_path.stem
+        country_manifest = get_release_manifest(country_id)
+        data_release_manifest = get_data_release_manifest(country_id)
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            data_release_manifest,
+            certification=country_manifest.certification,
+        )
+        out_path = manifest_path.with_suffix(".trace.tro.jsonld")
+        out_path.write_bytes(serialize_trace_tro(tro))
+        written.append(out_path)
+    return written
+
+
+def main() -> int:
+    paths = regenerate_all()
+    for path in paths:
+        print(f"wrote {path}")
+    if not paths:
+        print("no release manifests found", file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/policyengine/cli.py b/src/policyengine/cli.py
new file mode 100644
index 00000000..e21b3ed4
--- /dev/null
+++ b/src/policyengine/cli.py
@@ -0,0 +1,109 @@
+"""Command-line entry point for policyengine.
+
+Exposes a ``trace-tro`` subcommand that emits a TRACE TRO for a
+certified country bundle. The TRO is the standards-based provenance
+surface on top of the release manifests: see
+:mod:`policyengine.core.trace_tro` and ``docs/release-bundles.md``.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Optional, Sequence
+
+from policyengine.core.release_manifest import (
+    DataReleaseManifestUnavailableError,
+    get_data_release_manifest,
+    get_release_manifest,
+)
+from policyengine.core.trace_tro import (
+    build_trace_tro_from_release_bundle,
+    serialize_trace_tro,
+)
+
+
+def _parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="policyengine",
+        description="PolicyEngine reproducibility and release tooling.",
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    tro = subparsers.add_parser(
+        "trace-tro",
+        help="Emit a TRACE Transparent Research Object for a certified bundle.",
+    )
+    tro.add_argument("country", help="Country id (e.g. us, uk).")
+    tro.add_argument(
+        "--out",
+        "-o",
+        type=Path,
+        default=None,
+        help="Write the TRO to this path. Defaults to stdout.",
+    )
+    tro.add_argument(
+        "--offline",
+        action="store_true",
+        help=(
+            "Skip fetching the data release manifest over HTTPS. Requires "
+            "the bundled manifest to include a data release manifest for "
+            "the pinned data package version."
+        ),
+    )
+
+    bundle = subparsers.add_parser(
+        "release-manifest",
+        help="Print the bundled country release manifest as JSON.",
+    )
+    bundle.add_argument("country", help="Country id (e.g. us, uk).")
+
+    return parser
+
+
+def _emit_bundle_tro(country_id: str, out: Optional[Path], *, offline: bool) -> int:
+    country_manifest = get_release_manifest(country_id)
+    try:
+        data_release_manifest = get_data_release_manifest(country_id)
+    except DataReleaseManifestUnavailableError as exc:
+        if offline:
+            print(
+                f"error: data release manifest for '{country_id}' is not "
+                "available in offline mode.",
+                file=sys.stderr,
+            )
+            return 2
+        raise exc
+    tro = build_trace_tro_from_release_bundle(
+        country_manifest,
+        data_release_manifest,
+        certification=country_manifest.certification,
+    )
+    payload = serialize_trace_tro(tro)
+    if out is None:
+        sys.stdout.buffer.write(payload)
+    else:
+        out.parent.mkdir(parents=True, exist_ok=True)
+        out.write_bytes(payload)
+    return 0
+
+
+def _emit_release_manifest(country_id: str) -> int:
+    manifest = get_release_manifest(country_id)
+    print(json.dumps(manifest.model_dump(mode="json"), indent=2, sort_keys=True))
+    return 0
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    args = _parser().parse_args(argv)
+    if args.command == "trace-tro":
+        return _emit_bundle_tro(args.country, args.out, offline=args.offline)
+    if args.command == "release-manifest":
+        return _emit_release_manifest(args.country)
+    return 1
+
+
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(main())
diff --git a/src/policyengine/core/__init__.py b/src/policyengine/core/__init__.py
index bb0e80d5..8ff37aed 100644
--- a/src/policyengine/core/__init__.py
+++ b/src/policyengine/core/__init__.py
@@ -39,6 +39,19 @@
 from .tax_benefit_model_version import (
     TaxBenefitModelVersion as TaxBenefitModelVersion,
 )
+from .trace_tro import (
+    build_simulation_trace_tro as build_simulation_trace_tro,
+)
+from .trace_tro import (
+    build_trace_tro_from_release_bundle as build_trace_tro_from_release_bundle,
+)
+from .trace_tro import (
+    compute_trace_composition_fingerprint as compute_trace_composition_fingerprint,
+)
+from .trace_tro import (
+    extract_bundle_tro_reference as extract_bundle_tro_reference,
+)
+from .trace_tro import serialize_trace_tro as serialize_trace_tro
 from .variable import Variable as Variable
 
 # Rebuild models to resolve forward references
diff --git a/src/policyengine/core/release_manifest.py b/src/policyengine/core/release_manifest.py
index 90a09f32..881597f5 100644
--- a/src/policyengine/core/release_manifest.py
+++ b/src/policyengine/core/release_manifest.py
@@ -9,6 +9,7 @@
 from pydantic import BaseModel, Field
 
 HF_REQUEST_TIMEOUT_SECONDS = 30
+PYPI_REQUEST_TIMEOUT_SECONDS = 30
 LOCAL_DATA_REPO_HINTS = {
     "us": ("policyengine_us", "policyengine-us-data", "policyengine_us_data"),
     "uk": ("policyengine_uk", "policyengine-uk-data", "policyengine_uk_data"),
@@ -22,6 +23,8 @@ class DataReleaseManifestUnavailableError(ValueError):
 class PackageVersion(BaseModel):
     name: str
     version: str
+    sha256: Optional[str] = None
+    wheel_url: Optional[str] = None
 
 
 class DataPackageVersion(PackageVersion):
@@ -73,6 +76,14 @@ def uri(self) -> str:
             revision=self.revision,
         )
 
+    @property
+    def https_uri(self) -> str:
+        return https_dataset_uri(
+            repo_id=self.repo_id,
+            path_in_repo=self.path,
+            revision=self.revision,
+        )
+
 
 class DataReleaseManifest(BaseModel):
     schema_version: int
@@ -131,6 +142,51 @@ def build_hf_uri(repo_id: str, path_in_repo: str, revision: str) -> str:
     return f"hf://{repo_id}/{path_in_repo}@{revision}"
 
 
+def https_dataset_uri(repo_id: str, path_in_repo: str, revision: str) -> str:
+    """Return a dereferenceable HTTPS URI for a Hugging Face dataset artifact."""
+    return f"https://huggingface.co/{repo_id}/resolve/{revision}/{path_in_repo}"
+
+
+def https_release_manifest_uri(data_package: "DataPackageVersion") -> str:
+    """Return a dereferenceable HTTPS URI for a data release manifest."""
+    return (
+        f"https://huggingface.co/{data_package.repo_id}/resolve/"
+        f"{data_package.version}/{data_package.release_manifest_path}"
+    )
+
+
+@lru_cache
+def fetch_pypi_wheel_metadata(name: str, version: str) -> dict[str, Optional[str]]:
+    """Fetch wheel sha256 and URL from PyPI for a package version.
+
+    Returns a dict with ``sha256`` and ``url`` keys. Missing keys are
+    returned as ``None`` rather than raising, so TRO construction can
+    degrade gracefully when PyPI is unreachable or the package lacks
+    a wheel distribution.
+    """
+    response = requests.get(
+        f"https://pypi.org/pypi/{name}/{version}/json",
+        timeout=PYPI_REQUEST_TIMEOUT_SECONDS,
+    )
+    if response.status_code != 200:
+        return {"sha256": None, "url": None}
+    payload = response.json()
+    urls = payload.get("urls") or []
+    for entry in urls:
+        if entry.get("packagetype") == "bdist_wheel":
+            return {
+                "sha256": entry.get("digests", {}).get("sha256"),
+                "url": entry.get("url"),
+            }
+    if urls:
+        entry = urls[0]
+        return {
+            "sha256": entry.get("digests", {}).get("sha256"),
+            "url": entry.get("url"),
+        }
+    return {"sha256": None, "url": None}
+
+
 @lru_cache
 def get_release_manifest(country_id: str) -> CountryReleaseManifest:
     manifest_path = files("policyengine").joinpath(
diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py
index 7fb03334..eeddef85 100644
--- a/src/policyengine/core/tax_benefit_model_version.py
+++ b/src/policyengine/core/tax_benefit_model_version.py
@@ -4,8 +4,14 @@
 
 from pydantic import BaseModel, Field
 
-from .release_manifest import CountryReleaseManifest, DataCertification, PackageVersion
+from .release_manifest import (
+    CountryReleaseManifest,
+    DataCertification,
+    PackageVersion,
+    get_data_release_manifest,
+)
 from .tax_benefit_model import TaxBenefitModel
+from .trace_tro import build_trace_tro_from_release_bundle
 
 if TYPE_CHECKING:
     from .parameter import Parameter
@@ -203,6 +209,26 @@ def release_bundle(self) -> dict[str, Optional[str]]:
             ),
         }
 
+    @property
+    def trace_tro(self) -> dict:
+        """Build a TRACE TRO for this certified bundle.
+
+        Fetches the published data release manifest so the TRO can pin
+        the exact dataset sha256. Requires a bundled release manifest.
+        """
+        if self.release_manifest is None:
+            raise ValueError(
+                "TRACE TRO export requires a bundled country release manifest."
+            )
+        data_release_manifest = get_data_release_manifest(
+            self.release_manifest.country_id
+        )
+        return build_trace_tro_from_release_bundle(
+            self.release_manifest,
+            data_release_manifest,
+            certification=self.data_certification,
+        )
+
     def __repr__(self) -> str:
         # Give the id and version, and the number of variables, parameters, parameter nodes, parameter values
         return f"<TaxBenefitModelVersion id={self.id} variables={len(self.variables)} parameters={len(self.parameters)} parameter_nodes={len(self.parameter_nodes)} parameter_values={len(self.parameter_values)}>"
diff --git a/src/policyengine/core/trace_tro.py b/src/policyengine/core/trace_tro.py
index ae31a29e..ca11ca5d 100644
--- a/src/policyengine/core/trace_tro.py
+++ b/src/policyengine/core/trace_tro.py
@@ -1,26 +1,60 @@
+"""TRACE Transparent Research Object (TRO) export.
+
+Emits TROv v0.1 JSON-LD for a PolicyEngine certified runtime bundle. The
+TRO is the standards-based provenance surface on top of the internal
+release manifests; it pins the model wheel, bundle manifest, data release
+manifest, and certified dataset artifact together by sha256 and exposes
+certification metadata in machine-readable fields so downstream tooling
+does not have to parse prose.
+
+See https://w3id.org/trace/trov/0.1 for the vocabulary and
+docs/release-bundles.md for how the bundle layer is composed.
+"""
+
 from __future__ import annotations
 
 import hashlib
 import json
+import os
 from collections.abc import Iterable, Mapping
-from typing import Optional
+from typing import Any, Optional
 
 from .release_manifest import (
     CountryReleaseManifest,
     DataCertification,
     DataReleaseManifest,
+    fetch_pypi_wheel_metadata,
+    https_dataset_uri,
+    https_release_manifest_uri,
 )
 
 TRACE_TROV_VERSION = "0.1"
-TRACE_CONTEXT = [
+POLICYENGINE_TRACE_NAMESPACE = "https://policyengine.org/trace/0.1#"
+
+TRACE_CONTEXT: list[dict[str, str]] = [
     {
         "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
         "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
         "trov": "https://w3id.org/trace/trov/0.1#",
         "schema": "https://schema.org/",
+        "pe": POLICYENGINE_TRACE_NAMESPACE,
     }
 ]
 
+POLICYENGINE_ORGANIZATION: dict[str, str] = {
+    "@type": "schema:Organization",
+    "schema:name": "PolicyEngine",
+    "schema:url": "https://policyengine.org",
+}
+
+_MIME_TYPES = {
+    "h5": "application/x-hdf5",
+    "json": "application/json",
+    "jsonld": "application/ld+json",
+    "whl": "application/zip",
+    "tar.gz": "application/gzip",
+}
+
 
 def _hash_object(value: str) -> dict[str, str]:
     return {
@@ -30,12 +64,11 @@ def _hash_object(value: str) -> dict[str, str]:
 
 
 def _artifact_mime_type(path_or_uri: str) -> Optional[str]:
-    suffix = path_or_uri.rsplit(".", 1)[-1].lower() if "." in path_or_uri else ""
-    return {
-        "h5": "application/x-hdf5",
-        "json": "application/json",
-        "jsonld": "application/ld+json",
-    }.get(suffix)
+    lowered = path_or_uri.lower()
+    if lowered.endswith(".tar.gz"):
+        return _MIME_TYPES["tar.gz"]
+    suffix = lowered.rsplit(".", 1)[-1] if "." in lowered else ""
+    return _MIME_TYPES.get(suffix)
 
 
 def _canonical_json_bytes(value: Mapping) -> bytes:
@@ -45,11 +78,53 @@ def _canonical_json_bytes(value: Mapping) -> bytes:
 def compute_trace_composition_fingerprint(
     artifact_hashes: Iterable[str],
 ) -> str:
+    """Fingerprint a composition by the sorted set of its artifact hashes."""
     digest = hashlib.sha256()
     digest.update("".join(sorted(artifact_hashes)).encode("utf-8"))
     return digest.hexdigest()
 
 
+def _ci_attestation() -> dict[str, str]:
+    """Return GitHub Actions attestation metadata if available."""
+    attestation: dict[str, str] = {}
+    if os.environ.get("GITHUB_ACTIONS") != "true":
+        return attestation
+    server = os.environ.get("GITHUB_SERVER_URL")
+    repo = os.environ.get("GITHUB_REPOSITORY")
+    run_id = os.environ.get("GITHUB_RUN_ID")
+    if server and repo and run_id:
+        attestation["pe:ciRunUrl"] = f"{server}/{repo}/actions/runs/{run_id}"
+    sha = os.environ.get("GITHUB_SHA")
+    if sha:
+        attestation["pe:ciGitSha"] = sha
+    ref = os.environ.get("GITHUB_REF")
+    if ref:
+        attestation["pe:ciGitRef"] = ref
+    return attestation
+
+
+def _resolve_model_wheel_hash(
+    country_manifest: CountryReleaseManifest,
+    *,
+    model_wheel_sha256: Optional[str],
+    model_wheel_url: Optional[str],
+    fetch_pypi: Any,
+) -> tuple[Optional[str], Optional[str]]:
+    """Return (sha256, https_url) for the model wheel, fetching from PyPI if missing."""
+    sha = model_wheel_sha256 or country_manifest.model_package.sha256
+    url = model_wheel_url or country_manifest.model_package.wheel_url
+    if sha is not None and url is not None:
+        return sha, url
+    try:
+        metadata = fetch_pypi(
+            country_manifest.model_package.name,
+            country_manifest.model_package.version,
+        )
+    except Exception:
+        return sha, url
+    return sha or metadata.get("sha256"), url or metadata.get("url")
+
+
 def build_trace_tro_from_release_bundle(
     country_manifest: CountryReleaseManifest,
     data_release_manifest: DataReleaseManifest,
@@ -57,7 +132,21 @@ def build_trace_tro_from_release_bundle(
     certification: Optional[DataCertification] = None,
     bundle_manifest_path: Optional[str] = None,
     data_release_manifest_path: Optional[str] = None,
+    model_wheel_sha256: Optional[str] = None,
+    model_wheel_url: Optional[str] = None,
+    fetch_pypi: Any = fetch_pypi_wheel_metadata,
+    ci_attestation: Optional[Mapping[str, str]] = None,
 ) -> dict:
+    """Build a TRACE TRO for a certified runtime bundle.
+
+    Artifacts in the composition: bundle manifest, data release manifest,
+    certified dataset, and the country model wheel. The wheel hash is read
+    from the bundled manifest when available and fetched from PyPI otherwise.
+
+    Certification metadata is encoded as structured ``pe:*`` fields on the
+    :class:`trov:TrustedResearchPerformance` node so downstream tools can
+    read it without parsing the description.
+    """
     certified_artifact = country_manifest.certified_data_artifact
     if certified_artifact is None:
         raise ValueError(
@@ -81,11 +170,13 @@ def build_trace_tro_from_release_bundle(
         bundle_manifest_path
         or f"data/release_manifests/{country_manifest.country_id}.json"
     )
-    data_manifest_location = data_release_manifest_path or (
-        "https://huggingface.co/"
-        f"{country_manifest.data_package.repo_id}/resolve/"
-        f"{country_manifest.data_package.version}/"
-        f"{country_manifest.data_package.release_manifest_path}"
+    data_manifest_location = data_release_manifest_path or https_release_manifest_uri(
+        country_manifest.data_package
+    )
+    dataset_location = https_dataset_uri(
+        repo_id=dataset_artifact.repo_id,
+        path_in_repo=dataset_artifact.path,
+        revision=dataset_artifact.revision,
     )
 
     bundle_manifest_payload = country_manifest.model_dump(mode="json")
@@ -97,34 +188,64 @@ def build_trace_tro_from_release_bundle(
         _canonical_json_bytes(data_release_payload)
     ).hexdigest()
 
-    artifact_specs = [
+    model_wheel_sha, model_wheel_https = _resolve_model_wheel_hash(
+        country_manifest,
+        model_wheel_sha256=model_wheel_sha256,
+        model_wheel_url=model_wheel_url,
+        fetch_pypi=fetch_pypi,
+    )
+
+    artifact_specs: list[dict[str, Any]] = [
         {
+            "id": "bundle_manifest",
             "hash": bundle_manifest_hash,
             "location": bundle_manifest_location,
             "mime_type": "application/json",
+            "name": f"policyengine.py bundle manifest for {country_manifest.country_id}",
         },
         {
+            "id": "data_release_manifest",
             "hash": data_release_manifest_hash,
             "location": data_manifest_location,
             "mime_type": "application/json",
+            "name": f"{country_manifest.data_package.name} release manifest "
+            f"{country_manifest.data_package.version}",
         },
         {
+            "id": "dataset",
             "hash": dataset_artifact.sha256,
-            "location": certified_artifact.uri,
-            "mime_type": _artifact_mime_type(certified_artifact.uri),
+            "location": dataset_location,
+            "mime_type": _artifact_mime_type(dataset_artifact.path),
+            "name": certified_artifact.dataset,
         },
     ]
 
-    composition_artifacts = []
-    arrangement_locations = []
-    artifact_hashes = []
+    if model_wheel_sha is not None:
+        artifact_specs.append(
+            {
+                "id": "model_wheel",
+                "hash": model_wheel_sha,
+                "location": model_wheel_https
+                or f"https://pypi.org/project/{country_manifest.model_package.name}/"
+                f"{country_manifest.model_package.version}/",
+                "mime_type": _artifact_mime_type(model_wheel_https or "")
+                or "application/zip",
+                "name": f"{country_manifest.model_package.name}=="
+                f"{country_manifest.model_package.version} wheel",
+            }
+        )
+
+    composition_artifacts: list[dict[str, Any]] = []
+    arrangement_locations: list[dict[str, Any]] = []
+    artifact_hashes: list[str] = []
 
     for index, artifact in enumerate(artifact_specs):
-        artifact_id = f"composition/1/artifact/{index}"
+        artifact_id = f"composition/1/artifact/{artifact['id']}"
         artifact_hashes.append(artifact["hash"])
-        artifact_entry = {
+        artifact_entry: dict[str, Any] = {
             "@id": artifact_id,
             "@type": "trov:ResearchArtifact",
+            "schema:name": artifact["name"],
             "trov:hash": _hash_object(artifact["hash"]),
         }
         if artifact["mime_type"] is not None:
@@ -132,129 +253,356 @@ def build_trace_tro_from_release_bundle(
         composition_artifacts.append(artifact_entry)
         arrangement_locations.append(
             {
-                "@id": f"arrangement/0/location/{index}",
+                "@id": f"arrangement/0/location/{artifact['id']}",
                 "@type": "trov:ArtifactLocation",
                 "trov:artifact": {"@id": artifact_id},
                 "trov:path": artifact["location"],
             }
         )
 
-    certification_description = ""
+    certification_fields: dict[str, Any] = {}
+    certification_description_parts: list[str] = []
     if effective_certification is not None:
-        certification_description = (
-            f" Certified for runtime model version "
+        certification_fields["pe:certifiedForModelVersion"] = (
+            effective_certification.certified_for_model_version
+        )
+        certification_fields["pe:compatibilityBasis"] = (
+            effective_certification.compatibility_basis
+        )
+        certification_description_parts.append(
+            f"Certified for runtime model version "
             f"{effective_certification.certified_for_model_version} via "
             f"{effective_certification.compatibility_basis}."
         )
         if effective_certification.built_with_model_version is not None:
-            certification_description += (
-                f" Built with {country_manifest.model_package.name} "
+            certification_fields["pe:builtWithModelVersion"] = (
+                effective_certification.built_with_model_version
+            )
+            certification_description_parts.append(
+                f"Built with {country_manifest.model_package.name} "
                 f"{effective_certification.built_with_model_version}."
             )
+        if effective_certification.built_with_model_git_sha is not None:
+            certification_fields["pe:builtWithModelGitSha"] = (
+                effective_certification.built_with_model_git_sha
+            )
         if effective_certification.data_build_fingerprint is not None:
-            certification_description += (
-                f" Data-build fingerprint: "
+            certification_fields["pe:dataBuildFingerprint"] = (
+                effective_certification.data_build_fingerprint
+            )
+            certification_description_parts.append(
+                f"Data-build fingerprint: "
                 f"{effective_certification.data_build_fingerprint}."
             )
+        if effective_certification.data_build_id is not None:
+            certification_fields["pe:dataBuildId"] = (
+                effective_certification.data_build_id
+            )
+        if effective_certification.certified_by is not None:
+            certification_fields["pe:certifiedBy"] = (
+                effective_certification.certified_by
+            )
+
+    attestation_fields = (
+        dict(ci_attestation) if ci_attestation is not None else _ci_attestation()
+    )
 
     created_at = country_manifest.published_at or (
         data_release_manifest.build.built_at
         if data_release_manifest.build is not None
         else None
     )
+    started_at = (
+        data_release_manifest.build.built_at
+        if data_release_manifest.build is not None
+        else created_at
+    )
     build_id = (
-        effective_certification.data_build_id
-        if effective_certification is not None
-        else (
-            certified_artifact.build_id
-            or f"{country_manifest.data_package.name}-{country_manifest.data_package.version}"
+        (
+            effective_certification.data_build_id
+            if effective_certification is not None
+            else None
+        )
+        or certified_artifact.build_id
+        or (
+            f"{country_manifest.data_package.name}-{country_manifest.data_package.version}"
         )
     )
 
-    return {
-        "@context": TRACE_CONTEXT,
-        "@graph": [
+    certification_description = (
+        " " + " ".join(certification_description_parts)
+        if certification_description_parts
+        else ""
+    )
+
+    tro_node: dict[str, Any] = {
+        "@id": "tro",
+        "@type": ["trov:TransparentResearchObject", "schema:CreativeWork"],
+        "trov:vocabularyVersion": TRACE_TROV_VERSION,
+        "schema:creator": POLICYENGINE_ORGANIZATION,
+        "schema:name": (
+            f"policyengine {country_manifest.country_id} certified bundle TRO"
+        ),
+        "schema:description": (
+            f"TRACE TRO for certified runtime bundle "
+            f"{country_manifest.bundle_id or country_manifest.country_id} "
+            f"covering the bundled country release manifest, the country data "
+            f"release manifest, the certified dataset artifact, and the model "
+            f"wheel." + certification_description
+        ),
+        "trov:wasAssembledBy": {
+            "@id": "trs",
+            "@type": ["trov:TrustedResearchSystem", "schema:Organization"],
+            "schema:name": "PolicyEngine certified release bundle pipeline",
+            "schema:description": (
+                "PolicyEngine certification workflow for runtime bundles that "
+                "pin a country model version, a country data release, and a "
+                "specific dataset artifact."
+            ),
+        },
+        "trov:createdWith": {
+            "@type": "schema:SoftwareApplication",
+            "schema:name": "policyengine",
+            "schema:softwareVersion": country_manifest.policyengine_version,
+        },
+        "trov:hasComposition": {
+            "@id": "composition/1",
+            "@type": "trov:ArtifactComposition",
+            "trov:hasFingerprint": {
+                "@id": "fingerprint",
+                "@type": "trov:CompositionFingerprint",
+                "trov:hash": _hash_object(
+                    compute_trace_composition_fingerprint(artifact_hashes)
+                ),
+            },
+            "trov:hasArtifact": composition_artifacts,
+        },
+        "trov:hasArrangement": [
             {
-                "@id": "tro",
-                "@type": ["trov:TransparentResearchObject", "schema:CreativeWork"],
-                "trov:vocabularyVersion": TRACE_TROV_VERSION,
-                "schema:creator": country_manifest.policyengine_version,
-                "schema:name": (
-                    f"policyengine {country_manifest.country_id} certified bundle TRO"
+                "@id": "arrangement/0",
+                "@type": "trov:ArtifactArrangement",
+                "rdfs:comment": (
+                    f"Certified arrangement for bundle "
+                    f"{country_manifest.bundle_id or country_manifest.country_id}."
                 ),
-                "schema:description": (
-                    f"TRACE TRO for certified runtime bundle "
-                    f"{country_manifest.bundle_id or country_manifest.country_id} "
-                    f"covering the bundled country release manifest, the country data "
-                    f"release manifest, and the certified dataset artifact."
-                    f"{certification_description}"
+                "trov:hasArtifactLocation": arrangement_locations,
+            }
+        ],
+        "trov:hasPerformance": [
+            {
+                "@id": "trp/0",
+                "@type": "trov:TrustedResearchPerformance",
+                "rdfs:comment": (
+                    f"Certification of build {build_id} for "
+                    f"{country_manifest.model_package.name} "
+                    f"{country_manifest.model_package.version}."
                 ),
-                "schema:dateCreated": created_at,
-                "trov:wasAssembledBy": {
-                    "@id": "trs",
-                    "@type": ["trov:TrustedResearchSystem", "schema:Organization"],
-                    "schema:name": "PolicyEngine certified release bundle pipeline",
-                    "schema:description": (
-                        "PolicyEngine certification workflow for runtime bundles that "
-                        "pin a country model version, a country data release, and a "
-                        "specific dataset artifact."
-                    ),
-                },
-                "trov:createdWith": {
-                    "@type": "schema:SoftwareApplication",
-                    "schema:name": "policyengine",
-                    "schema:softwareVersion": country_manifest.policyengine_version,
+                "trov:wasConductedBy": {"@id": "trs"},
+                "trov:startedAtTime": started_at,
+                "trov:endedAtTime": created_at,
+                "trov:contributedToArrangement": {
+                    "@id": "trp/0/binding/0",
+                    "@type": "trov:ArrangementBinding",
+                    "trov:arrangement": {"@id": "arrangement/0"},
                 },
-                "trov:hasComposition": {
-                    "@id": "composition/1",
-                    "@type": "trov:ArtifactComposition",
-                    "trov:hasFingerprint": {
-                        "@id": "fingerprint",
-                        "@type": "trov:CompositionFingerprint",
-                        "trov:hash": _hash_object(
-                            compute_trace_composition_fingerprint(artifact_hashes)
-                        ),
-                    },
-                    "trov:hasArtifact": composition_artifacts,
-                },
-                "trov:hasArrangement": [
-                    {
-                        "@id": "arrangement/0",
-                        "@type": "trov:ArtifactArrangement",
-                        "rdfs:comment": (
-                            f"Certified arrangement for bundle "
-                            f"{country_manifest.bundle_id or country_manifest.country_id}."
-                        ),
-                        "trov:hasArtifactLocation": arrangement_locations,
-                    }
-                ],
-                "trov:hasPerformance": [
-                    {
-                        "@id": "trp/0",
-                        "@type": "trov:TrustedResearchPerformance",
-                        "rdfs:comment": (
-                            f"Certification of build {build_id} for "
-                            f"{country_manifest.model_package.name} "
-                            f"{country_manifest.model_package.version}."
-                        ),
-                        "trov:wasConductedBy": {"@id": "trs"},
-                        "trov:startedAtTime": (
-                            data_release_manifest.build.built_at
-                            if data_release_manifest.build is not None
-                            else created_at
-                        ),
-                        "trov:endedAtTime": created_at,
-                        "trov:contributedToArrangement": {
-                            "@id": "trp/0/binding/0",
-                            "@type": "trov:ArrangementBinding",
-                            "trov:arrangement": {"@id": "arrangement/0"},
-                        },
-                    }
-                ],
+                **certification_fields,
+                **attestation_fields,
             }
         ],
     }
+    if created_at is not None:
+        tro_node["schema:dateCreated"] = created_at
+
+    return {"@context": TRACE_CONTEXT, "@graph": [tro_node]}
 
 
 def serialize_trace_tro(tro: Mapping) -> bytes:
+    """Serialize a TRO to canonical JSON bytes (sorted keys, trailing newline)."""
     return (json.dumps(tro, indent=2, sort_keys=True) + "\n").encode("utf-8")
+
+
+def extract_bundle_tro_reference(tro: Mapping) -> dict[str, Any]:
+    """Extract a compact reference to a bundle TRO for inclusion in other TROs.
+
+    Returns a dict with the composition fingerprint and the bundle TRO's
+    name, suitable for use as an input reference in a per-simulation TRO.
+    """
+    graph = tro.get("@graph") or []
+    if not graph:
+        raise ValueError("TRO has an empty graph.")
+    node = graph[0]
+    fingerprint = (
+        node.get("trov:hasComposition", {})
+        .get("trov:hasFingerprint", {})
+        .get("trov:hash", {})
+        .get("trov:hashValue")
+    )
+    if fingerprint is None:
+        raise ValueError("TRO is missing a composition fingerprint.")
+    return {
+        "fingerprint": fingerprint,
+        "name": node.get("schema:name"),
+        "policyengine_version": (
+            node.get("trov:createdWith", {}).get("schema:softwareVersion")
+        ),
+    }
+
+
+def build_simulation_trace_tro(
+    *,
+    bundle_tro: Mapping,
+    results_payload: Mapping,
+    reform_payload: Optional[Mapping] = None,
+    reform_name: Optional[str] = None,
+    simulation_id: Optional[str] = None,
+    created_at: Optional[str] = None,
+    started_at: Optional[str] = None,
+    results_location: Optional[str] = None,
+    reform_location: Optional[str] = None,
+    bundle_tro_location: Optional[str] = None,
+    ci_attestation: Optional[Mapping[str, str]] = None,
+) -> dict:
+    """Build a per-simulation TRO chaining a bundle TRO to a results payload.
+
+    The simulation TRO's composition includes: the bundle TRO itself (as a
+    single hashed artifact), the reform JSON (if provided), and the
+    results.json payload. This is the TRO academics cite alongside a
+    published result.
+    """
+    bundle_reference = extract_bundle_tro_reference(bundle_tro)
+    bundle_bytes = _canonical_json_bytes(bundle_tro)
+    bundle_hash = hashlib.sha256(bundle_bytes).hexdigest()
+    results_bytes = _canonical_json_bytes(results_payload)
+    results_hash = hashlib.sha256(results_bytes).hexdigest()
+
+    artifact_specs: list[dict[str, Any]] = [
+        {
+            "id": "bundle_tro",
+            "hash": bundle_hash,
+            "location": bundle_tro_location
+            or f"bundle.trace.tro.jsonld#{bundle_reference['fingerprint']}",
+            "mime_type": "application/ld+json",
+            "name": bundle_reference.get("name") or "policyengine bundle TRO",
+        }
+    ]
+    if reform_payload is not None:
+        reform_bytes = _canonical_json_bytes(reform_payload)
+        reform_hash = hashlib.sha256(reform_bytes).hexdigest()
+        artifact_specs.append(
+            {
+                "id": "reform",
+                "hash": reform_hash,
+                "location": reform_location or "reform.json",
+                "mime_type": "application/json",
+                "name": reform_name or "reform",
+            }
+        )
+    artifact_specs.append(
+        {
+            "id": "results",
+            "hash": results_hash,
+            "location": results_location or "results.json",
+            "mime_type": "application/json",
+            "name": "results.json",
+        }
+    )
+
+    composition_artifacts: list[dict[str, Any]] = []
+    arrangement_locations: list[dict[str, Any]] = []
+    artifact_hashes: list[str] = []
+    for artifact in artifact_specs:
+        artifact_id = f"composition/1/artifact/{artifact['id']}"
+        artifact_hashes.append(artifact["hash"])
+        composition_artifacts.append(
+            {
+                "@id": artifact_id,
+                "@type": "trov:ResearchArtifact",
+                "schema:name": artifact["name"],
+                "trov:hash": _hash_object(artifact["hash"]),
+                "trov:mimeType": artifact["mime_type"],
+            }
+        )
+        arrangement_locations.append(
+            {
+                "@id": f"arrangement/0/location/{artifact['id']}",
+                "@type": "trov:ArtifactLocation",
+                "trov:artifact": {"@id": artifact_id},
+                "trov:path": artifact["location"],
+            }
+        )
+
+    attestation_fields = (
+        dict(ci_attestation) if ci_attestation is not None else _ci_attestation()
+    )
+    simulation_slug = simulation_id or "simulation"
+
+    tro_node: dict[str, Any] = {
+        "@id": "tro",
+        "@type": ["trov:TransparentResearchObject", "schema:CreativeWork"],
+        "trov:vocabularyVersion": TRACE_TROV_VERSION,
+        "schema:creator": POLICYENGINE_ORGANIZATION,
+        "schema:name": f"policyengine simulation TRO ({simulation_slug})",
+        "schema:description": (
+            "TRACE TRO for a PolicyEngine simulation result. Composition pins "
+            "the certified runtime bundle TRO, the reform specification "
+            "(where applicable), and the results.json payload."
+        ),
+        "trov:createdWith": {
+            "@type": "schema:SoftwareApplication",
+            "schema:name": "policyengine",
+            "schema:softwareVersion": bundle_reference.get("policyengine_version"),
+        },
+        "trov:wasAssembledBy": {
+            "@id": "trs",
+            "@type": ["trov:TrustedResearchSystem", "schema:Organization"],
+            "schema:name": "PolicyEngine simulation pipeline",
+            "schema:description": (
+                "PolicyEngine simulation that consumes a certified runtime "
+                "bundle and produces a results.json payload."
+            ),
+        },
+        "trov:hasComposition": {
+            "@id": "composition/1",
+            "@type": "trov:ArtifactComposition",
+            "trov:hasFingerprint": {
+                "@id": "fingerprint",
+                "@type": "trov:CompositionFingerprint",
+                "trov:hash": _hash_object(
+                    compute_trace_composition_fingerprint(artifact_hashes)
+                ),
+            },
+            "trov:hasArtifact": composition_artifacts,
+        },
+        "trov:hasArrangement": [
+            {
+                "@id": "arrangement/0",
+                "@type": "trov:ArtifactArrangement",
+                "rdfs:comment": f"Simulation arrangement for {simulation_slug}.",
+                "trov:hasArtifactLocation": arrangement_locations,
+            }
+        ],
+        "trov:hasPerformance": [
+            {
+                "@id": "trp/0",
+                "@type": "trov:TrustedResearchPerformance",
+                "rdfs:comment": (
+                    f"PolicyEngine simulation bound to bundle fingerprint "
+                    f"{bundle_reference['fingerprint']}."
+                ),
+                "trov:wasConductedBy": {"@id": "trs"},
+                "trov:startedAtTime": started_at or created_at,
+                "trov:endedAtTime": created_at,
+                "trov:contributedToArrangement": {
+                    "@id": "trp/0/binding/0",
+                    "@type": "trov:ArrangementBinding",
+                    "trov:arrangement": {"@id": "arrangement/0"},
+                },
+                "pe:bundleFingerprint": bundle_reference["fingerprint"],
+                **attestation_fields,
+            }
+        ],
+    }
+    if created_at is not None:
+        tro_node["schema:dateCreated"] = created_at
+
+    return {"@context": TRACE_CONTEXT, "@graph": [tro_node]}
diff --git a/src/policyengine/data/schemas/trace_tro.schema.json b/src/policyengine/data/schemas/trace_tro.schema.json
new file mode 100644
index 00000000..baa03d1c
--- /dev/null
+++ b/src/policyengine/data/schemas/trace_tro.schema.json
@@ -0,0 +1,210 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://policyengine.org/schemas/trace_tro/0.1.json",
+  "title": "PolicyEngine TRACE TRO",
+  "description": "JSON Schema for PolicyEngine's TRACE Transparent Research Object (TROv v0.1) emission. Catches structural regressions in bundle and per-simulation TROs.",
+  "type": "object",
+  "required": ["@context", "@graph"],
+  "properties": {
+    "@context": {
+      "type": "array",
+      "minItems": 1,
+      "items": {
+        "type": "object",
+        "required": ["trov", "schema"],
+        "properties": {
+          "trov": {
+            "type": "string",
+            "const": "https://w3id.org/trace/trov/0.1#"
+          },
+          "schema": {
+            "type": "string",
+            "const": "https://schema.org/"
+          },
+          "pe": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "@graph": {
+      "type": "array",
+      "minItems": 1,
+      "items": {
+        "$ref": "#/$defs/troNode"
+      }
+    }
+  },
+  "$defs": {
+    "hash": {
+      "type": "object",
+      "required": ["trov:hashAlgorithm", "trov:hashValue"],
+      "properties": {
+        "trov:hashAlgorithm": { "type": "string" },
+        "trov:hashValue": {
+          "type": "string",
+          "pattern": "^[a-f0-9]{64}$"
+        }
+      }
+    },
+    "organization": {
+      "type": "object",
+      "required": ["@type", "schema:name"],
+      "properties": {
+        "@type": { "const": "schema:Organization" },
+        "schema:name": { "type": "string", "minLength": 1 },
+        "schema:url": { "type": "string", "format": "uri" }
+      }
+    },
+    "artifact": {
+      "type": "object",
+      "required": ["@id", "@type", "trov:hash"],
+      "properties": {
+        "@id": { "type": "string" },
+        "@type": { "const": "trov:ResearchArtifact" },
+        "schema:name": { "type": "string" },
+        "trov:hash": { "$ref": "#/$defs/hash" },
+        "trov:mimeType": { "type": "string" }
+      }
+    },
+    "artifactLocation": {
+      "type": "object",
+      "required": ["@id", "@type", "trov:artifact", "trov:path"],
+      "properties": {
+        "@id": { "type": "string" },
+        "@type": { "const": "trov:ArtifactLocation" },
+        "trov:artifact": {
+          "type": "object",
+          "required": ["@id"],
+          "properties": { "@id": { "type": "string" } }
+        },
+        "trov:path": { "type": "string", "minLength": 1 }
+      }
+    },
+    "troNode": {
+      "type": "object",
+      "required": [
+        "@id",
+        "@type",
+        "trov:vocabularyVersion",
+        "schema:creator",
+        "schema:name",
+        "trov:hasComposition",
+        "trov:hasArrangement",
+        "trov:hasPerformance"
+      ],
+      "properties": {
+        "@id": { "type": "string" },
+        "@type": {
+          "type": "array",
+          "minItems": 2,
+          "contains": { "const": "trov:TransparentResearchObject" }
+        },
+        "trov:vocabularyVersion": { "type": "string", "const": "0.1" },
+        "schema:creator": { "$ref": "#/$defs/organization" },
+        "schema:name": { "type": "string", "minLength": 1 },
+        "schema:description": { "type": "string" },
+        "schema:dateCreated": { "type": "string" },
+        "trov:wasAssembledBy": {
+          "type": "object",
+          "required": ["@id", "@type", "schema:name"],
+          "properties": {
+            "@id": { "type": "string" },
+            "@type": {
+              "type": "array",
+              "contains": { "const": "trov:TrustedResearchSystem" }
+            }
+          }
+        },
+        "trov:createdWith": {
+          "type": "object",
+          "required": ["@type", "schema:name"],
+          "properties": {
+            "@type": { "const": "schema:SoftwareApplication" },
+            "schema:name": { "type": "string" },
+            "schema:softwareVersion": {
+              "oneOf": [
+                { "type": "string" },
+                { "type": "null" }
+              ]
+            }
+          }
+        },
+        "trov:hasComposition": {
+          "type": "object",
+          "required": ["@id", "@type", "trov:hasFingerprint", "trov:hasArtifact"],
+          "properties": {
+            "@id": { "type": "string" },
+            "@type": { "const": "trov:ArtifactComposition" },
+            "trov:hasFingerprint": {
+              "type": "object",
+              "required": ["@id", "@type", "trov:hash"],
+              "properties": {
+                "@id": { "type": "string" },
+                "@type": { "const": "trov:CompositionFingerprint" },
+                "trov:hash": { "$ref": "#/$defs/hash" }
+              }
+            },
+            "trov:hasArtifact": {
+              "type": "array",
+              "minItems": 1,
+              "items": { "$ref": "#/$defs/artifact" }
+            }
+          }
+        },
+        "trov:hasArrangement": {
+          "type": "array",
+          "minItems": 1,
+          "items": {
+            "type": "object",
+            "required": ["@id", "@type", "trov:hasArtifactLocation"],
+            "properties": {
+              "@id": { "type": "string" },
+              "@type": { "const": "trov:ArtifactArrangement" },
+              "trov:hasArtifactLocation": {
+                "type": "array",
+                "minItems": 1,
+                "items": { "$ref": "#/$defs/artifactLocation" }
+              }
+            }
+          }
+        },
+        "trov:hasPerformance": {
+          "type": "array",
+          "minItems": 1,
+          "items": {
+            "type": "object",
+            "required": [
+              "@id",
+              "@type",
+              "trov:wasConductedBy",
+              "trov:contributedToArrangement"
+            ],
+            "properties": {
+              "@id": { "type": "string" },
+              "@type": { "const": "trov:TrustedResearchPerformance" },
+              "trov:wasConductedBy": {
+                "type": "object",
+                "required": ["@id"],
+                "properties": { "@id": { "type": "string" } }
+              },
+              "trov:contributedToArrangement": {
+                "type": "object",
+                "required": ["@id", "@type", "trov:arrangement"],
+                "properties": {
+                  "@id": { "type": "string" },
+                  "@type": { "const": "trov:ArrangementBinding" },
+                  "trov:arrangement": {
+                    "type": "object",
+                    "required": ["@id"],
+                    "properties": { "@id": { "type": "string" } }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/policyengine/results/__init__.py b/src/policyengine/results/__init__.py
index 812f5027..7d93cb7b 100644
--- a/src/policyengine/results/__init__.py
+++ b/src/policyengine/results/__init__.py
@@ -5,6 +5,10 @@
     TableEntry,
     ValueEntry,
 )
+from policyengine.results.trace_tro import (
+    build_results_trace_tro,
+    write_results_with_trace_tro,
+)
 from policyengine.results.tracking import tracked_value
 
 __all__ = [
@@ -13,5 +17,7 @@
     "ResultsMetadata",
     "TableEntry",
     "ValueEntry",
+    "build_results_trace_tro",
     "tracked_value",
+    "write_results_with_trace_tro",
 ]
diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py
new file mode 100644
index 00000000..fc1106eb
--- /dev/null
+++ b/src/policyengine/results/trace_tro.py
@@ -0,0 +1,113 @@
+"""Per-simulation TRACE TRO for results.json payloads.
+
+The certified-bundle TRO pins the country model, data package, and
+dataset artifact together. A simulation TRO chains that bundle to a
+specific reform + ``results.json`` payload so a published result can
+be cited with an immutable composition fingerprint.
+
+See :mod:`policyengine.core.trace_tro` for the bundle-level layer.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Optional, Union
+
+from policyengine.core.trace_tro import (
+    build_simulation_trace_tro,
+    serialize_trace_tro,
+)
+
+from .schema import ResultsJson
+
+
+def build_results_trace_tro(
+    results: ResultsJson,
+    *,
+    bundle_tro: Mapping,
+    reform_payload: Optional[Mapping] = None,
+    reform_name: Optional[str] = None,
+    simulation_id: Optional[str] = None,
+    results_location: Optional[str] = None,
+    reform_location: Optional[str] = None,
+    bundle_tro_location: Optional[str] = None,
+) -> dict:
+    """Build a per-simulation TRO for a ``ResultsJson`` instance.
+
+    Args:
+        results: The validated results payload.
+        bundle_tro: A bundle-level TRACE TRO (see
+            :func:`policyengine.core.trace_tro.build_trace_tro_from_release_bundle`).
+        reform_payload: Optional reform JSON to include as a hashed artifact.
+        reform_name: Optional display name for the reform.
+        simulation_id: Optional identifier used in the TRO's ``schema:name``.
+        results_location: Optional URI or path for the ``results.json`` file.
+        reform_location: Optional URI or path for the reform JSON.
+        bundle_tro_location: Optional URI or path for the bundle TRO.
+
+    Returns:
+        The TRO as a ``dict``. Serialize with
+        :func:`policyengine.core.trace_tro.serialize_trace_tro`.
+    """
+    slug = simulation_id or (results.metadata.slug or results.metadata.title)
+    return build_simulation_trace_tro(
+        bundle_tro=bundle_tro,
+        results_payload=results.model_dump(mode="json"),
+        reform_payload=reform_payload,
+        reform_name=reform_name,
+        simulation_id=slug,
+        created_at=results.metadata.generated_at,
+        results_location=results_location,
+        reform_location=reform_location,
+        bundle_tro_location=bundle_tro_location,
+    )
+
+
+def write_results_with_trace_tro(
+    results: ResultsJson,
+    results_path: Union[str, Path],
+    *,
+    bundle_tro: Mapping,
+    reform_payload: Optional[Mapping] = None,
+    reform_name: Optional[str] = None,
+    tro_suffix: str = ".trace.tro.jsonld",
+    bundle_tro_path: Optional[Union[str, Path]] = None,
+) -> dict[str, Path]:
+    """Write ``results.json`` and a sibling per-simulation TRACE TRO.
+
+    The TRO is written next to the results file with the given suffix
+    appended to the results filename stem. Returns a dict with ``results``
+    and ``tro`` paths.
+    """
+    results_path = Path(results_path)
+    results.write(results_path)
+
+    if bundle_tro_path is not None:
+        bundle_tro_path = Path(bundle_tro_path)
+        bundle_tro_location: Optional[str] = bundle_tro_path.name
+    else:
+        bundle_tro_location = None
+
+    tro = build_results_trace_tro(
+        results,
+        bundle_tro=bundle_tro,
+        reform_payload=reform_payload,
+        reform_name=reform_name,
+        results_location=results_path.name,
+        bundle_tro_location=bundle_tro_location,
+    )
+    tro_path = results_path.with_suffix(tro_suffix)
+    tro_path.write_bytes(serialize_trace_tro(tro))
+
+    written: dict[str, Path] = {"results": results_path, "tro": tro_path}
+
+    if bundle_tro_path is not None:
+        bundle_tro_path.parent.mkdir(parents=True, exist_ok=True)
+        bundle_tro_path.write_text(
+            json.dumps(bundle_tro, indent=2, sort_keys=True) + "\n"
+        )
+        written["bundle_tro"] = bundle_tro_path
+
+    return written
diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py
new file mode 100644
index 00000000..df5b9dc5
--- /dev/null
+++ b/tests/test_trace_tro.py
@@ -0,0 +1,482 @@
+"""Tests for TRACE Transparent Research Object (TRO) export.
+
+Covers bundle-level TROs (``policyengine.core.trace_tro``) and per-simulation
+TROs (``policyengine.results.trace_tro``), plus the ``policyengine trace-tro``
+CLI and JSON-Schema conformance.
+"""
+
+from __future__ import annotations
+
+import json
+from importlib.resources import files
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from jsonschema import Draft202012Validator
+
+from policyengine.cli import main as cli_main
+from policyengine.core.release_manifest import (
+    DataReleaseManifest,
+    get_data_release_manifest,
+    get_release_manifest,
+)
+from policyengine.core.tax_benefit_model import TaxBenefitModel
+from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+from policyengine.core.trace_tro import (
+    POLICYENGINE_ORGANIZATION,
+    TRACE_TROV_VERSION,
+    build_trace_tro_from_release_bundle,
+    compute_trace_composition_fingerprint,
+    extract_bundle_tro_reference,
+    serialize_trace_tro,
+)
+from policyengine.results import (
+    ResultsJson,
+    ResultsMetadata,
+    ValueEntry,
+    build_results_trace_tro,
+    write_results_with_trace_tro,
+)
+
+FAKE_WHEEL_SHA = "a" * 64
+FAKE_WHEEL_URL = (
+    "https://files.pythonhosted.org/packages/ab/cd/"
+    "policyengine_us-1.647.0-py3-none-any.whl"
+)
+
+
+def _fake_fetch_pypi(name: str, version: str) -> dict:
+    return {"sha256": FAKE_WHEEL_SHA, "url": FAKE_WHEEL_URL}
+
+
+def _us_data_release_manifest(
+    sha256: str = "c" * 64,
+    data_build_fingerprint: str = "sha256:build",
+) -> DataReleaseManifest:
+    return DataReleaseManifest.model_validate(
+        {
+            "schema_version": 1,
+            "data_package": {
+                "name": "policyengine-us-data",
+                "version": "1.73.0",
+            },
+            "build": {
+                "build_id": "policyengine-us-data-1.73.0",
+                "built_at": "2026-04-10T12:00:00Z",
+                "built_with_model_package": {
+                    "name": "policyengine-us",
+                    "version": "1.647.0",
+                    "git_sha": "deadbeef",
+                    "data_build_fingerprint": data_build_fingerprint,
+                },
+            },
+            "compatible_model_packages": [],
+            "default_datasets": {"national": "enhanced_cps_2024"},
+            "artifacts": {
+                "enhanced_cps_2024": {
+                    "kind": "microdata",
+                    "path": "enhanced_cps_2024.h5",
+                    "repo_id": "policyengine/policyengine-us-data",
+                    "revision": "1.73.0",
+                    "sha256": sha256,
+                    "size_bytes": 123,
+                }
+            },
+        }
+    )
+
+
+@pytest.fixture
+def tro_schema() -> dict:
+    schema_path = Path(
+        str(files("policyengine").joinpath("data", "schemas", "trace_tro.schema.json"))
+    )
+    return json.loads(schema_path.read_text())
+
+
+@pytest.fixture(autouse=True)
+def clear_manifest_caches():
+    yield
+    get_release_manifest.cache_clear()
+    get_data_release_manifest.cache_clear()
+
+
+class TestBundleTRO:
+    """Bundle-level TRACE TRO emission."""
+
+    def test__given_us_bundle__then_schema_creator_is_policyengine_organization(
+        self,
+    ):
+        country_manifest = get_release_manifest("us")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        assert tro["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+
+    def test__given_us_bundle__then_model_wheel_is_hashed_as_artifact(self):
+        country_manifest = get_release_manifest("us")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+        wheel_artifacts = [a for a in artifacts if a["@id"].endswith("model_wheel")]
+        assert len(wheel_artifacts) == 1
+        assert wheel_artifacts[0]["trov:hash"]["trov:hashValue"] == FAKE_WHEEL_SHA
+        locations = tro["@graph"][0]["trov:hasArrangement"][0][
+            "trov:hasArtifactLocation"
+        ]
+        wheel_location = next(
+            location
+            for location in locations
+            if location["@id"].endswith("model_wheel")
+        )
+        assert wheel_location["trov:path"] == FAKE_WHEEL_URL
+
+    def test__given_manifest_sha__then_pypi_not_fetched(self):
+        country_manifest = get_release_manifest("us")
+        country_manifest.model_package.sha256 = "b" * 64
+        country_manifest.model_package.wheel_url = "https://example/wheel.whl"
+        fetch_pypi = MagicMock()
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=fetch_pypi,
+        )
+
+        artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+        wheel_artifacts = [a for a in artifacts if a["@id"].endswith("model_wheel")]
+        assert wheel_artifacts[0]["trov:hash"]["trov:hashValue"] == "b" * 64
+        fetch_pypi.assert_not_called()
+
+    def test__given_pypi_unreachable__then_wheel_artifact_is_skipped(self):
+        country_manifest = get_release_manifest("us")
+        country_manifest.model_package.sha256 = None
+        country_manifest.model_package.wheel_url = None
+
+        def failing_fetch(name, version):
+            raise RuntimeError("PyPI unreachable")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=failing_fetch,
+        )
+
+        artifact_ids = [
+            a["@id"]
+            for a in tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+        ]
+        assert not any(aid.endswith("model_wheel") for aid in artifact_ids)
+
+    def test__given_artifact_locations__then_all_paths_are_https_or_local(self):
+        country_manifest = get_release_manifest("us")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        locations = tro["@graph"][0]["trov:hasArrangement"][0][
+            "trov:hasArtifactLocation"
+        ]
+        paths = [location["trov:path"] for location in locations]
+        # Bundle manifest is a local wheel-internal path; everything else must
+        # be dereferenceable HTTPS so a reproducibility reviewer can fetch it.
+        assert paths[0].startswith("data/release_manifests/")
+        for path in paths[1:]:
+            assert path.startswith("https://"), path
+
+    def test__given_certification__then_fields_are_machine_readable(self):
+        country_manifest = get_release_manifest("us")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        assert (
+            performance["pe:certifiedForModelVersion"]
+            == country_manifest.certification.certified_for_model_version
+        )
+        assert (
+            performance["pe:compatibilityBasis"]
+            == country_manifest.certification.compatibility_basis
+        )
+        assert (
+            performance["pe:builtWithModelVersion"]
+            == country_manifest.certification.built_with_model_version
+        )
+        assert (
+            performance["pe:dataBuildId"]
+            == country_manifest.certification.data_build_id
+        )
+
+    def test__given_github_actions_env__then_ci_attestation_is_included(
+        self, monkeypatch
+    ):
+        country_manifest = get_release_manifest("us")
+        monkeypatch.setenv("GITHUB_ACTIONS", "true")
+        monkeypatch.setenv("GITHUB_SERVER_URL", "https://github.com")
+        monkeypatch.setenv("GITHUB_REPOSITORY", "PolicyEngine/policyengine.py")
+        monkeypatch.setenv("GITHUB_RUN_ID", "12345")
+        monkeypatch.setenv("GITHUB_SHA", "abc123")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        assert (
+            performance["pe:ciRunUrl"]
+            == "https://github.com/PolicyEngine/policyengine.py/actions/runs/12345"
+        )
+        assert performance["pe:ciGitSha"] == "abc123"
+
+    def test__given_non_ci_env__then_no_attestation_fields(self, monkeypatch):
+        country_manifest = get_release_manifest("us")
+        monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        assert "pe:ciRunUrl" not in performance
+        assert "pe:ciGitSha" not in performance
+
+    def test__given_same_inputs__then_built_tros_serialize_identically(self):
+        country_manifest = get_release_manifest("us")
+        data = _us_data_release_manifest()
+
+        first = serialize_trace_tro(
+            build_trace_tro_from_release_bundle(
+                country_manifest,
+                data,
+                fetch_pypi=_fake_fetch_pypi,
+                ci_attestation={},
+            )
+        )
+        second = serialize_trace_tro(
+            build_trace_tro_from_release_bundle(
+                country_manifest,
+                data,
+                fetch_pypi=_fake_fetch_pypi,
+                ci_attestation={},
+            )
+        )
+        assert first == second
+
+    def test__given_hashes_in_any_order__then_composition_fingerprint_matches(
+        self,
+    ):
+        hashes = ["ccc", "aaa", "bbb"]
+        assert compute_trace_composition_fingerprint(
+            hashes
+        ) == compute_trace_composition_fingerprint(reversed(hashes))
+
+    def test__given_generated_tro__then_validates_against_json_schema(self, tro_schema):
+        country_manifest = get_release_manifest("us")
+
+        tro = build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+        errors = list(Draft202012Validator(tro_schema).iter_errors(tro))
+        assert errors == [], [error.message for error in errors]
+
+    def test__given_vocabulary_version_constant__then_matches_context_namespace(
+        self,
+    ):
+        assert TRACE_TROV_VERSION == "0.1"
+
+    def test__given_model_version_attribute__then_trace_tro_property_works(
+        self,
+    ):
+        manifest = get_release_manifest("us")
+        data_release_manifest = _us_data_release_manifest()
+        model_version = TaxBenefitModelVersion(
+            model=TaxBenefitModel(id="us"),
+            version=manifest.model_package.version,
+            release_manifest=manifest,
+            model_package=manifest.model_package,
+            data_package=manifest.data_package,
+            default_dataset_uri=manifest.default_dataset_uri,
+            data_certification=manifest.certification,
+        )
+
+        with patch(
+            "policyengine.core.tax_benefit_model_version.get_data_release_manifest",
+            return_value=data_release_manifest,
+        ):
+            with patch(
+                "policyengine.core.trace_tro.fetch_pypi_wheel_metadata",
+                side_effect=_fake_fetch_pypi,
+            ):
+                tro = model_version.trace_tro
+
+        assert tro["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+
+
+class TestSimulationTRO:
+    """Per-simulation TROs chained from a bundle TRO."""
+
+    def _bundle_tro(self):
+        country_manifest = get_release_manifest("us")
+        return build_trace_tro_from_release_bundle(
+            country_manifest,
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+
+    def _results(self, **overrides):
+        return ResultsJson(
+            metadata=ResultsMetadata(
+                title="SALT cap repeal",
+                repo="PolicyEngine/analyses",
+                generated_at="2026-04-18T12:00:00Z",
+                **overrides,
+            ),
+            values={
+                "budget_impact": ValueEntry(
+                    value=-15200000000,
+                    display="$15.2 billion",
+                    source_line=47,
+                    source_url="https://github.com/PolicyEngine/analyses/blob/main/salt.py#L47",
+                )
+            },
+        )
+
+    def test__given_bundle_and_results__then_simulation_tro_pins_both(self):
+        bundle_tro = self._bundle_tro()
+        results = self._results()
+
+        tro = build_results_trace_tro(
+            results,
+            bundle_tro=bundle_tro,
+            reform_payload={"salt_cap": 0},
+            reform_name="SALT cap repeal",
+        )
+
+        artifact_ids = {
+            a["@id"]
+            for a in tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+        }
+        assert artifact_ids == {
+            "composition/1/artifact/bundle_tro",
+            "composition/1/artifact/reform",
+            "composition/1/artifact/results",
+        }
+        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        assert (
+            performance["pe:bundleFingerprint"]
+            == extract_bundle_tro_reference(bundle_tro)["fingerprint"]
+        )
+
+    def test__given_simulation_tro__then_validates_against_json_schema(
+        self, tro_schema
+    ):
+        tro = build_results_trace_tro(
+            self._results(),
+            bundle_tro=self._bundle_tro(),
+            reform_payload={"salt_cap": 0},
+        )
+
+        errors = list(Draft202012Validator(tro_schema).iter_errors(tro))
+        assert errors == [], [error.message for error in errors]
+
+    def test__given_no_reform__then_only_bundle_and_results_are_pinned(self):
+        tro = build_results_trace_tro(self._results(), bundle_tro=self._bundle_tro())
+
+        artifact_ids = {
+            a["@id"]
+            for a in tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+        }
+        assert artifact_ids == {
+            "composition/1/artifact/bundle_tro",
+            "composition/1/artifact/results",
+        }
+
+    def test__given_write_helper__then_results_and_tro_files_are_sidebyside(
+        self, tmp_path
+    ):
+        written = write_results_with_trace_tro(
+            self._results(),
+            tmp_path / "results.json",
+            bundle_tro=self._bundle_tro(),
+            reform_payload={"salt_cap": 0},
+        )
+
+        assert written["results"].exists()
+        assert written["tro"].exists()
+        assert written["tro"].name == "results.trace.tro.jsonld"
+        tro_payload = json.loads(written["tro"].read_text())
+        assert tro_payload["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+
+
+class TestCLI:
+    """``policyengine`` CLI entry point."""
+
+    def test__given_trace_tro_stdout__then_writes_canonical_json(self, capsysbinary):
+        data_release_manifest = _us_data_release_manifest()
+
+        with patch(
+            "policyengine.cli.get_data_release_manifest",
+            return_value=data_release_manifest,
+        ):
+            with patch(
+                "policyengine.core.trace_tro.fetch_pypi_wheel_metadata",
+                side_effect=_fake_fetch_pypi,
+            ):
+                exit_code = cli_main(["trace-tro", "us"])
+
+        assert exit_code == 0
+        stdout = capsysbinary.readouterr().out
+        payload = json.loads(stdout)
+        assert payload["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+        assert payload["@graph"][0]["trov:vocabularyVersion"] == TRACE_TROV_VERSION
+
+    def test__given_out_path__then_writes_to_file(self, tmp_path):
+        out = tmp_path / "nested" / "us.trace.tro.jsonld"
+        data_release_manifest = _us_data_release_manifest()
+
+        with patch(
+            "policyengine.cli.get_data_release_manifest",
+            return_value=data_release_manifest,
+        ):
+            with patch(
+                "policyengine.core.trace_tro.fetch_pypi_wheel_metadata",
+                side_effect=_fake_fetch_pypi,
+            ):
+                exit_code = cli_main(["trace-tro", "us", "--out", str(out)])
+
+        assert exit_code == 0
+        assert out.exists()
+        payload = json.loads(out.read_text())
+        assert payload["@graph"][0]["trov:vocabularyVersion"] == "0.1"
+
+    def test__given_release_manifest_command__then_prints_bundle(self, capsys):
+        exit_code = cli_main(["release-manifest", "us"])
+
+        assert exit_code == 0
+        stdout = capsys.readouterr().out
+        payload = json.loads(stdout)
+        assert payload["country_id"] == "us"

From bc2354e00574ea952ec94a5de268df329d4bc18b Mon Sep 17 00:00:00 2001
From: Max Ghenis <max@policyengine.org>
Date: Sat, 18 Apr 2026 07:26:25 -0400
Subject: [PATCH 4/8] Make results schema Python 3.9-compatible

Switches PEP 604 `X | None` unions in `ResultsMetadata` and
`ResultsJson.write` to `Optional[X]` / `Union[X, Y]`, matching the
project-wide pattern enforced for the 3.9 floor (ruff `UP007` is
disabled for the same reason in `pyproject.toml`).

Without this fix the `content-pipeline-results` branch fails
`ResultsMetadata` class construction on Python 3.9 with
`TypeError: unsupported operand type(s) for |: 'type' and 'NoneType'`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/policyengine/results/schema.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/policyengine/results/schema.py b/src/policyengine/results/schema.py
index 8ca94fb8..1bbb1f23 100644
--- a/src/policyengine/results/schema.py
+++ b/src/policyengine/results/schema.py
@@ -9,7 +9,7 @@
 
 import json
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional, Union
 
 from pydantic import BaseModel, model_validator
 
@@ -19,13 +19,13 @@ class ResultsMetadata(BaseModel):
 
     title: str
     repo: str
-    slug: str | None = None
-    commit: str | None = None
-    generated_at: str | None = None
-    policyengine_version: str | None = None
-    dataset: str | None = None
-    country_id: str | None = None
-    year: int | None = None
+    slug: Optional[str] = None
+    commit: Optional[str] = None
+    generated_at: Optional[str] = None
+    policyengine_version: Optional[str] = None
+    dataset: Optional[str] = None
+    country_id: Optional[str] = None
+    year: Optional[int] = None
 
 
 class ValueEntry(BaseModel):
@@ -107,7 +107,7 @@ class ResultsJson(BaseModel):
     tables: dict[str, TableEntry] = {}
     charts: dict[str, ChartEntry] = {}
 
-    def write(self, path: str | Path) -> None:
+    def write(self, path: Union[str, Path]) -> None:
         """Write validated results.json to disk."""
         path = Path(path)
         path.parent.mkdir(parents=True, exist_ok=True)

From 23318839755b298480eb86bffaa9f16993c8b706 Mon Sep 17 00:00:00 2001
From: Max Ghenis <max@policyengine.org>
Date: Sat, 18 Apr 2026 07:31:36 -0400
Subject: [PATCH 5/8] Apply ruff format to results module

Collapses string concatenations that the ruff 0.15.11 formatter in CI
wants unified onto single lines. No behaviour change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/policyengine/results/schema.py   | 3 +--
 src/policyengine/results/tracking.py | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/policyengine/results/schema.py b/src/policyengine/results/schema.py
index 1bbb1f23..0fe63a32 100644
--- a/src/policyengine/results/schema.py
+++ b/src/policyengine/results/schema.py
@@ -52,8 +52,7 @@ def check_row_widths(self) -> "TableEntry":
         for i, row in enumerate(self.rows):
             if len(row) != n_cols:
                 raise ValueError(
-                    f"Row {i} has {len(row)} columns but headers "
-                    f"has {n_cols}"
+                    f"Row {i} has {len(row)} columns but headers has {n_cols}"
                 )
         return self
 
diff --git a/src/policyengine/results/tracking.py b/src/policyengine/results/tracking.py
index 5650b820..badb4c96 100644
--- a/src/policyengine/results/tracking.py
+++ b/src/policyengine/results/tracking.py
@@ -56,9 +56,7 @@ def tracked_value(
     frame = inspect.stack()[_stack_offset]
     line = frame.lineno
 
-    source_url = (
-        f"https://github.com/{repo}/blob/{branch}/{filename}#L{line}"
-    )
+    source_url = f"https://github.com/{repo}/blob/{branch}/{filename}#L{line}"
 
     return {
         "value": value,

From 0009c370238d8a538d3b560903eaf4f921f160bb Mon Sep 17 00:00:00 2001
From: Max Ghenis <max@policyengine.org>
Date: Sat, 18 Apr 2026 08:22:04 -0400
Subject: [PATCH 6/8] Conform TRACE TRO to public TROv vocabulary; address
 reviewer findings

Round two of reviewer fixes. The published TRACE/TROv reference demos
use a different vocabulary than the draft this module was originally
written against; reviewers caught that our emission would not validate
against real TROv SHACL shapes.

TROv vocabulary conformance:
- Switch to the public namespace https://w3id.org/trace/2023/05/trov#
- Flatten the locally-invented trov:hash / trov:hashAlgorithm /
  trov:hashValue wrapper to the vocabulary-native trov:sha256 property
- Rename trov:path -> trov:hasLocation on ArtifactLocation
- Rename the inverse pointer to trov:hasArtifact (was trov:artifact)
- Correct TrustedResearchSystem -> TransparentResearchSystem
- Correct TrustedResearchPerformance -> TransparentResearchPerformance
- Drop the locally-invented ArrangementBinding chain; use the
  vocabulary-native trov:accessedArrangement on the TRP instead
- Emit @type as a single string (not a 2-element array), matching the
  published trov-demos reference shape

Hardening from reproducibility + code-simplifier reviewers:
- pe:emittedIn is always present ("local" or "github-actions") so a
  verifier can tell a CI-emitted TRO from a laptop rebuild without
  inferring from absent fields
- Per-simulation TRO records pe:bundleTroUrl on the performance node;
  a verifier can fetch that URL, re-hash it, and confirm it matches the
  bundle_tro artifact hash - so swapping the caller's bundle_tro dict
  is detectable
- Composition fingerprint joins hashes with \n to prevent hex-length
  concatenation collisions (sha256("ab" + "cdef") vs "abcd" + "ef")
- CertifiedDataArtifact.sha256 is now authoritative when present;
  us.json ships the real dataset sha256, so bundle TRO emission no
  longer requires the data release manifest to carry it
- JSON Schema rejects non-HTTPS trov:hasLocation values and requires
  canonical 64-hex sha256 strings
- Inline the real policyengine-us 1.647.0 / policyengine-uk 2.88.0
  wheel sha256 + URL on us.json/uk.json

Extracted shared helpers to collapse the ~120-line duplication between
build_trace_tro_from_release_bundle and build_simulation_trace_tro
(_assemble_composition_and_arrangement, _assemble_tro_node,
_policyengine_trs, _build_bundle_performance).

Removed dead code flagged by simplifier:
- DataReleaseArtifact.https_uri (zero callers, zero tests)
- _data_release_manifest_url (replaced by https_release_manifest_uri)
- Prose certification_description_parts (metadata is now purely in pe:*
  structured fields, as the commit message for #274 originally claimed)

CLI + release workflow:
- Dropped the broken --offline flag (never had a working code path)
- Added policyengine trace-tro-validate <path> subcommand that
  validates a TRO against the shipped JSON Schema
- Versioning CI job now runs scripts/generate_trace_tros.py and
  commits the generated bundled TROs alongside the changelog, so every
  released wheel ships with its matching TRACE TRO
- generate_trace_tros.py skips (with warning) countries whose data
  release manifest is unreachable instead of hard-failing

Tests (34 total in tests/test_trace_tro.py, replacing the prior 20):
- Real determinism: build TRO from two fresh manifest instances,
  assert bytes equal (previously tested only that json.dumps is
  deterministic)
- Forgery detection: swap bundle_tro, assert hash in sim TRO changes
- Schema rejects file:// locations
- Schema rejects missing pe:emittedIn
- Hex-length ambiguity test for the fingerprint separator
- All 4 TROv property renames have explicit assertions so a future
  regression to the wrong names fails loudly
- trace-tro-validate CLI accepts valid TROs and rejects invalid ones

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/push.yaml                   |   8 +-
 .../trace-tro-vocabulary-fix.changed.md       |  19 +
 docs/release-bundles.md                       |  77 +-
 scripts/generate_trace_tros.py                |  27 +-
 src/policyengine/cli.py                       |  75 +-
 src/policyengine/core/release_manifest.py     |  19 +-
 src/policyengine/core/trace_tro.py            | 684 +++++++++---------
 .../data/release_manifests/uk.json            |   4 +-
 .../data/release_manifests/us.json            |   7 +-
 .../data/schemas/trace_tro.schema.json        | 116 ++-
 src/policyengine/results/trace_tro.py         |  30 +-
 tests/test_trace_tro.py                       | 387 ++++++----
 12 files changed, 825 insertions(+), 628 deletions(-)
 create mode 100644 changelog.d/trace-tro-vocabulary-fix.changed.md

diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
index 7708fd9b..dc70f233 100644
--- a/.github/workflows/push.yaml
+++ b/.github/workflows/push.yaml
@@ -117,7 +117,13 @@ jobs:
         run: pip install yaml-changelog towncrier && make changelog
       - name: Preview changelog update
         run: ".github/get-changelog-diff.sh"
-      - name: Update changelog
+      - name: Install package for TRO regeneration
+        run: pip install -e .
+      - name: Regenerate bundled TRACE TROs
+        env:
+          HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+        run: python scripts/generate_trace_tros.py
+      - name: Update changelog and TROs
         uses: EndBug/add-and-commit@v9
         with:
           add: "."
diff --git a/changelog.d/trace-tro-vocabulary-fix.changed.md b/changelog.d/trace-tro-vocabulary-fix.changed.md
new file mode 100644
index 00000000..4dbd5559
--- /dev/null
+++ b/changelog.d/trace-tro-vocabulary-fix.changed.md
@@ -0,0 +1,19 @@
+TRACE TRO emission now conforms to the public TROv 2023/05 vocabulary:
+switched namespace to `https://w3id.org/trace/2023/05/trov#`, flattened
+`trov:hash` nodes to the native `trov:sha256` property, renamed
+`trov:path`→`trov:hasLocation` and the inverse pointer on ArtifactLocation
+to `trov:hasArtifact`, corrected `TrustedResearchSystem`→`TransparentResearchSystem`
+and `TrustedResearchPerformance`→`TransparentResearchPerformance`, and replaced
+the locally-invented `ArrangementBinding` chain with
+`trov:accessedArrangement` as used by the published trov-demos. Every TRO
+now carries `pe:emittedIn` (`"local"` or `"github-actions"`) so a verifier
+can distinguish a CI-emitted TRO from a laptop rebuild. Per-simulation TROs
+accept a `bundle_tro_url` that is recorded as `pe:bundleTroUrl`, letting a
+verifier independently fetch and re-hash the bundle to detect a forged
+reference. The composition fingerprint now joins hashes with `\n` to
+prevent hex-length concatenation collisions. Adds `policyengine
+trace-tro-validate` CLI, removes the broken `--offline` flag, wires
+`scripts/generate_trace_tros.py` into the `Versioning` CI job so bundled
+TROs ship with every release, inlines the real model wheel sha256 on
+`us.json`/`uk.json`, and cleans up the dead `DataReleaseArtifact.https_uri`
+/ `_data_release_manifest_url` helpers.
diff --git a/docs/release-bundles.md b/docs/release-bundles.md
index 8a9e24a6..b2a84c29 100644
--- a/docs/release-bundles.md
+++ b/docs/release-bundles.md
@@ -204,19 +204,22 @@ composition pins four artifacts by sha256:
 - the country model wheel published to PyPI (hash read from the bundled manifest
   when present, otherwise fetched from the PyPI JSON API at emit time)
 
-Every artifact location in the TRO is a dereferenceable HTTPS URI or a path
-relative to the shipped wheel. Certification metadata is carried as structured
-`pe:*` fields on the `trov:TrustedResearchPerformance` node so downstream
-tooling can read `pe:certifiedForModelVersion`, `pe:compatibilityBasis`,
+TROs use the public TROv vocabulary at
+`https://w3id.org/trace/2023/05/trov#`. Every artifact location in the TRO
+is a dereferenceable HTTPS URI or a local path relative to the shipped
+wheel. Certification metadata is carried as structured `pe:*` fields on
+the `trov:TransparentResearchPerformance` node so downstream tooling can
+read `pe:certifiedForModelVersion`, `pe:compatibilityBasis`,
 `pe:builtWithModelVersion`, `pe:dataBuildFingerprint`, and `pe:dataBuildId`
-without parsing prose. When emitted under GitHub Actions, the TRO also carries
-`pe:ciRunUrl` and `pe:ciGitSha` attestation.
+without parsing prose. Every TRO also carries `pe:emittedIn` set to
+`"github-actions"` or `"local"`; CI-emitted TROs additionally carry
+`pe:ciRunUrl` and `pe:ciGitSha`.
 
 Country `*-data` repos should also emit a matching `trace.tro.jsonld` per
 data release covering the release manifest and every staged artifact hash.
 That is a country-data concern and lives in those repos.
 
-#### Emitting a TRO
+#### Emitting a bundle TRO
 
 From Python:
 
@@ -235,27 +238,61 @@ From the CLI:
 policyengine trace-tro us --out us.trace.tro.jsonld
 ```
 
-Per-simulation TROs chain a bundle TRO to a reform plus a `results.json`
-payload. Use `policyengine.results.write_results_with_trace_tro` to emit the
-pair alongside each published result.
+At release time, `scripts/generate_trace_tros.py` regenerates the bundled
+`data/release_manifests/{country}.trace.tro.jsonld` files, and the
+`Versioning` CI job commits them alongside the changelog so every published
+wheel ships with the matching TRO.
 
-#### Schema validation
+#### Emitting a per-simulation TRO
 
-Generated TROs are validated against
-`policyengine/data/schemas/trace_tro.schema.json` in CI. Regressions to the
-shape — including mis-typed `schema:creator`, missing composition fingerprints,
-or non-HTTPS artifact locations — fail the test suite before reaching a
-release.
+```python
+from policyengine.results import write_results_with_trace_tro
+
+write_results_with_trace_tro(
+    results,                                # ResultsJson instance
+    "results.json",                         # where to write results
+    bundle_tro=bundle_tro,                  # loaded from the shipped bundle
+    reform_payload={"salt_cap": 0},
+    bundle_tro_url=(
+        "https://raw.githubusercontent.com/PolicyEngine/policyengine.py/"
+        "v3.4.5/src/policyengine/data/release_manifests/us.trace.tro.jsonld"
+    ),
+)
+```
+
+The `bundle_tro_url` is recorded on the performance node as
+`pe:bundleTroUrl`. A verifier can fetch that URL, recompute its sha256,
+and confirm it matches the `bundle_tro` artifact hash in the simulation
+TRO's composition. Without this anchor, the bundle reference is only as
+trustworthy as whoever produced the JSON.
+
+#### Validating a TRO
+
+```
+policyengine trace-tro-validate path/to/tro.jsonld
+```
+
+The shipped schema at `policyengine/data/schemas/trace_tro.schema.json`
+checks structural fields, canonical hex-encoded sha256s, the required
+`pe:emittedIn`, and that `trov:hasLocation` uses HTTPS (or the
+well-known local paths `results.json`, `reform.json`,
+`bundle.trace.tro.jsonld`). The same schema is exercised in the test
+suite against generated TROs.
 
 #### Known limitations
 
-- `schema:creator` and all `schema:*` references use schema.org vocabulary;
-  we do not (yet) validate against schema.org's own SHACL shapes.
 - TROs are emitted unsigned. A signed attestation (sigstore or in-toto)
   is a future addition that will bind TROs to a trusted-system key.
+- The bundle composition does not yet pin a transitive lockfile
+  (`uv.lock`/`poetry.lock`), a Python interpreter version, or an OS. AEA
+  reviewers may demand these; the schema is extensible.
 - The model wheel is hashed by PyPI's published sha256. If a wheel is
-  yanked and re-uploaded under the same version, the hash will change and
-  the TRO becomes invalid — which is the correct behaviour.
+  yanked and re-uploaded under the same version, the hash will change
+  and the TRO becomes invalid — which is the correct behaviour.
+- Country data packages whose data release manifest is private require
+  `HUGGING_FACE_TOKEN` at emit time. The regeneration script skips
+  countries whose data release manifest is unreachable so a partial run
+  does not block other countries.
 
 ### What TRACE does not replace
 
diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py
index 7df3dfff..02fd1049 100644
--- a/scripts/generate_trace_tros.py
+++ b/scripts/generate_trace_tros.py
@@ -3,8 +3,11 @@
 Writes ``data/release_manifests/{country}.trace.tro.jsonld`` for each
 country whose bundled manifest ships in the wheel. Run this before
 releasing a new ``policyengine.py`` version so the packaged TRO
-matches the pinned bundle. Network access is required to fetch the
-data release manifest and model wheel hash.
+matches the pinned bundle. Requires HTTPS access to the data release
+manifest (and ``HUGGING_FACE_TOKEN`` for private country data).
+Countries whose data release manifest is unreachable are skipped with
+a warning so the step can run without all credentials; those TROs can
+be regenerated in a later release.
 """
 
 from __future__ import annotations
@@ -14,6 +17,7 @@
 from pathlib import Path
 
 from policyengine.core.release_manifest import (
+    DataReleaseManifestUnavailableError,
     get_data_release_manifest,
     get_release_manifest,
 )
@@ -23,15 +27,20 @@
 )
 
 
-def regenerate_all() -> list[Path]:
+def regenerate_all() -> tuple[list[Path], list[tuple[str, str]]]:
     manifest_root = Path(
         str(files("policyengine").joinpath("data", "release_manifests"))
     )
     written: list[Path] = []
+    skipped: list[tuple[str, str]] = []
     for manifest_path in sorted(manifest_root.glob("*.json")):
         country_id = manifest_path.stem
         country_manifest = get_release_manifest(country_id)
-        data_release_manifest = get_data_release_manifest(country_id)
+        try:
+            data_release_manifest = get_data_release_manifest(country_id)
+        except DataReleaseManifestUnavailableError as exc:
+            skipped.append((country_id, str(exc)))
+            continue
         tro = build_trace_tro_from_release_bundle(
             country_manifest,
             data_release_manifest,
@@ -40,14 +49,16 @@ def regenerate_all() -> list[Path]:
         out_path = manifest_path.with_suffix(".trace.tro.jsonld")
         out_path.write_bytes(serialize_trace_tro(tro))
         written.append(out_path)
-    return written
+    return written, skipped
 
 
 def main() -> int:
-    paths = regenerate_all()
-    for path in paths:
+    written, skipped = regenerate_all()
+    for path in written:
         print(f"wrote {path}")
-    if not paths:
+    for country_id, reason in skipped:
+        print(f"skipped {country_id}: {reason}", file=sys.stderr)
+    if not written and not skipped:
         print("no release manifests found", file=sys.stderr)
         return 1
     return 0
diff --git a/src/policyengine/cli.py b/src/policyengine/cli.py
index e21b3ed4..add36388 100644
--- a/src/policyengine/cli.py
+++ b/src/policyengine/cli.py
@@ -1,9 +1,12 @@
 """Command-line entry point for policyengine.
 
-Exposes a ``trace-tro`` subcommand that emits a TRACE TRO for a
-certified country bundle. The TRO is the standards-based provenance
-surface on top of the release manifests: see
-:mod:`policyengine.core.trace_tro` and ``docs/release-bundles.md``.
+Subcommands:
+
+- ``trace-tro <country>`` emit a TRACE TRO for a certified bundle
+- ``trace-tro-validate <path>`` validate a TRO against the shipped schema
+- ``release-manifest <country>`` print the bundled country manifest
+
+See :mod:`policyengine.core.trace_tro` and ``docs/release-bundles.md``.
 """
 
 from __future__ import annotations
@@ -11,11 +14,11 @@
 import argparse
 import json
 import sys
+from importlib.resources import files
 from pathlib import Path
 from typing import Optional, Sequence
 
 from policyengine.core.release_manifest import (
-    DataReleaseManifestUnavailableError,
     get_data_release_manifest,
     get_release_manifest,
 )
@@ -44,38 +47,29 @@ def _parser() -> argparse.ArgumentParser:
         default=None,
         help="Write the TRO to this path. Defaults to stdout.",
     )
-    tro.add_argument(
-        "--offline",
-        action="store_true",
-        help=(
-            "Skip fetching the data release manifest over HTTPS. Requires "
-            "the bundled manifest to include a data release manifest for "
-            "the pinned data package version."
-        ),
+
+    validate = subparsers.add_parser(
+        "trace-tro-validate",
+        help="Validate a TRO file against the shipped JSON Schema.",
     )
+    validate.add_argument("path", type=Path, help="Path to a .trace.tro.jsonld file.")
 
     bundle = subparsers.add_parser(
         "release-manifest",
-        help="Print the bundled country release manifest as JSON.",
+        help=(
+            "Print the bundled country release manifest as JSON. Use this to "
+            "inspect the pinned model/data versions shipped with this "
+            "policyengine release."
+        ),
     )
     bundle.add_argument("country", help="Country id (e.g. us, uk).")
 
     return parser
 
 
-def _emit_bundle_tro(country_id: str, out: Optional[Path], *, offline: bool) -> int:
+def _emit_bundle_tro(country_id: str, out: Optional[Path]) -> int:
     country_manifest = get_release_manifest(country_id)
-    try:
-        data_release_manifest = get_data_release_manifest(country_id)
-    except DataReleaseManifestUnavailableError as exc:
-        if offline:
-            print(
-                f"error: data release manifest for '{country_id}' is not "
-                "available in offline mode.",
-                file=sys.stderr,
-            )
-            return 2
-        raise exc
+    data_release_manifest = get_data_release_manifest(country_id)
     tro = build_trace_tro_from_release_bundle(
         country_manifest,
         data_release_manifest,
@@ -90,6 +84,31 @@ def _emit_bundle_tro(country_id: str, out: Optional[Path], *, offline: bool) ->
     return 0
 
 
+def _validate_tro(path: Path) -> int:
+    try:
+        from jsonschema import Draft202012Validator
+    except ImportError:
+        print(
+            "error: jsonschema is required for trace-tro-validate. "
+            "Install with: pip install jsonschema",
+            file=sys.stderr,
+        )
+        return 1
+    schema_path = Path(
+        str(files("policyengine").joinpath("data", "schemas", "trace_tro.schema.json"))
+    )
+    schema = json.loads(schema_path.read_text())
+    payload = json.loads(path.read_text())
+    errors = list(Draft202012Validator(schema).iter_errors(payload))
+    if errors:
+        print(f"error: {path} is invalid against the TRO schema:", file=sys.stderr)
+        for error in errors:
+            print(f"  - {error.message}", file=sys.stderr)
+        return 1
+    print(f"ok: {path}")
+    return 0
+
+
 def _emit_release_manifest(country_id: str) -> int:
     manifest = get_release_manifest(country_id)
     print(json.dumps(manifest.model_dump(mode="json"), indent=2, sort_keys=True))
@@ -99,7 +118,9 @@ def _emit_release_manifest(country_id: str) -> int:
 def main(argv: Optional[Sequence[str]] = None) -> int:
     args = _parser().parse_args(argv)
     if args.command == "trace-tro":
-        return _emit_bundle_tro(args.country, args.out, offline=args.offline)
+        return _emit_bundle_tro(args.country, args.out)
+    if args.command == "trace-tro-validate":
+        return _validate_tro(args.path)
     if args.command == "release-manifest":
         return _emit_release_manifest(args.country)
     return 1
diff --git a/src/policyengine/core/release_manifest.py b/src/policyengine/core/release_manifest.py
index 881597f5..a1ab2fd0 100644
--- a/src/policyengine/core/release_manifest.py
+++ b/src/policyengine/core/release_manifest.py
@@ -76,14 +76,6 @@ def uri(self) -> str:
             revision=self.revision,
         )
 
-    @property
-    def https_uri(self) -> str:
-        return https_dataset_uri(
-            repo_id=self.repo_id,
-            path_in_repo=self.path,
-            revision=self.revision,
-        )
-
 
 class DataReleaseManifest(BaseModel):
     schema_version: int
@@ -198,18 +190,9 @@ def get_release_manifest(country_id: str) -> CountryReleaseManifest:
     return CountryReleaseManifest.model_validate_json(manifest_path.read_text())
 
 
-def _data_release_manifest_url(data_package: DataPackageVersion) -> str:
-    return (
-        "https://huggingface.co/"
-        f"{data_package.repo_id}/resolve/{data_package.version}/"
-        f"{data_package.release_manifest_path}"
-    )
-
-
 @lru_cache
 def get_data_release_manifest(country_id: str) -> DataReleaseManifest:
     country_manifest = get_release_manifest(country_id)
-    data_package = country_manifest.data_package
 
     headers = {}
     token = os.environ.get("HUGGING_FACE_TOKEN")
@@ -217,7 +200,7 @@ def get_data_release_manifest(country_id: str) -> DataReleaseManifest:
         headers["Authorization"] = f"Bearer {token}"
 
     response = requests.get(
-        _data_release_manifest_url(data_package),
+        https_release_manifest_uri(country_manifest.data_package),
         headers=headers,
         timeout=HF_REQUEST_TIMEOUT_SECONDS,
     )
diff --git a/src/policyengine/core/trace_tro.py b/src/policyengine/core/trace_tro.py
index ca11ca5d..c9c431a3 100644
--- a/src/policyengine/core/trace_tro.py
+++ b/src/policyengine/core/trace_tro.py
@@ -1,14 +1,19 @@
 """TRACE Transparent Research Object (TRO) export.
 
-Emits TROv v0.1 JSON-LD for a PolicyEngine certified runtime bundle. The
-TRO is the standards-based provenance surface on top of the internal
-release manifests; it pins the model wheel, bundle manifest, data release
-manifest, and certified dataset artifact together by sha256 and exposes
-certification metadata in machine-readable fields so downstream tooling
-does not have to parse prose.
-
-See https://w3id.org/trace/trov/0.1 for the vocabulary and
-docs/release-bundles.md for how the bundle layer is composed.
+Emits JSON-LD that conforms to the TRACE TROv vocabulary
+(https://w3id.org/trace/2023/05/trov#) for a PolicyEngine certified
+runtime bundle or a PolicyEngine simulation result. The bundle TRO pins
+the country model wheel, the country data release manifest, the
+certified dataset, and the bundle manifest itself by sha256. The
+per-simulation TRO chains a bundle TRO to a reform and a results.json
+payload so a published result has an immutable composition fingerprint.
+
+PolicyEngine-specific certification metadata lives under the ``pe:``
+namespace and does not pollute the TROv vocabulary, so generated TROs
+can still be validated against TROv SHACL shapes when tooling is
+available.
+
+See docs/release-bundles.md for how the bundle layer is composed.
 """
 
 from __future__ import annotations
@@ -28,14 +33,14 @@
     https_release_manifest_uri,
 )
 
-TRACE_TROV_VERSION = "0.1"
+TRACE_TROV_NAMESPACE = "https://w3id.org/trace/2023/05/trov#"
 POLICYENGINE_TRACE_NAMESPACE = "https://policyengine.org/trace/0.1#"
 
 TRACE_CONTEXT: list[dict[str, str]] = [
     {
         "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
         "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-        "trov": "https://w3id.org/trace/trov/0.1#",
+        "trov": TRACE_TROV_NAMESPACE,
         "schema": "https://schema.org/",
         "pe": POLICYENGINE_TRACE_NAMESPACE,
     }
@@ -56,13 +61,6 @@
 }
 
 
-def _hash_object(value: str) -> dict[str, str]:
-    return {
-        "trov:hashAlgorithm": "sha256",
-        "trov:hashValue": value,
-    }
-
-
 def _artifact_mime_type(path_or_uri: str) -> Optional[str]:
     lowered = path_or_uri.lower()
     if lowered.endswith(".tar.gz"):
@@ -71,46 +69,69 @@ def _artifact_mime_type(path_or_uri: str) -> Optional[str]:
     return _MIME_TYPES.get(suffix)
 
 
-def _canonical_json_bytes(value: Mapping) -> bytes:
+def canonical_json_bytes(value: Mapping) -> bytes:
+    """Canonical JSON serialization used for every content hash in the TRO.
+
+    Documented publicly because any third-party verifier needs to
+    reproduce these bytes exactly to recompute the artifact hashes that
+    the composition fingerprint binds together.
+    """
     return (json.dumps(value, indent=2, sort_keys=True) + "\n").encode("utf-8")
 
 
 def compute_trace_composition_fingerprint(
     artifact_hashes: Iterable[str],
 ) -> str:
-    """Fingerprint a composition by the sorted set of its artifact hashes."""
+    """Fingerprint a composition by the sorted set of its artifact hashes.
+
+    Joins hashes with ``\\n`` so concatenation is unambiguous regardless
+    of hash length.
+    """
+    sorted_hashes = sorted(artifact_hashes)
     digest = hashlib.sha256()
-    digest.update("".join(sorted(artifact_hashes)).encode("utf-8"))
+    digest.update("\n".join(sorted_hashes).encode("utf-8"))
     return digest.hexdigest()
 
 
-def _ci_attestation() -> dict[str, str]:
-    """Return GitHub Actions attestation metadata if available."""
-    attestation: dict[str, str] = {}
-    if os.environ.get("GITHUB_ACTIONS") != "true":
-        return attestation
-    server = os.environ.get("GITHUB_SERVER_URL")
-    repo = os.environ.get("GITHUB_REPOSITORY")
-    run_id = os.environ.get("GITHUB_RUN_ID")
-    if server and repo and run_id:
-        attestation["pe:ciRunUrl"] = f"{server}/{repo}/actions/runs/{run_id}"
-    sha = os.environ.get("GITHUB_SHA")
-    if sha:
-        attestation["pe:ciGitSha"] = sha
-    ref = os.environ.get("GITHUB_REF")
-    if ref:
-        attestation["pe:ciGitRef"] = ref
-    return attestation
-
-
-def _resolve_model_wheel_hash(
+def _emission_context() -> dict[str, str]:
+    """Attestation metadata about where and how the TRO was emitted.
+
+    Always includes ``pe:emittedIn`` so a verifier can distinguish a CI
+    build from a laptop build without inferring from the absence of
+    optional fields.
+    """
+    context: dict[str, str] = {}
+    if os.environ.get("GITHUB_ACTIONS") == "true":
+        context["pe:emittedIn"] = "github-actions"
+        server = os.environ.get("GITHUB_SERVER_URL")
+        repo = os.environ.get("GITHUB_REPOSITORY")
+        run_id = os.environ.get("GITHUB_RUN_ID")
+        if server and repo and run_id:
+            context["pe:ciRunUrl"] = f"{server}/{repo}/actions/runs/{run_id}"
+        sha = os.environ.get("GITHUB_SHA")
+        if sha:
+            context["pe:ciGitSha"] = sha
+        ref = os.environ.get("GITHUB_REF")
+        if ref:
+            context["pe:ciGitRef"] = ref
+    else:
+        context["pe:emittedIn"] = "local"
+    return context
+
+
+def _resolve_model_wheel(
     country_manifest: CountryReleaseManifest,
     *,
     model_wheel_sha256: Optional[str],
     model_wheel_url: Optional[str],
     fetch_pypi: Any,
 ) -> tuple[Optional[str], Optional[str]]:
-    """Return (sha256, https_url) for the model wheel, fetching from PyPI if missing."""
+    """Return ``(sha256, https_url)`` for the model wheel.
+
+    Uses the bundled manifest when both are present; otherwise queries
+    the PyPI JSON API. Network failures degrade to ``(None, None)`` so
+    the wheel artifact is omitted rather than breaking emission.
+    """
     sha = model_wheel_sha256 or country_manifest.model_package.sha256
     url = model_wheel_url or country_manifest.model_package.wheel_url
     if sha is not None and url is not None:
@@ -125,6 +146,122 @@ def _resolve_model_wheel_hash(
     return sha or metadata.get("sha256"), url or metadata.get("url")
 
 
+def _make_artifact(
+    artifact_id: str, sha256: str, mime_type: Optional[str], name: Optional[str]
+) -> dict[str, Any]:
+    artifact: dict[str, Any] = {
+        "@id": artifact_id,
+        "@type": "trov:ResearchArtifact",
+        "trov:sha256": sha256,
+    }
+    if mime_type is not None:
+        artifact["trov:mimeType"] = mime_type
+    if name is not None:
+        artifact["schema:name"] = name
+    return artifact
+
+
+def _make_location(location_id: str, artifact_id: str, location: str) -> dict[str, Any]:
+    return {
+        "@id": location_id,
+        "@type": "trov:ArtifactLocation",
+        "trov:hasArtifact": {"@id": artifact_id},
+        "trov:hasLocation": location,
+    }
+
+
+def _assemble_composition_and_arrangement(
+    artifact_specs: list[dict[str, Any]],
+    *,
+    composition_id: str = "composition/1",
+    arrangement_id: str = "arrangement/1",
+    arrangement_comment: Optional[str] = None,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    artifacts: list[dict[str, Any]] = []
+    locations: list[dict[str, Any]] = []
+    hashes: list[str] = []
+    for spec in artifact_specs:
+        artifact_id = f"{composition_id}/artifact/{spec['id']}"
+        hashes.append(spec["hash"])
+        artifacts.append(
+            _make_artifact(
+                artifact_id,
+                spec["hash"],
+                spec.get("mime_type"),
+                spec.get("name"),
+            )
+        )
+        locations.append(
+            _make_location(
+                f"{arrangement_id}/location/{spec['id']}",
+                artifact_id,
+                spec["location"],
+            )
+        )
+
+    composition = {
+        "@id": composition_id,
+        "@type": "trov:ArtifactComposition",
+        "trov:hasFingerprint": {
+            "@id": f"{composition_id}/fingerprint",
+            "@type": "trov:CompositionFingerprint",
+            "trov:sha256": compute_trace_composition_fingerprint(hashes),
+        },
+        "trov:hasArtifact": artifacts,
+    }
+    arrangement: dict[str, Any] = {
+        "@id": arrangement_id,
+        "@type": "trov:ArtifactArrangement",
+        "trov:hasArtifactLocation": locations,
+    }
+    if arrangement_comment is not None:
+        arrangement["rdfs:comment"] = arrangement_comment
+    return composition, arrangement
+
+
+def _policyengine_trs(comment: str) -> dict[str, Any]:
+    return {
+        "@id": "trs",
+        "@type": "trov:TransparentResearchSystem",
+        "schema:name": "PolicyEngine release pipeline",
+        "rdfs:comment": comment,
+    }
+
+
+def _assemble_tro_node(
+    *,
+    tro_id: str = "tro",
+    tro_name: str,
+    tro_description: str,
+    created_at: Optional[str],
+    creator: Mapping[str, str],
+    software_version: Optional[str],
+    trs_comment: str,
+    composition: Mapping[str, Any],
+    arrangement: Mapping[str, Any],
+    performance: Mapping[str, Any],
+) -> dict[str, Any]:
+    node: dict[str, Any] = {
+        "@id": tro_id,
+        "@type": "trov:TransparentResearchObject",
+        "schema:name": tro_name,
+        "schema:description": tro_description,
+        "schema:creator": dict(creator),
+        "trov:wasAssembledBy": _policyengine_trs(trs_comment),
+        "trov:createdWith": {
+            "@type": "schema:SoftwareApplication",
+            "schema:name": "policyengine",
+            "schema:softwareVersion": software_version,
+        },
+        "trov:hasComposition": dict(composition),
+        "trov:hasArrangement": [dict(arrangement)],
+        "trov:hasPerformance": dict(performance),
+    }
+    if created_at is not None:
+        node["schema:dateCreated"] = created_at
+    return node
+
+
 def build_trace_tro_from_release_bundle(
     country_manifest: CountryReleaseManifest,
     data_release_manifest: DataReleaseManifest,
@@ -135,17 +272,14 @@ def build_trace_tro_from_release_bundle(
     model_wheel_sha256: Optional[str] = None,
     model_wheel_url: Optional[str] = None,
     fetch_pypi: Any = fetch_pypi_wheel_metadata,
-    ci_attestation: Optional[Mapping[str, str]] = None,
+    emission_context: Optional[Mapping[str, str]] = None,
 ) -> dict:
     """Build a TRACE TRO for a certified runtime bundle.
 
     Artifacts in the composition: bundle manifest, data release manifest,
-    certified dataset, and the country model wheel. The wheel hash is read
-    from the bundled manifest when available and fetched from PyPI otherwise.
-
-    Certification metadata is encoded as structured ``pe:*`` fields on the
-    :class:`trov:TrustedResearchPerformance` node so downstream tools can
-    read it without parsing the description.
+    certified dataset, and (when resolvable) the country model wheel.
+    Certification metadata is encoded as structured ``pe:*`` fields on
+    the :class:`trov:TransparentResearchPerformance` node.
     """
     certified_artifact = country_manifest.certified_data_artifact
     if certified_artifact is None:
@@ -159,13 +293,13 @@ def build_trace_tro_from_release_bundle(
             "Data release manifest does not include the certified dataset "
             f"'{certified_artifact.dataset}'."
         )
-    if dataset_artifact.sha256 is None:
+    dataset_sha256 = certified_artifact.sha256 or dataset_artifact.sha256
+    if dataset_sha256 is None:
         raise ValueError(
-            "Data release manifest does not include a SHA256 for the certified dataset "
-            f"'{certified_artifact.dataset}'."
+            "Neither the country release manifest nor the data release manifest "
+            f"provides a SHA256 for dataset '{certified_artifact.dataset}'."
         )
 
-    effective_certification = certification or country_manifest.certification
     bundle_manifest_location = (
         bundle_manifest_path
         or f"data/release_manifests/{country_manifest.country_id}.json"
@@ -179,16 +313,14 @@ def build_trace_tro_from_release_bundle(
         revision=dataset_artifact.revision,
     )
 
-    bundle_manifest_payload = country_manifest.model_dump(mode="json")
-    data_release_payload = data_release_manifest.model_dump(mode="json")
     bundle_manifest_hash = hashlib.sha256(
-        _canonical_json_bytes(bundle_manifest_payload)
+        canonical_json_bytes(country_manifest.model_dump(mode="json"))
     ).hexdigest()
     data_release_manifest_hash = hashlib.sha256(
-        _canonical_json_bytes(data_release_payload)
+        canonical_json_bytes(data_release_manifest.model_dump(mode="json"))
     ).hexdigest()
 
-    model_wheel_sha, model_wheel_https = _resolve_model_wheel_hash(
+    model_wheel_sha, model_wheel_https = _resolve_model_wheel(
         country_manifest,
         model_wheel_sha256=model_wheel_sha256,
         model_wheel_url=model_wheel_url,
@@ -213,13 +345,12 @@ def build_trace_tro_from_release_bundle(
         },
         {
             "id": "dataset",
-            "hash": dataset_artifact.sha256,
+            "hash": dataset_sha256,
             "location": dataset_location,
             "mime_type": _artifact_mime_type(dataset_artifact.path),
             "name": certified_artifact.dataset,
         },
     ]
-
     if model_wheel_sha is not None:
         artifact_specs.append(
             {
@@ -235,207 +366,135 @@ def build_trace_tro_from_release_bundle(
             }
         )
 
-    composition_artifacts: list[dict[str, Any]] = []
-    arrangement_locations: list[dict[str, Any]] = []
-    artifact_hashes: list[str] = []
-
-    for index, artifact in enumerate(artifact_specs):
-        artifact_id = f"composition/1/artifact/{artifact['id']}"
-        artifact_hashes.append(artifact["hash"])
-        artifact_entry: dict[str, Any] = {
-            "@id": artifact_id,
-            "@type": "trov:ResearchArtifact",
-            "schema:name": artifact["name"],
-            "trov:hash": _hash_object(artifact["hash"]),
-        }
-        if artifact["mime_type"] is not None:
-            artifact_entry["trov:mimeType"] = artifact["mime_type"]
-        composition_artifacts.append(artifact_entry)
-        arrangement_locations.append(
-            {
-                "@id": f"arrangement/0/location/{artifact['id']}",
-                "@type": "trov:ArtifactLocation",
-                "trov:artifact": {"@id": artifact_id},
-                "trov:path": artifact["location"],
-            }
-        )
-
-    certification_fields: dict[str, Any] = {}
-    certification_description_parts: list[str] = []
-    if effective_certification is not None:
-        certification_fields["pe:certifiedForModelVersion"] = (
-            effective_certification.certified_for_model_version
-        )
-        certification_fields["pe:compatibilityBasis"] = (
-            effective_certification.compatibility_basis
-        )
-        certification_description_parts.append(
-            f"Certified for runtime model version "
-            f"{effective_certification.certified_for_model_version} via "
-            f"{effective_certification.compatibility_basis}."
-        )
-        if effective_certification.built_with_model_version is not None:
-            certification_fields["pe:builtWithModelVersion"] = (
-                effective_certification.built_with_model_version
-            )
-            certification_description_parts.append(
-                f"Built with {country_manifest.model_package.name} "
-                f"{effective_certification.built_with_model_version}."
-            )
-        if effective_certification.built_with_model_git_sha is not None:
-            certification_fields["pe:builtWithModelGitSha"] = (
-                effective_certification.built_with_model_git_sha
-            )
-        if effective_certification.data_build_fingerprint is not None:
-            certification_fields["pe:dataBuildFingerprint"] = (
-                effective_certification.data_build_fingerprint
-            )
-            certification_description_parts.append(
-                f"Data-build fingerprint: "
-                f"{effective_certification.data_build_fingerprint}."
-            )
-        if effective_certification.data_build_id is not None:
-            certification_fields["pe:dataBuildId"] = (
-                effective_certification.data_build_id
-            )
-        if effective_certification.certified_by is not None:
-            certification_fields["pe:certifiedBy"] = (
-                effective_certification.certified_by
-            )
-
-    attestation_fields = (
-        dict(ci_attestation) if ci_attestation is not None else _ci_attestation()
+    composition, arrangement = _assemble_composition_and_arrangement(
+        artifact_specs,
+        arrangement_comment=(
+            f"Certified arrangement for bundle "
+            f"{country_manifest.bundle_id or country_manifest.country_id}."
+        ),
     )
 
-    created_at = country_manifest.published_at or (
-        data_release_manifest.build.built_at
-        if data_release_manifest.build is not None
-        else None
-    )
-    started_at = (
-        data_release_manifest.build.built_at
-        if data_release_manifest.build is not None
-        else created_at
-    )
-    build_id = (
-        (
+    effective_certification = certification or country_manifest.certification
+    performance = _build_bundle_performance(
+        country_manifest,
+        certified_data_build_id=(
             effective_certification.data_build_id
             if effective_certification is not None
             else None
         )
         or certified_artifact.build_id
         or (
-            f"{country_manifest.data_package.name}-{country_manifest.data_package.version}"
-        )
-    )
-
-    certification_description = (
-        " " + " ".join(certification_description_parts)
-        if certification_description_parts
-        else ""
+            f"{country_manifest.data_package.name}-"
+            f"{country_manifest.data_package.version}"
+        ),
+        certification=effective_certification,
+        started_at=(
+            data_release_manifest.build.built_at
+            if data_release_manifest.build is not None
+            else country_manifest.published_at
+        ),
+        ended_at=country_manifest.published_at,
+        emission_context=(
+            dict(emission_context)
+            if emission_context is not None
+            else _emission_context()
+        ),
     )
 
-    tro_node: dict[str, Any] = {
-        "@id": "tro",
-        "@type": ["trov:TransparentResearchObject", "schema:CreativeWork"],
-        "trov:vocabularyVersion": TRACE_TROV_VERSION,
-        "schema:creator": POLICYENGINE_ORGANIZATION,
-        "schema:name": (
-            f"policyengine {country_manifest.country_id} certified bundle TRO"
-        ),
-        "schema:description": (
+    tro_node = _assemble_tro_node(
+        tro_name=f"policyengine {country_manifest.country_id} certified bundle TRO",
+        tro_description=(
             f"TRACE TRO for certified runtime bundle "
             f"{country_manifest.bundle_id or country_manifest.country_id} "
-            f"covering the bundled country release manifest, the country data "
-            f"release manifest, the certified dataset artifact, and the model "
-            f"wheel." + certification_description
+            f"covering the bundle manifest, the country data release "
+            f"manifest, the certified dataset artifact, and the country "
+            f"model wheel."
         ),
-        "trov:wasAssembledBy": {
-            "@id": "trs",
-            "@type": ["trov:TrustedResearchSystem", "schema:Organization"],
-            "schema:name": "PolicyEngine certified release bundle pipeline",
-            "schema:description": (
-                "PolicyEngine certification workflow for runtime bundles that "
-                "pin a country model version, a country data release, and a "
-                "specific dataset artifact."
-            ),
-        },
-        "trov:createdWith": {
-            "@type": "schema:SoftwareApplication",
-            "schema:name": "policyengine",
-            "schema:softwareVersion": country_manifest.policyengine_version,
-        },
-        "trov:hasComposition": {
-            "@id": "composition/1",
-            "@type": "trov:ArtifactComposition",
-            "trov:hasFingerprint": {
-                "@id": "fingerprint",
-                "@type": "trov:CompositionFingerprint",
-                "trov:hash": _hash_object(
-                    compute_trace_composition_fingerprint(artifact_hashes)
-                ),
-            },
-            "trov:hasArtifact": composition_artifacts,
-        },
-        "trov:hasArrangement": [
-            {
-                "@id": "arrangement/0",
-                "@type": "trov:ArtifactArrangement",
-                "rdfs:comment": (
-                    f"Certified arrangement for bundle "
-                    f"{country_manifest.bundle_id or country_manifest.country_id}."
-                ),
-                "trov:hasArtifactLocation": arrangement_locations,
-            }
-        ],
-        "trov:hasPerformance": [
-            {
-                "@id": "trp/0",
-                "@type": "trov:TrustedResearchPerformance",
-                "rdfs:comment": (
-                    f"Certification of build {build_id} for "
-                    f"{country_manifest.model_package.name} "
-                    f"{country_manifest.model_package.version}."
-                ),
-                "trov:wasConductedBy": {"@id": "trs"},
-                "trov:startedAtTime": started_at,
-                "trov:endedAtTime": created_at,
-                "trov:contributedToArrangement": {
-                    "@id": "trp/0/binding/0",
-                    "@type": "trov:ArrangementBinding",
-                    "trov:arrangement": {"@id": "arrangement/0"},
-                },
-                **certification_fields,
-                **attestation_fields,
-            }
-        ],
-    }
-    if created_at is not None:
-        tro_node["schema:dateCreated"] = created_at
+        created_at=country_manifest.published_at
+        or (
+            data_release_manifest.build.built_at
+            if data_release_manifest.build is not None
+            else None
+        ),
+        creator=POLICYENGINE_ORGANIZATION,
+        software_version=country_manifest.policyengine_version,
+        trs_comment=(
+            "PolicyEngine certification workflow that pins a country model "
+            "version, a country data release, and a specific dataset artifact."
+        ),
+        composition=composition,
+        arrangement=arrangement,
+        performance=performance,
+    )
 
     return {"@context": TRACE_CONTEXT, "@graph": [tro_node]}
 
 
+def _build_bundle_performance(
+    country_manifest: CountryReleaseManifest,
+    *,
+    certified_data_build_id: str,
+    certification: Optional[DataCertification],
+    started_at: Optional[str],
+    ended_at: Optional[str],
+    emission_context: Mapping[str, str],
+) -> dict[str, Any]:
+    performance: dict[str, Any] = {
+        "@id": "trp/1",
+        "@type": "trov:TransparentResearchPerformance",
+        "rdfs:comment": (
+            f"Certification of build {certified_data_build_id} for "
+            f"{country_manifest.model_package.name} "
+            f"{country_manifest.model_package.version}."
+        ),
+        "trov:wasConductedBy": {"@id": "trs"},
+        "trov:accessedArrangement": {"@id": "arrangement/1"},
+    }
+    if started_at is not None:
+        performance["trov:startedAtTime"] = started_at
+    if ended_at is not None:
+        performance["trov:endedAtTime"] = ended_at
+    if certification is not None:
+        performance["pe:certifiedForModelVersion"] = (
+            certification.certified_for_model_version
+        )
+        performance["pe:compatibilityBasis"] = certification.compatibility_basis
+        if certification.built_with_model_version is not None:
+            performance["pe:builtWithModelVersion"] = (
+                certification.built_with_model_version
+            )
+        if certification.built_with_model_git_sha is not None:
+            performance["pe:builtWithModelGitSha"] = (
+                certification.built_with_model_git_sha
+            )
+        if certification.data_build_fingerprint is not None:
+            performance["pe:dataBuildFingerprint"] = (
+                certification.data_build_fingerprint
+            )
+        if certification.data_build_id is not None:
+            performance["pe:dataBuildId"] = certification.data_build_id
+        if certification.certified_by is not None:
+            performance["pe:certifiedBy"] = certification.certified_by
+    performance.update(emission_context)
+    return performance
+
+
 def serialize_trace_tro(tro: Mapping) -> bytes:
-    """Serialize a TRO to canonical JSON bytes (sorted keys, trailing newline)."""
-    return (json.dumps(tro, indent=2, sort_keys=True) + "\n").encode("utf-8")
+    """Serialize a TRO with the same canonical JSON used for hashing."""
+    return canonical_json_bytes(tro)
 
 
 def extract_bundle_tro_reference(tro: Mapping) -> dict[str, Any]:
-    """Extract a compact reference to a bundle TRO for inclusion in other TROs.
-
-    Returns a dict with the composition fingerprint and the bundle TRO's
-    name, suitable for use as an input reference in a per-simulation TRO.
-    """
+    """Extract a compact reference to a bundle TRO for use as a simulation input."""
     graph = tro.get("@graph") or []
     if not graph:
         raise ValueError("TRO has an empty graph.")
     node = graph[0]
+    composition = node.get("trov:hasComposition") or {}
     fingerprint = (
-        node.get("trov:hasComposition", {})
-        .get("trov:hasFingerprint", {})
-        .get("trov:hash", {})
-        .get("trov:hashValue")
+        composition.get("trov:hasFingerprint", {}).get("trov:sha256")
+        if isinstance(composition, Mapping)
+        else None
     )
     if fingerprint is None:
         raise ValueError("TRO is missing a composition fingerprint.")
@@ -460,34 +519,35 @@ def build_simulation_trace_tro(
     results_location: Optional[str] = None,
     reform_location: Optional[str] = None,
     bundle_tro_location: Optional[str] = None,
-    ci_attestation: Optional[Mapping[str, str]] = None,
+    bundle_tro_url: Optional[str] = None,
+    emission_context: Optional[Mapping[str, str]] = None,
 ) -> dict:
     """Build a per-simulation TRO chaining a bundle TRO to a results payload.
 
-    The simulation TRO's composition includes: the bundle TRO itself (as a
-    single hashed artifact), the reform JSON (if provided), and the
-    results.json payload. This is the TRO academics cite alongside a
-    published result.
+    The simulation TRO composition pins: the bundle TRO itself, the
+    reform JSON (if provided), and the ``results.json`` payload. The
+    ``bundle_tro_url`` field is recorded on the performance node under
+    ``pe:bundleTroUrl`` so a verifier can cross-check the bundle TRO
+    hash against bytes fetched from a canonical location rather than
+    trusting the caller's dict.
     """
     bundle_reference = extract_bundle_tro_reference(bundle_tro)
-    bundle_bytes = _canonical_json_bytes(bundle_tro)
-    bundle_hash = hashlib.sha256(bundle_bytes).hexdigest()
-    results_bytes = _canonical_json_bytes(results_payload)
-    results_hash = hashlib.sha256(results_bytes).hexdigest()
+    bundle_hash = hashlib.sha256(canonical_json_bytes(bundle_tro)).hexdigest()
+    results_hash = hashlib.sha256(canonical_json_bytes(results_payload)).hexdigest()
 
     artifact_specs: list[dict[str, Any]] = [
         {
             "id": "bundle_tro",
             "hash": bundle_hash,
             "location": bundle_tro_location
+            or bundle_tro_url
             or f"bundle.trace.tro.jsonld#{bundle_reference['fingerprint']}",
             "mime_type": "application/ld+json",
             "name": bundle_reference.get("name") or "policyengine bundle TRO",
         }
     ]
     if reform_payload is not None:
-        reform_bytes = _canonical_json_bytes(reform_payload)
-        reform_hash = hashlib.sha256(reform_bytes).hexdigest()
+        reform_hash = hashlib.sha256(canonical_json_bytes(reform_payload)).hexdigest()
         artifact_specs.append(
             {
                 "id": "reform",
@@ -507,102 +567,50 @@ def build_simulation_trace_tro(
         }
     )
 
-    composition_artifacts: list[dict[str, Any]] = []
-    arrangement_locations: list[dict[str, Any]] = []
-    artifact_hashes: list[str] = []
-    for artifact in artifact_specs:
-        artifact_id = f"composition/1/artifact/{artifact['id']}"
-        artifact_hashes.append(artifact["hash"])
-        composition_artifacts.append(
-            {
-                "@id": artifact_id,
-                "@type": "trov:ResearchArtifact",
-                "schema:name": artifact["name"],
-                "trov:hash": _hash_object(artifact["hash"]),
-                "trov:mimeType": artifact["mime_type"],
-            }
-        )
-        arrangement_locations.append(
-            {
-                "@id": f"arrangement/0/location/{artifact['id']}",
-                "@type": "trov:ArtifactLocation",
-                "trov:artifact": {"@id": artifact_id},
-                "trov:path": artifact["location"],
-            }
-        )
-
-    attestation_fields = (
-        dict(ci_attestation) if ci_attestation is not None else _ci_attestation()
-    )
     simulation_slug = simulation_id or "simulation"
+    composition, arrangement = _assemble_composition_and_arrangement(
+        artifact_specs,
+        arrangement_comment=f"Simulation arrangement for {simulation_slug}.",
+    )
 
-    tro_node: dict[str, Any] = {
-        "@id": "tro",
-        "@type": ["trov:TransparentResearchObject", "schema:CreativeWork"],
-        "trov:vocabularyVersion": TRACE_TROV_VERSION,
-        "schema:creator": POLICYENGINE_ORGANIZATION,
-        "schema:name": f"policyengine simulation TRO ({simulation_slug})",
-        "schema:description": (
-            "TRACE TRO for a PolicyEngine simulation result. Composition pins "
-            "the certified runtime bundle TRO, the reform specification "
-            "(where applicable), and the results.json payload."
+    performance: dict[str, Any] = {
+        "@id": "trp/1",
+        "@type": "trov:TransparentResearchPerformance",
+        "rdfs:comment": (
+            f"PolicyEngine simulation bound to bundle fingerprint "
+            f"{bundle_reference['fingerprint']}."
         ),
-        "trov:createdWith": {
-            "@type": "schema:SoftwareApplication",
-            "schema:name": "policyengine",
-            "schema:softwareVersion": bundle_reference.get("policyengine_version"),
-        },
-        "trov:wasAssembledBy": {
-            "@id": "trs",
-            "@type": ["trov:TrustedResearchSystem", "schema:Organization"],
-            "schema:name": "PolicyEngine simulation pipeline",
-            "schema:description": (
-                "PolicyEngine simulation that consumes a certified runtime "
-                "bundle and produces a results.json payload."
-            ),
-        },
-        "trov:hasComposition": {
-            "@id": "composition/1",
-            "@type": "trov:ArtifactComposition",
-            "trov:hasFingerprint": {
-                "@id": "fingerprint",
-                "@type": "trov:CompositionFingerprint",
-                "trov:hash": _hash_object(
-                    compute_trace_composition_fingerprint(artifact_hashes)
-                ),
-            },
-            "trov:hasArtifact": composition_artifacts,
-        },
-        "trov:hasArrangement": [
-            {
-                "@id": "arrangement/0",
-                "@type": "trov:ArtifactArrangement",
-                "rdfs:comment": f"Simulation arrangement for {simulation_slug}.",
-                "trov:hasArtifactLocation": arrangement_locations,
-            }
-        ],
-        "trov:hasPerformance": [
-            {
-                "@id": "trp/0",
-                "@type": "trov:TrustedResearchPerformance",
-                "rdfs:comment": (
-                    f"PolicyEngine simulation bound to bundle fingerprint "
-                    f"{bundle_reference['fingerprint']}."
-                ),
-                "trov:wasConductedBy": {"@id": "trs"},
-                "trov:startedAtTime": started_at or created_at,
-                "trov:endedAtTime": created_at,
-                "trov:contributedToArrangement": {
-                    "@id": "trp/0/binding/0",
-                    "@type": "trov:ArrangementBinding",
-                    "trov:arrangement": {"@id": "arrangement/0"},
-                },
-                "pe:bundleFingerprint": bundle_reference["fingerprint"],
-                **attestation_fields,
-            }
-        ],
+        "trov:wasConductedBy": {"@id": "trs"},
+        "trov:accessedArrangement": {"@id": "arrangement/1"},
+        "pe:bundleFingerprint": bundle_reference["fingerprint"],
     }
+    if bundle_tro_url is not None:
+        performance["pe:bundleTroUrl"] = bundle_tro_url
+    if started_at is not None or created_at is not None:
+        performance["trov:startedAtTime"] = started_at or created_at
     if created_at is not None:
-        tro_node["schema:dateCreated"] = created_at
+        performance["trov:endedAtTime"] = created_at
+    performance.update(
+        dict(emission_context) if emission_context is not None else _emission_context()
+    )
+
+    tro_node = _assemble_tro_node(
+        tro_name=f"policyengine simulation TRO ({simulation_slug})",
+        tro_description=(
+            "TRACE TRO for a PolicyEngine simulation result. Composition "
+            "pins the certified runtime bundle TRO, the reform "
+            "specification (where applicable), and the results.json payload."
+        ),
+        created_at=created_at,
+        creator=POLICYENGINE_ORGANIZATION,
+        software_version=bundle_reference.get("policyengine_version"),
+        trs_comment=(
+            "PolicyEngine simulation that consumes a certified runtime "
+            "bundle and produces a results.json payload."
+        ),
+        composition=composition,
+        arrangement=arrangement,
+        performance=performance,
+    )
 
     return {"@context": TRACE_CONTEXT, "@graph": [tro_node]}
diff --git a/src/policyengine/data/release_manifests/uk.json b/src/policyengine/data/release_manifests/uk.json
index 1ef3a800..8f437212 100644
--- a/src/policyengine/data/release_manifests/uk.json
+++ b/src/policyengine/data/release_manifests/uk.json
@@ -5,7 +5,9 @@
   "policyengine_version": "3.4.0",
   "model_package": {
     "name": "policyengine-uk",
-    "version": "2.88.0"
+    "version": "2.88.0",
+    "sha256": "46a3ba443b43ec810c5efaccd4645edb63c8dc90ef5acf9b0cdf5ace86b9334d",
+    "wheel_url": "https://files.pythonhosted.org/packages/23/7e/8a2a42eac1da63730a865964aa17e7fd4420ce4db4c80001c1b5ca6011e8/policyengine_uk-2.88.0-py3-none-any.whl"
   },
   "data_package": {
     "name": "policyengine-uk-data",
diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json
index f4815645..4eb945f0 100644
--- a/src/policyengine/data/release_manifests/us.json
+++ b/src/policyengine/data/release_manifests/us.json
@@ -5,7 +5,9 @@
   "policyengine_version": "3.4.0",
   "model_package": {
     "name": "policyengine-us",
-    "version": "1.647.0"
+    "version": "1.647.0",
+    "sha256": "50e64bf910772b224cdc2b5af5a3414f976f68a9e1748107da7e1de6e325425c",
+    "wheel_url": "https://files.pythonhosted.org/packages/2a/96/4814f2630395350915d819452d7684f232c9b8df1d9ba5c279f3b6d02c17/policyengine_us-1.647.0-py3-none-any.whl"
   },
   "data_package": {
     "name": "policyengine-us-data",
@@ -19,7 +21,8 @@
     },
     "build_id": "policyengine-us-data-1.73.0",
     "dataset": "enhanced_cps_2024",
-    "uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0"
+    "uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0",
+    "sha256": "18cdc668d05311c32ae37364abcea89b0221c27154559667e951c7b19f5b5cbd"
   },
   "certification": {
     "compatibility_basis": "exact_build_model_version",
diff --git a/src/policyengine/data/schemas/trace_tro.schema.json b/src/policyengine/data/schemas/trace_tro.schema.json
index baa03d1c..5b49bb2f 100644
--- a/src/policyengine/data/schemas/trace_tro.schema.json
+++ b/src/policyengine/data/schemas/trace_tro.schema.json
@@ -2,7 +2,7 @@
   "$schema": "https://json-schema.org/draft/2020-12/schema",
   "$id": "https://policyengine.org/schemas/trace_tro/0.1.json",
   "title": "PolicyEngine TRACE TRO",
-  "description": "JSON Schema for PolicyEngine's TRACE Transparent Research Object (TROv v0.1) emission. Catches structural regressions in bundle and per-simulation TROs.",
+  "description": "JSON Schema for PolicyEngine's TRACE Transparent Research Object emission, using the TROv 2023/05 vocabulary with a PolicyEngine extension namespace.",
   "type": "object",
   "required": ["@context", "@graph"],
   "properties": {
@@ -11,18 +11,19 @@
       "minItems": 1,
       "items": {
         "type": "object",
-        "required": ["trov", "schema"],
+        "required": ["trov", "schema", "pe"],
         "properties": {
           "trov": {
             "type": "string",
-            "const": "https://w3id.org/trace/trov/0.1#"
+            "const": "https://w3id.org/trace/2023/05/trov#"
           },
           "schema": {
             "type": "string",
             "const": "https://schema.org/"
           },
           "pe": {
-            "type": "string"
+            "type": "string",
+            "const": "https://policyengine.org/trace/0.1#"
           }
         }
       }
@@ -30,22 +31,13 @@
     "@graph": {
       "type": "array",
       "minItems": 1,
-      "items": {
-        "$ref": "#/$defs/troNode"
-      }
+      "items": { "$ref": "#/$defs/troNode" }
     }
   },
   "$defs": {
-    "hash": {
-      "type": "object",
-      "required": ["trov:hashAlgorithm", "trov:hashValue"],
-      "properties": {
-        "trov:hashAlgorithm": { "type": "string" },
-        "trov:hashValue": {
-          "type": "string",
-          "pattern": "^[a-f0-9]{64}$"
-        }
-      }
+    "sha256": {
+      "type": "string",
+      "pattern": "^[a-f0-9]{64}$"
     },
     "organization": {
       "type": "object",
@@ -58,27 +50,30 @@
     },
     "artifact": {
       "type": "object",
-      "required": ["@id", "@type", "trov:hash"],
+      "required": ["@id", "@type", "trov:sha256"],
       "properties": {
         "@id": { "type": "string" },
         "@type": { "const": "trov:ResearchArtifact" },
         "schema:name": { "type": "string" },
-        "trov:hash": { "$ref": "#/$defs/hash" },
+        "trov:sha256": { "$ref": "#/$defs/sha256" },
         "trov:mimeType": { "type": "string" }
       }
     },
     "artifactLocation": {
       "type": "object",
-      "required": ["@id", "@type", "trov:artifact", "trov:path"],
+      "required": ["@id", "@type", "trov:hasArtifact", "trov:hasLocation"],
       "properties": {
         "@id": { "type": "string" },
         "@type": { "const": "trov:ArtifactLocation" },
-        "trov:artifact": {
+        "trov:hasArtifact": {
           "type": "object",
           "required": ["@id"],
           "properties": { "@id": { "type": "string" } }
         },
-        "trov:path": { "type": "string", "minLength": 1 }
+        "trov:hasLocation": {
+          "type": "string",
+          "pattern": "^(https://|data/|reform\\.json$|results\\.json$|bundle\\.trace\\.tro\\.jsonld)"
+        }
       }
     },
     "troNode": {
@@ -86,21 +81,16 @@
       "required": [
         "@id",
         "@type",
-        "trov:vocabularyVersion",
-        "schema:creator",
         "schema:name",
+        "schema:creator",
+        "trov:wasAssembledBy",
         "trov:hasComposition",
         "trov:hasArrangement",
         "trov:hasPerformance"
       ],
       "properties": {
         "@id": { "type": "string" },
-        "@type": {
-          "type": "array",
-          "minItems": 2,
-          "contains": { "const": "trov:TransparentResearchObject" }
-        },
-        "trov:vocabularyVersion": { "type": "string", "const": "0.1" },
+        "@type": { "const": "trov:TransparentResearchObject" },
         "schema:creator": { "$ref": "#/$defs/organization" },
         "schema:name": { "type": "string", "minLength": 1 },
         "schema:description": { "type": "string" },
@@ -110,10 +100,9 @@
           "required": ["@id", "@type", "schema:name"],
           "properties": {
             "@id": { "type": "string" },
-            "@type": {
-              "type": "array",
-              "contains": { "const": "trov:TrustedResearchSystem" }
-            }
+            "@type": { "const": "trov:TransparentResearchSystem" },
+            "schema:name": { "type": "string" },
+            "rdfs:comment": { "type": "string" }
           }
         },
         "trov:createdWith": {
@@ -123,10 +112,7 @@
             "@type": { "const": "schema:SoftwareApplication" },
             "schema:name": { "type": "string" },
             "schema:softwareVersion": {
-              "oneOf": [
-                { "type": "string" },
-                { "type": "null" }
-              ]
+              "oneOf": [{ "type": "string" }, { "type": "null" }]
             }
           }
         },
@@ -138,11 +124,11 @@
             "@type": { "const": "trov:ArtifactComposition" },
             "trov:hasFingerprint": {
               "type": "object",
-              "required": ["@id", "@type", "trov:hash"],
+              "required": ["@id", "@type", "trov:sha256"],
               "properties": {
                 "@id": { "type": "string" },
                 "@type": { "const": "trov:CompositionFingerprint" },
-                "trov:hash": { "$ref": "#/$defs/hash" }
+                "trov:sha256": { "$ref": "#/$defs/sha256" }
               }
             },
             "trov:hasArtifact": {
@@ -161,6 +147,7 @@
             "properties": {
               "@id": { "type": "string" },
               "@type": { "const": "trov:ArtifactArrangement" },
+              "rdfs:comment": { "type": "string" },
               "trov:hasArtifactLocation": {
                 "type": "array",
                 "minItems": 1,
@@ -170,37 +157,24 @@
           }
         },
         "trov:hasPerformance": {
-          "type": "array",
-          "minItems": 1,
-          "items": {
-            "type": "object",
-            "required": [
-              "@id",
-              "@type",
-              "trov:wasConductedBy",
-              "trov:contributedToArrangement"
-            ],
-            "properties": {
-              "@id": { "type": "string" },
-              "@type": { "const": "trov:TrustedResearchPerformance" },
-              "trov:wasConductedBy": {
-                "type": "object",
-                "required": ["@id"],
-                "properties": { "@id": { "type": "string" } }
-              },
-              "trov:contributedToArrangement": {
-                "type": "object",
-                "required": ["@id", "@type", "trov:arrangement"],
-                "properties": {
-                  "@id": { "type": "string" },
-                  "@type": { "const": "trov:ArrangementBinding" },
-                  "trov:arrangement": {
-                    "type": "object",
-                    "required": ["@id"],
-                    "properties": { "@id": { "type": "string" } }
-                  }
-                }
-              }
+          "type": "object",
+          "required": ["@id", "@type", "trov:wasConductedBy", "trov:accessedArrangement", "pe:emittedIn"],
+          "properties": {
+            "@id": { "type": "string" },
+            "@type": { "const": "trov:TransparentResearchPerformance" },
+            "trov:wasConductedBy": {
+              "type": "object",
+              "required": ["@id"],
+              "properties": { "@id": { "type": "string" } }
+            },
+            "trov:accessedArrangement": {
+              "type": "object",
+              "required": ["@id"],
+              "properties": { "@id": { "type": "string" } }
+            },
+            "pe:emittedIn": {
+              "type": "string",
+              "enum": ["local", "github-actions"]
             }
           }
         }
diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py
index fc1106eb..16bf9c42 100644
--- a/src/policyengine/results/trace_tro.py
+++ b/src/policyengine/results/trace_tro.py
@@ -33,23 +33,16 @@ def build_results_trace_tro(
     results_location: Optional[str] = None,
     reform_location: Optional[str] = None,
     bundle_tro_location: Optional[str] = None,
+    bundle_tro_url: Optional[str] = None,
 ) -> dict:
     """Build a per-simulation TRO for a ``ResultsJson`` instance.
 
-    Args:
-        results: The validated results payload.
-        bundle_tro: A bundle-level TRACE TRO (see
-            :func:`policyengine.core.trace_tro.build_trace_tro_from_release_bundle`).
-        reform_payload: Optional reform JSON to include as a hashed artifact.
-        reform_name: Optional display name for the reform.
-        simulation_id: Optional identifier used in the TRO's ``schema:name``.
-        results_location: Optional URI or path for the ``results.json`` file.
-        reform_location: Optional URI or path for the reform JSON.
-        bundle_tro_location: Optional URI or path for the bundle TRO.
-
-    Returns:
-        The TRO as a ``dict``. Serialize with
-        :func:`policyengine.core.trace_tro.serialize_trace_tro`.
+    ``bundle_tro_url`` should point to a canonical, immutable location
+    for the bundle TRO (e.g. a GitHub release raw URL). It is recorded
+    on the performance node under ``pe:bundleTroUrl`` so a verifier can
+    fetch that URL, recompute its sha256, and confirm it matches the
+    bundle artifact hash in this TRO's composition. Without this
+    anchor, the bundle reference is only as trustworthy as the caller.
     """
     slug = simulation_id or (results.metadata.slug or results.metadata.title)
     return build_simulation_trace_tro(
@@ -62,6 +55,7 @@ def build_results_trace_tro(
         results_location=results_location,
         reform_location=reform_location,
         bundle_tro_location=bundle_tro_location,
+        bundle_tro_url=bundle_tro_url,
     )
 
 
@@ -74,12 +68,13 @@ def write_results_with_trace_tro(
     reform_name: Optional[str] = None,
     tro_suffix: str = ".trace.tro.jsonld",
     bundle_tro_path: Optional[Union[str, Path]] = None,
+    bundle_tro_url: Optional[str] = None,
 ) -> dict[str, Path]:
     """Write ``results.json`` and a sibling per-simulation TRACE TRO.
 
-    The TRO is written next to the results file with the given suffix
-    appended to the results filename stem. Returns a dict with ``results``
-    and ``tro`` paths.
+    The TRO is written next to the results file with the given suffix.
+    When ``bundle_tro_url`` is provided, it is recorded in the TRO so a
+    verifier can independently fetch that URL and check its hash.
     """
     results_path = Path(results_path)
     results.write(results_path)
@@ -97,6 +92,7 @@ def write_results_with_trace_tro(
         reform_name=reform_name,
         results_location=results_path.name,
         bundle_tro_location=bundle_tro_location,
+        bundle_tro_url=bundle_tro_url,
     )
     tro_path = results_path.with_suffix(tro_suffix)
     tro_path.write_bytes(serialize_trace_tro(tro))
diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py
index df5b9dc5..2cc6edf8 100644
--- a/tests/test_trace_tro.py
+++ b/tests/test_trace_tro.py
@@ -2,7 +2,7 @@
 
 Covers bundle-level TROs (``policyengine.core.trace_tro``) and per-simulation
 TROs (``policyengine.results.trace_tro``), plus the ``policyengine trace-tro``
-CLI and JSON-Schema conformance.
+CLI, determinism guarantees, and JSON-Schema conformance against TROv 2023/05.
 """
 
 from __future__ import annotations
@@ -25,7 +25,7 @@
 from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
 from policyengine.core.trace_tro import (
     POLICYENGINE_ORGANIZATION,
-    TRACE_TROV_VERSION,
+    TRACE_TROV_NAMESPACE,
     build_trace_tro_from_release_bundle,
     compute_trace_composition_fingerprint,
     extract_bundle_tro_reference,
@@ -95,6 +95,16 @@ def tro_schema() -> dict:
     return json.loads(schema_path.read_text())
 
 
+@pytest.fixture
+def us_bundle_tro(monkeypatch):
+    monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
+    return build_trace_tro_from_release_bundle(
+        get_release_manifest("us"),
+        _us_data_release_manifest(),
+        fetch_pypi=_fake_fetch_pypi,
+    )
+
+
 @pytest.fixture(autouse=True)
 def clear_manifest_caches():
     yield
@@ -105,41 +115,66 @@ def clear_manifest_caches():
 class TestBundleTRO:
     """Bundle-level TRACE TRO emission."""
 
-    def test__given_us_bundle__then_schema_creator_is_policyengine_organization(
-        self,
+    def test__given_context__then_uses_public_trov_namespace(self, us_bundle_tro):
+        context = us_bundle_tro["@context"][0]
+        assert context["trov"] == TRACE_TROV_NAMESPACE
+        assert context["trov"] == "https://w3id.org/trace/2023/05/trov#"
+
+    def test__given_root_type__then_is_single_transparent_research_object(
+        self, us_bundle_tro
     ):
-        country_manifest = get_release_manifest("us")
+        node = us_bundle_tro["@graph"][0]
+        assert node["@type"] == "trov:TransparentResearchObject"
 
-        tro = build_trace_tro_from_release_bundle(
-            country_manifest,
-            _us_data_release_manifest(),
-            fetch_pypi=_fake_fetch_pypi,
-        )
+    def test__given_trs__then_is_transparent_research_system(self, us_bundle_tro):
+        trs = us_bundle_tro["@graph"][0]["trov:wasAssembledBy"]
+        assert trs["@type"] == "trov:TransparentResearchSystem"
 
-        assert tro["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+    def test__given_performance__then_is_transparent_research_performance(
+        self, us_bundle_tro
+    ):
+        performance = us_bundle_tro["@graph"][0]["trov:hasPerformance"]
+        assert performance["@type"] == "trov:TransparentResearchPerformance"
+        assert performance["trov:accessedArrangement"]["@id"] == "arrangement/1"
 
-    def test__given_us_bundle__then_model_wheel_is_hashed_as_artifact(self):
-        country_manifest = get_release_manifest("us")
+    def test__given_artifacts__then_use_flat_trov_sha256(self, us_bundle_tro):
+        artifacts = us_bundle_tro["@graph"][0]["trov:hasComposition"][
+            "trov:hasArtifact"
+        ]
+        for artifact in artifacts:
+            assert "trov:sha256" in artifact
+            assert "trov:hash" not in artifact
+            assert len(artifact["trov:sha256"]) == 64
 
-        tro = build_trace_tro_from_release_bundle(
-            country_manifest,
-            _us_data_release_manifest(),
-            fetch_pypi=_fake_fetch_pypi,
-        )
+    def test__given_locations__then_use_has_location_and_has_artifact(
+        self, us_bundle_tro
+    ):
+        locations = us_bundle_tro["@graph"][0]["trov:hasArrangement"][0][
+            "trov:hasArtifactLocation"
+        ]
+        for location in locations:
+            assert "trov:hasLocation" in location
+            assert "trov:path" not in location
+            assert "trov:hasArtifact" in location
+            assert "trov:artifact" not in location
 
-        artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
-        wheel_artifacts = [a for a in artifacts if a["@id"].endswith("model_wheel")]
-        assert len(wheel_artifacts) == 1
-        assert wheel_artifacts[0]["trov:hash"]["trov:hashValue"] == FAKE_WHEEL_SHA
-        locations = tro["@graph"][0]["trov:hasArrangement"][0][
+    def test__given_creator__then_is_policyengine_organization(self, us_bundle_tro):
+        assert us_bundle_tro["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+
+    def test__given_us_bundle__then_model_wheel_hash_is_included(self, us_bundle_tro):
+        country_manifest = get_release_manifest("us")
+        artifacts = us_bundle_tro["@graph"][0]["trov:hasComposition"][
+            "trov:hasArtifact"
+        ]
+        wheels = [a for a in artifacts if a["@id"].endswith("model_wheel")]
+        assert len(wheels) == 1
+        # us.json pins the wheel sha directly so PyPI is not consulted.
+        assert wheels[0]["trov:sha256"] == country_manifest.model_package.sha256
+        locations = us_bundle_tro["@graph"][0]["trov:hasArrangement"][0][
             "trov:hasArtifactLocation"
         ]
-        wheel_location = next(
-            location
-            for location in locations
-            if location["@id"].endswith("model_wheel")
-        )
-        assert wheel_location["trov:path"] == FAKE_WHEEL_URL
+        wheel_loc = next(loc for loc in locations if loc["@id"].endswith("model_wheel"))
+        assert wheel_loc["trov:hasLocation"] == country_manifest.model_package.wheel_url
 
     def test__given_manifest_sha__then_pypi_not_fetched(self):
         country_manifest = get_release_manifest("us")
@@ -154,8 +189,8 @@ def test__given_manifest_sha__then_pypi_not_fetched(self):
         )
 
         artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
-        wheel_artifacts = [a for a in artifacts if a["@id"].endswith("model_wheel")]
-        assert wheel_artifacts[0]["trov:hash"]["trov:hashValue"] == "b" * 64
+        wheels = [a for a in artifacts if a["@id"].endswith("model_wheel")]
+        assert wheels[0]["trov:sha256"] == "b" * 64
         fetch_pypi.assert_not_called()
 
     def test__given_pypi_unreachable__then_wheel_artifact_is_skipped(self):
@@ -178,35 +213,37 @@ def failing_fetch(name, version):
         ]
         assert not any(aid.endswith("model_wheel") for aid in artifact_ids)
 
-    def test__given_artifact_locations__then_all_paths_are_https_or_local(self):
+    def test__given_manifest_dataset_sha__then_data_release_sha_not_required(self):
         country_manifest = get_release_manifest("us")
+        country_manifest.certified_data_artifact.sha256 = "d" * 64
+        data_release_manifest = _us_data_release_manifest(sha256=None)
 
         tro = build_trace_tro_from_release_bundle(
             country_manifest,
-            _us_data_release_manifest(),
+            data_release_manifest,
             fetch_pypi=_fake_fetch_pypi,
         )
 
-        locations = tro["@graph"][0]["trov:hasArrangement"][0][
+        artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+        dataset = next(a for a in artifacts if a["@id"].endswith("dataset"))
+        assert dataset["trov:sha256"] == "d" * 64
+
+    def test__given_artifact_locations__then_all_paths_are_https_or_local(
+        self, us_bundle_tro
+    ):
+        locations = us_bundle_tro["@graph"][0]["trov:hasArrangement"][0][
             "trov:hasArtifactLocation"
         ]
-        paths = [location["trov:path"] for location in locations]
-        # Bundle manifest is a local wheel-internal path; everything else must
-        # be dereferenceable HTTPS so a reproducibility reviewer can fetch it.
+        paths = [location["trov:hasLocation"] for location in locations]
         assert paths[0].startswith("data/release_manifests/")
         for path in paths[1:]:
             assert path.startswith("https://"), path
 
-    def test__given_certification__then_fields_are_machine_readable(self):
+    def test__given_certification__then_fields_are_machine_readable(
+        self, us_bundle_tro
+    ):
         country_manifest = get_release_manifest("us")
-
-        tro = build_trace_tro_from_release_bundle(
-            country_manifest,
-            _us_data_release_manifest(),
-            fetch_pypi=_fake_fetch_pypi,
-        )
-
-        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        performance = us_bundle_tro["@graph"][0]["trov:hasPerformance"]
         assert (
             performance["pe:certifiedForModelVersion"]
             == country_manifest.certification.certified_for_model_version
@@ -224,10 +261,7 @@ def test__given_certification__then_fields_are_machine_readable(self):
             == country_manifest.certification.data_build_id
         )
 
-    def test__given_github_actions_env__then_ci_attestation_is_included(
-        self, monkeypatch
-    ):
-        country_manifest = get_release_manifest("us")
+    def test__given_github_actions_env__then_emitted_in_is_ci(self, monkeypatch):
         monkeypatch.setenv("GITHUB_ACTIONS", "true")
         monkeypatch.setenv("GITHUB_SERVER_URL", "https://github.com")
         monkeypatch.setenv("GITHUB_REPOSITORY", "PolicyEngine/policyengine.py")
@@ -235,82 +269,132 @@ def test__given_github_actions_env__then_ci_attestation_is_included(
         monkeypatch.setenv("GITHUB_SHA", "abc123")
 
         tro = build_trace_tro_from_release_bundle(
-            country_manifest,
+            get_release_manifest("us"),
             _us_data_release_manifest(),
             fetch_pypi=_fake_fetch_pypi,
         )
 
-        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        performance = tro["@graph"][0]["trov:hasPerformance"]
+        assert performance["pe:emittedIn"] == "github-actions"
         assert (
             performance["pe:ciRunUrl"]
             == "https://github.com/PolicyEngine/policyengine.py/actions/runs/12345"
         )
         assert performance["pe:ciGitSha"] == "abc123"
 
-    def test__given_non_ci_env__then_no_attestation_fields(self, monkeypatch):
-        country_manifest = get_release_manifest("us")
-        monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
-
-        tro = build_trace_tro_from_release_bundle(
-            country_manifest,
-            _us_data_release_manifest(),
-            fetch_pypi=_fake_fetch_pypi,
-        )
-
-        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+    def test__given_no_ci_env__then_emitted_in_is_local(
+        self, monkeypatch, us_bundle_tro
+    ):
+        performance = us_bundle_tro["@graph"][0]["trov:hasPerformance"]
+        assert performance["pe:emittedIn"] == "local"
         assert "pe:ciRunUrl" not in performance
         assert "pe:ciGitSha" not in performance
 
-    def test__given_same_inputs__then_built_tros_serialize_identically(self):
-        country_manifest = get_release_manifest("us")
-        data = _us_data_release_manifest()
-
+    def test__given_fresh_manifest_instances__then_tro_bytes_match(self, monkeypatch):
+        monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
         first = serialize_trace_tro(
             build_trace_tro_from_release_bundle(
-                country_manifest,
-                data,
+                get_release_manifest("us"),
+                _us_data_release_manifest(),
                 fetch_pypi=_fake_fetch_pypi,
-                ci_attestation={},
             )
         )
+        get_release_manifest.cache_clear()
         second = serialize_trace_tro(
             build_trace_tro_from_release_bundle(
-                country_manifest,
-                data,
+                get_release_manifest("us"),
+                _us_data_release_manifest(),
                 fetch_pypi=_fake_fetch_pypi,
-                ci_attestation={},
             )
         )
         assert first == second
 
-    def test__given_hashes_in_any_order__then_composition_fingerprint_matches(
-        self,
-    ):
-        hashes = ["ccc", "aaa", "bbb"]
+    def test__given_hashes_in_any_order__then_fingerprint_matches(self):
+        hashes = ["c" * 64, "a" * 64, "b" * 64]
         assert compute_trace_composition_fingerprint(
             hashes
         ) == compute_trace_composition_fingerprint(reversed(hashes))
 
-    def test__given_generated_tro__then_validates_against_json_schema(self, tro_schema):
-        country_manifest = get_release_manifest("us")
-
-        tro = build_trace_tro_from_release_bundle(
-            country_manifest,
-            _us_data_release_manifest(),
-            fetch_pypi=_fake_fetch_pypi,
-        )
+    def test__given_hex_length_ambiguity__then_separator_prevents_collision(self):
+        assert compute_trace_composition_fingerprint(
+            ["ab", "cdef"]
+        ) != compute_trace_composition_fingerprint(["abcd", "ef"])
 
-        errors = list(Draft202012Validator(tro_schema).iter_errors(tro))
+    def test__given_generated_tro__then_validates_against_json_schema(
+        self, tro_schema, us_bundle_tro
+    ):
+        errors = list(Draft202012Validator(tro_schema).iter_errors(us_bundle_tro))
         assert errors == [], [error.message for error in errors]
 
-    def test__given_vocabulary_version_constant__then_matches_context_namespace(
-        self,
-    ):
-        assert TRACE_TROV_VERSION == "0.1"
+    def test__given_non_https_location__then_schema_rejects(self, tro_schema):
+        # Schema must catch the "non-HTTPS artifact locations" claim in the docs.
+        bad = {
+            "@context": [
+                {
+                    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+                    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+                    "trov": TRACE_TROV_NAMESPACE,
+                    "schema": "https://schema.org/",
+                    "pe": "https://policyengine.org/trace/0.1#",
+                }
+            ],
+            "@graph": [
+                {
+                    "@id": "tro",
+                    "@type": "trov:TransparentResearchObject",
+                    "schema:name": "bad",
+                    "schema:creator": POLICYENGINE_ORGANIZATION,
+                    "trov:wasAssembledBy": {
+                        "@id": "trs",
+                        "@type": "trov:TransparentResearchSystem",
+                        "schema:name": "x",
+                    },
+                    "trov:hasComposition": {
+                        "@id": "composition/1",
+                        "@type": "trov:ArtifactComposition",
+                        "trov:hasFingerprint": {
+                            "@id": "fp",
+                            "@type": "trov:CompositionFingerprint",
+                            "trov:sha256": "a" * 64,
+                        },
+                        "trov:hasArtifact": [
+                            {
+                                "@id": "composition/1/artifact/1",
+                                "@type": "trov:ResearchArtifact",
+                                "trov:sha256": "a" * 64,
+                            }
+                        ],
+                    },
+                    "trov:hasArrangement": [
+                        {
+                            "@id": "arrangement/1",
+                            "@type": "trov:ArtifactArrangement",
+                            "trov:hasArtifactLocation": [
+                                {
+                                    "@id": "arrangement/1/location/1",
+                                    "@type": "trov:ArtifactLocation",
+                                    "trov:hasArtifact": {
+                                        "@id": "composition/1/artifact/1"
+                                    },
+                                    "trov:hasLocation": "file:///tmp/leak.h5",
+                                }
+                            ],
+                        }
+                    ],
+                    "trov:hasPerformance": {
+                        "@id": "trp/1",
+                        "@type": "trov:TransparentResearchPerformance",
+                        "trov:wasConductedBy": {"@id": "trs"},
+                        "trov:accessedArrangement": {"@id": "arrangement/1"},
+                        "pe:emittedIn": "local",
+                    },
+                }
+            ],
+        }
+        errors = list(Draft202012Validator(tro_schema).iter_errors(bad))
+        assert errors, "schema must reject file:// locations"
 
-    def test__given_model_version_attribute__then_trace_tro_property_works(
-        self,
-    ):
+    def test__given_trace_tro_property__then_emits_valid_tro(self):
         manifest = get_release_manifest("us")
         data_release_manifest = _us_data_release_manifest()
         model_version = TaxBenefitModelVersion(
@@ -339,14 +423,6 @@ def test__given_model_version_attribute__then_trace_tro_property_works(
 class TestSimulationTRO:
     """Per-simulation TROs chained from a bundle TRO."""
 
-    def _bundle_tro(self):
-        country_manifest = get_release_manifest("us")
-        return build_trace_tro_from_release_bundle(
-            country_manifest,
-            _us_data_release_manifest(),
-            fetch_pypi=_fake_fetch_pypi,
-        )
-
     def _results(self, **overrides):
         return ResultsJson(
             metadata=ResultsMetadata(
@@ -365,13 +441,12 @@ def _results(self, **overrides):
             },
         )
 
-    def test__given_bundle_and_results__then_simulation_tro_pins_both(self):
-        bundle_tro = self._bundle_tro()
-        results = self._results()
-
+    def test__given_bundle_and_results__then_simulation_tro_pins_both(
+        self, us_bundle_tro
+    ):
         tro = build_results_trace_tro(
-            results,
-            bundle_tro=bundle_tro,
+            self._results(),
+            bundle_tro=us_bundle_tro,
             reform_payload={"salt_cap": 0},
             reform_name="SALT cap repeal",
         )
@@ -385,27 +460,27 @@ def test__given_bundle_and_results__then_simulation_tro_pins_both(self):
             "composition/1/artifact/reform",
             "composition/1/artifact/results",
         }
-        performance = tro["@graph"][0]["trov:hasPerformance"][0]
+        performance = tro["@graph"][0]["trov:hasPerformance"]
         assert (
             performance["pe:bundleFingerprint"]
-            == extract_bundle_tro_reference(bundle_tro)["fingerprint"]
+            == extract_bundle_tro_reference(us_bundle_tro)["fingerprint"]
         )
 
     def test__given_simulation_tro__then_validates_against_json_schema(
-        self, tro_schema
+        self, tro_schema, us_bundle_tro
     ):
         tro = build_results_trace_tro(
             self._results(),
-            bundle_tro=self._bundle_tro(),
+            bundle_tro=us_bundle_tro,
             reform_payload={"salt_cap": 0},
         )
-
         errors = list(Draft202012Validator(tro_schema).iter_errors(tro))
         assert errors == [], [error.message for error in errors]
 
-    def test__given_no_reform__then_only_bundle_and_results_are_pinned(self):
-        tro = build_results_trace_tro(self._results(), bundle_tro=self._bundle_tro())
-
+    def test__given_no_reform__then_only_bundle_and_results_are_pinned(
+        self, us_bundle_tro
+    ):
+        tro = build_results_trace_tro(self._results(), bundle_tro=us_bundle_tro)
         artifact_ids = {
             a["@id"]
             for a in tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
@@ -415,13 +490,52 @@ def test__given_no_reform__then_only_bundle_and_results_are_pinned(self):
             "composition/1/artifact/results",
         }
 
+    def test__given_bundle_tro_url__then_performance_records_it(self, us_bundle_tro):
+        tro = build_results_trace_tro(
+            self._results(),
+            bundle_tro=us_bundle_tro,
+            bundle_tro_url="https://raw.githubusercontent.com/PolicyEngine/policyengine.py/v3.4.5/src/policyengine/data/release_manifests/us.trace.tro.jsonld",
+        )
+
+        performance = tro["@graph"][0]["trov:hasPerformance"]
+        assert performance["pe:bundleTroUrl"].startswith(
+            "https://raw.githubusercontent.com/PolicyEngine/policyengine.py/"
+        )
+        locations = tro["@graph"][0]["trov:hasArrangement"][0][
+            "trov:hasArtifactLocation"
+        ]
+        bundle_location = next(
+            loc for loc in locations if loc["@id"].endswith("bundle_tro")
+        )
+        assert bundle_location["trov:hasLocation"].startswith("https://")
+
+    def test__given_forged_bundle_tro__then_hash_changes_in_sim_tro(
+        self, us_bundle_tro
+    ):
+        # If the caller swaps the bundle TRO, the artifact hash in the sim TRO
+        # changes, so a verifier that re-fetches from pe:bundleTroUrl will
+        # detect the swap.
+        original = build_results_trace_tro(self._results(), bundle_tro=us_bundle_tro)
+        forged_bundle = json.loads(json.dumps(us_bundle_tro))
+        forged_bundle["@graph"][0]["schema:description"] = "forged"
+        forged = build_results_trace_tro(self._results(), bundle_tro=forged_bundle)
+
+        def bundle_hash(tro):
+            return next(
+                a["trov:sha256"]
+                for a in tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+                if a["@id"].endswith("bundle_tro")
+            )
+
+        assert bundle_hash(original) != bundle_hash(forged)
+
     def test__given_write_helper__then_results_and_tro_files_are_sidebyside(
-        self, tmp_path
+        self, tmp_path, us_bundle_tro
     ):
         written = write_results_with_trace_tro(
             self._results(),
             tmp_path / "results.json",
-            bundle_tro=self._bundle_tro(),
+            bundle_tro=us_bundle_tro,
             reform_payload={"salt_cap": 0},
         )
 
@@ -435,7 +549,10 @@ def test__given_write_helper__then_results_and_tro_files_are_sidebyside(
 class TestCLI:
     """``policyengine`` CLI entry point."""
 
-    def test__given_trace_tro_stdout__then_writes_canonical_json(self, capsysbinary):
+    def test__given_trace_tro_stdout__then_writes_canonical_json(
+        self, capsysbinary, monkeypatch
+    ):
+        monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
         data_release_manifest = _us_data_release_manifest()
 
         with patch(
@@ -449,12 +566,12 @@ def test__given_trace_tro_stdout__then_writes_canonical_json(self, capsysbinary)
                 exit_code = cli_main(["trace-tro", "us"])
 
         assert exit_code == 0
-        stdout = capsysbinary.readouterr().out
-        payload = json.loads(stdout)
+        payload = json.loads(capsysbinary.readouterr().out)
         assert payload["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
-        assert payload["@graph"][0]["trov:vocabularyVersion"] == TRACE_TROV_VERSION
+        assert payload["@graph"][0]["trov:hasPerformance"]["pe:emittedIn"] == "local"
 
-    def test__given_out_path__then_writes_to_file(self, tmp_path):
+    def test__given_out_path__then_writes_to_file(self, tmp_path, monkeypatch):
+        monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
         out = tmp_path / "nested" / "us.trace.tro.jsonld"
         data_release_manifest = _us_data_release_manifest()
 
@@ -471,12 +588,32 @@ def test__given_out_path__then_writes_to_file(self, tmp_path):
         assert exit_code == 0
         assert out.exists()
         payload = json.loads(out.read_text())
-        assert payload["@graph"][0]["trov:vocabularyVersion"] == "0.1"
+        assert payload["@graph"][0]["@type"] == "trov:TransparentResearchObject"
 
     def test__given_release_manifest_command__then_prints_bundle(self, capsys):
         exit_code = cli_main(["release-manifest", "us"])
 
         assert exit_code == 0
-        stdout = capsys.readouterr().out
-        payload = json.loads(stdout)
+        payload = json.loads(capsys.readouterr().out)
         assert payload["country_id"] == "us"
+
+    def test__given_validate_command__then_accepts_valid_tro(
+        self, tmp_path, us_bundle_tro
+    ):
+        tro_path = tmp_path / "us.trace.tro.jsonld"
+        tro_path.write_bytes(serialize_trace_tro(us_bundle_tro))
+
+        exit_code = cli_main(["trace-tro-validate", str(tro_path)])
+
+        assert exit_code == 0
+
+    def test__given_validate_command__then_rejects_invalid_tro(self, tmp_path, capsys):
+        bad = {"@context": [{"trov": "wrong"}], "@graph": []}
+        tro_path = tmp_path / "bad.jsonld"
+        tro_path.write_text(json.dumps(bad))
+
+        exit_code = cli_main(["trace-tro-validate", str(tro_path)])
+
+        assert exit_code == 1
+        err = capsys.readouterr().err
+        assert "invalid" in err.lower() or "error" in err.lower()

From a4d0809f666cffebbc6bc52956fd70cd976d6cfb Mon Sep 17 00:00:00 2001
From: Max Ghenis <max@policyengine.org>
Date: Sat, 18 Apr 2026 08:31:18 -0400
Subject: [PATCH 7/8] Close reviewer round-2 gaps: forgery anchor, schema
 regex, dead kwargs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewers came back accept / clean / minor revisions; this commit
picks up the remaining suggestions.

Forgery resistance:
- Bundle TRO takes an optional self_url and records it as pe:selfUrl
  so a verifier with only the bundle bytes can discover the canonical
  location it was published at.
- write_results_with_trace_tro now requires bundle_tro_url, not
  merely accepts it. A published simulation TRO that omitted the URL
  would leave reviewers without a pinned fetch target; raising when
  the caller forgets matches the "adversarial reviewer" expectation.
- docs/release-bundles.md shows the three-step verifier workflow a
  replication reviewer should run: fetch pe:bundleTroUrl, recompute
  its sha256, compare to the sim TRO's bundle_tro artifact hash,
  confirm pe:bundleFingerprint matches the bundle's own
  CompositionFingerprint. A sim TRO with a swapped bundle_tro dict
  but a truthful URL fails step 2; both-swapped fails step 3.

CI regression guard:
- scripts/generate_trace_tros.py now exits non-zero if a country that
  previously shipped a .trace.tro.jsonld fails to regenerate (e.g.
  HUGGING_FACE_TOKEN expired). The Versioning CI job will block a
  release rather than silently ship a stale TRO.

Schema tightening:
- trov:hasLocation regex now anchors end-of-string on every legal
  local path and restricts data/ to data/release_manifests/<country>.
  data/../../etc/passwd and bundle.trace.tro.jsonld.evil no longer
  pass. HTTPS locations must contain no whitespace.
- Added a test covering the multi-node @graph path after filter fix.

extract_bundle_tro_reference filter:
- Locates the trov:TransparentResearchObject node by @type rather
  than trusting @graph[0]. Future TROs that embed TRS/TSA nodes no
  longer break reference extraction.

Dead-kwarg cleanup (simplifier):
- Dropped emission_context kwarg from both public builders; tests
  use monkeypatch on GITHUB_ACTIONS/GITHUB_SHA instead, which is
  closer to what CI does anyway.
- Dropped tro_id / composition_id / arrangement_id default kwargs
  from the helpers; hardcoded as module constants.
- Dropped the bundle_tro_path branch from write_results_with_trace_tro
  — no caller, no test, no actual use case.

Tests (38 total in test_trace_tro.py):
- test__given_fixed_ci_env__then_tro_bytes_match_across_builds locks
  down determinism under CI with pinned run_id/git_sha
- test__given_self_url__then_tro_records_it covers pe:selfUrl
- test__given_graph_with_multiple_nodes__then_extract_finds_tro
  exercises the @type filter
- test__given_write_helper_without_url__then_raises locks the
  required-kwarg contract

Docstring caveat on build_trace_tro_from_release_bundle now states
explicitly that pe:compatibilityBasis covers the model and data
layers only; Python version, OS, and transitive lockfile are not
yet pinned.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/release-bundles.md                       | 37 ++++++++-
 scripts/generate_trace_tros.py                | 42 ++++++----
 src/policyengine/core/trace_tro.py            | 67 +++++++++------
 .../data/schemas/trace_tro.schema.json        |  2 +-
 src/policyengine/results/trace_tro.py         | 29 ++-----
 tests/test_trace_tro.py                       | 81 +++++++++++++++++++
 6 files changed, 196 insertions(+), 62 deletions(-)

diff --git a/docs/release-bundles.md b/docs/release-bundles.md
index b2a84c29..ea014c9d 100644
--- a/docs/release-bundles.md
+++ b/docs/release-bundles.md
@@ -266,7 +266,9 @@ and confirm it matches the `bundle_tro` artifact hash in the simulation
 TRO's composition. Without this anchor, the bundle reference is only as
 trustworthy as whoever produced the JSON.
 
-#### Validating a TRO
+#### Validating a received TRO
+
+Structural validation:
 
 ```
 policyengine trace-tro-validate path/to/tro.jsonld
@@ -279,6 +281,39 @@ well-known local paths `results.json`, `reform.json`,
 `bundle.trace.tro.jsonld`). The same schema is exercised in the test
 suite against generated TROs.
 
+Content validation (the verifier workflow a replication reviewer
+should run):
+
+```python
+import hashlib, json, requests
+from policyengine.core.trace_tro import canonical_json_bytes
+
+sim_tro = json.load(open("results.trace.tro.jsonld"))
+perf = sim_tro["@graph"][0]["trov:hasPerformance"]
+
+# 1. Fetch the bundle TRO from its pinned URL and recompute its hash.
+bundle_bytes = requests.get(perf["pe:bundleTroUrl"]).content
+bundle_hash = hashlib.sha256(canonical_json_bytes(json.loads(bundle_bytes))).hexdigest()
+
+# 2. Compare against the hash recorded in the simulation TRO's composition.
+recorded = next(
+    a["trov:sha256"]
+    for a in sim_tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"]
+    if a["@id"].endswith("bundle_tro")
+)
+assert bundle_hash == recorded, "bundle_tro_url content does not match sim TRO"
+
+# 3. Confirm the fingerprint recorded on the performance matches the
+#    fingerprint inside the fetched bundle.
+bundle = json.loads(bundle_bytes)
+bundle_fp = bundle["@graph"][0]["trov:hasComposition"]["trov:hasFingerprint"]["trov:sha256"]
+assert perf["pe:bundleFingerprint"] == bundle_fp
+```
+
+A sim TRO with a swapped `bundle_tro` dict but a truthful
+`pe:bundleTroUrl` will fail step 2; a sim TRO with both swapped will
+fail step 3.
+
 #### Known limitations
 
 - TROs are emitted unsigned. A signed attestation (sigstore or in-toto)
diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py
index 02fd1049..9c384341 100644
--- a/scripts/generate_trace_tros.py
+++ b/scripts/generate_trace_tros.py
@@ -5,9 +5,11 @@
 releasing a new ``policyengine.py`` version so the packaged TRO
 matches the pinned bundle. Requires HTTPS access to the data release
 manifest (and ``HUGGING_FACE_TOKEN`` for private country data).
-Countries whose data release manifest is unreachable are skipped with
-a warning so the step can run without all credentials; those TROs can
-be regenerated in a later release.
+
+If a country previously had a TRO on disk and the new run cannot
+regenerate it (e.g. a missing secret or an unreachable HF endpoint),
+the script exits non-zero so the release workflow blocks rather than
+silently shipping a stale/missing TRO.
 """
 
 from __future__ import annotations
@@ -27,38 +29,50 @@
 )
 
 
-def regenerate_all() -> tuple[list[Path], list[tuple[str, str]]]:
+def regenerate_all() -> tuple[list[Path], list[tuple[str, Path, str]]]:
     manifest_root = Path(
         str(files("policyengine").joinpath("data", "release_manifests"))
     )
     written: list[Path] = []
-    skipped: list[tuple[str, str]] = []
+    regressions: list[tuple[str, Path, str]] = []
     for manifest_path in sorted(manifest_root.glob("*.json")):
         country_id = manifest_path.stem
+        tro_path = manifest_path.with_suffix(".trace.tro.jsonld")
         country_manifest = get_release_manifest(country_id)
         try:
             data_release_manifest = get_data_release_manifest(country_id)
         except DataReleaseManifestUnavailableError as exc:
-            skipped.append((country_id, str(exc)))
+            if tro_path.exists():
+                regressions.append((country_id, tro_path, str(exc)))
+            else:
+                print(
+                    f"skipped {country_id}: {exc}",
+                    file=sys.stderr,
+                )
             continue
         tro = build_trace_tro_from_release_bundle(
             country_manifest,
             data_release_manifest,
             certification=country_manifest.certification,
         )
-        out_path = manifest_path.with_suffix(".trace.tro.jsonld")
-        out_path.write_bytes(serialize_trace_tro(tro))
-        written.append(out_path)
-    return written, skipped
+        tro_path.write_bytes(serialize_trace_tro(tro))
+        written.append(tro_path)
+    return written, regressions
 
 
 def main() -> int:
-    written, skipped = regenerate_all()
+    written, regressions = regenerate_all()
     for path in written:
         print(f"wrote {path}")
-    for country_id, reason in skipped:
-        print(f"skipped {country_id}: {reason}", file=sys.stderr)
-    if not written and not skipped:
+    for country_id, tro_path, reason in regressions:
+        print(
+            f"error: {country_id} already has {tro_path.name} but regeneration "
+            f"failed: {reason}",
+            file=sys.stderr,
+        )
+    if regressions:
+        return 1
+    if not written:
         print("no release manifests found", file=sys.stderr)
         return 1
     return 0
diff --git a/src/policyengine/core/trace_tro.py b/src/policyengine/core/trace_tro.py
index c9c431a3..66dca63c 100644
--- a/src/policyengine/core/trace_tro.py
+++ b/src/policyengine/core/trace_tro.py
@@ -170,18 +170,20 @@ def _make_location(location_id: str, artifact_id: str, location: str) -> dict[st
     }
 
 
+_COMPOSITION_ID = "composition/1"
+_ARRANGEMENT_ID = "arrangement/1"
+
+
 def _assemble_composition_and_arrangement(
     artifact_specs: list[dict[str, Any]],
     *,
-    composition_id: str = "composition/1",
-    arrangement_id: str = "arrangement/1",
     arrangement_comment: Optional[str] = None,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
     artifacts: list[dict[str, Any]] = []
     locations: list[dict[str, Any]] = []
     hashes: list[str] = []
     for spec in artifact_specs:
-        artifact_id = f"{composition_id}/artifact/{spec['id']}"
+        artifact_id = f"{_COMPOSITION_ID}/artifact/{spec['id']}"
         hashes.append(spec["hash"])
         artifacts.append(
             _make_artifact(
@@ -193,24 +195,24 @@ def _assemble_composition_and_arrangement(
         )
         locations.append(
             _make_location(
-                f"{arrangement_id}/location/{spec['id']}",
+                f"{_ARRANGEMENT_ID}/location/{spec['id']}",
                 artifact_id,
                 spec["location"],
             )
         )
 
     composition = {
-        "@id": composition_id,
+        "@id": _COMPOSITION_ID,
         "@type": "trov:ArtifactComposition",
         "trov:hasFingerprint": {
-            "@id": f"{composition_id}/fingerprint",
+            "@id": f"{_COMPOSITION_ID}/fingerprint",
             "@type": "trov:CompositionFingerprint",
             "trov:sha256": compute_trace_composition_fingerprint(hashes),
         },
         "trov:hasArtifact": artifacts,
     }
     arrangement: dict[str, Any] = {
-        "@id": arrangement_id,
+        "@id": _ARRANGEMENT_ID,
         "@type": "trov:ArtifactArrangement",
         "trov:hasArtifactLocation": locations,
     }
@@ -230,7 +232,6 @@ def _policyengine_trs(comment: str) -> dict[str, Any]:
 
 def _assemble_tro_node(
     *,
-    tro_id: str = "tro",
     tro_name: str,
     tro_description: str,
     created_at: Optional[str],
@@ -240,9 +241,10 @@ def _assemble_tro_node(
     composition: Mapping[str, Any],
     arrangement: Mapping[str, Any],
     performance: Mapping[str, Any],
+    self_url: Optional[str] = None,
 ) -> dict[str, Any]:
     node: dict[str, Any] = {
-        "@id": tro_id,
+        "@id": "tro",
         "@type": "trov:TransparentResearchObject",
         "schema:name": tro_name,
         "schema:description": tro_description,
@@ -259,6 +261,8 @@ def _assemble_tro_node(
     }
     if created_at is not None:
         node["schema:dateCreated"] = created_at
+    if self_url is not None:
+        node["pe:selfUrl"] = self_url
     return node
 
 
@@ -272,7 +276,7 @@ def build_trace_tro_from_release_bundle(
     model_wheel_sha256: Optional[str] = None,
     model_wheel_url: Optional[str] = None,
     fetch_pypi: Any = fetch_pypi_wheel_metadata,
-    emission_context: Optional[Mapping[str, str]] = None,
+    self_url: Optional[str] = None,
 ) -> dict:
     """Build a TRACE TRO for a certified runtime bundle.
 
@@ -280,6 +284,17 @@ def build_trace_tro_from_release_bundle(
     certified dataset, and (when resolvable) the country model wheel.
     Certification metadata is encoded as structured ``pe:*`` fields on
     the :class:`trov:TransparentResearchPerformance` node.
+
+    ``self_url`` is recorded on the TRO node as ``pe:selfUrl`` so a
+    verifier who has only the bundle bytes can still discover the
+    canonical location this TRO was published at.
+
+    .. note::
+       ``pe:compatibilityBasis`` covers the model and data layers only.
+       The Python interpreter version, OS, and transitive dependency
+       lockfile are not yet pinned in the TRO composition — reviewers
+       who require bit-exact reproducibility of the runtime stack need
+       to consult the wheel's own metadata and should flag the gap.
     """
     certified_artifact = country_manifest.certified_data_artifact
     if certified_artifact is None:
@@ -394,11 +409,7 @@ def build_trace_tro_from_release_bundle(
             else country_manifest.published_at
         ),
         ended_at=country_manifest.published_at,
-        emission_context=(
-            dict(emission_context)
-            if emission_context is not None
-            else _emission_context()
-        ),
+        emission_context=_emission_context(),
     )
 
     tro_node = _assemble_tro_node(
@@ -425,6 +436,7 @@ def build_trace_tro_from_release_bundle(
         composition=composition,
         arrangement=arrangement,
         performance=performance,
+        self_url=self_url,
     )
 
     return {"@context": TRACE_CONTEXT, "@graph": [tro_node]}
@@ -485,11 +497,21 @@ def serialize_trace_tro(tro: Mapping) -> bytes:
 
 
 def extract_bundle_tro_reference(tro: Mapping) -> dict[str, Any]:
-    """Extract a compact reference to a bundle TRO for use as a simulation input."""
+    """Extract a compact reference to a bundle TRO for use as a simulation input.
+
+    Locates the ``trov:TransparentResearchObject`` node explicitly rather
+    than trusting ``@graph[0]`` so future TROs that embed additional
+    nodes (TRS, TSA) do not break reference extraction.
+    """
     graph = tro.get("@graph") or []
-    if not graph:
-        raise ValueError("TRO has an empty graph.")
-    node = graph[0]
+    node = next(
+        (n for n in graph if n.get("@type") == "trov:TransparentResearchObject"),
+        None,
+    )
+    if node is None:
+        raise ValueError(
+            "TRO graph does not contain a trov:TransparentResearchObject node."
+        )
     composition = node.get("trov:hasComposition") or {}
     fingerprint = (
         composition.get("trov:hasFingerprint", {}).get("trov:sha256")
@@ -498,12 +520,14 @@ def extract_bundle_tro_reference(tro: Mapping) -> dict[str, Any]:
     )
     if fingerprint is None:
         raise ValueError("TRO is missing a composition fingerprint.")
+    self_url = node.get("pe:selfUrl")
     return {
         "fingerprint": fingerprint,
         "name": node.get("schema:name"),
         "policyengine_version": (
             node.get("trov:createdWith", {}).get("schema:softwareVersion")
         ),
+        "self_url": self_url,
     }
 
 
@@ -520,7 +544,6 @@ def build_simulation_trace_tro(
     reform_location: Optional[str] = None,
     bundle_tro_location: Optional[str] = None,
     bundle_tro_url: Optional[str] = None,
-    emission_context: Optional[Mapping[str, str]] = None,
 ) -> dict:
     """Build a per-simulation TRO chaining a bundle TRO to a results payload.
 
@@ -590,9 +613,7 @@ def build_simulation_trace_tro(
         performance["trov:startedAtTime"] = started_at or created_at
     if created_at is not None:
         performance["trov:endedAtTime"] = created_at
-    performance.update(
-        dict(emission_context) if emission_context is not None else _emission_context()
-    )
+    performance.update(_emission_context())
 
     tro_node = _assemble_tro_node(
         tro_name=f"policyengine simulation TRO ({simulation_slug})",
diff --git a/src/policyengine/data/schemas/trace_tro.schema.json b/src/policyengine/data/schemas/trace_tro.schema.json
index 5b49bb2f..b8a12a34 100644
--- a/src/policyengine/data/schemas/trace_tro.schema.json
+++ b/src/policyengine/data/schemas/trace_tro.schema.json
@@ -72,7 +72,7 @@
         },
         "trov:hasLocation": {
           "type": "string",
-          "pattern": "^(https://|data/|reform\\.json$|results\\.json$|bundle\\.trace\\.tro\\.jsonld)"
+          "pattern": "^(https://[^\\s]+$|data/release_manifests/[a-z]{2,3}\\.json$|reform\\.json$|results\\.json$|bundle\\.trace\\.tro\\.jsonld(#[a-f0-9]{64})?$)"
         }
       }
     },
diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py
index 16bf9c42..2f45b8d0 100644
--- a/src/policyengine/results/trace_tro.py
+++ b/src/policyengine/results/trace_tro.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-import json
 from collections.abc import Mapping
 from pathlib import Path
 from typing import Optional, Union
@@ -64,46 +63,30 @@ def write_results_with_trace_tro(
     results_path: Union[str, Path],
     *,
     bundle_tro: Mapping,
+    bundle_tro_url: str,
     reform_payload: Optional[Mapping] = None,
     reform_name: Optional[str] = None,
     tro_suffix: str = ".trace.tro.jsonld",
-    bundle_tro_path: Optional[Union[str, Path]] = None,
-    bundle_tro_url: Optional[str] = None,
 ) -> dict[str, Path]:
     """Write ``results.json`` and a sibling per-simulation TRACE TRO.
 
-    The TRO is written next to the results file with the given suffix.
-    When ``bundle_tro_url`` is provided, it is recorded in the TRO so a
-    verifier can independently fetch that URL and check its hash.
+    ``bundle_tro_url`` is required: a published simulation TRO must
+    point at a canonical, immutable URL for the bundle TRO so a
+    verifier can fetch and rehash it independently of the caller.
     """
     results_path = Path(results_path)
     results.write(results_path)
 
-    if bundle_tro_path is not None:
-        bundle_tro_path = Path(bundle_tro_path)
-        bundle_tro_location: Optional[str] = bundle_tro_path.name
-    else:
-        bundle_tro_location = None
-
     tro = build_results_trace_tro(
         results,
         bundle_tro=bundle_tro,
         reform_payload=reform_payload,
         reform_name=reform_name,
         results_location=results_path.name,
-        bundle_tro_location=bundle_tro_location,
+        bundle_tro_location=bundle_tro_url,
         bundle_tro_url=bundle_tro_url,
     )
     tro_path = results_path.with_suffix(tro_suffix)
     tro_path.write_bytes(serialize_trace_tro(tro))
 
-    written: dict[str, Path] = {"results": results_path, "tro": tro_path}
-
-    if bundle_tro_path is not None:
-        bundle_tro_path.parent.mkdir(parents=True, exist_ok=True)
-        bundle_tro_path.write_text(
-            json.dumps(bundle_tro, indent=2, sort_keys=True) + "\n"
-        )
-        written["bundle_tro"] = bundle_tro_path
-
-    return written
+    return {"results": results_path, "tro": tro_path}
diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py
index 2cc6edf8..90bb10eb 100644
--- a/tests/test_trace_tro.py
+++ b/tests/test_trace_tro.py
@@ -309,6 +309,66 @@ def test__given_fresh_manifest_instances__then_tro_bytes_match(self, monkeypatch
         )
         assert first == second
 
+    def test__given_fixed_ci_env__then_tro_bytes_match_across_builds(self, monkeypatch):
+        # Two builds inside the same CI run must produce identical bytes,
+        # including the pe:ciRunUrl / pe:ciGitSha attestation fields.
+        monkeypatch.setenv("GITHUB_ACTIONS", "true")
+        monkeypatch.setenv("GITHUB_SERVER_URL", "https://github.com")
+        monkeypatch.setenv("GITHUB_REPOSITORY", "PolicyEngine/policyengine.py")
+        monkeypatch.setenv("GITHUB_RUN_ID", "999")
+        monkeypatch.setenv("GITHUB_SHA", "cafef00d")
+        first = serialize_trace_tro(
+            build_trace_tro_from_release_bundle(
+                get_release_manifest("us"),
+                _us_data_release_manifest(),
+                fetch_pypi=_fake_fetch_pypi,
+            )
+        )
+        get_release_manifest.cache_clear()
+        second = serialize_trace_tro(
+            build_trace_tro_from_release_bundle(
+                get_release_manifest("us"),
+                _us_data_release_manifest(),
+                fetch_pypi=_fake_fetch_pypi,
+            )
+        )
+        assert first == second
+
+    def test__given_self_url__then_tro_records_it(self):
+        self_url = (
+            "https://raw.githubusercontent.com/PolicyEngine/policyengine.py/"
+            "v3.4.5/src/policyengine/data/release_manifests/us.trace.tro.jsonld"
+        )
+        tro = build_trace_tro_from_release_bundle(
+            get_release_manifest("us"),
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+            self_url=self_url,
+        )
+        assert tro["@graph"][0]["pe:selfUrl"] == self_url
+
+    def test__given_graph_with_multiple_nodes__then_extract_finds_tro(self):
+        tro = build_trace_tro_from_release_bundle(
+            get_release_manifest("us"),
+            _us_data_release_manifest(),
+            fetch_pypi=_fake_fetch_pypi,
+        )
+        # Inject a decoy node ahead of the TRO in the graph.
+        decoy_tro = {
+            "@context": tro["@context"],
+            "@graph": [
+                {"@id": "decoy", "@type": "schema:Thing"},
+                tro["@graph"][0],
+            ],
+        }
+        reference = extract_bundle_tro_reference(decoy_tro)
+        assert (
+            reference["fingerprint"]
+            == tro["@graph"][0]["trov:hasComposition"]["trov:hasFingerprint"][
+                "trov:sha256"
+            ]
+        )
+
     def test__given_hashes_in_any_order__then_fingerprint_matches(self):
         hashes = ["c" * 64, "a" * 64, "b" * 64]
         assert compute_trace_composition_fingerprint(
@@ -532,10 +592,15 @@ def bundle_hash(tro):
     def test__given_write_helper__then_results_and_tro_files_are_sidebyside(
         self, tmp_path, us_bundle_tro
     ):
+        bundle_url = (
+            "https://raw.githubusercontent.com/PolicyEngine/policyengine.py/"
+            "v3.4.5/src/policyengine/data/release_manifests/us.trace.tro.jsonld"
+        )
         written = write_results_with_trace_tro(
             self._results(),
             tmp_path / "results.json",
             bundle_tro=us_bundle_tro,
+            bundle_tro_url=bundle_url,
             reform_payload={"salt_cap": 0},
         )
 
@@ -544,6 +609,22 @@ def test__given_write_helper__then_results_and_tro_files_are_sidebyside(
         assert written["tro"].name == "results.trace.tro.jsonld"
         tro_payload = json.loads(written["tro"].read_text())
         assert tro_payload["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION
+        assert (
+            tro_payload["@graph"][0]["trov:hasPerformance"]["pe:bundleTroUrl"]
+            == bundle_url
+        )
+
+    def test__given_write_helper_without_url__then_raises(
+        self, tmp_path, us_bundle_tro
+    ):
+        import pytest
+
+        with pytest.raises(TypeError):
+            write_results_with_trace_tro(
+                self._results(),
+                tmp_path / "results.json",
+                bundle_tro=us_bundle_tro,
+            )
 
 
 class TestCLI:

From f0a9af86a19fff2ad0fa86de22e2d777116803c3 Mon Sep 17 00:00:00 2001
From: Max Ghenis <max@policyengine.org>
Date: Sat, 18 Apr 2026 08:36:15 -0400
Subject: [PATCH 8/8] Unify emission-context plumbing; drop redundant
 bundle_tro_location

Last two simplifier nits from round 3:

- _build_bundle_performance no longer takes emission_context as a
  kwarg; like the sim builder, it calls _emission_context() inline
  at the end of performance construction. One fewer parameter, same
  ordering behaviour, matches the sim-side pattern.
- write_results_with_trace_tro no longer passes the URL to both
  bundle_tro_location and bundle_tro_url; the build_simulation_trace_tro
  fallback (bundle_tro_location or bundle_tro_url or <default>) picks
  the URL up on its own.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/policyengine/core/trace_tro.py    | 4 +---
 src/policyengine/results/trace_tro.py | 1 -
 uv.lock                               | 2 ++
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/policyengine/core/trace_tro.py b/src/policyengine/core/trace_tro.py
index 66dca63c..76f1661a 100644
--- a/src/policyengine/core/trace_tro.py
+++ b/src/policyengine/core/trace_tro.py
@@ -409,7 +409,6 @@ def build_trace_tro_from_release_bundle(
             else country_manifest.published_at
         ),
         ended_at=country_manifest.published_at,
-        emission_context=_emission_context(),
     )
 
     tro_node = _assemble_tro_node(
@@ -449,7 +448,6 @@ def _build_bundle_performance(
     certification: Optional[DataCertification],
     started_at: Optional[str],
     ended_at: Optional[str],
-    emission_context: Mapping[str, str],
 ) -> dict[str, Any]:
     performance: dict[str, Any] = {
         "@id": "trp/1",
@@ -487,7 +485,7 @@ def _build_bundle_performance(
             performance["pe:dataBuildId"] = certification.data_build_id
         if certification.certified_by is not None:
             performance["pe:certifiedBy"] = certification.certified_by
-    performance.update(emission_context)
+    performance.update(_emission_context())
     return performance
 
 
diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py
index 2f45b8d0..d904d5b4 100644
--- a/src/policyengine/results/trace_tro.py
+++ b/src/policyengine/results/trace_tro.py
@@ -83,7 +83,6 @@ def write_results_with_trace_tro(
         reform_payload=reform_payload,
         reform_name=reform_name,
         results_location=results_path.name,
-        bundle_tro_location=bundle_tro_url,
         bundle_tro_url=bundle_tro_url,
     )
     tro_path = results_path.with_suffix(tro_suffix)
diff --git a/uv.lock b/uv.lock
index 2cdfb801..bbbd4300 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2434,6 +2434,7 @@ dev = [
     { name = "build" },
     { name = "furo" },
     { name = "itables" },
+    { name = "jsonschema" },
     { name = "jupyter-book" },
     { name = "mypy", version = "1.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "mypy", version = "1.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
@@ -2462,6 +2463,7 @@ requires-dist = [
     { name = "build", marker = "extra == 'dev'" },
     { name = "furo", marker = "extra == 'dev'" },
     { name = "itables", marker = "extra == 'dev'" },
+    { name = "jsonschema", marker = "extra == 'dev'", specifier = ">=4.0.0" },
     { name = "jupyter-book", marker = "extra == 'dev'" },
     { name = "microdf-python", specifier = ">=1.2.1" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.0" },