Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/test_app_answer_release_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"$(MAKE) python-surface-test PYTHON=$(PYTHON)",
"$(PYTHON) schemas/validate_examples.py",
"$(PYTHON) .github/scripts/test_app_answer_release_contract.py",
"$(PYTHON) .github/scripts/test_app_answer_release_demo.py",
"$(PYTHON) .github/scripts/claims_gate.py",
"$(PYTHON) .github/scripts/public_boundary_claims_gate.py",
"git diff --check",
Expand Down
170 changes: 170 additions & 0 deletions .github/scripts/test_app_answer_release_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/usr/bin/env python3
#
# Copyright 2026 The Ethos maintainers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import json
import subprocess
import sys
import unittest
from pathlib import Path

from jsonschema import Draft202012Validator
from makefile_guard import target_block


ROOT = Path(__file__).resolve().parents[2]
DEMO = ROOT / "examples/app-answer-release"
SCRIPT = DEMO / "run_python_demo.py"
SCHEMA = ROOT / "schemas/ethos-app-answer-release-decision.schema.json"
VERIFICATION_SCHEMA = ROOT / "schemas/ethos-verification-report.schema.json"
PYTHON_PACKAGE = ROOT / "python"


def load_json(path: Path) -> dict:
return json.loads(path.read_text(encoding="utf-8"))


def run_demo() -> subprocess.CompletedProcess[str]:
return subprocess.run(
[sys.executable, str(SCRIPT), "--check"],
cwd=ROOT,
text=True,
capture_output=True,
check=False,
)


class AppAnswerReleaseDemoTests(unittest.TestCase):
def test_demo_script_exits_zero_and_matches_expected_decision(self) -> None:
result = run_demo()

self.assertEqual("", result.stderr)
self.assertEqual(0, result.returncode, result.stderr)
self.assertEqual(load_json(DEMO / "expected-decision.json"), json.loads(result.stdout))

def test_demo_expected_decision_validates_against_schema(self) -> None:
schema = load_json(SCHEMA)
decision = load_json(DEMO / "expected-decision.json")

Draft202012Validator.check_schema(schema)
errors = sorted(
Draft202012Validator(schema).iter_errors(decision),
key=lambda error: list(error.absolute_path),
)

self.assertEqual([], errors)

def test_demo_verification_report_validates_against_canonical_schema(self) -> None:
schema = load_json(VERIFICATION_SCHEMA)
report = load_json(DEMO / "verification-report.json")

Draft202012Validator.check_schema(schema)
errors = sorted(
Draft202012Validator(schema).iter_errors(report),
key=lambda error: list(error.absolute_path),
)

self.assertEqual([], errors)

def test_demo_derives_expected_proof_summary_from_verification_report(self) -> None:
if str(PYTHON_PACKAGE) not in sys.path:
sys.path.insert(0, str(PYTHON_PACKAGE))
from ethos_pdf import proof_summary

report = load_json(DEMO / "verification-report.json")

self.assertEqual(load_json(DEMO / "proof-summary.json"), proof_summary(report))

def test_demo_keeps_canonical_report_separate_from_app_wrapper(self) -> None:
report = load_json(DEMO / "verification-report.json")
decision = load_json(DEMO / "expected-decision.json")

self.assertIn("checks", report)
self.assertIn("all_evidence_grounded", report)
self.assertNotIn("artifact_type", report)
self.assertNotIn("app_status", report)
self.assertNotIn("checks", decision)
self.assertEqual(
"verification-report.json",
decision["grounding"]["verification_report_ref"],
)

def test_demo_covers_final_review_and_blocked_release_cases(self) -> None:
decision = load_json(DEMO / "expected-decision.json")
claims = {claim["id"]: claim for claim in decision["claims"]}

self.assertEqual(["claim-revenue"], decision["final_answer_claim_ids"])
self.assertEqual(["claim-growth-driver"], decision["review_claim_ids"])
self.assertEqual(
["claim-office-background", "claim-margin"],
decision["blocked_claim_ids"],
)

certified = claims["claim-revenue"]
self.assertTrue(certified["citation_grounded"])
self.assertEqual("source_fact", certified["claim_type"])
self.assertEqual("show_final", certified["release_action"])
self.assertEqual("certified", certified["release_reason"])

synthesis = claims["claim-growth-driver"]
self.assertTrue(synthesis["citation_grounded"])
self.assertEqual("synthesis", synthesis["claim_type"])
self.assertEqual("needs_review", synthesis["release_action"])
self.assertEqual("supported_synthesis_needs_review", synthesis["release_reason"])

irrelevant = claims["claim-office-background"]
self.assertTrue(irrelevant["citation_grounded"])
self.assertEqual("background_only", irrelevant["question_relevance"])
self.assertEqual("block", irrelevant["release_action"])
self.assertEqual("grounded_but_irrelevant", irrelevant["release_reason"])

unsupported = claims["claim-margin"]
self.assertFalse(unsupported["citation_grounded"])
self.assertEqual("unsupported", unsupported["claim_type"])
self.assertEqual("block", unsupported["release_action"])
self.assertEqual("cannot_answer_from_sources", unsupported["release_reason"])

def test_demo_helper_rejects_duplicate_claim_ids(self) -> None:
if str(PYTHON_PACKAGE) not in sys.path:
sys.path.insert(0, str(PYTHON_PACKAGE))
from ethos_pdf import app_answer_release_decision

summary = load_json(DEMO / "proof-summary.json")
payload = load_json(DEMO / "claims.json")
claims = [dict(claim) for claim in payload["claims"]]
claims[1]["id"] = claims[0]["id"]

with self.assertRaisesRegex(ValueError, "duplicate claim id: claim-revenue"):
app_answer_release_decision(
payload["question"],
summary,
claims,
verification_report_ref=payload["verification_report_ref"],
)

def test_make_target_runs_demo_guard_without_publication_actions(self) -> None:
block = target_block("app-answer-release-demo")

self.assertIn("$(PYTHON) .github/scripts/test_app_answer_release_demo.py", block)
self.assertIn("git diff --check", block)
for out_of_scope in ["cargo publish", "gh release", "npm publish", "twine upload"]:
self.assertNotIn(out_of_scope, block)


if __name__ == "__main__":
unittest.main()
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ LAYOUT_EVALUATOR_OUT ?= $(ROOT)/target/layout-evaluator-alpha
.PHONY: milestone-d-grounding-source-contract
.PHONY: milestone-d-crop-element-surface-shape-contract
.PHONY: milestone-d-claim-kind-boundary-contract
.PHONY: app-answer-release-contract
.PHONY: app-answer-release-contract app-answer-release-demo

$(ETHOS_BIN):
cargo build --locked -p ethos-cli
Expand Down Expand Up @@ -67,10 +67,15 @@ app-answer-release-contract:
$(MAKE) python-surface-test PYTHON=$(PYTHON)
$(PYTHON) schemas/validate_examples.py
$(PYTHON) .github/scripts/test_app_answer_release_contract.py
$(PYTHON) .github/scripts/test_app_answer_release_demo.py
$(PYTHON) .github/scripts/claims_gate.py
$(PYTHON) .github/scripts/public_boundary_claims_gate.py
git diff --check

app-answer-release-demo:
$(PYTHON) .github/scripts/test_app_answer_release_demo.py
git diff --check

milestone-d-verify-citations-contract:
cargo test --locked -p ethos-cli --test verify
$(PYTHON) schemas/validate_examples.py
Expand Down
2 changes: 2 additions & 0 deletions docs/app-answer-release-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ result. Rust apps can build the same envelope with `derive_app_answer_release_de
Python apps can build it with `app_answer_release_decision(...)`, after they have supplied
relevance and synthesis labels.

For a runnable offline Python reference path, see `examples/app-answer-release/README.md`.

If a wrapper exposes `invalid_request`, that status is a process or API envelope for malformed
input, invalid configuration, adapter failure, or usage errors. It is not derived from a
`VerificationReport`.
Expand Down
6 changes: 5 additions & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
# examples — source-only public beta fixture set: RAG demo (parse -> chunks -> citations -> verification), agent demo (verify + crop), foreign-parser demo (verify over OpenDataLoader JSON). Pinned fixtures only.
# examples

source-only public beta fixture set: RAG demo (parse -> chunks -> citations -> verification),
agent demo (verify + crop), foreign-parser demo (verify over OpenDataLoader JSON), and
app-answer-release demo (proof summary -> app release decision). Pinned fixtures only.
44 changes: 44 additions & 0 deletions examples/app-answer-release/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# App Answer Release Demo

This demo is an offline reference path for applications that use Ethos grounding before deciding
which answer claims can be released.

It shows this flow:

```text
verification-report.json
-> proof_summary(...)
-> app-labeled claims
-> app_answer_release_decision(...)
-> final / review / blocked claim lists
```

The demo does not call an LLM, does not use DocuShell UI code, and does not ask Ethos to judge
question relevance or synthesis. Ethos derives the grounding summary. The application supplies the
original question plus `question_relevance` and `claim_type` labels.

## Run

From the repo root:

```sh
python3 examples/app-answer-release/run_python_demo.py --check
```

The command prints the decision envelope and exits non-zero if it differs from
`expected-decision.json`.

## Files

- `verification-report.json`: canonical Ethos-style grounding report used as the audit artifact.
- `proof-summary.json`: expected output from `proof_summary(verification_report)`.
- `claims.json`: application-owned question, relevance labels, and synthesis labels.
- `expected-decision.json`: expected app release envelope from `app_answer_release_decision(...)`.
- `run_python_demo.py`: copyable Python reference path.

## Cases Covered

- `claim-revenue`: certified source fact, released into the final answer.
- `claim-growth-driver`: grounded synthesis, kept for review.
- `claim-office-background`: grounded but irrelevant, blocked.
- `claim-margin`: unsupported by reusable grounded evidence, blocked.
46 changes: 46 additions & 0 deletions examples/app-answer-release/claims.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"question": "What changed in Q3 2025 revenue?",
"verification_report_ref": "verification-report.json",
"notes": [
"Ethos verified citation grounding; the application supplied relevance and synthesis labels."
],
"claims": [
{
"id": "claim-revenue",
"text": "Q3 2025 revenue was $12.4M, up 18% year over year.",
"check_ids": [
"v0001"
],
"question_relevance": "direct_answer",
"claim_type": "source_fact"
},
{
"id": "claim-growth-driver",
"text": "Q3 revenue growth was likely driven by enterprise expansion.",
"check_ids": [
"v0001",
"v0003"
],
"question_relevance": "supports_answer",
"claim_type": "synthesis"
},
{
"id": "claim-office-background",
"text": "The company opened a European office in September.",
"check_ids": [
"v0002"
],
"question_relevance": "background_only",
"claim_type": "source_fact"
},
{
"id": "claim-margin",
"text": "Gross margin improved in Q3 2025.",
"check_ids": [
"v0004"
],
"question_relevance": "direct_answer",
"claim_type": "unsupported"
}
]
}
86 changes: 86 additions & 0 deletions examples/app-answer-release/expected-decision.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"artifact_type": "ethos.app_answer_release_decision.v1",
"schema_version": "1.0.0",
"question": "What changed in Q3 2025 revenue?",
"grounding": {
"verification_report_ref": "verification-report.json",
"proof_status": "partially_verified",
"request_certified": false,
"reusable_grounded_check_ids": [
"v0001",
"v0002",
"v0003"
],
"needs_review_check_ids": [
"v0004"
],
"proof_limitations": [
"non_grounded_checks"
]
},
"app_status": "partial_certified",
"claims": [
{
"id": "claim-revenue",
"text": "Q3 2025 revenue was $12.4M, up 18% year over year.",
"citation_grounded": true,
"question_relevance": "direct_answer",
"claim_type": "source_fact",
"release_action": "show_final",
"release_reason": "certified",
"check_ids": [
"v0001"
]
},
{
"id": "claim-growth-driver",
"text": "Q3 revenue growth was likely driven by enterprise expansion.",
"citation_grounded": true,
"question_relevance": "supports_answer",
"claim_type": "synthesis",
"release_action": "needs_review",
"release_reason": "supported_synthesis_needs_review",
"check_ids": [
"v0001",
"v0003"
]
},
{
"id": "claim-office-background",
"text": "The company opened a European office in September.",
"citation_grounded": true,
"question_relevance": "background_only",
"claim_type": "source_fact",
"release_action": "block",
"release_reason": "grounded_but_irrelevant",
"check_ids": [
"v0002"
]
},
{
"id": "claim-margin",
"text": "Gross margin improved in Q3 2025.",
"citation_grounded": false,
"question_relevance": "direct_answer",
"claim_type": "unsupported",
"release_action": "block",
"release_reason": "cannot_answer_from_sources",
"check_ids": [
"v0004"
]
}
],
"final_answer_claim_ids": [
"claim-revenue"
],
"review_claim_ids": [
"claim-growth-driver"
],
"blocked_claim_ids": [
"claim-office-background",
"claim-margin"
],
"notes": [
"Ethos verified citation grounding; the application supplied relevance and synthesis labels."
]
}
Loading
Loading