From 6ba682b758c083a2a9d148b10ed0181b1bff3b4f Mon Sep 17 00:00:00 2001 From: docushell-admin Date: Wed, 1 Jul 2026 11:21:19 +0530 Subject: [PATCH 1/2] Add app answer release demo Signed-off-by: docushell-admin --- .../test_app_answer_release_contract.py | 1 + .../scripts/test_app_answer_release_demo.py | 170 ++++++++++++++++++ Makefile | 7 +- docs/app-answer-release-contract.md | 2 + examples/README.md | 6 +- examples/app-answer-release/README.md | 44 +++++ examples/app-answer-release/claims.json | 46 +++++ .../app-answer-release/expected-decision.json | 86 +++++++++ .../app-answer-release/proof-summary.json | 15 ++ .../app-answer-release/run_python_demo.py | 97 ++++++++++ .../verification-report.json | 117 ++++++++++++ 11 files changed, 589 insertions(+), 2 deletions(-) create mode 100644 .github/scripts/test_app_answer_release_demo.py create mode 100644 examples/app-answer-release/README.md create mode 100644 examples/app-answer-release/claims.json create mode 100644 examples/app-answer-release/expected-decision.json create mode 100644 examples/app-answer-release/proof-summary.json create mode 100644 examples/app-answer-release/run_python_demo.py create mode 100644 examples/app-answer-release/verification-report.json diff --git a/.github/scripts/test_app_answer_release_contract.py b/.github/scripts/test_app_answer_release_contract.py index a1ee593d..81e901e8 100644 --- a/.github/scripts/test_app_answer_release_contract.py +++ b/.github/scripts/test_app_answer_release_contract.py @@ -41,6 +41,7 @@ "$(MAKE) python-surface-test PYTHON=$(PYTHON)", "$(PYTHON) schemas/validate_examples.py", "$(PYTHON) .github/scripts/test_app_answer_release_contract.py", + "$(PYTHON) .github/scripts/test_app_answer_release_demo.py", "$(PYTHON) .github/scripts/claims_gate.py", "$(PYTHON) .github/scripts/public_boundary_claims_gate.py", "git diff --check", diff --git a/.github/scripts/test_app_answer_release_demo.py b/.github/scripts/test_app_answer_release_demo.py new file mode 100644 index 00000000..88ac3fe1 --- /dev/null +++ b/.github/scripts/test_app_answer_release_demo.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# +# Copyright 2026 The Ethos maintainers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import json +import subprocess +import sys +import unittest +from pathlib import Path + +from jsonschema import Draft202012Validator +from makefile_guard import target_block + + +ROOT = Path(__file__).resolve().parents[2] +DEMO = ROOT / "examples/app-answer-release" +SCRIPT = DEMO / "run_python_demo.py" +SCHEMA = ROOT / "schemas/ethos-app-answer-release-decision.schema.json" +VERIFICATION_SCHEMA = ROOT / "schemas/ethos-verification-report.schema.json" +PYTHON_PACKAGE = ROOT / "python" + + +def load_json(path: Path) -> dict: + return json.loads(path.read_text(encoding="utf-8")) + + +def run_demo() -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(SCRIPT), "--check"], + cwd=ROOT, + text=True, + capture_output=True, + check=False, + ) + + +class AppAnswerReleaseDemoTests(unittest.TestCase): + def test_demo_script_exits_zero_and_matches_expected_decision(self) -> None: + result = run_demo() + + self.assertEqual("", result.stderr) + self.assertEqual(0, result.returncode, result.stderr) + self.assertEqual(load_json(DEMO / "expected-decision.json"), json.loads(result.stdout)) + + def test_demo_expected_decision_validates_against_schema(self) -> None: + schema = load_json(SCHEMA) + decision = load_json(DEMO / "expected-decision.json") + + Draft202012Validator.check_schema(schema) + errors = sorted( + Draft202012Validator(schema).iter_errors(decision), + key=lambda error: list(error.absolute_path), + ) + + self.assertEqual([], errors) + + def test_demo_verification_report_validates_against_canonical_schema(self) -> None: + schema = load_json(VERIFICATION_SCHEMA) + report = load_json(DEMO / "verification-report.json") + + Draft202012Validator.check_schema(schema) + errors = sorted( + Draft202012Validator(schema).iter_errors(report), + key=lambda error: list(error.absolute_path), + ) + + self.assertEqual([], errors) + + def test_demo_derives_expected_proof_summary_from_verification_report(self) -> None: + if str(PYTHON_PACKAGE) not in sys.path: + sys.path.insert(0, str(PYTHON_PACKAGE)) + from ethos_pdf import proof_summary + + report = load_json(DEMO / "verification-report.json") + + self.assertEqual(load_json(DEMO / "proof-summary.json"), proof_summary(report)) + + def test_demo_keeps_canonical_report_separate_from_app_wrapper(self) -> None: + report = load_json(DEMO / "verification-report.json") + decision = load_json(DEMO / "expected-decision.json") + + self.assertIn("checks", report) + self.assertIn("all_evidence_grounded", report) + self.assertNotIn("artifact_type", report) + self.assertNotIn("app_status", report) + self.assertNotIn("checks", decision) + self.assertEqual( + "verification-report.json", + decision["grounding"]["verification_report_ref"], + ) + + def test_demo_covers_final_review_and_blocked_release_cases(self) -> None: + decision = load_json(DEMO / "expected-decision.json") + claims = {claim["id"]: claim for claim in decision["claims"]} + + self.assertEqual(["claim-revenue"], decision["final_answer_claim_ids"]) + self.assertEqual(["claim-growth-driver"], decision["review_claim_ids"]) + self.assertEqual( + ["claim-office-background", "claim-margin"], + decision["blocked_claim_ids"], + ) + + certified = claims["claim-revenue"] + self.assertTrue(certified["citation_grounded"]) + self.assertEqual("source_fact", certified["claim_type"]) + self.assertEqual("show_final", certified["release_action"]) + self.assertEqual("certified", certified["release_reason"]) + + synthesis = claims["claim-growth-driver"] + self.assertTrue(synthesis["citation_grounded"]) + self.assertEqual("synthesis", synthesis["claim_type"]) + self.assertEqual("needs_review", synthesis["release_action"]) + self.assertEqual("supported_synthesis_needs_review", synthesis["release_reason"]) + + irrelevant = claims["claim-office-background"] + self.assertTrue(irrelevant["citation_grounded"]) + self.assertEqual("background_only", irrelevant["question_relevance"]) + self.assertEqual("block", irrelevant["release_action"]) + self.assertEqual("grounded_but_irrelevant", irrelevant["release_reason"]) + + unsupported = claims["claim-margin"] + self.assertFalse(unsupported["citation_grounded"]) + self.assertEqual("unsupported", unsupported["claim_type"]) + self.assertEqual("block", unsupported["release_action"]) + self.assertEqual("cannot_answer_from_sources", unsupported["release_reason"]) + + def test_demo_helper_rejects_duplicate_claim_ids(self) -> None: + if str(PYTHON_PACKAGE) not in sys.path: + sys.path.insert(0, str(PYTHON_PACKAGE)) + from ethos_pdf import app_answer_release_decision + + summary = load_json(DEMO / "proof-summary.json") + payload = load_json(DEMO / "claims.json") + claims = [dict(claim) for claim in payload["claims"]] + claims[1]["id"] = claims[0]["id"] + + with self.assertRaisesRegex(ValueError, "duplicate claim id: claim-revenue"): + app_answer_release_decision( + payload["question"], + summary, + claims, + verification_report_ref=payload["verification_report_ref"], + ) + + def test_make_target_runs_demo_guard_without_publication_actions(self) -> None: + block = target_block("app-answer-release-demo") + + self.assertIn("$(PYTHON) .github/scripts/test_app_answer_release_demo.py", block) + self.assertIn("git diff --check", block) + for out_of_scope in ["cargo publish", "gh release", "npm publish", "twine upload"]: + self.assertNotIn(out_of_scope, block) + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile b/Makefile index 5263acbb..87202835 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ LAYOUT_EVALUATOR_OUT ?= $(ROOT)/target/layout-evaluator-alpha .PHONY: milestone-d-grounding-source-contract .PHONY: milestone-d-crop-element-surface-shape-contract .PHONY: milestone-d-claim-kind-boundary-contract -.PHONY: app-answer-release-contract +.PHONY: app-answer-release-contract app-answer-release-demo $(ETHOS_BIN): cargo build --locked -p ethos-cli @@ -67,10 +67,15 @@ app-answer-release-contract: $(MAKE) python-surface-test PYTHON=$(PYTHON) $(PYTHON) schemas/validate_examples.py $(PYTHON) .github/scripts/test_app_answer_release_contract.py + $(PYTHON) .github/scripts/test_app_answer_release_demo.py $(PYTHON) .github/scripts/claims_gate.py $(PYTHON) .github/scripts/public_boundary_claims_gate.py git diff --check +app-answer-release-demo: + $(PYTHON) .github/scripts/test_app_answer_release_demo.py + git diff --check + milestone-d-verify-citations-contract: cargo test --locked -p ethos-cli --test verify $(PYTHON) schemas/validate_examples.py diff --git a/docs/app-answer-release-contract.md b/docs/app-answer-release-contract.md index f07e8807..e44d3e89 100644 --- a/docs/app-answer-release-contract.md +++ b/docs/app-answer-release-contract.md @@ -30,6 +30,8 @@ result. Rust apps can build the same envelope with `derive_app_answer_release_de Python apps can build it with `app_answer_release_decision(...)`, after they have supplied relevance and synthesis labels. +For a runnable offline Python reference path, see `examples/app-answer-release/README.md`. + If a wrapper exposes `invalid_request`, that status is a process or API envelope for malformed input, invalid configuration, adapter failure, or usage errors. It is not derived from a `VerificationReport`. diff --git a/examples/README.md b/examples/README.md index a1031ae0..3fd13fd1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1 +1,5 @@ -# examples — source-only public beta fixture set: RAG demo (parse -> chunks -> citations -> verification), agent demo (verify + crop), foreign-parser demo (verify over OpenDataLoader JSON). Pinned fixtures only. +# examples + +Source-only public beta fixture set: RAG demo (parse -> chunks -> citations -> verification), +agent demo (verify + crop), foreign-parser demo (verify over OpenDataLoader JSON), and +app-answer-release demo (proof summary -> app release decision). Pinned fixtures only. diff --git a/examples/app-answer-release/README.md b/examples/app-answer-release/README.md new file mode 100644 index 00000000..c306f069 --- /dev/null +++ b/examples/app-answer-release/README.md @@ -0,0 +1,44 @@ +# App Answer Release Demo + +This demo is an offline reference path for applications that use Ethos grounding before deciding +which answer claims can be released. + +It shows this flow: + +```text +verification-report.json +-> proof_summary(...) +-> app-labeled claims +-> app_answer_release_decision(...) +-> final / review / blocked claim lists +``` + +The demo does not call an LLM, does not use DocuShell UI code, and does not ask Ethos to judge +question relevance or synthesis. Ethos derives the grounding summary. The application supplies the +original question plus `question_relevance` and `claim_type` labels. + +## Run + +From the repo root: + +```sh +python3 examples/app-answer-release/run_python_demo.py --check +``` + +The command prints the decision envelope and exits non-zero if it differs from +`expected-decision.json`. + +## Files + +- `verification-report.json`: canonical Ethos-style grounding report used as the audit artifact. +- `proof-summary.json`: expected output from `proof_summary(verification_report)`. +- `claims.json`: application-owned question, relevance labels, and synthesis labels. +- `expected-decision.json`: expected app release envelope from `app_answer_release_decision(...)`. +- `run_python_demo.py`: copyable Python reference path. + +## Cases Covered + +- `claim-revenue`: certified source fact, released into the final answer. +- `claim-growth-driver`: grounded synthesis, kept for review. +- `claim-office-background`: grounded but irrelevant, blocked. +- `claim-margin`: unsupported by reusable grounded evidence, blocked. diff --git a/examples/app-answer-release/claims.json b/examples/app-answer-release/claims.json new file mode 100644 index 00000000..37d6ce59 --- /dev/null +++ b/examples/app-answer-release/claims.json @@ -0,0 +1,46 @@ +{ + "question": "What changed in Q3 2025 revenue?", + "verification_report_ref": "verification-report.json", + "notes": [ + "Ethos verified citation grounding; the application supplied relevance and synthesis labels." + ], + "claims": [ + { + "id": "claim-revenue", + "text": "Q3 2025 revenue was $12.4M, up 18% year over year.", + "check_ids": [ + "v0001" + ], + "question_relevance": "direct_answer", + "claim_type": "source_fact" + }, + { + "id": "claim-growth-driver", + "text": "Q3 revenue growth was likely driven by enterprise expansion.", + "check_ids": [ + "v0001", + "v0003" + ], + "question_relevance": "supports_answer", + "claim_type": "synthesis" + }, + { + "id": "claim-office-background", + "text": "The company opened a European office in September.", + "check_ids": [ + "v0002" + ], + "question_relevance": "background_only", + "claim_type": "source_fact" + }, + { + "id": "claim-margin", + "text": "Gross margin improved in Q3 2025.", + "check_ids": [ + "v0004" + ], + "question_relevance": "direct_answer", + "claim_type": "unsupported" + } + ] +} diff --git a/examples/app-answer-release/expected-decision.json b/examples/app-answer-release/expected-decision.json new file mode 100644 index 00000000..2e8bb239 --- /dev/null +++ b/examples/app-answer-release/expected-decision.json @@ -0,0 +1,86 @@ +{ + "artifact_type": "ethos.app_answer_release_decision.v1", + "schema_version": "1.0.0", + "question": "What changed in Q3 2025 revenue?", + "grounding": { + "verification_report_ref": "verification-report.json", + "proof_status": "partially_verified", + "request_certified": false, + "reusable_grounded_check_ids": [ + "v0001", + "v0002", + "v0003" + ], + "needs_review_check_ids": [ + "v0004" + ], + "proof_limitations": [ + "non_grounded_checks" + ] + }, + "app_status": "partial_certified", + "claims": [ + { + "id": "claim-revenue", + "text": "Q3 2025 revenue was $12.4M, up 18% year over year.", + "citation_grounded": true, + "question_relevance": "direct_answer", + "claim_type": "source_fact", + "release_action": "show_final", + "release_reason": "certified", + "check_ids": [ + "v0001" + ] + }, + { + "id": "claim-growth-driver", + "text": "Q3 revenue growth was likely driven by enterprise expansion.", + "citation_grounded": true, + "question_relevance": "supports_answer", + "claim_type": "synthesis", + "release_action": "needs_review", + "release_reason": "supported_synthesis_needs_review", + "check_ids": [ + "v0001", + "v0003" + ] + }, + { + "id": "claim-office-background", + "text": "The company opened a European office in September.", + "citation_grounded": true, + "question_relevance": "background_only", + "claim_type": "source_fact", + "release_action": "block", + "release_reason": "grounded_but_irrelevant", + "check_ids": [ + "v0002" + ] + }, + { + "id": "claim-margin", + "text": "Gross margin improved in Q3 2025.", + "citation_grounded": false, + "question_relevance": "direct_answer", + "claim_type": "unsupported", + "release_action": "block", + "release_reason": "cannot_answer_from_sources", + "check_ids": [ + "v0004" + ] + } + ], + "final_answer_claim_ids": [ + "claim-revenue" + ], + "review_claim_ids": [ + "claim-growth-driver" + ], + "blocked_claim_ids": [ + "claim-office-background", + "claim-margin" + ], + "notes": [ + "Ethos verified citation grounding; the application supplied relevance and synthesis labels." + ] +} diff --git a/examples/app-answer-release/proof-summary.json b/examples/app-answer-release/proof-summary.json new file mode 100644 index 00000000..912bb2ca --- /dev/null +++ b/examples/app-answer-release/proof-summary.json @@ -0,0 +1,15 @@ +{ + "proof_status": "partially_verified", + "request_certified": false, + "reusable_grounded_check_ids": [ + "v0001", + "v0002", + "v0003" + ], + "needs_review_check_ids": [ + "v0004" + ], + "proof_limitations": [ + "non_grounded_checks" + ] +} diff --git a/examples/app-answer-release/run_python_demo.py b/examples/app-answer-release/run_python_demo.py new file mode 100644 index 00000000..925cf455 --- /dev/null +++ b/examples/app-answer-release/run_python_demo.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# +# Copyright 2026 The Ethos maintainers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import argparse +import difflib +import json +import sys +from pathlib import Path +from typing import Any + + +DEMO_DIR = Path(__file__).resolve().parent +ROOT = DEMO_DIR.parents[1] +PYTHON_PACKAGE = ROOT / "python" +if str(PYTHON_PACKAGE) not in sys.path: + sys.path.insert(0, str(PYTHON_PACKAGE)) + +from ethos_pdf import app_answer_release_decision, proof_summary # noqa: E402 + + +def load_json(name: str) -> Any: + return json.loads((DEMO_DIR / name).read_text(encoding="utf-8")) + + +def dumps_json(value: Any) -> str: + return json.dumps(value, indent=2) + "\n" + + +def build_decision() -> dict[str, Any]: + report = load_json("verification-report.json") + expected_summary = load_json("proof-summary.json") + claim_payload = load_json("claims.json") + + summary = proof_summary(report) + if summary != expected_summary: + raise RuntimeError("proof_summary(verification-report.json) changed") + + return app_answer_release_decision( + claim_payload["question"], + summary, + claim_payload["claims"], + verification_report_ref=claim_payload["verification_report_ref"], + notes=claim_payload["notes"], + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Run the Ethos app-answer-release reference demo.", + ) + parser.add_argument( + "--check", + action="store_true", + help="Fail if the generated decision differs from expected-decision.json.", + ) + args = parser.parse_args() + + decision = build_decision() + rendered = dumps_json(decision) + + if args.check: + expected = load_json("expected-decision.json") + if decision != expected: + expected_rendered = dumps_json(expected) + diff = "".join( + difflib.unified_diff( + expected_rendered.splitlines(keepends=True), + rendered.splitlines(keepends=True), + fromfile="expected-decision.json", + tofile="generated-decision.json", + ) + ) + print(diff, file=sys.stderr) + return 1 + + print(rendered, end="") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/app-answer-release/verification-report.json b/examples/app-answer-release/verification-report.json new file mode 100644 index 00000000..8fb8229a --- /dev/null +++ b/examples/app-answer-release/verification-report.json @@ -0,0 +1,117 @@ +{ + "schema_version": "1.0.0", + "document_fingerprint": "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3", + "verification_config_sha256": "4bb224166a04a25fed2dd3ecdb9638ddcc5b398658532b73f1c0547e4983d0b0", + "grounding": { + "parser": { + "name": "app-release-demo", + "version": "1.0.0" + }, + "capabilities": { + "spans": true, + "char_offsets": true, + "tables": true, + "fingerprint": true, + "coordinate_origin": "top-left", + "crop_support": false + } + }, + "capability_limits": [], + "fingerprint_stale": false, + "all_evidence_grounded": false, + "checks": [ + { + "id": "v0001", + "claim": { + "kind": "quote", + "text": "Q3 2025 revenue was $12.4M, up 18% year over year.", + "citation": { + "page": "p0001", + "element_id": "e0001" + } + }, + "status": "grounded", + "match_method": "normalized_text", + "semantic_unverified": false, + "evidence": { + "text": "Q3 2025 revenue was $12.4M, up 18% year over year.", + "page": "p0001", + "bbox": [ + 7200, + 10100, + 54000, + 11500 + ] + }, + "warnings": [] + }, + { + "id": "v0002", + "claim": { + "kind": "quote", + "text": "The company opened a European office in September.", + "citation": { + "page": "p0002", + "element_id": "e0007" + } + }, + "status": "grounded", + "match_method": "normalized_text", + "semantic_unverified": false, + "evidence": { + "text": "The company opened a European office in September.", + "page": "p0002", + "bbox": [ + 7200, + 20100, + 51000, + 21500 + ] + }, + "warnings": [] + }, + { + "id": "v0003", + "claim": { + "kind": "quote", + "text": "Enterprise expansion accounted for most new bookings.", + "citation": { + "page": "p0001", + "element_id": "e0003" + } + }, + "status": "grounded", + "match_method": "normalized_text", + "semantic_unverified": false, + "evidence": { + "text": "Enterprise expansion accounted for most new bookings.", + "page": "p0001", + "bbox": [ + 7200, + 12100, + 54000, + 13500 + ] + }, + "warnings": [] + }, + { + "id": "v0004", + "claim": { + "kind": "quote", + "text": "Gross margin improved in Q3 2025.", + "citation": { + "page": "p0003", + "element_id": "e0099" + } + }, + "status": "not_found", + "reason": "element_not_found", + "match_method": "none", + "semantic_unverified": false, + "warnings": [] + } + ], + "unsupported_claim_kinds": [], + "warnings": [] +} From 07ad16d515b08263b75189a05ef6b9a09192e78a Mon Sep 17 00:00:00 2001 From: docushell-admin Date: Wed, 1 Jul 2026 11:31:03 +0530 Subject: [PATCH 2/2] Fix examples README posture casing Signed-off-by: docushell-admin --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 3fd13fd1..a988b45d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,5 +1,5 @@ # examples -Source-only public beta fixture set: RAG demo (parse -> chunks -> citations -> verification), +source-only public beta fixture set: RAG demo (parse -> chunks -> citations -> verification), agent demo (verify + crop), foreign-parser demo (verify over OpenDataLoader JSON), and app-answer-release demo (proof summary -> app release decision). Pinned fixtures only.