From 8902439e413e429f30e73bd31087be6bdb15473c Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Tue, 16 Jun 2026 07:54:58 +0800 Subject: [PATCH 1/8] test: failing self-test for backtest fingerprint helpers Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/fingerprint_self_test.py | 166 +++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 scripts/fingerprint_self_test.py diff --git a/scripts/fingerprint_self_test.py b/scripts/fingerprint_self_test.py new file mode 100644 index 0000000..fc6885c --- /dev/null +++ b/scripts/fingerprint_self_test.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""fingerprint_self_test.py — pins the backtest-fingerprint logic. + +The fingerprint helpers are duplicated (byte-identical) in +docker/run_json.py and scripts/run_strategy.py because scripts/ is +.dockerignore'd, so a shared runtime module cannot be COPY'd into the +image. This test loads BOTH copies, runs one fixture generated.cpp +through each, and asserts (a) the parser/provenance/fingerprint behave +as designed and (b) the two copies produce identical output — so they +cannot silently drift. + +Exit 0 iff every check passes. +""" +from __future__ import annotations + +import base64 +import importlib.util +import json +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parent.parent +RUN_JSON = REPO / "docker" / "run_json.py" +RUN_STRATEGY = REPO / "scripts" / "run_strategy.py" + +# Fixture: a ctor that sets a SUBSET of strategy() fields (process_orders_on_close +# and close_entries_rule are intentionally absent — they default in the engine +# base class), a set_strategy_override with a decoy `initial_capital_ =` line +# (must NOT be parsed), and a mix of input types incl. a duplicate title. +FIXTURE_CPP = ''' +struct GeneratedStrategy : Strategy { + explicit GeneratedStrategy() : _ta_ema_1(5), _ta_ema_2(13) { + initial_capital_ = 50000.0; + default_qty_type_ = QtyType::PERCENT_OF_EQUITY; + default_qty_value_ = 3.0; + pyramiding_ = 2; + commission_type_ = CommissionType::PERCENT; + commission_value_ = 0.04; + slippage_ = 1; + } + void set_strategy_override(const std::string& key, const std::string& value) { + if (key == "initial_capital") { initial_capital_ = std::stod(value); return; } + } + void on_bar(const Bar& bar) override { + if (!_inputs_initialized_) { + i_fast = get_input_int("Fast EMA", 5); + i_slow = get_input_int("Slow EMA", 13); + thr = get_input_double("ADX trend threshold", 25); + mode = get_input_string("Mode", "fast"); + src = get_input_source("Source", close); + _inputs_initialized_ = true; + } + i_fast = get_input_int("Fast EMA", 5); + } +}; +''' + + +def _load(path: Path, name: str): + spec = importlib.util.spec_from_file_location(name, path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def main() -> int: + passed = failed = 0 + + def check(name: str, cond: bool) -> None: + nonlocal passed, failed + if cond: + passed += 1 + print(f" OK {name}") + else: + failed += 1 + print(f" FAIL {name}") + + rj = _load(RUN_JSON, "pf_run_json") + rs = _load(RUN_STRATEGY, "pf_run_strategy") + + for label, m in (("run_json", rj), ("run_strategy", rs)): + # --- strategy() defaults from the ctor only ----------------------- + strat = m.parse_strategy_params(FIXTURE_CPP) + check(f"{label}: initial_capital parsed", strat.get("initial_capital") == 50000.0) + check(f"{label}: enum default_qty_type mapped", + strat.get("default_qty_type") == "percent_of_equity") + check(f"{label}: commission_value parsed", strat.get("commission_value") == 0.04) + check(f"{label}: pyramiding parsed", strat.get("pyramiding") == 2) + check(f"{label}: slippage parsed", strat.get("slippage") == 1) + # decoy line in set_strategy_override must NOT pollute the parse + check(f"{label}: override-fn line not parsed (still 50000.0)", + strat.get("initial_capital") == 50000.0) + # ctor does NOT set these two; parse_strategy_params must omit them + check(f"{label}: ctor omits process_orders_on_close", + "process_orders_on_close" not in strat) + + # --- effective strategy: seed fills the base-class-defaulted fields + eff_no_ovr = m.effective_strategy(FIXTURE_CPP, {}) + check(f"{label}: seed fills process_orders_on_close=False (no override)", + eff_no_ovr.get("process_orders_on_close") is False) + check(f"{label}: seed fills close_entries_rule=FIFO (no override)", + eff_no_ovr.get("close_entries_rule") == "FIFO") + check(f"{label}: effective keeps ctor initial_capital", + eff_no_ovr.get("initial_capital") == 50000.0) + + eff_ovr = m.effective_strategy(FIXTURE_CPP, + {"pyramiding": "5", "process_orders_on_close": "true"}) + check(f"{label}: override wins (pyramiding=5)", eff_ovr.get("pyramiding") == "5") + check(f"{label}: override wins (process_orders_on_close=true)", + eff_ovr.get("process_orders_on_close") == "true") + + # --- inputs ------------------------------------------------------ + inp = m.parse_inputs(FIXTURE_CPP) + check(f"{label}: 5 distinct inputs (dedup Fast EMA)", len(inp) == 5) + check(f"{label}: int default typed", inp["Fast EMA"] == {"type": "int", "default": 5}) + check(f"{label}: double default typed", + inp["ADX trend threshold"]["default"] == 25 and inp["ADX trend threshold"]["type"] == "double") + check(f"{label}: string default unquoted", inp["Mode"]["default"] == "fast") + check(f"{label}: source default kept", inp["Source"]["default"] == "close") + + eff_in = m.effective_inputs(FIXTURE_CPP, {"Fast EMA": "8"}) + check(f"{label}: input override -> value", eff_in["Fast EMA"]["value"] == "8") + check(f"{label}: non-overridden input -> default value", eff_in["Slow EMA"]["value"] == 13) + check(f"{label}: effective_inputs keeps default+type", + eff_in["Fast EMA"] == {"type": "int", "default": 5, "value": "8"}) + + # --- fingerprint round-trip + determinism ------------------------ + prov = { + "engine": {"version_string": "0.10.2", "major": 0, "minor": 10, "patch": 2, + "commit_sha": "f3fc3a3"}, + "codegen": {"version": "0.6.4", "generated_cpp_sha256": "deadbeef", + "transpiled_from_pine": True}, + "strategy": eff_no_ovr, + "inputs": eff_in, + "applied": {"inputs": {"Fast EMA": "8"}, "overrides": {}}, + "runtime": {"input_tf": "", "bar_magnifier": False}, + } + fp = m.build_fingerprint(prov) + decoded = json.loads(base64.b64decode(fp["token"])) + check(f"{label}: token decodes to provenance", decoded == prov) + check(f"{label}: digest prefixed sha256:", fp["digest"].startswith("sha256:")) + canonical = json.dumps(prov, sort_keys=True, separators=(",", ":")) + import hashlib + check(f"{label}: digest matches canonical sha256", + fp["digest"] == "sha256:" + hashlib.sha256(canonical.encode()).hexdigest()) + fp2 = m.build_fingerprint(prov) + check(f"{label}: deterministic token", fp["token"] == fp2["token"]) + + # --- the two copies must agree byte-for-byte on the fixture ---------- + check("copies agree: parse_strategy_params", + rj.parse_strategy_params(FIXTURE_CPP) == rs.parse_strategy_params(FIXTURE_CPP)) + check("copies agree: parse_inputs", + rj.parse_inputs(FIXTURE_CPP) == rs.parse_inputs(FIXTURE_CPP)) + check("copies agree: effective_strategy", + rj.effective_strategy(FIXTURE_CPP, {"pyramiding": "5"}) + == rs.effective_strategy(FIXTURE_CPP, {"pyramiding": "5"})) + check("copies agree: effective_inputs", + rj.effective_inputs(FIXTURE_CPP, {"Fast EMA": "8"}) + == rs.effective_inputs(FIXTURE_CPP, {"Fast EMA": "8"})) + + print(f"\n{passed} passed, {failed} failed") + return 1 if failed else 0 + + +if __name__ == "__main__": + sys.exit(main()) From 32125424a02693b34ef35b757ef01effc1f1ba92 Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Tue, 16 Jun 2026 07:58:22 +0800 Subject: [PATCH 2/8] feat(report): emit decode-able backtest fingerprint from run_json Add fingerprint helpers (strategy/input parsing, provenance builder, base64-encoded token + sha256 digest) to docker/run_json.py. The same block will be duplicated verbatim in scripts/run_strategy.py (Task 3). New CLI flags --generated-cpp and --transpiled feed the provenance. The fingerprint is injected into the JSON report under the top-level "fingerprint" key. Co-Authored-By: Claude Opus 4.8 (1M context) --- docker/run_json.py | 259 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 258 insertions(+), 1 deletion(-) diff --git a/docker/run_json.py b/docker/run_json.py index 5fbd1f8..2d1f48b 100755 --- a/docker/run_json.py +++ b/docker/run_json.py @@ -54,7 +54,19 @@ "equity_curve": [ # ABI v2: one point per script bar { "time_ms": int, "equity": float, "open_profit": float }, ... - ] + ], + "fingerprint": { # decode-able backtest provenance + "token": "", # b64decode -> JSON + "digest": "sha256:", # stable run id over canonical JSON + "provenance": { + "engine": { version_string, major, minor, patch, commit_sha }, + "codegen": { version, generated_cpp_sha256, transpiled_from_pine }, + "strategy": { ...all strategy() params, effective... }, + "inputs": { "": { type, default, value }, ... }, + "applied": { "inputs": {...}, "overrides": {...} }, # user deltas + "runtime": { ...same fields as applied_runtime... } + } + } } NaN convention: any metric with an empty/zero denominator is null (JSON has no @@ -64,16 +76,223 @@ from __future__ import annotations import argparse +import base64 import csv import ctypes +import hashlib import json import math +import re import sys import time from datetime import datetime, timezone from pathlib import Path +# >>> fingerprint helpers (DUPLICATED verbatim in scripts/run_strategy.py; +# scripts/ is .dockerignore'd so this cannot be a shared module. +# scripts/fingerprint_self_test.py asserts both copies stay identical.) +try: + from importlib import metadata as _ilmd +except ImportError: # pragma: no cover + _ilmd = None + +# Canonical strategy() defaults. Mirrors the engine base-class defaults in +# include/pineforge/engine.hpp (initial_capital_, process_orders_on_close_, +# default_qty_type_, default_qty_value_, pyramiding_, commission_type_, +# commission_value_, slippage_, close_entries_rule_any_). The codegen ctor +# emits only a subset (it omits process_orders_on_close + close_entries_rule), +# so this seed supplies the rest. KEEP IN SYNC with engine.hpp. +STRATEGY_SEED = { + "initial_capital": 1000000.0, + "process_orders_on_close": False, + "default_qty_type": "fixed", + "default_qty_value": 1.0, + "pyramiding": 1, + "commission_type": "percent", + "commission_value": 0.0, + "slippage": 0, + "close_entries_rule": "FIFO", +} + +_QTY_TYPE = {"FIXED": "fixed", "PERCENT_OF_EQUITY": "percent_of_equity", "CASH": "cash"} +_COMM_TYPE = {"PERCENT": "percent", "CASH_PER_ORDER": "cash_per_order", + "CASH_PER_CONTRACT": "cash_per_contract"} + +# generated.cpp ctor field name -> provenance key. +_STRAT_FIELD_KEY = { + "initial_capital_": "initial_capital", + "process_orders_on_close_": "process_orders_on_close", + "default_qty_type_": "default_qty_type", + "default_qty_value_": "default_qty_value", + "pyramiding_": "pyramiding", + "commission_type_": "commission_type", + "commission_value_": "commission_value", + "slippage_": "slippage", + "close_entries_rule_any_": "close_entries_rule", +} + +_INPUT_RE = re.compile(r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*([^;]*?)\)') + + +def _ctor_body(cpp_text: str) -> str: + """Return the GeneratedStrategy constructor body, or '' if not found. + + Scoping to the ctor is load-bearing: set_strategy_override() also contains + `initial_capital_ = std::stod(value);` lines that must NOT be parsed as + defaults. The member-init list (`_ta_ema_1(5)`) has no `=` so it cannot + false-match the field regex.""" + m = re.search(r"GeneratedStrategy\s*\([^)]*\)\s*(?::[^{]*)?\{", cpp_text) + if not m: + return "" + i = m.end() - 1 # index of the opening '{' + depth = 0 + for j in range(i, len(cpp_text)): + c = cpp_text[j] + if c == "{": + depth += 1 + elif c == "}": + depth -= 1 + if depth == 0: + return cpp_text[i + 1:j] + return "" + + +def _coerce_scalar(rhs: str): + rhs = rhs.strip() + if rhs in ("true", "false"): + return rhs == "true" + if re.fullmatch(r"[+-]?\d+", rhs): + return int(rhs) + try: + return float(rhs) + except ValueError: + return rhs + + +def parse_strategy_params(cpp_text: str) -> dict: + """Parse strategy() header defaults from the constructor body only.""" + out: dict = {} + body = _ctor_body(cpp_text) + for fld, rhs in re.findall(r"(\w+_)\s*=\s*([^;]+);", body): + key = _STRAT_FIELD_KEY.get(fld) + if not key: + continue + rhs = rhs.strip() + if fld == "default_qty_type_": + out[key] = _QTY_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "commission_type_": + out[key] = _COMM_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "close_entries_rule_any_": + out[key] = "ANY" if _coerce_scalar(rhs) is True else "FIFO" + else: + out[key] = _coerce_scalar(rhs) + return out + + +def effective_strategy(cpp_text: str, overrides: dict | None) -> dict: + """Canonical seed -> ctor-parsed defaults -> user overrides (string wins).""" + s = dict(STRATEGY_SEED) + s.update(parse_strategy_params(cpp_text)) + for k, v in (overrides or {}).items(): + s[k] = v + return s + + +def parse_inputs(cpp_text: str) -> dict: + """Parse every get_input_*("title", default) call; dedup by title (first wins).""" + out: dict = {} + for typ, title, dflt in _INPUT_RE.findall(cpp_text): + if title in out: + continue + d = dflt.strip() + if d.startswith('"') and d.endswith('"') and len(d) >= 2: + val = d[1:-1] + elif typ == "source": + val = d + else: + val = _coerce_scalar(d) + out[title] = {"type": typ, "default": val} + return out + + +def effective_inputs(cpp_text: str, inputs_applied: dict | None) -> dict: + """All declared inputs with {type, default, value}; value = override or default. + Applied inputs with no matching declaration are appended best-effort.""" + applied = inputs_applied or {} + out: dict = {} + for title, meta in parse_inputs(cpp_text).items(): + out[title] = { + "type": meta["type"], + "default": meta["default"], + "value": applied.get(title, meta["default"]), + } + for title, v in applied.items(): + if title not in out: + out[title] = {"type": "unknown", "default": None, "value": v} + return out + + +def _sha256_file(path) -> str | None: + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + except OSError: + return None + + +def _codegen_version() -> str: + if _ilmd is None: + return "unknown" + try: + return _ilmd.version("pineforge-codegen") + except Exception: + return "unknown" + + +def build_provenance(engine: dict, cpp_path, transpiled: bool, + inputs_applied: dict, overrides_applied: dict, + runtime: dict | None) -> dict: + cpp_text = "" + cpp_sha = None + if cpp_path: + cpp_sha = _sha256_file(cpp_path) + try: + with open(cpp_path, "r", encoding="utf-8", errors="replace") as f: + cpp_text = f.read() + except OSError: + cpp_text = "" + return { + "engine": engine, + "codegen": { + "version": _codegen_version(), + "generated_cpp_sha256": cpp_sha, + "transpiled_from_pine": bool(transpiled), + }, + "strategy": effective_strategy(cpp_text, overrides_applied), + "inputs": effective_inputs(cpp_text, inputs_applied), + "applied": { + "inputs": dict(inputs_applied or {}), + "overrides": dict(overrides_applied or {}), + }, + "runtime": runtime or {}, + } + + +def build_fingerprint(provenance: dict) -> dict: + canonical = json.dumps(provenance, sort_keys=True, separators=(",", ":")) + raw = canonical.encode("utf-8") + return { + "token": base64.b64encode(raw).decode("ascii"), + "digest": "sha256:" + hashlib.sha256(raw).hexdigest(), + "provenance": provenance, + } +# <<< fingerprint helpers + + # --- ctypes mirror of <pineforge/pineforge.h> ------------------------- class BarC(ctypes.Structure): @@ -203,6 +422,29 @@ class ReportC(ctypes.Structure): ] +class PfVersionC(ctypes.Structure): + """Mirror of pf_version_t (returned by value from pf_version_get).""" + _fields_ = [("major", ctypes.c_int), ("minor", ctypes.c_int), + ("patch", ctypes.c_int), ("commit_sha", ctypes.c_char_p)] + + +def engine_version(lib: ctypes.CDLL) -> dict: + """Read engine version+sha from the .so (whole-archive exports). The + fields are hasattr-guarded so an older .so degrades to blanks.""" + eng = {"version_string": "", "major": None, "minor": None, + "patch": None, "commit_sha": ""} + if hasattr(lib, "pf_version_string"): + lib.pf_version_string.restype = ctypes.c_char_p + s = lib.pf_version_string() + eng["version_string"] = s.decode("utf-8", "replace") if s else "" + if hasattr(lib, "pf_version_get"): + lib.pf_version_get.restype = PfVersionC + v = lib.pf_version_get() + eng["major"], eng["minor"], eng["patch"] = int(v.major), int(v.minor), int(v.patch) + eng["commit_sha"] = v.commit_sha.decode("utf-8", "replace") if v.commit_sha else "" + return eng + + # pf_report_t is CALLER-allocated: a .so built against a different ABI # writes past (or short of) our ReportC buffer. Assert version up front. EXPECTED_PF_ABI = 2 @@ -463,6 +705,13 @@ def main() -> int: ap.add_argument("--magnifier-dist", default="endpoints", help="Sample distribution: uniform, cosine, triangle, " "endpoints (default), front_loaded, back_loaded.") + ap.add_argument("--generated-cpp", type=Path, default=None, + help="Path to the compiled generated.cpp; hashed and parsed " + "for the report fingerprint (strategy()/input() provenance).") + ap.add_argument("--transpiled", default="", + help="'true' if generated.cpp came from a .pine transpile this " + "run, 'false' if a user-supplied .cpp. Recorded in the " + "fingerprint as codegen.transpiled_from_pine.") args = ap.parse_args() inputs = parse_kv_json(args.inputs, "--inputs") @@ -516,6 +765,14 @@ def main() -> int: } out = build_report_dict(report, args.ohlcv, n, first_ts, last_ts, elapsed, inputs, overrides, applied_runtime) + out["fingerprint"] = build_fingerprint(build_provenance( + engine_version(lib), + args.generated_cpp, + parse_bool(args.transpiled), + inputs, + overrides, + applied_runtime, + )) json.dump(out, sys.stdout, separators=(",", ":")) sys.stdout.write("\n") finally: From 35a17162a20654b49801aadf69d72cbf0089debd Mon Sep 17 00:00:00 2001 From: luisleo526 <luisleo52655@gmail.com> Date: Tue, 16 Jun 2026 08:05:36 +0800 Subject: [PATCH 3/8] harden(report): non-finite-safe scalar parse + crash-proof fingerprint injection Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- docker/run_json.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/docker/run_json.py b/docker/run_json.py index 2d1f48b..b72f177 100755 --- a/docker/run_json.py +++ b/docker/run_json.py @@ -165,7 +165,8 @@ def _coerce_scalar(rhs: str): if re.fullmatch(r"[+-]?\d+", rhs): return int(rhs) try: - return float(rhs) + f = float(rhs) + return f if (f == f and f not in (float("inf"), float("-inf"))) else rhs except ValueError: return rhs @@ -765,14 +766,17 @@ def main() -> int: } out = build_report_dict(report, args.ohlcv, n, first_ts, last_ts, elapsed, inputs, overrides, applied_runtime) - out["fingerprint"] = build_fingerprint(build_provenance( - engine_version(lib), - args.generated_cpp, - parse_bool(args.transpiled), - inputs, - overrides, - applied_runtime, - )) + try: + out["fingerprint"] = build_fingerprint(build_provenance( + engine_version(lib), + args.generated_cpp, + parse_bool(args.transpiled), + inputs, + overrides, + applied_runtime, + )) + except Exception: + out["fingerprint"] = None json.dump(out, sys.stdout, separators=(",", ":")) sys.stdout.write("\n") finally: From a9ec8f58dbc6a8cb29a9fc228443382430e48e26 Mon Sep 17 00:00:00 2001 From: luisleo526 <luisleo52655@gmail.com> Date: Tue, 16 Jun 2026 08:09:06 +0800 Subject: [PATCH 4/8] feat(run_strategy): opt-in --fingerprint-json sidecar Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- scripts/run_strategy.py | 271 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) diff --git a/scripts/run_strategy.py b/scripts/run_strategy.py index 4bf86b4..28ce5d9 100644 --- a/scripts/run_strategy.py +++ b/scripts/run_strategy.py @@ -39,9 +39,12 @@ from __future__ import annotations import argparse +import base64 import csv import ctypes +import hashlib import json +import re import sys import time from datetime import datetime, timezone @@ -69,6 +72,210 @@ "chart_timezone", }) +# >>> fingerprint helpers (DUPLICATED verbatim in scripts/run_strategy.py; +# scripts/ is .dockerignore'd so this cannot be a shared module. +# scripts/fingerprint_self_test.py asserts both copies stay identical.) +try: + from importlib import metadata as _ilmd +except ImportError: # pragma: no cover + _ilmd = None + +# Canonical strategy() defaults. Mirrors the engine base-class defaults in +# include/pineforge/engine.hpp (initial_capital_, process_orders_on_close_, +# default_qty_type_, default_qty_value_, pyramiding_, commission_type_, +# commission_value_, slippage_, close_entries_rule_any_). The codegen ctor +# emits only a subset (it omits process_orders_on_close + close_entries_rule), +# so this seed supplies the rest. KEEP IN SYNC with engine.hpp. +STRATEGY_SEED = { + "initial_capital": 1000000.0, + "process_orders_on_close": False, + "default_qty_type": "fixed", + "default_qty_value": 1.0, + "pyramiding": 1, + "commission_type": "percent", + "commission_value": 0.0, + "slippage": 0, + "close_entries_rule": "FIFO", +} + +_QTY_TYPE = {"FIXED": "fixed", "PERCENT_OF_EQUITY": "percent_of_equity", "CASH": "cash"} +_COMM_TYPE = {"PERCENT": "percent", "CASH_PER_ORDER": "cash_per_order", + "CASH_PER_CONTRACT": "cash_per_contract"} + +# generated.cpp ctor field name -> provenance key. +_STRAT_FIELD_KEY = { + "initial_capital_": "initial_capital", + "process_orders_on_close_": "process_orders_on_close", + "default_qty_type_": "default_qty_type", + "default_qty_value_": "default_qty_value", + "pyramiding_": "pyramiding", + "commission_type_": "commission_type", + "commission_value_": "commission_value", + "slippage_": "slippage", + "close_entries_rule_any_": "close_entries_rule", +} + +_INPUT_RE = re.compile(r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*([^;]*?)\)') + + +def _ctor_body(cpp_text: str) -> str: + """Return the GeneratedStrategy constructor body, or '' if not found. + + Scoping to the ctor is load-bearing: set_strategy_override() also contains + `initial_capital_ = std::stod(value);` lines that must NOT be parsed as + defaults. The member-init list (`_ta_ema_1(5)`) has no `=` so it cannot + false-match the field regex.""" + m = re.search(r"GeneratedStrategy\s*\([^)]*\)\s*(?::[^{]*)?\{", cpp_text) + if not m: + return "" + i = m.end() - 1 # index of the opening '{' + depth = 0 + for j in range(i, len(cpp_text)): + c = cpp_text[j] + if c == "{": + depth += 1 + elif c == "}": + depth -= 1 + if depth == 0: + return cpp_text[i + 1:j] + return "" + + +def _coerce_scalar(rhs: str): + rhs = rhs.strip() + if rhs in ("true", "false"): + return rhs == "true" + if re.fullmatch(r"[+-]?\d+", rhs): + return int(rhs) + try: + f = float(rhs) + return f if (f == f and f not in (float("inf"), float("-inf"))) else rhs + except ValueError: + return rhs + + +def parse_strategy_params(cpp_text: str) -> dict: + """Parse strategy() header defaults from the constructor body only.""" + out: dict = {} + body = _ctor_body(cpp_text) + for fld, rhs in re.findall(r"(\w+_)\s*=\s*([^;]+);", body): + key = _STRAT_FIELD_KEY.get(fld) + if not key: + continue + rhs = rhs.strip() + if fld == "default_qty_type_": + out[key] = _QTY_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "commission_type_": + out[key] = _COMM_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "close_entries_rule_any_": + out[key] = "ANY" if _coerce_scalar(rhs) is True else "FIFO" + else: + out[key] = _coerce_scalar(rhs) + return out + + +def effective_strategy(cpp_text: str, overrides: dict | None) -> dict: + """Canonical seed -> ctor-parsed defaults -> user overrides (string wins).""" + s = dict(STRATEGY_SEED) + s.update(parse_strategy_params(cpp_text)) + for k, v in (overrides or {}).items(): + s[k] = v + return s + + +def parse_inputs(cpp_text: str) -> dict: + """Parse every get_input_*("title", default) call; dedup by title (first wins).""" + out: dict = {} + for typ, title, dflt in _INPUT_RE.findall(cpp_text): + if title in out: + continue + d = dflt.strip() + if d.startswith('"') and d.endswith('"') and len(d) >= 2: + val = d[1:-1] + elif typ == "source": + val = d + else: + val = _coerce_scalar(d) + out[title] = {"type": typ, "default": val} + return out + + +def effective_inputs(cpp_text: str, inputs_applied: dict | None) -> dict: + """All declared inputs with {type, default, value}; value = override or default. + Applied inputs with no matching declaration are appended best-effort.""" + applied = inputs_applied or {} + out: dict = {} + for title, meta in parse_inputs(cpp_text).items(): + out[title] = { + "type": meta["type"], + "default": meta["default"], + "value": applied.get(title, meta["default"]), + } + for title, v in applied.items(): + if title not in out: + out[title] = {"type": "unknown", "default": None, "value": v} + return out + + +def _sha256_file(path) -> str | None: + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + except OSError: + return None + + +def _codegen_version() -> str: + if _ilmd is None: + return "unknown" + try: + return _ilmd.version("pineforge-codegen") + except Exception: + return "unknown" + + +def build_provenance(engine: dict, cpp_path, transpiled: bool, + inputs_applied: dict, overrides_applied: dict, + runtime: dict | None) -> dict: + cpp_text = "" + cpp_sha = None + if cpp_path: + cpp_sha = _sha256_file(cpp_path) + try: + with open(cpp_path, "r", encoding="utf-8", errors="replace") as f: + cpp_text = f.read() + except OSError: + cpp_text = "" + return { + "engine": engine, + "codegen": { + "version": _codegen_version(), + "generated_cpp_sha256": cpp_sha, + "transpiled_from_pine": bool(transpiled), + }, + "strategy": effective_strategy(cpp_text, overrides_applied), + "inputs": effective_inputs(cpp_text, inputs_applied), + "applied": { + "inputs": dict(inputs_applied or {}), + "overrides": dict(overrides_applied or {}), + }, + "runtime": runtime or {}, + } + + +def build_fingerprint(provenance: dict) -> dict: + canonical = json.dumps(provenance, sort_keys=True, separators=(",", ":")) + raw = canonical.encode("utf-8") + return { + "token": base64.b64encode(raw).decode("ascii"), + "digest": "sha256:" + hashlib.sha256(raw).hexdigest(), + "provenance": provenance, + } +# <<< fingerprint helpers + # --- ctypes mirror of <pineforge/pineforge.h> ------------------------- # @@ -225,6 +432,29 @@ def _check_abi(lib: ctypes.CDLL) -> None: f"{EXPECTED_PF_ABI}; rebuild.") +class PfVersionC(ctypes.Structure): + """Mirror of pf_version_t (returned by value from pf_version_get).""" + _fields_ = [("major", ctypes.c_int), ("minor", ctypes.c_int), + ("patch", ctypes.c_int), ("commit_sha", ctypes.c_char_p)] + + +def engine_version(lib: ctypes.CDLL) -> dict: + """Read engine version+sha from the .so (whole-archive exports). The + fields are hasattr-guarded so an older .so degrades to blanks.""" + eng = {"version_string": "", "major": None, "minor": None, + "patch": None, "commit_sha": ""} + if hasattr(lib, "pf_version_string"): + lib.pf_version_string.restype = ctypes.c_char_p + s = lib.pf_version_string() + eng["version_string"] = s.decode("utf-8", "replace") if s else "" + if hasattr(lib, "pf_version_get"): + lib.pf_version_get.restype = PfVersionC + v = lib.pf_version_get() + eng["major"], eng["minor"], eng["patch"] = int(v.major), int(v.minor), int(v.patch) + eng["commit_sha"] = v.commit_sha.decode("utf-8", "replace") if v.commit_sha else "" + return eng + + # --- Strategy harness -------------------------------------------------- def find_strategy_lib(strategy_dir: Path, so_name: str = "strategy.so") -> Path: @@ -393,6 +623,10 @@ def _setup_signatures(self) -> None: if hasattr(L, "strategy_set_syminfo_pointvalue"): L.strategy_set_syminfo_pointvalue.argtypes = [ctypes.c_void_p, ctypes.c_double] L.strategy_set_syminfo_pointvalue.restype = None + if hasattr(L, "pf_version_get"): + L.pf_version_get.restype = PfVersionC + if hasattr(L, "pf_version_string"): + L.pf_version_string.restype = ctypes.c_char_p def run(self, bars_csv: Path, params: dict | None = None, *, trace_enabled: bool = False, trade_start_time_ms: int | None = None, @@ -806,6 +1040,10 @@ def main() -> int: "were recorded. Pass an IANA name (e.g. 'Asia/Taipei') only " "for probes that genuinely need a non-UTC chart-tz. " "Per-probe override: set 'chart_timezone' in inputs.json.") + ap.add_argument("--fingerprint-json", type=Path, default=None, + help="Write a {token,digest,provenance} fingerprint of this " + "run to PATH. Off by default (keeps corpus output and " + "run_corpus.sh parity untouched).") args = ap.parse_args() strategy_dir = args.strategy_dir.resolve() @@ -862,6 +1100,39 @@ def main() -> int: "trace_names": report["trace_names"], "trace": trace_to_write, }, f) + if args.fingerprint_json is not None: + try: + cpp_path = strategy_dir / "generated.cpp" + # Inputs actually forwarded to the engine (drop tv_*/validator meta keys). + inputs_applied = { + str(k): str(v) for k, v in params.items() + if not str(k).startswith("tv_") and k not in _VALIDATION_META_KEYS + } + overrides_applied = { + str(k): str(v) for k, v in (run_kwargs.get("strategy_overrides") or {}).items() + } + runtime = { + "input_tf": run_kwargs.get("input_tf") or "", + "script_tf": run_kwargs.get("script_tf") or "", + "bar_magnifier": bool(run_kwargs.get("bar_magnifier")), + "magnifier_samples": int(run_kwargs.get("magnifier_samples") or 4), + "magnifier_distribution": run_kwargs.get("magnifier_distribution") or "ENDPOINTS", + "chart_timezone": run_kwargs.get("chart_timezone") or "", + } + fp = build_fingerprint(build_provenance( + engine_version(strat.lib), + cpp_path if cpp_path.exists() else None, + False, # run_strategy.py drives a prebuilt .so; no transpile this run + inputs_applied, + overrides_applied, + runtime, + )) + args.fingerprint_json.parent.mkdir(parents=True, exist_ok=True) + with args.fingerprint_json.open("w", encoding="utf-8") as f: + json.dump(fp, f, indent=2) + print(f" fingerprint -> {args.fingerprint_json} ({fp['digest']})") + except Exception as e: + print(f" fingerprint: skipped ({e})", file=sys.stderr) elapsed = time.time() - started try: From d04ea93e2ec47304e26dd17c4cc1ba35467b058e Mon Sep 17 00:00:00 2001 From: luisleo526 <luisleo52655@gmail.com> Date: Tue, 16 Jun 2026 08:14:01 +0800 Subject: [PATCH 5/8] feat(docker): pass generated.cpp + transpile flag to run_json Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- docker/entrypoint.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 8f5e70e..5d5e1b6 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -93,8 +93,10 @@ if [[ -f "${PINE}" ]]; then echo "[pineforge] transpiling strategy.pine ..." >&2 run_transpile "${PINE}" "${GEN}" # set -e aborts (exit 5) on failure SRC="${GEN}" + TRANSPILED=true elif [[ -f "${SRC_CPP}" ]]; then SRC="${SRC_CPP}" + TRANSPILED=false else echo "error: missing input — mount /in/strategy.pine (preferred) or /in/strategy.cpp" >&2 exit 2 @@ -131,4 +133,6 @@ python3 "${PREFIX}/bin/run_json.py" \ --bar-magnifier "${PINEFORGE_BAR_MAGNIFIER:-}" \ --magnifier-samples "${PINEFORGE_MAGNIFIER_SAMPLES:-4}" \ --magnifier-dist "${PINEFORGE_MAGNIFIER_DIST:-endpoints}" \ + --generated-cpp "${SRC}" \ + --transpiled "${TRANSPILED}" \ || { echo "[pineforge] backtest failed" >&2; exit 4; } From f48a2355b8ef775e5233558c88d77b6b470ddb63 Mon Sep 17 00:00:00 2001 From: luisleo526 <luisleo52655@gmail.com> Date: Tue, 16 Jun 2026 08:14:07 +0800 Subject: [PATCH 6/8] build(docker): force-latest (-U) codegen install Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index bbf844a..7626cd7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -32,7 +32,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Stage the pure-Python transpiler here so the runtime never needs pip or # network: target-install gives a plain directory tree to COPY across. -RUN pip3 install --break-system-packages --no-cache-dir --target /opt/codegen pineforge-codegen +RUN pip3 install --break-system-packages --no-cache-dir -U --target /opt/codegen pineforge-codegen WORKDIR /src # VERSION file is the version source-of-truth inside the build context; From 959457ecfe1cb092addfce8f2ca0b7e029eb90ec Mon Sep 17 00:00:00 2001 From: luisleo526 <luisleo52655@gmail.com> Date: Tue, 16 Jun 2026 08:14:38 +0800 Subject: [PATCH 7/8] docs(docker): document the report fingerprint field Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- docker/README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docker/README.md b/docker/README.md index 1d2b29d..06777af 100644 --- a/docker/README.md +++ b/docker/README.md @@ -200,6 +200,41 @@ read-only mounts; the image performs no network I/O at run time. } ``` +## Backtest fingerprint + +Every JSON report carries a `fingerprint` recording exactly what produced it — +reversible, no key required: + +```json +"fingerprint": { + "token": "<base64 of the canonical provenance JSON>", + "digest": "sha256:<hex>", + "provenance": { + "engine": { "version_string": "...", "major": 0, "minor": 10, "patch": 2, "commit_sha": "..." }, + "codegen": { "version": "0.6.4", "generated_cpp_sha256": "...", "transpiled_from_pine": true }, + "strategy": { "initial_capital": 1000000.0, "pyramiding": 1, "commission_type": "percent", "...": "all strategy() params, effective" }, + "inputs": { "Fast Length": { "type": "int", "default": 9, "value": "8" }, "...": "all input()s, effective" }, + "applied": { "inputs": { "Fast Length": "8" }, "overrides": {} }, + "runtime": { "input_tf": "", "bar_magnifier": false, "...": "..." } + } +} +``` + +`strategy` and `inputs` list the **full effective** parameter set — every +`strategy()` field and every `input()` value, with declared defaults, even +when no override was passed. `value` is the applied override if one was given, +otherwise the default. `digest` is a stable id for the run (same target ⇒ same +digest). + +Decode the token to recover the provenance: + +```bash +jq -r '.fingerprint.token' report.json | base64 -d | jq . +``` + +The provenance is also inlined under `fingerprint.provenance`, so decoding is +only needed to verify the token round-trips. + ## Exit codes | Code | Meaning | From f97af1e0cdc4a8269918106596ce8ec40e4ea82f Mon Sep 17 00:00:00 2001 From: luisleo526 <luisleo52655@gmail.com> Date: Tue, 16 Jun 2026 08:29:14 +0800 Subject: [PATCH 8/8] fix(report): unwrap std::string() in input-default parse + regression test Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- docker/README.md | 3 +-- docker/run_json.py | 12 ++++++++++-- scripts/fingerprint_self_test.py | 2 +- scripts/run_strategy.py | 12 ++++++++++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/docker/README.md b/docker/README.md index 06777af..fc1f612 100644 --- a/docker/README.md +++ b/docker/README.md @@ -223,8 +223,7 @@ reversible, no key required: `strategy` and `inputs` list the **full effective** parameter set — every `strategy()` field and every `input()` value, with declared defaults, even when no override was passed. `value` is the applied override if one was given, -otherwise the default. `digest` is a stable id for the run (same target ⇒ same -digest). +otherwise the default. `digest` is a stable id for a run under a given harness and its runtime settings (same inputs + same settings ⇒ same digest). Decode the token to recover the provenance: diff --git a/docker/run_json.py b/docker/run_json.py index b72f177..d38c65e 100755 --- a/docker/run_json.py +++ b/docker/run_json.py @@ -132,7 +132,8 @@ "close_entries_rule_any_": "close_entries_rule", } -_INPUT_RE = re.compile(r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*([^;]*?)\)') +_INPUT_RE = re.compile( + r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*((?:[^();]|\([^()]*\))*?)\s*\)') def _ctor_body(cpp_text: str) -> str: @@ -171,6 +172,13 @@ def _coerce_scalar(rhs: str): return rhs +def _unwrap_std_string(expr: str) -> str: + """Codegen wraps string input defaults as std::string("..."); unwrap to the + inner literal so the recorded default is the value, not the C++ expression.""" + m = re.fullmatch(r'std::string\((.*)\)', expr.strip(), re.DOTALL) + return m.group(1).strip() if m else expr + + def parse_strategy_params(cpp_text: str) -> dict: """Parse strategy() header defaults from the constructor body only.""" out: dict = {} @@ -206,7 +214,7 @@ def parse_inputs(cpp_text: str) -> dict: for typ, title, dflt in _INPUT_RE.findall(cpp_text): if title in out: continue - d = dflt.strip() + d = _unwrap_std_string(dflt.strip()) if d.startswith('"') and d.endswith('"') and len(d) >= 2: val = d[1:-1] elif typ == "source": diff --git a/scripts/fingerprint_self_test.py b/scripts/fingerprint_self_test.py index fc6885c..5ea3fb2 100644 --- a/scripts/fingerprint_self_test.py +++ b/scripts/fingerprint_self_test.py @@ -46,7 +46,7 @@ i_fast = get_input_int("Fast EMA", 5); i_slow = get_input_int("Slow EMA", 13); thr = get_input_double("ADX trend threshold", 25); - mode = get_input_string("Mode", "fast"); + mode = get_input_string("Mode", std::string("fast")); src = get_input_source("Source", close); _inputs_initialized_ = true; } diff --git a/scripts/run_strategy.py b/scripts/run_strategy.py index 28ce5d9..1435aca 100644 --- a/scripts/run_strategy.py +++ b/scripts/run_strategy.py @@ -115,7 +115,8 @@ "close_entries_rule_any_": "close_entries_rule", } -_INPUT_RE = re.compile(r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*([^;]*?)\)') +_INPUT_RE = re.compile( + r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*((?:[^();]|\([^()]*\))*?)\s*\)') def _ctor_body(cpp_text: str) -> str: @@ -154,6 +155,13 @@ def _coerce_scalar(rhs: str): return rhs +def _unwrap_std_string(expr: str) -> str: + """Codegen wraps string input defaults as std::string("..."); unwrap to the + inner literal so the recorded default is the value, not the C++ expression.""" + m = re.fullmatch(r'std::string\((.*)\)', expr.strip(), re.DOTALL) + return m.group(1).strip() if m else expr + + def parse_strategy_params(cpp_text: str) -> dict: """Parse strategy() header defaults from the constructor body only.""" out: dict = {} @@ -189,7 +197,7 @@ def parse_inputs(cpp_text: str) -> dict: for typ, title, dflt in _INPUT_RE.findall(cpp_text): if title in out: continue - d = dflt.strip() + d = _unwrap_std_string(dflt.strip()) if d.startswith('"') and d.endswith('"') and len(d) >= 2: val = d[1:-1] elif typ == "source":