diff --git a/docker/Dockerfile b/docker/Dockerfile index bbf844a..7626cd7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -32,7 +32,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Stage the pure-Python transpiler here so the runtime never needs pip or # network: target-install gives a plain directory tree to COPY across. -RUN pip3 install --break-system-packages --no-cache-dir --target /opt/codegen pineforge-codegen +RUN pip3 install --break-system-packages --no-cache-dir -U --target /opt/codegen pineforge-codegen WORKDIR /src # VERSION file is the version source-of-truth inside the build context; diff --git a/docker/README.md b/docker/README.md index 1d2b29d..fc1f612 100644 --- a/docker/README.md +++ b/docker/README.md @@ -200,6 +200,40 @@ read-only mounts; the image performs no network I/O at run time. } ``` +## Backtest fingerprint + +Every JSON report carries a `fingerprint` recording exactly what produced it — +reversible, no key required: + +```json +"fingerprint": { + "token": "", + "digest": "sha256:", + "provenance": { + "engine": { "version_string": "...", "major": 0, "minor": 10, "patch": 2, "commit_sha": "..." }, + "codegen": { "version": "0.6.4", "generated_cpp_sha256": "...", "transpiled_from_pine": true }, + "strategy": { "initial_capital": 1000000.0, "pyramiding": 1, "commission_type": "percent", "...": "all strategy() params, effective" }, + "inputs": { "Fast Length": { "type": "int", "default": 9, "value": "8" }, "...": "all input()s, effective" }, + "applied": { "inputs": { "Fast Length": "8" }, "overrides": {} }, + "runtime": { "input_tf": "", "bar_magnifier": false, "...": "..." } + } +} +``` + +`strategy` and `inputs` list the **full effective** parameter set — every +`strategy()` field and every `input()` value, with declared defaults, even +when no override was passed. `value` is the applied override if one was given, +otherwise the default. `digest` is a stable id for a run under a given harness and its runtime settings (same inputs + same settings ⇒ same digest). + +Decode the token to recover the provenance: + +```bash +jq -r '.fingerprint.token' report.json | base64 -d | jq . +``` + +The provenance is also inlined under `fingerprint.provenance`, so decoding is +only needed to verify the token round-trips. + ## Exit codes | Code | Meaning | diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 8f5e70e..5d5e1b6 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -93,8 +93,10 @@ if [[ -f "${PINE}" ]]; then echo "[pineforge] transpiling strategy.pine ..." >&2 run_transpile "${PINE}" "${GEN}" # set -e aborts (exit 5) on failure SRC="${GEN}" + TRANSPILED=true elif [[ -f "${SRC_CPP}" ]]; then SRC="${SRC_CPP}" + TRANSPILED=false else echo "error: missing input — mount /in/strategy.pine (preferred) or /in/strategy.cpp" >&2 exit 2 @@ -131,4 +133,6 @@ python3 "${PREFIX}/bin/run_json.py" \ --bar-magnifier "${PINEFORGE_BAR_MAGNIFIER:-}" \ --magnifier-samples "${PINEFORGE_MAGNIFIER_SAMPLES:-4}" \ --magnifier-dist "${PINEFORGE_MAGNIFIER_DIST:-endpoints}" \ + --generated-cpp "${SRC}" \ + --transpiled "${TRANSPILED}" \ || { echo "[pineforge] backtest failed" >&2; exit 4; } diff --git a/docker/run_json.py b/docker/run_json.py index 5fbd1f8..d38c65e 100755 --- a/docker/run_json.py +++ b/docker/run_json.py @@ -54,7 +54,19 @@ "equity_curve": [ # ABI v2: one point per script bar { "time_ms": int, "equity": float, "open_profit": float }, ... - ] + ], + "fingerprint": { # decode-able backtest provenance + "token": "", # b64decode -> JSON + "digest": "sha256:", # stable run id over canonical JSON + "provenance": { + "engine": { version_string, major, minor, patch, commit_sha }, + "codegen": { version, generated_cpp_sha256, transpiled_from_pine }, + "strategy": { ...all strategy() params, effective... }, + "inputs": { "": { type, default, value }, ... }, + "applied": { "inputs": {...}, "overrides": {...} }, # user deltas + "runtime": { ...same fields as applied_runtime... } + } + } } NaN convention: any metric with an empty/zero denominator is null (JSON has no @@ -64,16 +76,232 @@ from __future__ import annotations import argparse +import base64 import csv import ctypes +import hashlib import json import math +import re import sys import time from datetime import datetime, timezone from pathlib import Path +# >>> fingerprint helpers (DUPLICATED verbatim in scripts/run_strategy.py; +# scripts/ is .dockerignore'd so this cannot be a shared module. +# scripts/fingerprint_self_test.py asserts both copies stay identical.) +try: + from importlib import metadata as _ilmd +except ImportError: # pragma: no cover + _ilmd = None + +# Canonical strategy() defaults. Mirrors the engine base-class defaults in +# include/pineforge/engine.hpp (initial_capital_, process_orders_on_close_, +# default_qty_type_, default_qty_value_, pyramiding_, commission_type_, +# commission_value_, slippage_, close_entries_rule_any_). The codegen ctor +# emits only a subset (it omits process_orders_on_close + close_entries_rule), +# so this seed supplies the rest. KEEP IN SYNC with engine.hpp. +STRATEGY_SEED = { + "initial_capital": 1000000.0, + "process_orders_on_close": False, + "default_qty_type": "fixed", + "default_qty_value": 1.0, + "pyramiding": 1, + "commission_type": "percent", + "commission_value": 0.0, + "slippage": 0, + "close_entries_rule": "FIFO", +} + +_QTY_TYPE = {"FIXED": "fixed", "PERCENT_OF_EQUITY": "percent_of_equity", "CASH": "cash"} +_COMM_TYPE = {"PERCENT": "percent", "CASH_PER_ORDER": "cash_per_order", + "CASH_PER_CONTRACT": "cash_per_contract"} + +# generated.cpp ctor field name -> provenance key. +_STRAT_FIELD_KEY = { + "initial_capital_": "initial_capital", + "process_orders_on_close_": "process_orders_on_close", + "default_qty_type_": "default_qty_type", + "default_qty_value_": "default_qty_value", + "pyramiding_": "pyramiding", + "commission_type_": "commission_type", + "commission_value_": "commission_value", + "slippage_": "slippage", + "close_entries_rule_any_": "close_entries_rule", +} + +_INPUT_RE = re.compile( + r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*((?:[^();]|\([^()]*\))*?)\s*\)') + + +def _ctor_body(cpp_text: str) -> str: + """Return the GeneratedStrategy constructor body, or '' if not found. + + Scoping to the ctor is load-bearing: set_strategy_override() also contains + `initial_capital_ = std::stod(value);` lines that must NOT be parsed as + defaults. The member-init list (`_ta_ema_1(5)`) has no `=` so it cannot + false-match the field regex.""" + m = re.search(r"GeneratedStrategy\s*\([^)]*\)\s*(?::[^{]*)?\{", cpp_text) + if not m: + return "" + i = m.end() - 1 # index of the opening '{' + depth = 0 + for j in range(i, len(cpp_text)): + c = cpp_text[j] + if c == "{": + depth += 1 + elif c == "}": + depth -= 1 + if depth == 0: + return cpp_text[i + 1:j] + return "" + + +def _coerce_scalar(rhs: str): + rhs = rhs.strip() + if rhs in ("true", "false"): + return rhs == "true" + if re.fullmatch(r"[+-]?\d+", rhs): + return int(rhs) + try: + f = float(rhs) + return f if (f == f and f not in (float("inf"), float("-inf"))) else rhs + except ValueError: + return rhs + + +def _unwrap_std_string(expr: str) -> str: + """Codegen wraps string input defaults as std::string("..."); unwrap to the + inner literal so the recorded default is the value, not the C++ expression.""" + m = re.fullmatch(r'std::string\((.*)\)', expr.strip(), re.DOTALL) + return m.group(1).strip() if m else expr + + +def parse_strategy_params(cpp_text: str) -> dict: + """Parse strategy() header defaults from the constructor body only.""" + out: dict = {} + body = _ctor_body(cpp_text) + for fld, rhs in re.findall(r"(\w+_)\s*=\s*([^;]+);", body): + key = _STRAT_FIELD_KEY.get(fld) + if not key: + continue + rhs = rhs.strip() + if fld == "default_qty_type_": + out[key] = _QTY_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "commission_type_": + out[key] = _COMM_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "close_entries_rule_any_": + out[key] = "ANY" if _coerce_scalar(rhs) is True else "FIFO" + else: + out[key] = _coerce_scalar(rhs) + return out + + +def effective_strategy(cpp_text: str, overrides: dict | None) -> dict: + """Canonical seed -> ctor-parsed defaults -> user overrides (string wins).""" + s = dict(STRATEGY_SEED) + s.update(parse_strategy_params(cpp_text)) + for k, v in (overrides or {}).items(): + s[k] = v + return s + + +def parse_inputs(cpp_text: str) -> dict: + """Parse every get_input_*("title", default) call; dedup by title (first wins).""" + out: dict = {} + for typ, title, dflt in _INPUT_RE.findall(cpp_text): + if title in out: + continue + d = _unwrap_std_string(dflt.strip()) + if d.startswith('"') and d.endswith('"') and len(d) >= 2: + val = d[1:-1] + elif typ == "source": + val = d + else: + val = _coerce_scalar(d) + out[title] = {"type": typ, "default": val} + return out + + +def effective_inputs(cpp_text: str, inputs_applied: dict | None) -> dict: + """All declared inputs with {type, default, value}; value = override or default. + Applied inputs with no matching declaration are appended best-effort.""" + applied = inputs_applied or {} + out: dict = {} + for title, meta in parse_inputs(cpp_text).items(): + out[title] = { + "type": meta["type"], + "default": meta["default"], + "value": applied.get(title, meta["default"]), + } + for title, v in applied.items(): + if title not in out: + out[title] = {"type": "unknown", "default": None, "value": v} + return out + + +def _sha256_file(path) -> str | None: + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + except OSError: + return None + + +def _codegen_version() -> str: + if _ilmd is None: + return "unknown" + try: + return _ilmd.version("pineforge-codegen") + except Exception: + return "unknown" + + +def build_provenance(engine: dict, cpp_path, transpiled: bool, + inputs_applied: dict, overrides_applied: dict, + runtime: dict | None) -> dict: + cpp_text = "" + cpp_sha = None + if cpp_path: + cpp_sha = _sha256_file(cpp_path) + try: + with open(cpp_path, "r", encoding="utf-8", errors="replace") as f: + cpp_text = f.read() + except OSError: + cpp_text = "" + return { + "engine": engine, + "codegen": { + "version": _codegen_version(), + "generated_cpp_sha256": cpp_sha, + "transpiled_from_pine": bool(transpiled), + }, + "strategy": effective_strategy(cpp_text, overrides_applied), + "inputs": effective_inputs(cpp_text, inputs_applied), + "applied": { + "inputs": dict(inputs_applied or {}), + "overrides": dict(overrides_applied or {}), + }, + "runtime": runtime or {}, + } + + +def build_fingerprint(provenance: dict) -> dict: + canonical = json.dumps(provenance, sort_keys=True, separators=(",", ":")) + raw = canonical.encode("utf-8") + return { + "token": base64.b64encode(raw).decode("ascii"), + "digest": "sha256:" + hashlib.sha256(raw).hexdigest(), + "provenance": provenance, + } +# <<< fingerprint helpers + + # --- ctypes mirror of <pineforge/pineforge.h> ------------------------- class BarC(ctypes.Structure): @@ -203,6 +431,29 @@ class ReportC(ctypes.Structure): ] +class PfVersionC(ctypes.Structure): + """Mirror of pf_version_t (returned by value from pf_version_get).""" + _fields_ = [("major", ctypes.c_int), ("minor", ctypes.c_int), + ("patch", ctypes.c_int), ("commit_sha", ctypes.c_char_p)] + + +def engine_version(lib: ctypes.CDLL) -> dict: + """Read engine version+sha from the .so (whole-archive exports). The + fields are hasattr-guarded so an older .so degrades to blanks.""" + eng = {"version_string": "", "major": None, "minor": None, + "patch": None, "commit_sha": ""} + if hasattr(lib, "pf_version_string"): + lib.pf_version_string.restype = ctypes.c_char_p + s = lib.pf_version_string() + eng["version_string"] = s.decode("utf-8", "replace") if s else "" + if hasattr(lib, "pf_version_get"): + lib.pf_version_get.restype = PfVersionC + v = lib.pf_version_get() + eng["major"], eng["minor"], eng["patch"] = int(v.major), int(v.minor), int(v.patch) + eng["commit_sha"] = v.commit_sha.decode("utf-8", "replace") if v.commit_sha else "" + return eng + + # pf_report_t is CALLER-allocated: a .so built against a different ABI # writes past (or short of) our ReportC buffer. Assert version up front. EXPECTED_PF_ABI = 2 @@ -463,6 +714,13 @@ def main() -> int: ap.add_argument("--magnifier-dist", default="endpoints", help="Sample distribution: uniform, cosine, triangle, " "endpoints (default), front_loaded, back_loaded.") + ap.add_argument("--generated-cpp", type=Path, default=None, + help="Path to the compiled generated.cpp; hashed and parsed " + "for the report fingerprint (strategy()/input() provenance).") + ap.add_argument("--transpiled", default="", + help="'true' if generated.cpp came from a .pine transpile this " + "run, 'false' if a user-supplied .cpp. Recorded in the " + "fingerprint as codegen.transpiled_from_pine.") args = ap.parse_args() inputs = parse_kv_json(args.inputs, "--inputs") @@ -516,6 +774,17 @@ def main() -> int: } out = build_report_dict(report, args.ohlcv, n, first_ts, last_ts, elapsed, inputs, overrides, applied_runtime) + try: + out["fingerprint"] = build_fingerprint(build_provenance( + engine_version(lib), + args.generated_cpp, + parse_bool(args.transpiled), + inputs, + overrides, + applied_runtime, + )) + except Exception: + out["fingerprint"] = None json.dump(out, sys.stdout, separators=(",", ":")) sys.stdout.write("\n") finally: diff --git a/scripts/fingerprint_self_test.py b/scripts/fingerprint_self_test.py new file mode 100644 index 0000000..5ea3fb2 --- /dev/null +++ b/scripts/fingerprint_self_test.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""fingerprint_self_test.py — pins the backtest-fingerprint logic. + +The fingerprint helpers are duplicated (byte-identical) in +docker/run_json.py and scripts/run_strategy.py because scripts/ is +.dockerignore'd, so a shared runtime module cannot be COPY'd into the +image. This test loads BOTH copies, runs one fixture generated.cpp +through each, and asserts (a) the parser/provenance/fingerprint behave +as designed and (b) the two copies produce identical output — so they +cannot silently drift. + +Exit 0 iff every check passes. +""" +from __future__ import annotations + +import base64 +import importlib.util +import json +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parent.parent +RUN_JSON = REPO / "docker" / "run_json.py" +RUN_STRATEGY = REPO / "scripts" / "run_strategy.py" + +# Fixture: a ctor that sets a SUBSET of strategy() fields (process_orders_on_close +# and close_entries_rule are intentionally absent — they default in the engine +# base class), a set_strategy_override with a decoy `initial_capital_ =` line +# (must NOT be parsed), and a mix of input types incl. a duplicate title. +FIXTURE_CPP = ''' +struct GeneratedStrategy : Strategy { + explicit GeneratedStrategy() : _ta_ema_1(5), _ta_ema_2(13) { + initial_capital_ = 50000.0; + default_qty_type_ = QtyType::PERCENT_OF_EQUITY; + default_qty_value_ = 3.0; + pyramiding_ = 2; + commission_type_ = CommissionType::PERCENT; + commission_value_ = 0.04; + slippage_ = 1; + } + void set_strategy_override(const std::string& key, const std::string& value) { + if (key == "initial_capital") { initial_capital_ = std::stod(value); return; } + } + void on_bar(const Bar& bar) override { + if (!_inputs_initialized_) { + i_fast = get_input_int("Fast EMA", 5); + i_slow = get_input_int("Slow EMA", 13); + thr = get_input_double("ADX trend threshold", 25); + mode = get_input_string("Mode", std::string("fast")); + src = get_input_source("Source", close); + _inputs_initialized_ = true; + } + i_fast = get_input_int("Fast EMA", 5); + } +}; +''' + + +def _load(path: Path, name: str): + spec = importlib.util.spec_from_file_location(name, path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def main() -> int: + passed = failed = 0 + + def check(name: str, cond: bool) -> None: + nonlocal passed, failed + if cond: + passed += 1 + print(f" OK {name}") + else: + failed += 1 + print(f" FAIL {name}") + + rj = _load(RUN_JSON, "pf_run_json") + rs = _load(RUN_STRATEGY, "pf_run_strategy") + + for label, m in (("run_json", rj), ("run_strategy", rs)): + # --- strategy() defaults from the ctor only ----------------------- + strat = m.parse_strategy_params(FIXTURE_CPP) + check(f"{label}: initial_capital parsed", strat.get("initial_capital") == 50000.0) + check(f"{label}: enum default_qty_type mapped", + strat.get("default_qty_type") == "percent_of_equity") + check(f"{label}: commission_value parsed", strat.get("commission_value") == 0.04) + check(f"{label}: pyramiding parsed", strat.get("pyramiding") == 2) + check(f"{label}: slippage parsed", strat.get("slippage") == 1) + # decoy line in set_strategy_override must NOT pollute the parse + check(f"{label}: override-fn line not parsed (still 50000.0)", + strat.get("initial_capital") == 50000.0) + # ctor does NOT set these two; parse_strategy_params must omit them + check(f"{label}: ctor omits process_orders_on_close", + "process_orders_on_close" not in strat) + + # --- effective strategy: seed fills the base-class-defaulted fields + eff_no_ovr = m.effective_strategy(FIXTURE_CPP, {}) + check(f"{label}: seed fills process_orders_on_close=False (no override)", + eff_no_ovr.get("process_orders_on_close") is False) + check(f"{label}: seed fills close_entries_rule=FIFO (no override)", + eff_no_ovr.get("close_entries_rule") == "FIFO") + check(f"{label}: effective keeps ctor initial_capital", + eff_no_ovr.get("initial_capital") == 50000.0) + + eff_ovr = m.effective_strategy(FIXTURE_CPP, + {"pyramiding": "5", "process_orders_on_close": "true"}) + check(f"{label}: override wins (pyramiding=5)", eff_ovr.get("pyramiding") == "5") + check(f"{label}: override wins (process_orders_on_close=true)", + eff_ovr.get("process_orders_on_close") == "true") + + # --- inputs ------------------------------------------------------ + inp = m.parse_inputs(FIXTURE_CPP) + check(f"{label}: 5 distinct inputs (dedup Fast EMA)", len(inp) == 5) + check(f"{label}: int default typed", inp["Fast EMA"] == {"type": "int", "default": 5}) + check(f"{label}: double default typed", + inp["ADX trend threshold"]["default"] == 25 and inp["ADX trend threshold"]["type"] == "double") + check(f"{label}: string default unquoted", inp["Mode"]["default"] == "fast") + check(f"{label}: source default kept", inp["Source"]["default"] == "close") + + eff_in = m.effective_inputs(FIXTURE_CPP, {"Fast EMA": "8"}) + check(f"{label}: input override -> value", eff_in["Fast EMA"]["value"] == "8") + check(f"{label}: non-overridden input -> default value", eff_in["Slow EMA"]["value"] == 13) + check(f"{label}: effective_inputs keeps default+type", + eff_in["Fast EMA"] == {"type": "int", "default": 5, "value": "8"}) + + # --- fingerprint round-trip + determinism ------------------------ + prov = { + "engine": {"version_string": "0.10.2", "major": 0, "minor": 10, "patch": 2, + "commit_sha": "f3fc3a3"}, + "codegen": {"version": "0.6.4", "generated_cpp_sha256": "deadbeef", + "transpiled_from_pine": True}, + "strategy": eff_no_ovr, + "inputs": eff_in, + "applied": {"inputs": {"Fast EMA": "8"}, "overrides": {}}, + "runtime": {"input_tf": "", "bar_magnifier": False}, + } + fp = m.build_fingerprint(prov) + decoded = json.loads(base64.b64decode(fp["token"])) + check(f"{label}: token decodes to provenance", decoded == prov) + check(f"{label}: digest prefixed sha256:", fp["digest"].startswith("sha256:")) + canonical = json.dumps(prov, sort_keys=True, separators=(",", ":")) + import hashlib + check(f"{label}: digest matches canonical sha256", + fp["digest"] == "sha256:" + hashlib.sha256(canonical.encode()).hexdigest()) + fp2 = m.build_fingerprint(prov) + check(f"{label}: deterministic token", fp["token"] == fp2["token"]) + + # --- the two copies must agree byte-for-byte on the fixture ---------- + check("copies agree: parse_strategy_params", + rj.parse_strategy_params(FIXTURE_CPP) == rs.parse_strategy_params(FIXTURE_CPP)) + check("copies agree: parse_inputs", + rj.parse_inputs(FIXTURE_CPP) == rs.parse_inputs(FIXTURE_CPP)) + check("copies agree: effective_strategy", + rj.effective_strategy(FIXTURE_CPP, {"pyramiding": "5"}) + == rs.effective_strategy(FIXTURE_CPP, {"pyramiding": "5"})) + check("copies agree: effective_inputs", + rj.effective_inputs(FIXTURE_CPP, {"Fast EMA": "8"}) + == rs.effective_inputs(FIXTURE_CPP, {"Fast EMA": "8"})) + + print(f"\n{passed} passed, {failed} failed") + return 1 if failed else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/run_strategy.py b/scripts/run_strategy.py index 4bf86b4..1435aca 100644 --- a/scripts/run_strategy.py +++ b/scripts/run_strategy.py @@ -39,9 +39,12 @@ from __future__ import annotations import argparse +import base64 import csv import ctypes +import hashlib import json +import re import sys import time from datetime import datetime, timezone @@ -69,6 +72,218 @@ "chart_timezone", }) +# >>> fingerprint helpers (DUPLICATED verbatim in scripts/run_strategy.py; +# scripts/ is .dockerignore'd so this cannot be a shared module. +# scripts/fingerprint_self_test.py asserts both copies stay identical.) +try: + from importlib import metadata as _ilmd +except ImportError: # pragma: no cover + _ilmd = None + +# Canonical strategy() defaults. Mirrors the engine base-class defaults in +# include/pineforge/engine.hpp (initial_capital_, process_orders_on_close_, +# default_qty_type_, default_qty_value_, pyramiding_, commission_type_, +# commission_value_, slippage_, close_entries_rule_any_). The codegen ctor +# emits only a subset (it omits process_orders_on_close + close_entries_rule), +# so this seed supplies the rest. KEEP IN SYNC with engine.hpp. +STRATEGY_SEED = { + "initial_capital": 1000000.0, + "process_orders_on_close": False, + "default_qty_type": "fixed", + "default_qty_value": 1.0, + "pyramiding": 1, + "commission_type": "percent", + "commission_value": 0.0, + "slippage": 0, + "close_entries_rule": "FIFO", +} + +_QTY_TYPE = {"FIXED": "fixed", "PERCENT_OF_EQUITY": "percent_of_equity", "CASH": "cash"} +_COMM_TYPE = {"PERCENT": "percent", "CASH_PER_ORDER": "cash_per_order", + "CASH_PER_CONTRACT": "cash_per_contract"} + +# generated.cpp ctor field name -> provenance key. +_STRAT_FIELD_KEY = { + "initial_capital_": "initial_capital", + "process_orders_on_close_": "process_orders_on_close", + "default_qty_type_": "default_qty_type", + "default_qty_value_": "default_qty_value", + "pyramiding_": "pyramiding", + "commission_type_": "commission_type", + "commission_value_": "commission_value", + "slippage_": "slippage", + "close_entries_rule_any_": "close_entries_rule", +} + +_INPUT_RE = re.compile( + r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*((?:[^();]|\([^()]*\))*?)\s*\)') + + +def _ctor_body(cpp_text: str) -> str: + """Return the GeneratedStrategy constructor body, or '' if not found. + + Scoping to the ctor is load-bearing: set_strategy_override() also contains + `initial_capital_ = std::stod(value);` lines that must NOT be parsed as + defaults. The member-init list (`_ta_ema_1(5)`) has no `=` so it cannot + false-match the field regex.""" + m = re.search(r"GeneratedStrategy\s*\([^)]*\)\s*(?::[^{]*)?\{", cpp_text) + if not m: + return "" + i = m.end() - 1 # index of the opening '{' + depth = 0 + for j in range(i, len(cpp_text)): + c = cpp_text[j] + if c == "{": + depth += 1 + elif c == "}": + depth -= 1 + if depth == 0: + return cpp_text[i + 1:j] + return "" + + +def _coerce_scalar(rhs: str): + rhs = rhs.strip() + if rhs in ("true", "false"): + return rhs == "true" + if re.fullmatch(r"[+-]?\d+", rhs): + return int(rhs) + try: + f = float(rhs) + return f if (f == f and f not in (float("inf"), float("-inf"))) else rhs + except ValueError: + return rhs + + +def _unwrap_std_string(expr: str) -> str: + """Codegen wraps string input defaults as std::string("..."); unwrap to the + inner literal so the recorded default is the value, not the C++ expression.""" + m = re.fullmatch(r'std::string\((.*)\)', expr.strip(), re.DOTALL) + return m.group(1).strip() if m else expr + + +def parse_strategy_params(cpp_text: str) -> dict: + """Parse strategy() header defaults from the constructor body only.""" + out: dict = {} + body = _ctor_body(cpp_text) + for fld, rhs in re.findall(r"(\w+_)\s*=\s*([^;]+);", body): + key = _STRAT_FIELD_KEY.get(fld) + if not key: + continue + rhs = rhs.strip() + if fld == "default_qty_type_": + out[key] = _QTY_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "commission_type_": + out[key] = _COMM_TYPE.get(rhs.split("::")[-1], rhs) + elif fld == "close_entries_rule_any_": + out[key] = "ANY" if _coerce_scalar(rhs) is True else "FIFO" + else: + out[key] = _coerce_scalar(rhs) + return out + + +def effective_strategy(cpp_text: str, overrides: dict | None) -> dict: + """Canonical seed -> ctor-parsed defaults -> user overrides (string wins).""" + s = dict(STRATEGY_SEED) + s.update(parse_strategy_params(cpp_text)) + for k, v in (overrides or {}).items(): + s[k] = v + return s + + +def parse_inputs(cpp_text: str) -> dict: + """Parse every get_input_*("title", default) call; dedup by title (first wins).""" + out: dict = {} + for typ, title, dflt in _INPUT_RE.findall(cpp_text): + if title in out: + continue + d = _unwrap_std_string(dflt.strip()) + if d.startswith('"') and d.endswith('"') and len(d) >= 2: + val = d[1:-1] + elif typ == "source": + val = d + else: + val = _coerce_scalar(d) + out[title] = {"type": typ, "default": val} + return out + + +def effective_inputs(cpp_text: str, inputs_applied: dict | None) -> dict: + """All declared inputs with {type, default, value}; value = override or default. + Applied inputs with no matching declaration are appended best-effort.""" + applied = inputs_applied or {} + out: dict = {} + for title, meta in parse_inputs(cpp_text).items(): + out[title] = { + "type": meta["type"], + "default": meta["default"], + "value": applied.get(title, meta["default"]), + } + for title, v in applied.items(): + if title not in out: + out[title] = {"type": "unknown", "default": None, "value": v} + return out + + +def _sha256_file(path) -> str | None: + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + except OSError: + return None + + +def _codegen_version() -> str: + if _ilmd is None: + return "unknown" + try: + return _ilmd.version("pineforge-codegen") + except Exception: + return "unknown" + + +def build_provenance(engine: dict, cpp_path, transpiled: bool, + inputs_applied: dict, overrides_applied: dict, + runtime: dict | None) -> dict: + cpp_text = "" + cpp_sha = None + if cpp_path: + cpp_sha = _sha256_file(cpp_path) + try: + with open(cpp_path, "r", encoding="utf-8", errors="replace") as f: + cpp_text = f.read() + except OSError: + cpp_text = "" + return { + "engine": engine, + "codegen": { + "version": _codegen_version(), + "generated_cpp_sha256": cpp_sha, + "transpiled_from_pine": bool(transpiled), + }, + "strategy": effective_strategy(cpp_text, overrides_applied), + "inputs": effective_inputs(cpp_text, inputs_applied), + "applied": { + "inputs": dict(inputs_applied or {}), + "overrides": dict(overrides_applied or {}), + }, + "runtime": runtime or {}, + } + + +def build_fingerprint(provenance: dict) -> dict: + canonical = json.dumps(provenance, sort_keys=True, separators=(",", ":")) + raw = canonical.encode("utf-8") + return { + "token": base64.b64encode(raw).decode("ascii"), + "digest": "sha256:" + hashlib.sha256(raw).hexdigest(), + "provenance": provenance, + } +# <<< fingerprint helpers + # --- ctypes mirror of <pineforge/pineforge.h> ------------------------- # @@ -225,6 +440,29 @@ def _check_abi(lib: ctypes.CDLL) -> None: f"{EXPECTED_PF_ABI}; rebuild.") +class PfVersionC(ctypes.Structure): + """Mirror of pf_version_t (returned by value from pf_version_get).""" + _fields_ = [("major", ctypes.c_int), ("minor", ctypes.c_int), + ("patch", ctypes.c_int), ("commit_sha", ctypes.c_char_p)] + + +def engine_version(lib: ctypes.CDLL) -> dict: + """Read engine version+sha from the .so (whole-archive exports). The + fields are hasattr-guarded so an older .so degrades to blanks.""" + eng = {"version_string": "", "major": None, "minor": None, + "patch": None, "commit_sha": ""} + if hasattr(lib, "pf_version_string"): + lib.pf_version_string.restype = ctypes.c_char_p + s = lib.pf_version_string() + eng["version_string"] = s.decode("utf-8", "replace") if s else "" + if hasattr(lib, "pf_version_get"): + lib.pf_version_get.restype = PfVersionC + v = lib.pf_version_get() + eng["major"], eng["minor"], eng["patch"] = int(v.major), int(v.minor), int(v.patch) + eng["commit_sha"] = v.commit_sha.decode("utf-8", "replace") if v.commit_sha else "" + return eng + + # --- Strategy harness -------------------------------------------------- def find_strategy_lib(strategy_dir: Path, so_name: str = "strategy.so") -> Path: @@ -393,6 +631,10 @@ def _setup_signatures(self) -> None: if hasattr(L, "strategy_set_syminfo_pointvalue"): L.strategy_set_syminfo_pointvalue.argtypes = [ctypes.c_void_p, ctypes.c_double] L.strategy_set_syminfo_pointvalue.restype = None + if hasattr(L, "pf_version_get"): + L.pf_version_get.restype = PfVersionC + if hasattr(L, "pf_version_string"): + L.pf_version_string.restype = ctypes.c_char_p def run(self, bars_csv: Path, params: dict | None = None, *, trace_enabled: bool = False, trade_start_time_ms: int | None = None, @@ -806,6 +1048,10 @@ def main() -> int: "were recorded. Pass an IANA name (e.g. 'Asia/Taipei') only " "for probes that genuinely need a non-UTC chart-tz. " "Per-probe override: set 'chart_timezone' in inputs.json.") + ap.add_argument("--fingerprint-json", type=Path, default=None, + help="Write a {token,digest,provenance} fingerprint of this " + "run to PATH. Off by default (keeps corpus output and " + "run_corpus.sh parity untouched).") args = ap.parse_args() strategy_dir = args.strategy_dir.resolve() @@ -862,6 +1108,39 @@ def main() -> int: "trace_names": report["trace_names"], "trace": trace_to_write, }, f) + if args.fingerprint_json is not None: + try: + cpp_path = strategy_dir / "generated.cpp" + # Inputs actually forwarded to the engine (drop tv_*/validator meta keys). + inputs_applied = { + str(k): str(v) for k, v in params.items() + if not str(k).startswith("tv_") and k not in _VALIDATION_META_KEYS + } + overrides_applied = { + str(k): str(v) for k, v in (run_kwargs.get("strategy_overrides") or {}).items() + } + runtime = { + "input_tf": run_kwargs.get("input_tf") or "", + "script_tf": run_kwargs.get("script_tf") or "", + "bar_magnifier": bool(run_kwargs.get("bar_magnifier")), + "magnifier_samples": int(run_kwargs.get("magnifier_samples") or 4), + "magnifier_distribution": run_kwargs.get("magnifier_distribution") or "ENDPOINTS", + "chart_timezone": run_kwargs.get("chart_timezone") or "", + } + fp = build_fingerprint(build_provenance( + engine_version(strat.lib), + cpp_path if cpp_path.exists() else None, + False, # run_strategy.py drives a prebuilt .so; no transpile this run + inputs_applied, + overrides_applied, + runtime, + )) + args.fingerprint_json.parent.mkdir(parents=True, exist_ok=True) + with args.fingerprint_json.open("w", encoding="utf-8") as f: + json.dump(fp, f, indent=2) + print(f" fingerprint -> {args.fingerprint_json} ({fp['digest']})") + except Exception as e: + print(f" fingerprint: skipped ({e})", file=sys.stderr) elapsed = time.time() - started try: