Skip to content
Merged
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \

# Stage the pure-Python transpiler here so the runtime never needs pip or
# network: target-install gives a plain directory tree to COPY across.
RUN pip3 install --break-system-packages --no-cache-dir --target /opt/codegen pineforge-codegen
RUN pip3 install --break-system-packages --no-cache-dir -U --target /opt/codegen pineforge-codegen

WORKDIR /src
# VERSION file is the version source-of-truth inside the build context;
Expand Down
34 changes: 34 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,40 @@ read-only mounts; the image performs no network I/O at run time.
}
```

## Backtest fingerprint

Every JSON report carries a `fingerprint` recording exactly what produced it —
reversible, no key required:

```json
"fingerprint": {
"token": "<base64 of the canonical provenance JSON>",
"digest": "sha256:<hex>",
"provenance": {
"engine": { "version_string": "...", "major": 0, "minor": 10, "patch": 2, "commit_sha": "..." },
"codegen": { "version": "0.6.4", "generated_cpp_sha256": "...", "transpiled_from_pine": true },
"strategy": { "initial_capital": 1000000.0, "pyramiding": 1, "commission_type": "percent", "...": "all strategy() params, effective" },
"inputs": { "Fast Length": { "type": "int", "default": 9, "value": "8" }, "...": "all input()s, effective" },
"applied": { "inputs": { "Fast Length": "8" }, "overrides": {} },
"runtime": { "input_tf": "", "bar_magnifier": false, "...": "..." }
}
}
```

`strategy` and `inputs` list the **full effective** parameter set — every
`strategy()` field and every `input()` value, with declared defaults, even
when no override was passed. `value` is the applied override if one was given,
otherwise the default. `digest` is a stable id for a run under a given harness and its runtime settings (same inputs + same settings ⇒ same digest).

Decode the token to recover the provenance:

```bash
jq -r '.fingerprint.token' report.json | base64 -d | jq .
```

The provenance is also inlined under `fingerprint.provenance`, so decoding is
only needed to verify the token round-trips.

## Exit codes

| Code | Meaning |
Expand Down
4 changes: 4 additions & 0 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ if [[ -f "${PINE}" ]]; then
echo "[pineforge] transpiling strategy.pine ..." >&2
run_transpile "${PINE}" "${GEN}" # set -e aborts (exit 5) on failure
SRC="${GEN}"
TRANSPILED=true
elif [[ -f "${SRC_CPP}" ]]; then
SRC="${SRC_CPP}"
TRANSPILED=false
else
echo "error: missing input — mount /in/strategy.pine (preferred) or /in/strategy.cpp" >&2
exit 2
Expand Down Expand Up @@ -131,4 +133,6 @@ python3 "${PREFIX}/bin/run_json.py" \
--bar-magnifier "${PINEFORGE_BAR_MAGNIFIER:-}" \
--magnifier-samples "${PINEFORGE_MAGNIFIER_SAMPLES:-4}" \
--magnifier-dist "${PINEFORGE_MAGNIFIER_DIST:-endpoints}" \
--generated-cpp "${SRC}" \
--transpiled "${TRANSPILED}" \
|| { echo "[pineforge] backtest failed" >&2; exit 4; }
271 changes: 270 additions & 1 deletion docker/run_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,19 @@
"equity_curve": [ # ABI v2: one point per script bar
{ "time_ms": int, "equity": float, "open_profit": float },
...
]
],
"fingerprint": { # decode-able backtest provenance
"token": "<base64(canonical provenance JSON)>", # b64decode -> JSON
"digest": "sha256:<hex>", # stable run id over canonical JSON
"provenance": {
"engine": { version_string, major, minor, patch, commit_sha },
"codegen": { version, generated_cpp_sha256, transpiled_from_pine },
"strategy": { ...all strategy() params, effective... },
"inputs": { "<title>": { type, default, value }, ... },
"applied": { "inputs": {...}, "overrides": {...} }, # user deltas
"runtime": { ...same fields as applied_runtime... }
}
}
}

NaN convention: any metric with an empty/zero denominator is null (JSON has no
Expand All @@ -64,16 +76,232 @@
from __future__ import annotations

import argparse
import base64
import csv
import ctypes
import hashlib
import json
import math
import re
import sys
import time
from datetime import datetime, timezone
from pathlib import Path


# >>> fingerprint helpers (DUPLICATED verbatim in scripts/run_strategy.py;
# scripts/ is .dockerignore'd so this cannot be a shared module.
# scripts/fingerprint_self_test.py asserts both copies stay identical.)
try:
from importlib import metadata as _ilmd
except ImportError: # pragma: no cover
_ilmd = None

# Canonical strategy() defaults. Mirrors the engine base-class defaults in
# include/pineforge/engine.hpp (initial_capital_, process_orders_on_close_,
# default_qty_type_, default_qty_value_, pyramiding_, commission_type_,
# commission_value_, slippage_, close_entries_rule_any_). The codegen ctor
# emits only a subset (it omits process_orders_on_close + close_entries_rule),
# so this seed supplies the rest. KEEP IN SYNC with engine.hpp.
STRATEGY_SEED = {
"initial_capital": 1000000.0,
"process_orders_on_close": False,
"default_qty_type": "fixed",
"default_qty_value": 1.0,
"pyramiding": 1,
"commission_type": "percent",
"commission_value": 0.0,
"slippage": 0,
"close_entries_rule": "FIFO",
}

_QTY_TYPE = {"FIXED": "fixed", "PERCENT_OF_EQUITY": "percent_of_equity", "CASH": "cash"}
_COMM_TYPE = {"PERCENT": "percent", "CASH_PER_ORDER": "cash_per_order",
"CASH_PER_CONTRACT": "cash_per_contract"}

# generated.cpp ctor field name -> provenance key.
_STRAT_FIELD_KEY = {
"initial_capital_": "initial_capital",
"process_orders_on_close_": "process_orders_on_close",
"default_qty_type_": "default_qty_type",
"default_qty_value_": "default_qty_value",
"pyramiding_": "pyramiding",
"commission_type_": "commission_type",
"commission_value_": "commission_value",
"slippage_": "slippage",
"close_entries_rule_any_": "close_entries_rule",
}

_INPUT_RE = re.compile(
r'get_input_(\w+)\(\s*"((?:[^"\\]|\\.)*)"\s*,\s*((?:[^();]|\([^()]*\))*?)\s*\)')


def _ctor_body(cpp_text: str) -> str:
"""Return the GeneratedStrategy constructor body, or '' if not found.

Scoping to the ctor is load-bearing: set_strategy_override() also contains
`initial_capital_ = std::stod(value);` lines that must NOT be parsed as
defaults. The member-init list (`_ta_ema_1(5)`) has no `=` so it cannot
false-match the field regex."""
m = re.search(r"GeneratedStrategy\s*\([^)]*\)\s*(?::[^{]*)?\{", cpp_text)
if not m:
return ""
i = m.end() - 1 # index of the opening '{'
depth = 0
for j in range(i, len(cpp_text)):
c = cpp_text[j]
if c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0:
return cpp_text[i + 1:j]
return ""


def _coerce_scalar(rhs: str):
rhs = rhs.strip()
if rhs in ("true", "false"):
return rhs == "true"
if re.fullmatch(r"[+-]?\d+", rhs):
return int(rhs)
try:
f = float(rhs)
return f if (f == f and f not in (float("inf"), float("-inf"))) else rhs
except ValueError:
return rhs


def _unwrap_std_string(expr: str) -> str:
"""Codegen wraps string input defaults as std::string("..."); unwrap to the
inner literal so the recorded default is the value, not the C++ expression."""
m = re.fullmatch(r'std::string\((.*)\)', expr.strip(), re.DOTALL)
return m.group(1).strip() if m else expr


def parse_strategy_params(cpp_text: str) -> dict:
"""Parse strategy() header defaults from the constructor body only."""
out: dict = {}
body = _ctor_body(cpp_text)
for fld, rhs in re.findall(r"(\w+_)\s*=\s*([^;]+);", body):
key = _STRAT_FIELD_KEY.get(fld)
if not key:
continue
rhs = rhs.strip()
if fld == "default_qty_type_":
out[key] = _QTY_TYPE.get(rhs.split("::")[-1], rhs)
elif fld == "commission_type_":
out[key] = _COMM_TYPE.get(rhs.split("::")[-1], rhs)
elif fld == "close_entries_rule_any_":
out[key] = "ANY" if _coerce_scalar(rhs) is True else "FIFO"
else:
out[key] = _coerce_scalar(rhs)
return out


def effective_strategy(cpp_text: str, overrides: dict | None) -> dict:
"""Canonical seed -> ctor-parsed defaults -> user overrides (string wins)."""
s = dict(STRATEGY_SEED)
s.update(parse_strategy_params(cpp_text))
for k, v in (overrides or {}).items():
s[k] = v
return s


def parse_inputs(cpp_text: str) -> dict:
"""Parse every get_input_*("title", default) call; dedup by title (first wins)."""
out: dict = {}
for typ, title, dflt in _INPUT_RE.findall(cpp_text):
if title in out:
continue
d = _unwrap_std_string(dflt.strip())
if d.startswith('"') and d.endswith('"') and len(d) >= 2:
val = d[1:-1]
elif typ == "source":
val = d
else:
val = _coerce_scalar(d)
out[title] = {"type": typ, "default": val}
return out


def effective_inputs(cpp_text: str, inputs_applied: dict | None) -> dict:
"""All declared inputs with {type, default, value}; value = override or default.
Applied inputs with no matching declaration are appended best-effort."""
applied = inputs_applied or {}
out: dict = {}
for title, meta in parse_inputs(cpp_text).items():
out[title] = {
"type": meta["type"],
"default": meta["default"],
"value": applied.get(title, meta["default"]),
}
for title, v in applied.items():
if title not in out:
out[title] = {"type": "unknown", "default": None, "value": v}
return out


def _sha256_file(path) -> str | None:
try:
h = hashlib.sha256()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
h.update(chunk)
return h.hexdigest()
except OSError:
return None


def _codegen_version() -> str:
if _ilmd is None:
return "unknown"
try:
return _ilmd.version("pineforge-codegen")
except Exception:
return "unknown"


def build_provenance(engine: dict, cpp_path, transpiled: bool,
inputs_applied: dict, overrides_applied: dict,
runtime: dict | None) -> dict:
cpp_text = ""
cpp_sha = None
if cpp_path:
cpp_sha = _sha256_file(cpp_path)
try:
with open(cpp_path, "r", encoding="utf-8", errors="replace") as f:
cpp_text = f.read()
except OSError:
cpp_text = ""
return {
"engine": engine,
"codegen": {
"version": _codegen_version(),
"generated_cpp_sha256": cpp_sha,
"transpiled_from_pine": bool(transpiled),
},
"strategy": effective_strategy(cpp_text, overrides_applied),
"inputs": effective_inputs(cpp_text, inputs_applied),
"applied": {
"inputs": dict(inputs_applied or {}),
"overrides": dict(overrides_applied or {}),
},
"runtime": runtime or {},
}


def build_fingerprint(provenance: dict) -> dict:
canonical = json.dumps(provenance, sort_keys=True, separators=(",", ":"))
raw = canonical.encode("utf-8")
return {
"token": base64.b64encode(raw).decode("ascii"),
"digest": "sha256:" + hashlib.sha256(raw).hexdigest(),
"provenance": provenance,
}
# <<< fingerprint helpers


# --- ctypes mirror of <pineforge/pineforge.h> -------------------------

class BarC(ctypes.Structure):
Expand Down Expand Up @@ -203,6 +431,29 @@ class ReportC(ctypes.Structure):
]


class PfVersionC(ctypes.Structure):
"""Mirror of pf_version_t (returned by value from pf_version_get)."""
_fields_ = [("major", ctypes.c_int), ("minor", ctypes.c_int),
("patch", ctypes.c_int), ("commit_sha", ctypes.c_char_p)]


def engine_version(lib: ctypes.CDLL) -> dict:
"""Read engine version+sha from the .so (whole-archive exports). The
fields are hasattr-guarded so an older .so degrades to blanks."""
eng = {"version_string": "", "major": None, "minor": None,
"patch": None, "commit_sha": ""}
if hasattr(lib, "pf_version_string"):
lib.pf_version_string.restype = ctypes.c_char_p
s = lib.pf_version_string()
eng["version_string"] = s.decode("utf-8", "replace") if s else ""
if hasattr(lib, "pf_version_get"):
lib.pf_version_get.restype = PfVersionC
v = lib.pf_version_get()
eng["major"], eng["minor"], eng["patch"] = int(v.major), int(v.minor), int(v.patch)
eng["commit_sha"] = v.commit_sha.decode("utf-8", "replace") if v.commit_sha else ""
return eng


# pf_report_t is CALLER-allocated: a .so built against a different ABI
# writes past (or short of) our ReportC buffer. Assert version up front.
EXPECTED_PF_ABI = 2
Expand Down Expand Up @@ -463,6 +714,13 @@ def main() -> int:
ap.add_argument("--magnifier-dist", default="endpoints",
help="Sample distribution: uniform, cosine, triangle, "
"endpoints (default), front_loaded, back_loaded.")
ap.add_argument("--generated-cpp", type=Path, default=None,
help="Path to the compiled generated.cpp; hashed and parsed "
"for the report fingerprint (strategy()/input() provenance).")
ap.add_argument("--transpiled", default="",
help="'true' if generated.cpp came from a .pine transpile this "
"run, 'false' if a user-supplied .cpp. Recorded in the "
"fingerprint as codegen.transpiled_from_pine.")
args = ap.parse_args()

inputs = parse_kv_json(args.inputs, "--inputs")
Expand Down Expand Up @@ -516,6 +774,17 @@ def main() -> int:
}
out = build_report_dict(report, args.ohlcv, n, first_ts, last_ts,
elapsed, inputs, overrides, applied_runtime)
try:
out["fingerprint"] = build_fingerprint(build_provenance(
engine_version(lib),
args.generated_cpp,
parse_bool(args.transpiled),
inputs,
overrides,
applied_runtime,
))
except Exception:
out["fingerprint"] = None
json.dump(out, sys.stdout, separators=(",", ":"))
sys.stdout.write("\n")
finally:
Expand Down
Loading
Loading