From 0f8f3d8a29acc7a6e5c42717ea5d0fbefd1619d1 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Fri, 26 Jun 2026 18:00:39 -0300
Subject: [PATCH 01/36] feat: recursion profiling + measurement programs

Add the measurement/profiling harness for the in-VM STARK verifier:

- `empty`-proof and `deserialize-only` bench guests + `sp1/verifier`
  cross-prover comparison, all exercising the no_std verifier.
- Expand the recursion smoke test with PC-histogram, sampled-flamegraph,
  page-count, cycle-count and per-step-breakdown diagnostics, plus the
  `make test-profile-recursion` targets and the histogram-aggregation
  CI script/workflow.
- Expose read-only `Executor::memory()`, `Memory::cells()` and
  `SymbolTable::functions()` accessors and make `flamegraph::demangle`
  public so the diagnostics can resolve guest PCs to functions.
---
 .../scripts/aggregate_recursion_histogram.py  | 126 +++
 .github/workflows/profile-recursion.yml       | 175 ++++
 Makefile                                      |  10 +-
 .../deserialize-only/.cargo/config.toml       |   6 +
 bench_vs/lambda/deserialize-only/Cargo.toml   |  13 +
 bench_vs/lambda/deserialize-only/src/main.rs  |  93 ++
 bench_vs/sp1/verifier/Cargo.toml              |   3 +
 bench_vs/sp1/verifier/program/Cargo.toml      |  10 +
 bench_vs/sp1/verifier/program/src/main.rs     |  34 +
 bench_vs/sp1/verifier/script/Cargo.toml       |  13 +
 bench_vs/sp1/verifier/script/build.rs         |   5 +
 bench_vs/sp1/verifier/script/src/main.rs      |  83 ++
 executor/src/elf.rs                           |   5 +
 executor/src/flamegraph.rs                    |   2 +-
 executor/src/vm/memory.rs                     |   7 +
 prover/src/tests/recursion_smoke_test.rs      | 936 +++++++++++++++++-
 16 files changed, 1510 insertions(+), 11 deletions(-)
 create mode 100755 .github/scripts/aggregate_recursion_histogram.py
 create mode 100644 .github/workflows/profile-recursion.yml
 create mode 100644 bench_vs/lambda/deserialize-only/.cargo/config.toml
 create mode 100644 bench_vs/lambda/deserialize-only/Cargo.toml
 create mode 100644 bench_vs/lambda/deserialize-only/src/main.rs
 create mode 100644 bench_vs/sp1/verifier/Cargo.toml
 create mode 100644 bench_vs/sp1/verifier/program/Cargo.toml
 create mode 100644 bench_vs/sp1/verifier/program/src/main.rs
 create mode 100644 bench_vs/sp1/verifier/script/Cargo.toml
 create mode 100644 bench_vs/sp1/verifier/script/build.rs
 create mode 100644 bench_vs/sp1/verifier/script/src/main.rs
diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
new file mode 100755
index 000000000..8a12dc05e
--- /dev/null
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Format the recursion-guest per-function profile as a Markdown PR comment.
+
+`test_recursion_pc_histogram` prints a per-function summary table (cycles folded
+over each function's PCs, computed across the *full* histogram) followed by a
+per-address detail table. We extract the per-function table — the view that
+shows where the cycles actually go — and render it as Markdown.
+
+    Top 25 functions by cycle count (aggregated over their PCs):
+    rank          cycles        %    cum %    PCs  function (file:line)
+       1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
+
+Reads the test's captured output from argv[1]; writes the Markdown body to
+argv[2] (or stdout).
+"""
+
+import re
+import sys
+
+# A per-function summary row: rank, cycles, pct%, cum%, pcs, function.
+# Distinguished from the per-PC detail rows by the absence of a 0x<pc> column.
+FN_ROW = re.compile(
+    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$"
+)
+FN_TABLE_START = re.compile(r"Top \d+ functions by cycle count")
+PC_TABLE_START = re.compile(r"Top \d+ PCs by cycle count")
+TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
+UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)")
+EXEC_TIME = re.compile(r"Exec time\s*:\s*(\S+)")
+
+
+def parse(text):
+    total_cycles = unique_pcs = exec_time = None
+    rows = []
+    in_fn_table = False
+    for line in text.splitlines():
+        if total_cycles is None and (m := TOTAL_CYCLES.search(line)):
+            total_cycles = int(m.group(1))
+        if unique_pcs is None and (m := UNIQUE_PCS.search(line)):
+            unique_pcs = int(m.group(1))
+        if exec_time is None and (m := EXEC_TIME.search(line)):
+            exec_time = m.group(1)
+        if FN_TABLE_START.search(line):
+            in_fn_table = True
+            continue
+        if PC_TABLE_START.search(line):
+            in_fn_table = False
+            continue
+        if in_fn_table and (m := FN_ROW.match(line)):
+            rows.append(
+                {
+                    "cycles": int(m.group(1)),
+                    "pct": m.group(2),
+                    "cum": m.group(3),
+                    "pcs": int(m.group(4)),
+                    "fn": m.group(5),
+                }
+            )
+    return total_cycles, unique_pcs, exec_time, rows
+
+
+def short(name, width=90):
+    return name if len(name) <= width else name[: width - 1] + "…"
+
+
+def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest profile"):
+    if not rows:
+        return (
+            f"### {title}\n\n"
+            "> ⚠️ No per-function rows found in the test output — the run may "
+            "have failed before printing the table. Check the workflow logs.\n"
+        )
+
+    body = f"### {title}\n\n"
+    if total_cycles is not None:
+        body += f"**Total cycles:** {total_cycles:,}"
+        if unique_pcs is not None:
+            body += f" · **Unique PCs:** {unique_pcs:,}"
+        if exec_time:
+            body += f" · **Exec time:** {exec_time}"
+        body += "\n\n"
+
+    body += f"#### Top {len(rows)} functions by cycles (folded over their PCs)\n\n"
+    body += "| Rank | Cycles | % | Cum % | PCs | Function |\n"
+    body += "|-----:|-------:|--:|------:|----:|----------|\n"
+    for i, r in enumerate(rows, 1):
+        body += (
+            f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | "
+            f"{r['pcs']} | `{short(r['fn'])}` |\n"
+        )
+
+    last_cum = rows[-1]["cum"]
+    body += (
+        f"\n<sub>Each function's cycles are summed over all its program counters "
+        f"across the full histogram; the top {len(rows)} cover {last_cum}% of total "
+        f"cycles. Percentages are of total cycles.</sub>\n"
+    )
+    return body
+
+
+def main():
+    import argparse
+
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("log", help="captured test output to parse")
+    ap.add_argument("-o", "--out", help="write Markdown here instead of stdout")
+    ap.add_argument(
+        "-t",
+        "--title",
+        default="Recursion guest profile",
+        help="section heading (e.g. the test/config name)",
+    )
+    args = ap.parse_args()
+
+    with open(args.log, "r", errors="replace") as f:
+        text = f.read()
+    body = render(*parse(text), title=args.title)
+    if args.out:
+        with open(args.out, "w") as f:
+            f.write(body)
+    else:
+        sys.stdout.write(body)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/profile-recursion.yml b/.github/workflows/profile-recursion.yml
new file mode 100644
index 000000000..420cebfcb
--- /dev/null
+++ b/.github/workflows/profile-recursion.yml
@@ -0,0 +1,175 @@
+name: Profile Recursion (PR)
+
+# Runs the recursion-guest PC histogram diagnostics (single-query and
+# multi-query, in parallel via a matrix) and posts a combined per-function
+# profile as a PR comment. Triggered by a `/profile_recursion` comment from a
+# repo member, or manually via workflow_dispatch.
+
+on:
+  workflow_dispatch:
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: profile-recursion-${{ github.event.issue.number || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  # One job per configuration; they run in parallel and each uploads a Markdown
+  # fragment artifact. The `comment` job stitches them into one PR comment.
+  profile:
+    # Skip unless: workflow_dispatch, or "/profile_recursion" comment on a PR by a member.
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment' &&
+       github.event.issue.pull_request &&
+       startsWith(github.event.comment.body, '/profile_recursion') &&
+       contains(fromJSON('["MEMBER","OWNER","COLLABORATOR"]'), github.event.comment.author_association))
+    runs-on: [self-hosted, bench]
+    timeout-minutes: 90
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: single-query
+            test: single
+            title: "Single query (blowup=2, 1 query)"
+          - name: multi-query
+            test: single
+            title: "Multi query (blowup=8, 128-bit)"
+    steps:
+      - name: React to comment
+        if: github.event_name == 'issue_comment' && matrix.name == 'single-query'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'eyes'
+            });
+
+      - name: Get PR head ref
+        id: pr-ref
+        if: github.event_name == 'issue_comment'
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUM: ${{ github.event.issue.number }}
+        run: |
+          SHA=$(gh pr view "$PR_NUM" --repo "$GITHUB_REPOSITORY" --json headRefOid -q .headRefOid)
+          echo "sha=$SHA" >> "$GITHUB_OUTPUT"
+
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.pr-ref.outputs.sha || github.sha }}
+
+      - name: Setup Rust Environment
+        uses: ./.github/actions/setup-rust
+
+      - name: Add cargo to PATH
+        run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
+
+      - name: Run recursion PC histogram (${{ matrix.name }})
+        env:
+          TEST: ${{ matrix.test }}
+        run: |
+          # Self-provision the RISC-V sysroot in a user-writable dir (the default
+          # /opt path on the bench runner is root-owned); the guest ELF build the
+          # test triggers picks this up via the Makefile's `SYSROOT_DIR ?=`.
+          export SYSROOT_DIR="$HOME/.lambda-vm-sysroot"
+          set -o pipefail
+          make test-profile-recursion-$TEST
+
+      - name: Aggregate into a per-function fragment
+        if: always()
+        env:
+          TITLE: ${{ matrix.title }}
+        run: |
+          python3 .github/scripts/aggregate_recursion_histogram.py \
+            /tmp/hist.log --title "$TITLE" --out "/tmp/fragment-${{ matrix.name }}.md"
+          cat "/tmp/fragment-${{ matrix.name }}.md" >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload fragment
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: profile-fragment-${{ matrix.name }}
+          path: /tmp/fragment-${{ matrix.name }}.md
+          retention-days: 7
+
+  # Stitch the matrix fragments into a single PR comment.
+  comment:
+    needs: profile
+    if: always() && github.event_name == 'issue_comment'
+    runs-on: [self-hosted, bench]
+    steps:
+      - name: Get PR head ref
+        id: pr-ref
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUM: ${{ github.event.issue.number }}
+        run: |
+          SHA=$(gh pr view "$PR_NUM" --repo "$GITHUB_REPOSITORY" --json headRefOid -q .headRefOid)
+          echo "sha=$SHA" >> "$GITHUB_OUTPUT"
+
+      - name: Download fragments
+        uses: actions/download-artifact@v4
+        with:
+          path: fragments
+          pattern: profile-fragment-*
+          merge-multiple: true
+
+      - name: Assemble comment body
+        env:
+          COMMIT_SHA: ${{ steps.pr-ref.outputs.sha }}
+        run: |
+          {
+            echo "## Recursion guest profile"
+            echo
+            # Single-query first, then multi-query, then any others.
+            for frag in fragments/fragment-single-query.md \
+                        fragments/fragment-multi-query.md; do
+              [ -f "$frag" ] && { cat "$frag"; echo; }
+            done
+            echo "<sub>Commit: ${COMMIT_SHA:0:8} · Runner: self-hosted bench</sub>"
+          } > /tmp/profile_comment.md
+          cat /tmp/profile_comment.md
+
+      - name: Comment on PR
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('/tmp/profile_comment.md', 'utf8');
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+            // Reuse our own marker comment so repeated /profile_recursion runs update in place.
+            const existing = comments.find(c =>
+              c.user.type === 'Bot' &&
+              c.body.includes('Recursion guest profile')
+            );
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
diff --git a/Makefile b/Makefile
index 454eff098..30e3029da 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 .PHONY: deps deps-linux deps-macos compile-programs-asm compile-programs-rust compile-bench \
 compile-programs compile-recursion-elfs clean-asm clean-rust clean-bench clean-shared \
 clean-recursion-elfs clean test test-asm \
-test-rust test-executor test-flamegraph flamegraph-prover \
+test-rust test-executor test-flamegraph flamegraph-prover test-profile-recursion test-profile-recursion-single test-profile-recursion-multi \
 test-fast test-prover test-prover-all test-disk-spill test-math-cuda test-cuda-integration \
 bench-math-cuda bench-prover bench-prover-cuda build check clippy fmt lint regen-ethrex-fixtures \
 update-ethrex-fixture-checksums check-ethrex-fixture-checksums
@@ -232,6 +232,14 @@ test-rust: compile-programs-rust
 test-flamegraph:
 	cargo test -p executor --test flamegraph
 
+test-profile-recursion: test-profile-recursion-single test-profile-recursion-multi
+
+test-profile-recursion-single: compile-programs-rust
+	cargo test --package lambda-vm-prover --lib test_recursion_pc_histogram_1query -- --ignored --nocapture
+
+test-profile-recursion-multi: compile-programs-rust
+	cargo test --package lambda-vm-prover --lib test_recursion_pc_histogram_multiquery -- --ignored --nocapture
+
 # Regenerate the committed ethrex block fixtures (see tooling/ethrex-fixtures).
 # Run after bumping the ethrex rev; README checksums are refreshed automatically.
 regen-ethrex-fixtures:
diff --git a/bench_vs/lambda/deserialize-only/.cargo/config.toml b/bench_vs/lambda/deserialize-only/.cargo/config.toml
new file mode 100644
index 000000000..be730c3ec
--- /dev/null
+++ b/bench_vs/lambda/deserialize-only/.cargo/config.toml
@@ -0,0 +1,6 @@
+[target.riscv64im-lambda-vm-elf]
+rustflags = [
+  "-C", "link-arg=-e",
+  "-C", "link-arg=main",
+  "-C", "passes=lower-atomic"
+]
diff --git a/bench_vs/lambda/deserialize-only/Cargo.toml b/bench_vs/lambda/deserialize-only/Cargo.toml
new file mode 100644
index 000000000..b4a4616f4
--- /dev/null
+++ b/bench_vs/lambda/deserialize-only/Cargo.toml
@@ -0,0 +1,13 @@
+[workspace]
+
+[package]
+name = "deserialize-only-bench"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+lambda-vm-prover = { path = "../../../prover", default-features = false }
+embedded-alloc = "0.6"
+riscv = { version = "0.15", features = ["critical-section-single-hart"] }
+serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] }
+postcard = { version = "1.0", default-features = false, features = ["alloc"] }
diff --git a/bench_vs/lambda/deserialize-only/src/main.rs b/bench_vs/lambda/deserialize-only/src/main.rs
new file mode 100644
index 000000000..8627776a1
--- /dev/null
+++ b/bench_vs/lambda/deserialize-only/src/main.rs
@@ -0,0 +1,93 @@
+//! Deserialize-only counterpart to the recursion guest.
+//!
+//! Reads the same private-input blob as `recursion-bench`, postcard-decodes
+//! `(VmProof, Vec<u8>, ProofOptions)`, then commits success
+//! and halts — without ever calling `verify_with_options`. The cycle delta
+//! between this guest and `recursion-bench` is the actual cost of the STARK
+//! verifier inside the VM (everything else being equal).
+
+#![no_std]
+#![no_main]
+
+extern crate alloc;
+
+use alloc::vec::Vec;
+use core::arch::asm;
+use core::panic::PanicInfo;
+
+use embedded_alloc::TlsfHeap as Heap;
+use lambda_vm_prover::{ProofOptions, VmProof};
+// Required to pull in the riscv crate's critical-section implementation.
+use riscv as _;
+
+const PRIVATE_INPUT_START: usize = 0xFF000000;
+const SYSCALL_COMMIT: u64 = 64;
+const SYSCALL_HALT: u64 = 93;
+const MAX_MEMORY_SIZE: usize = 0xC000_0000;
+
+#[global_allocator]
+static HEAP: Heap = Heap::empty();
+
+#[panic_handler]
+fn panic(_info: &PanicInfo) -> ! {
+    loop {}
+}
+
+fn init_allocator() {
+    unsafe extern "C" {
+        static _end: u8;
+    }
+    let heap_pos = (&raw const _end) as usize;
+    unsafe { HEAP.init(heap_pos, MAX_MEMORY_SIZE - heap_pos) }
+}
+
+fn read_private_input() -> &'static [u8] {
+    let len = unsafe { core::ptr::read_volatile(PRIVATE_INPUT_START as *const u32) } as usize;
+    let data = (PRIVATE_INPUT_START + 4) as *const u8;
+    unsafe { core::slice::from_raw_parts(data, len) }
+}
+
+fn commit(bytes: &[u8]) {
+    unsafe {
+        asm!(
+            "ecall",
+            in("a0") 1u64,
+            in("a1") bytes.as_ptr(),
+            in("a2") bytes.len(),
+            in("a7") SYSCALL_COMMIT,
+        );
+    }
+}
+
+fn halt() -> ! {
+    unsafe {
+        asm!(
+            "ecall",
+            in("a0") 0u64,
+            in("a7") SYSCALL_HALT,
+            options(noreturn),
+        );
+    }
+}
+
+#[unsafe(no_mangle)]
+pub fn main() -> ! {
+    init_allocator();
+
+    let blob = read_private_input();
+    let decoded: (VmProof, Vec<u8>, ProofOptions) =
+        postcard::from_bytes(blob).expect("failed to deserialize");
+
+    // Force the commit byte to depend on the actually-decoded value. Without
+    // this, LLVM at -O3 was eliding the postcard decode entirely — the only
+    // sinks for `decoded` were `black_box(&decoded)` (which only forces the
+    // *reference* to materialize, not the pointee) and `Drop`, neither of
+    // which require the decoded bytes to be real. With the commit byte tied
+    // to a deep field of the decoded value, the decode has to run.
+    let proof_options_byte = decoded.2.blowup_factor;
+    let inner_elf_byte = *decoded.1.first().unwrap_or(&0);
+    let marker = proof_options_byte ^ inner_elf_byte;
+
+    commit(&[marker]);
+    halt()
+}
diff --git a/bench_vs/sp1/verifier/Cargo.toml b/bench_vs/sp1/verifier/Cargo.toml
new file mode 100644
index 000000000..fc24039c2
--- /dev/null
+++ b/bench_vs/sp1/verifier/Cargo.toml
@@ -0,0 +1,3 @@
+[workspace]
+members = ["program", "script"]
+resolver = "2"
diff --git a/bench_vs/sp1/verifier/program/Cargo.toml b/bench_vs/sp1/verifier/program/Cargo.toml
new file mode 100644
index 000000000..7fbc9c5ce
--- /dev/null
+++ b/bench_vs/sp1/verifier/program/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "verifier-program"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sp1-zkvm = "6.0.1"
+lambda-vm-prover = { path = "../../../../prover", default-features = false }
+serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] }
+postcard = { version = "1.0", default-features = false, features = ["alloc"] }
diff --git a/bench_vs/sp1/verifier/program/src/main.rs b/bench_vs/sp1/verifier/program/src/main.rs
new file mode 100644
index 000000000..c63bb67ca
--- /dev/null
+++ b/bench_vs/sp1/verifier/program/src/main.rs
@@ -0,0 +1,34 @@
+//! SP1 guest that runs lambda-vm's `verify_with_options` on a single proof.
+//!
+//! Input layout (postcard-encoded `Vec<u8>` written via `SP1Stdin::write_vec`):
+//!   `(VmProof, Vec<u8>, ProofOptions)`
+//! where the inner `Vec<u8>` is the inner program's ELF bytes.
+//!
+//! Output: commits `[1u8]` on successful verify; the guest panics otherwise.
+//!
+//! Caveats:
+//! - The verifier hashes through the `keccak` crate. SP1 has a Keccak
+//!   precompile but it patches `tiny-keccak`, not `keccak`. We don't patch
+//!   here, so Keccak runs as software inside the guest. Cycle counts will be
+//!   inflated by that overhead. Worth keeping in mind when interpreting the
+//!   number relative to lambda-vm's in-VM count.
+
+#![no_main]
+
+extern crate alloc;
+
+use alloc::vec::Vec;
+
+use lambda_vm_prover::{ProofOptions, VmProof};
+
+sp1_zkvm::entrypoint!(main);
+
+pub fn main() {
+    let blob = sp1_zkvm::io::read_vec();
+    let (vm_proof, inner_elf, options): (VmProof, Vec<u8>, ProofOptions) =
+        postcard::from_bytes(&blob).expect("failed to deserialize input");
+    let ok = lambda_vm_prover::verify_with_options(&vm_proof, &inner_elf, &options, None, None)
+        .expect("verify errored");
+    assert!(ok, "inner proof failed verification");
+    sp1_zkvm::io::commit_slice(&[1u8]);
+}
diff --git a/bench_vs/sp1/verifier/script/Cargo.toml b/bench_vs/sp1/verifier/script/Cargo.toml
new file mode 100644
index 000000000..3198059bd
--- /dev/null
+++ b/bench_vs/sp1/verifier/script/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "verifier-script"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sp1-sdk = { version = "6.0.1", features = ["blocking", "profiling"] }
+lambda-vm-prover = { path = "../../../../prover" }
+stark = { path = "../../../../crypto/stark" }
+postcard = { version = "1.0", features = ["alloc"] }
+
+[build-dependencies]
+sp1-build = "6.0.1"
diff --git a/bench_vs/sp1/verifier/script/build.rs b/bench_vs/sp1/verifier/script/build.rs
new file mode 100644
index 000000000..d6cf925d6
--- /dev/null
+++ b/bench_vs/sp1/verifier/script/build.rs
@@ -0,0 +1,5 @@
+use sp1_build::build_program_with_args;
+
+fn main() {
+    build_program_with_args("../program", Default::default());
+}
diff --git a/bench_vs/sp1/verifier/script/src/main.rs b/bench_vs/sp1/verifier/script/src/main.rs
new file mode 100644
index 000000000..86e46a710
--- /dev/null
+++ b/bench_vs/sp1/verifier/script/src/main.rs
@@ -0,0 +1,83 @@
+//! Host driver: prove an inner empty program on lambda-vm, then execute the
+//! lambda-vm verifier inside SP1's executor, printing the cycle count.
+//!
+//! Set `TRACE_FILE=profiles/verifier.json` to capture a DWARF-attributed
+//! profile (1 sample = 1 cycle). The output can be opened with
+//! `samply load profiles/verifier.json`.
+
+use std::path::PathBuf;
+
+use sp1_sdk::blocking::{Prover, ProverClient};
+use sp1_sdk::{SP1Stdin, include_elf};
+
+const VERIFIER_ELF: sp1_sdk::Elf = include_elf!("verifier-program");
+
+fn workspace_root() -> PathBuf {
+    // CARGO_MANIFEST_DIR for this crate is `<root>/bench_vs/sp1/verifier/script`.
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .ancestors()
+        .nth(4)
+        .expect("workspace root")
+        .to_path_buf()
+}
+
+fn main() {
+    sp1_sdk::utils::setup_logger();
+
+    let root = workspace_root();
+    let empty_elf_path = root
+        .join("bench_vs/lambda/empty/target/riscv64im-lambda-vm-elf/release/empty-bench");
+    assert!(
+        empty_elf_path.exists(),
+        "empty-bench ELF not found at {} — run `bash bench_vs/build_recursion_elfs.sh` first",
+        empty_elf_path.display(),
+    );
+    let inner_elf = std::fs::read(&empty_elf_path).expect("read empty-bench");
+
+    let options = stark::proof::options::ProofOptions {
+        blowup_factor: 2,
+        fri_number_of_queries: 1,
+        coset_offset: 3,
+        grinding_factor: 1,
+    };
+
+    println!("[sp1-verifier] proving inner (empty, blowup=2, 1 query) ...");
+    let inner_proof = lambda_vm_prover::prove_with_options_and_inputs(
+        &inner_elf,
+        &[],
+        &options,
+        &lambda_vm_prover::MaxRowsConfig::default(),
+    )
+    .expect("inner prove should succeed");
+
+    let blob = postcard::to_allocvec(&(&inner_proof, &inner_elf, &options))
+        .expect("postcard encode failed");
+    println!("[sp1-verifier] postcard blob: {} bytes", blob.len());
+
+    let client = ProverClient::from_env();
+    let mut stdin = SP1Stdin::new();
+    stdin.write_vec(blob);
+
+    println!("[sp1-verifier] executing verifier in SP1 ...");
+    let (_, report) = client
+        .execute(VERIFIER_ELF.clone(), stdin)
+        .run()
+        .expect("execute failed");
+
+    let cycles = report.total_instruction_count();
+    println!();
+    println!("============================================================");
+    println!("  SP1 EXECUTION SUMMARY — lambda-vm verifier inside SP1");
+    println!("============================================================");
+    println!("  Total cycles : {cycles}");
+    println!();
+    println!("  Compare against lambda-vm in-VM count (~40.5B for the same");
+    println!("  proof). Both VMs target riscv64im, so word width is symmetric.");
+    println!("  Main remaining asymmetry: lambda-vm's KeccakPermute precompile");
+    println!("  is patched on its guests but SP1 does not patch `keccak` (only");
+    println!("  `tiny-keccak`), so Keccak rounds run as software in SP1 here.");
+    println!();
+    println!("  If TRACE_FILE was set, the profile was written there.");
+    println!("  Render with: samply load <trace>");
+    println!("============================================================");
+}
diff --git a/executor/src/elf.rs b/executor/src/elf.rs
index ed79fb983..da38cbbf1 100644
--- a/executor/src/elf.rs
+++ b/executor/src/elf.rs
@@ -557,4 +557,9 @@ impl SymbolTable {
     pub fn len(&self) -> usize {
         self.functions.len()
     }
+
+    /// Borrow the full function list (sorted by address).
+    pub fn functions(&self) -> &[FunctionSymbol] {
+        &self.functions
+    }
 }
diff --git a/executor/src/flamegraph.rs b/executor/src/flamegraph.rs
index f9b447d19..4764d71a2 100644
--- a/executor/src/flamegraph.rs
+++ b/executor/src/flamegraph.rs
@@ -154,7 +154,7 @@ impl FlamegraphGenerator {
 /// Demangle a Rust symbol name using the official rustc-demangle crate.
 ///
 /// Uses the alternate format (`{:#}`) to omit the hash suffix for cleaner output.
-pub(crate) fn demangle(name: &str) -> String {
+pub fn demangle(name: &str) -> String {
     // Use rustc-demangle with alternate format to omit hash
     format!("{:#}", rustc_demangle(name))
 }
diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs
index f349eeae6..f3a3e622c 100644
--- a/executor/src/vm/memory.rs
+++ b/executor/src/vm/memory.rs
@@ -218,6 +218,13 @@ impl Memory {
         Ok(self.public_output.clone())
     }
 
+    /// Read-only access to the underlying 4-byte cell map. Exposed for
+    /// diagnostic tooling (e.g. counting the distinct 4 KB memory pages a
+    /// program touches) — not part of the normal execution interface.
+    pub fn cells(&self) -> &U64HashMap<[u8; 4]> {
+        &self.cells
+    }
+
     /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX` as a
     /// 4-byte LE length prefix followed by the raw data. The guest reads these
     /// bytes directly via normal RISC-V loads (ZisK-style memory-mapped input).
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index f072eec53..4e810022a 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -2,7 +2,7 @@
 //!
 //! Each test:
 //! 1. Proves an inner program on the host.
-//! 2. Serializes `(VmProof, inner_elf, opts)` with postcard.
+//! 2. Serializes `(VmProof, inner_elf)` with postcard.
 //! 3. Hands that as private input to the recursion guest.
 //! 4. Either **proves** the recursion guest's execution (memory-bounded via
 //!    continuations) and verifies the outer proof (`OuterMode::Prove`), or
@@ -12,6 +12,7 @@
 //!
 //! The guest ELFs are assumed built by `make compile-recursion-elfs`.
 
+use std::ops::ControlFlow;
 use std::path::PathBuf;
 
 fn workspace_root() -> PathBuf {
@@ -33,10 +34,10 @@ fn read_guest_elf(root: &std::path::Path, name: &str) -> Vec<u8> {
 }
 
 /// Minimum-security FRI parameters: blowup=2, a single FRI query. Security is
-/// intentionally terrible — used by the capacity-probing test, where the goal
-/// is the smallest possible inner proof, not a sound one.
-/// (`GoldilocksCubicProofOptions::with_blowup` derives a query count from a
-/// 128-bit target, far more than we want here.)
+/// intentionally terrible — used by the capacity-probing test and every cheap
+/// diagnostic below, where the goal is the smallest possible inner proof, not
+/// a sound one. (`GoldilocksCubicProofOptions::with_blowup` derives a query
+/// count from a 128-bit target, far more than we want here.)
 const MIN_PROOF_OPTIONS: stark::proof::options::ProofOptions =
     stark::proof::options::ProofOptions {
         blowup_factor: 2,
@@ -46,10 +47,10 @@ const MIN_PROOF_OPTIONS: stark::proof::options::ProofOptions =
     };
 
 /// Prove `inner_elf` (fed `inner_input`) under `opts`, then package
-/// `(proof, elf, opts)` into the postcard blob the recursion guest consumes as
-/// its private input. `tag` prefixes the progress lines. Returns the inner
-/// proof — callers that re-verify it on the host need it — next to the encoded
-/// blob.
+/// `(proof, elf, opts)` into the postcard blob the recursion and
+/// deserialize-only guests consume as their private input. `tag` prefixes the
+/// progress lines. Returns the inner proof — callers that re-verify it on the
+/// host need it — next to the encoded blob.
 fn prove_inner_and_encode_blob(
     tag: &str,
     inner_elf: &[u8],
@@ -152,6 +153,132 @@ fn prove_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8])
     committed
 }
 
+/// Stream a guest's execution via `Executor::resume()`, calling `on_log` for
+/// every `Log` without ever buffering the full log stream (`Executor::run`
+/// would accumulate tens of millions of `Log`s and OOM even a 125 GB box).
+/// `on_log` returns `ControlFlow::Break(())` to stop the run early (e.g. once a
+/// cycle budget is hit); `Continue(())` to keep going. `on_progress(chunks,
+/// total_cycles, elapsed)` fires once per resumed chunk; callers throttle and
+/// format their own progress lines. Returns `(total_cycles, wall_time)` —
+/// `total_cycles` counts logs actually visited, so it is exact even when a run
+/// breaks mid-chunk.
+fn drive_executor(
+    executor: &mut executor::vm::execution::Executor,
+    mut on_log: impl FnMut(&executor::vm::logs::Log) -> ControlFlow<()>,
+    mut on_progress: impl FnMut(usize, u64, std::time::Duration),
+) -> (u64, std::time::Duration) {
+    let start = std::time::Instant::now();
+    let mut total_cycles: u64 = 0;
+    let mut chunks: usize = 0;
+    while let Some(logs) = executor.resume().expect("executor resume failed") {
+        let mut stop = false;
+        for log in logs {
+            total_cycles += 1;
+            if on_log(log).is_break() {
+                stop = true;
+                break;
+            }
+        }
+        chunks += 1;
+        on_progress(chunks, total_cycles, start.elapsed());
+        if stop {
+            break;
+        }
+    }
+    (total_cycles, start.elapsed())
+}
+
+/// Resolve a guest PC to its (demangled) enclosing function name using the
+/// ELF's own symbol table — the same data `executor::flamegraph` resolves
+/// against. `<unknown>` when no function symbol covers the PC (e.g. PLT stubs
+/// or a release build that dropped symbols). No file:line: the symbol table
+/// carries function ranges only, not DWARF line info.
+fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
+    symbols.lookup(pc).map_or_else(
+        || "<unknown>".to_string(),
+        |s| executor::flamegraph::demangle(&s.name),
+    )
+}
+
+/// Print a PC histogram as two tables: a per-function summary (the cycles each
+/// resolved function accounts for, folded over all its PCs) followed by the
+/// top-100 per-address detail. `pc_hist` maps program counter → cycle count.
+///
+/// The per-function view is the one that matters: an inlined kernel is spread
+/// across dozens of PCs, so the raw per-address table scatters its true cost.
+fn print_pc_histogram(
+    title: &str,
+    symbols: &executor::elf::SymbolTable,
+    pc_hist: std::collections::HashMap<u64, u64>,
+    total_cycles: u64,
+    exec_time: std::time::Duration,
+) {
+    let mut entries: Vec<(u64, u64)> = pc_hist.into_iter().collect();
+    entries.sort_unstable_by_key(|(_pc, count)| std::cmp::Reverse(*count));
+
+    // Aggregate the full histogram by resolved function, resolving each PC once.
+    let mut by_function: std::collections::HashMap<String, (u64, u64)> =
+        std::collections::HashMap::new();
+    for (pc, count) in &entries {
+        let entry = by_function
+            .entry(resolve_pc(symbols, *pc))
+            .or_insert((0, 0));
+        entry.0 += *count; // cycles
+        entry.1 += 1; // distinct PCs folded into this function
+    }
+    let mut fn_entries: Vec<(String, (u64, u64))> = by_function.into_iter().collect();
+    fn_entries.sort_unstable_by_key(|(_name, (cycles, _pcs))| std::cmp::Reverse(*cycles));
+
+    let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!("  {title}");
+    eprintln!("============================================================");
+    eprintln!("  Total cycles : {total_cycles}");
+    eprintln!("  Unique PCs   : {}", entries.len());
+    eprintln!("  Exec time    : {exec_time:?}");
+    eprintln!();
+    eprintln!("  Top 25 functions by cycle count (aggregated over their PCs):");
+    eprintln!(
+        "  {:>4}  {:>14}  {:>7}  {:>7}  {:>5}  {}",
+        "rank", "cycles", "%", "cum %", "PCs", "function"
+    );
+    let mut fn_cumulative: u64 = 0;
+    for (rank, (name, (cycles, pcs))) in fn_entries.iter().take(25).enumerate() {
+        fn_cumulative += cycles;
+        eprintln!(
+            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  {:>5}  {}",
+            rank + 1,
+            cycles,
+            pct(*cycles),
+            pct(fn_cumulative),
+            pcs,
+            name,
+        );
+    }
+    eprintln!();
+    eprintln!("  Top 100 PCs by cycle count (per-address detail):");
+    eprintln!(
+        "  {:>4}  {:>18}  {:>14}  {:>7}  {:>7}  {}",
+        "rank", "pc", "cycles", "%", "cum %", "function"
+    );
+    let mut cumulative: u64 = 0;
+    for (rank, (pc, count)) in entries.iter().take(100).enumerate() {
+        cumulative += count;
+        eprintln!(
+            "  {:>4}  {:#018x}  {:>14}  {:>6.2}%  {:>6.2}%  {}",
+            rank + 1,
+            pc,
+            count,
+            pct(*count),
+            pct(cumulative),
+            resolve_pc(symbols, *pc),
+        );
+    }
+    eprintln!("============================================================");
+}
+
 /// Core pipeline: prove an inner program with the given options, hand the
 /// proof+ELF+options to the recursion guest, then take the guest to `mode`
 /// (execute-only or full prove) and assert it committed the `[1]` success
@@ -340,6 +467,797 @@ fn test_recursion_prove_1query() {
     );
 }
 
+/// Diagnostic: build the inner proof and dump the recursion guest's private-input
+/// blob to `/tmp/recursion_input.bin` so the CLI's `execute --flamegraph` can
+/// consume it.
+///
+/// Usage after running this test:
+/// ```
+/// cargo run -p cli --release -- execute \
+///     bench_vs/lambda/recursion/target/riscv64im-lambda-vm-elf/release/recursion-bench \
+///     --private-input /tmp/recursion_input.bin \
+///     --flamegraph /tmp/recursion_folded.txt
+/// cat /tmp/recursion_folded.txt | inferno-flamegraph > /tmp/recursion_flamegraph.svg
+/// ```
+#[test]
+#[ignore = "diagnostic: writes recursion private input to /tmp/recursion_input.bin"]
+fn test_dump_recursion_input() {
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("dump-input", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    let path = "/tmp/recursion_input.bin";
+    std::fs::write(path, &blob).expect("write blob");
+    eprintln!("[dump-input] wrote {} bytes to {path}", blob.len());
+}
+
+/// Diagnostic: build the inner proof + recursion guest input, then **execute
+/// only** the recursion guest (no STARK proving) and report cycle counts +
+/// trace size estimates.
+///
+/// This is the cheap way to find out how many RISC-V instructions the
+/// verifier actually executes inside the guest — a much faster signal than
+/// running the full outer prove (which can OOM on a 125 GB machine).
+#[test]
+#[ignore = "diagnostic: runs the executor only, prints cycle counts"]
+fn test_recursion_cycle_count() {
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
+
+    // Build the inner proof exactly as the smoke test does, with the
+    // absolute-minimum FRI params so the inner is as small as possible.
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("cycle-count", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    // Execute (NOT prove) the recursion guest. `drive_executor` streams chunks
+    // and never accumulates logs in memory — this avoids the Vec<Log> blow-up
+    // that OOMs even a 125 GB server (one Log is 40 B; a few billion of them is
+    // hundreds of GB).
+    eprintln!("[cycle-count] executing recursion guest (streaming counter only) ...");
+    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |_log| ControlFlow::Continue(()),
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(50) {
+                eprintln!(
+                    "[cycle-count]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
+                );
+            }
+        },
+    );
+    let cycle_count = total_cycles as usize;
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!("  RECURSION GUEST EXECUTION SUMMARY");
+    eprintln!("============================================================");
+    eprintln!("  Cycle count           : {cycle_count}");
+    eprintln!("  Executor wall time    : {exec_time:?}");
+    eprintln!();
+    eprintln!("  Rough memory estimate for outer prove:");
+    let bytes_per_field = 8usize;
+    let approx_columns = 250usize; // CPU + MEMW + DECODE + bus columns combined
+    let main_trace_bytes = cycle_count * approx_columns * bytes_per_field;
+    let blowup = 2usize;
+    let lde_main_bytes = main_trace_bytes * blowup;
+    eprintln!(
+        "    main trace            : ~{:.2} GB ({} cycles × ~{} cols × 8 B)",
+        main_trace_bytes as f64 / 1e9,
+        cycle_count,
+        approx_columns
+    );
+    eprintln!(
+        "    main LDE (blowup={})   : ~{:.2} GB",
+        blowup,
+        lde_main_bytes as f64 / 1e9
+    );
+    eprintln!("  (aux trace adds roughly 50% more, so peak peak ≈ 2-3× LDE)");
+    eprintln!("============================================================");
+}
+
+/// Diagnostic: count the distinct 4 KB memory pages the recursion guest
+/// touches when verifying a small inner proof.
+///
+/// We suspect the outer prover's 125 GB OOM wall is dominated by per-page
+/// PAGE-table overhead. The number of PAGE tables the prover would build
+/// equals the number of distinct 4 KB pages the executor touches — code,
+/// heap, private input, and stack. This test surfaces that count without
+/// running the prover.
+///
+/// Layout (per `executor::constants` + `bench_vs/lambda/recursion/src/main.rs`):
+/// - Code/static: whatever PT_LOAD segments the recursion ELF carries.
+/// - Heap: `_end .. 0xC000_0000` (`MAX_MEMORY_SIZE`); `TlsfHeap` scatters
+///   allocations across this region.
+/// - Private input: starts at `PRIVATE_INPUT_START_INDEX = 0xFF000000`.
+/// - Stack: top of address space (down from `STACK_TOP = 0xFFFFFFFFFFFFFFF0`).
+///
+/// Interpretation (rough):
+/// - <1,000 pages: PAGE-table overhead is not the bottleneck.
+/// - 10k-100k pages: TLSF heap fragmentation; design a tighter bump allocator
+///   and re-measure.
+/// - >100k pages: postcard decode dominates; consider streaming decode.
+#[test]
+#[ignore = "diagnostic: counts distinct 4 KB memory pages touched by the recursion guest"]
+fn test_recursion_page_count() {
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
+    use std::collections::HashSet;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("page-count", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    // Precompute the recursion ELF's PT_LOAD ranges so we can bucket code/
+    // static pages separately from heap. `Elf::load` already expands BSS
+    // (memsz > filesz) into zero-valued words, so these ranges cover
+    // .text + .rodata + .data + .bss.
+    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
+    let segment_ranges: Vec<(u64, u64)> = program
+        .data
+        .iter()
+        .map(|seg| (seg.base_addr, seg.base_addr + (seg.values.len() as u64 * 4)))
+        .collect();
+    eprintln!(
+        "[page-count] recursion ELF: {} PT_LOAD segment(s)",
+        segment_ranges.len(),
+    );
+    for (i, (lo, hi)) in segment_ranges.iter().enumerate() {
+        eprintln!(
+            "[page-count]   segment[{i}]: 0x{lo:016x} .. 0x{hi:016x} ({} bytes)",
+            hi - lo,
+        );
+    }
+
+    // Stream through execution — running to completion via `Executor::run`
+    // would accumulate ~67 M `Log` records (~2.7 GB) we don't need. We only
+    // care about the *final* memory state.
+    eprintln!("[page-count] executing recursion guest (streaming) ...");
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |_log| ControlFlow::Continue(()),
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(50) {
+                eprintln!(
+                    "[page-count]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
+                );
+            }
+        },
+    );
+
+    // Collect the set of distinct 4 KB pages from every cell touched during
+    // (a) program loading, (b) private-input loading, (c) execution.
+    const PAGE_MASK: u64 = !0xFFFu64;
+    let cells = executor.memory().cells();
+    let total_cells = cells.len();
+    let pages: HashSet<u64> = cells.keys().map(|&a| a & PAGE_MASK).collect();
+
+    // Bucket by region. A "code/static" page is any page that overlaps a
+    // PT_LOAD segment. Stack lives near the top of the 64-bit address
+    // space; private input lives in the [0xFF000000, ...) window above the
+    // 3 GB heap ceiling.
+    const HEAP_CEILING: u64 = 0xC000_0000;
+    const STACK_FLOOR: u64 = 0xFFFF_FFFF_0000_0000;
+
+    let mut code_pages = 0usize;
+    let mut heap_pages = 0usize;
+    let mut private_input_pages = 0usize;
+    let mut stack_pages = 0usize;
+    let mut other_pages = 0usize;
+
+    for &page in &pages {
+        let page_end = page.saturating_add(0x1000);
+        let in_code = segment_ranges
+            .iter()
+            .any(|&(lo, hi)| page < hi && lo < page_end);
+        if in_code {
+            code_pages += 1;
+        } else if page >= STACK_FLOOR {
+            stack_pages += 1;
+        } else if page >= PRIVATE_INPUT_START_INDEX {
+            private_input_pages += 1;
+        } else if page < HEAP_CEILING {
+            heap_pages += 1;
+        } else {
+            other_pages += 1;
+        }
+    }
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!("  RECURSION GUEST PAGE-COUNT SUMMARY");
+    eprintln!("============================================================");
+    eprintln!("  Total cycles                  : {total_cycles}");
+    eprintln!("  Executor wall time            : {exec_time:?}");
+    eprintln!("  Memory cells touched (4 B ea) : {total_cells}");
+    eprintln!("  Distinct 4 KB pages touched   : {}", pages.len());
+    eprintln!();
+    eprintln!("  Pages per region:");
+    eprintln!("    code/static (ELF segments)     : {code_pages}");
+    eprintln!("    heap (0..0xC000_0000)          : {heap_pages}");
+    eprintln!("    private input (0xFF000000..)   : {private_input_pages}");
+    eprintln!("    stack (>= 0xFFFFFFFF_00000000) : {stack_pages}");
+    if other_pages > 0 {
+        eprintln!("    other (unclassified)           : {other_pages}");
+    }
+    eprintln!();
+    eprintln!("  Interpretation (PAGE-table overhead):");
+    eprintln!("    <1k pages     → PAGE overhead is not the bottleneck.");
+    eprintln!("    10k-100k      → TLSF heap fragmentation; try a bump alloc.");
+    eprintln!("    >100k         → postcard decode dominates; stream-decode?");
+    eprintln!("============================================================");
+}
+
+/// Build a PC histogram of the recursion guest verifying an `empty`-program
+/// inner proof produced with `inner_proof_options`, and print it via
+/// [`print_pc_histogram`] under `title`.
+///
+/// `blowup_factor` and `fri_number_of_queries` are coupled (the query count is
+/// derived from blowup for a fixed security target), so each `#[test]` below is
+/// just this runner with a different `ProofOptions` — a single query at low
+/// blowup, vs. the security-derived multi-query count at a higher blowup.
+///
+/// Streams chunks of logs via `Executor::resume()` so memory stays bounded to
+/// the histogram itself. Each PC is resolved to its enclosing function via the
+/// in-house `executor::elf::SymbolTable` (reading the recursion ELF's symbol
+/// table directly — no external tool, no DWARF dependency).
+fn run_recursion_pc_histogram(
+    title: &str,
+    inner_proof_options: stark::proof::options::ProofOptions,
+) {
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use std::collections::HashMap;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("pc-hist", &empty_elf_bytes, &[], &inner_proof_options);
+
+    eprintln!("[pc-hist] executing recursion guest (building PC histogram) ...");
+    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+
+    let mut pc_hist: HashMap<u64, u64> = HashMap::with_capacity(300_000);
+    let unique = std::cell::Cell::new(0usize);
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |log| {
+            *pc_hist.entry(log.current_pc).or_insert(0) += 1;
+            unique.set(pc_hist.len());
+            ControlFlow::Continue(())
+        },
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(500) {
+                eprintln!(
+                    "[pc-hist]   ... {chunks} chunks, {cycles} cycles, {} unique PCs, {elapsed:?}",
+                    unique.get()
+                );
+            }
+        },
+    );
+
+    // Resolve PCs to functions directly from the ELF's symbol table.
+    let symbols = executor::elf::SymbolTable::parse(&recursion_elf_bytes);
+    print_pc_histogram(title, &symbols, pc_hist, total_cycles, exec_time);
+}
+
+/// Diagnostic: PC histogram of the recursion guest with a **single** FRI query
+/// at blowup=2 — the cheapest verifier run, dominated by fixed setup cost
+/// (decode, allocator, postcard) rather than per-query FRI/Merkle work.
+#[test]
+#[ignore = "diagnostic: ~8 minutes; prints PC histogram of the verifier-in-VM"]
+fn test_recursion_pc_histogram_1query() {
+    run_recursion_pc_histogram(
+        "RECURSION GUEST PC HISTOGRAM (blowup=2, 1 query)",
+        MIN_PROOF_OPTIONS,
+    );
+}
+
+/// Diagnostic: PC histogram of the recursion guest at **128-bit security**
+/// (blowup=8, FRI query count derived by the Johnson Bound Regime — tens of
+/// queries). Compared against the single-query runs, weight shifts toward the
+/// verifier's per-query FRI-layer / Merkle-opening and field arithmetic.
+#[test]
+#[ignore = "diagnostic: heavy; PC histogram of the multi-query verifier-in-VM"]
+fn test_recursion_pc_histogram_multiquery() {
+    let inner_proof_options =
+        crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid");
+    run_recursion_pc_histogram(
+        &format!(
+            "RECURSION GUEST PC HISTOGRAM (blowup=8, {} queries, 128-bit)",
+            inner_proof_options.fri_number_of_queries
+        ),
+        inner_proof_options,
+    );
+}
+
+/// Diagnostic: build a **sampled** call-stack histogram of the recursion guest.
+///
+/// Like `test_recursion_pc_histogram` but groups by full call stack (not PC).
+/// To stay fast, only every `SAMPLE_RATE`-th log is recorded into the histogram.
+/// The call stack itself is updated on every log (skipping would corrupt it).
+///
+/// Output is written to `/tmp/recursion_folded_sampled.txt` in
+/// inferno-flamegraph "folded stacks" format. Pipe it through:
+///
+///     cat /tmp/recursion_folded_sampled.txt | inferno-flamegraph > svg.svg
+///
+/// Expect ~10-20 minutes for SAMPLE_RATE=100 on a 40B-cycle guest.
+#[test]
+#[ignore = "diagnostic: sampled flamegraph for the verifier-in-VM"]
+fn test_recursion_sampled_flamegraph() {
+    use executor::elf::Elf;
+    use executor::flamegraph::FlamegraphGenerator;
+    use executor::vm::execution::Executor;
+    use std::io::BufWriter;
+
+    /// 1 in N logs is fed to `process_logs`, which both updates the call
+    /// stack and records a sample. At 1, every cycle goes through — the call
+    /// stack stays exactly in sync with execution so frame widths are
+    /// trustworthy, but the per-cycle cost (~57µs) limits how many cycles
+    /// we can cover within a wall-clock budget.
+    ///
+    /// At SAMPLE_RATE > 1, every CALL/RETURN that lands on a skipped cycle
+    /// silently desyncs the stack, producing the "stuck-in-visit_seq" effect
+    /// we saw at 1:1000. Use values > 1 only when stack accuracy is
+    /// expendable.
+    const SAMPLE_RATE: usize = 1;
+
+    /// Stop the executor early once we've covered this many cycles.
+    /// Set to 0 to run to completion (40B+ cycles, hours at SAMPLE_RATE=1).
+    /// At SAMPLE_RATE=1, ~57µs per cycle means 5M cycles ≈ 5 min wall time.
+    const CYCLE_BUDGET: u64 = 5_000_000;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("sampled-fg", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    eprintln!("[sampled-fg] executing recursion guest (sampling 1-in-{SAMPLE_RATE}) ...",);
+    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
+    let symbols = executor::elf::SymbolTable::parse(&recursion_elf_bytes);
+    let entry_point = program.entry_point;
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+
+    // Build our own instruction cache from the same segments `Executor::new`
+    // decodes internally. Owning it (rather than reading `executor.instructions`
+    // mid-loop) is what lets the per-log closure call `process_logs` without
+    // borrowing `executor`, which `drive_executor` holds mutably for `resume()`.
+    let instructions = executor::vm::execution::InstructionCache::new(&program.data)
+        .expect("instruction cache build failed");
+
+    // RefCell so the per-log closure (`process_logs`, &mut self) and the
+    // progress closure (`write_folded`, &self) can both reach the generator —
+    // their calls never overlap, so the runtime borrow check never trips.
+    let generator = std::cell::RefCell::new(FlamegraphGenerator::new(symbols, entry_point));
+
+    // Path is defined here (not after the loop) so the periodic checkpoint
+    // writes below can target it. The final write at the end still happens.
+    let path = "/tmp/recursion_folded_sampled.txt";
+
+    let mut i = 0usize;
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |log| {
+            // 1-in-SAMPLE_RATE logs are fed to `process_logs`. At SAMPLE_RATE==1
+            // this is the identity filter (`_ % 1 == 0`); the `#[allow]` keeps
+            // the general form so SAMPLE_RATE can be bumped without touching the
+            // body. Skipped logs lose stack accuracy — acceptable diagnostic
+            // quality at higher rates.
+            #[allow(clippy::modulo_one)]
+            let take = i % SAMPLE_RATE == 0;
+            if take {
+                generator
+                    .borrow_mut()
+                    .process_logs(std::slice::from_ref(log), &instructions)
+                    .expect("flamegraph process_logs");
+            }
+            i += 1;
+
+            // Early exit once we've covered the cycle budget. The dominant hot
+            // kernels are ~uniform across the verifier's runtime, so a partial
+            // run still surfaces them. `#[allow]` lets CYCLE_BUDGET be const-0
+            // (full run) without tripping clippy.
+            #[allow(clippy::absurd_extreme_comparisons)]
+            if CYCLE_BUDGET > 0 && i as u64 >= CYCLE_BUDGET {
+                eprintln!("[sampled-fg] hit cycle budget ({CYCLE_BUDGET} cycles), stopping early");
+                ControlFlow::Break(())
+            } else {
+                ControlFlow::Continue(())
+            }
+        },
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(500) {
+                eprintln!(
+                    "[sampled-fg]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
+                );
+                // Checkpoint: re-write the folded file in place so a killed run
+                // still leaves a usable (if partial) flamegraph on disk.
+                let file = std::fs::File::create(path).expect("create output file");
+                let mut writer = BufWriter::new(file);
+                generator
+                    .borrow()
+                    .write_folded(&mut writer)
+                    .expect("write folded output");
+            }
+        },
+    );
+
+    let file = std::fs::File::create(path).expect("create output file");
+    let mut writer = BufWriter::new(file);
+    generator
+        .borrow()
+        .write_folded(&mut writer)
+        .expect("write folded output");
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!("  SAMPLED FLAMEGRAPH SUMMARY");
+    eprintln!("============================================================");
+    eprintln!("  Total cycles : {total_cycles}");
+    eprintln!("  Sample rate  : 1 in {SAMPLE_RATE}");
+    eprintln!("  Exec time    : {exec_time:?}");
+    eprintln!("  Output file  : {path}");
+    eprintln!("============================================================");
+    eprintln!();
+    eprintln!("  To render SVG (requires inferno):");
+    eprintln!("    cat {path} | inferno-flamegraph > /tmp/recursion_flamegraph_sampled.svg");
+    eprintln!("============================================================");
+}
+
+/// Diagnostic: host-side per-step timings for the verifier.
+///
+/// Runs an inner prove (empty guest, blowup=2, 1 query) and then verifies it
+/// on the host. When built with `--features stark/instruments`, the verifier
+/// prints `Time spent: ...` for each of the four steps (replay challenges,
+/// composition polynomial, FRI, DEEP openings) plus the step-1-replay it
+/// does before step 2. Lets us see the host-side split in seconds, without
+/// running anything inside the VM.
+///
+/// Usage:
+/// ```
+/// cargo test --release -p lambda-vm-prover --features stark/instruments \
+///   --lib test_host_verify_step_timings -- --ignored --nocapture
+/// ```
+#[test]
+#[ignore = "diagnostic: prints host-side verifier step timings"]
+fn test_host_verify_step_timings() {
+    let root = workspace_root();
+    let empty_path =
+        root.join("bench_vs/lambda/empty/target/riscv64im-lambda-vm-elf/release/empty-bench");
+    let empty_elf_bytes = std::fs::read(&empty_path).expect("read empty-bench");
+
+    let inner_proof_options = MIN_PROOF_OPTIONS;
+
+    eprintln!("[host-verify] proving empty (blowup=2, fri_queries=1) ...");
+    let inner_proof = crate::prove_with_options_and_inputs(
+        &empty_elf_bytes,
+        &[],
+        &inner_proof_options,
+        &crate::MaxRowsConfig::default(),
+    )
+    .expect("inner prove should succeed");
+
+    eprintln!("[host-verify] verifying on host (with instruments) ...");
+    let ok = crate::verify_with_options(
+        &inner_proof,
+        &empty_elf_bytes,
+        &inner_proof_options,
+        None,
+        None,
+    )
+    .expect("verify errored");
+    assert!(ok, "proof must verify");
+    eprintln!("[host-verify] verified OK");
+}
+
+/// Diagnostic: cycle count for the **deserialize-only** counterpart of the
+/// recursion guest. Same input layout
+/// (`(VmProof, Vec<u8>, ProofOptions)`) and same proof, but
+/// the guest just postcard-decodes the blob and halts — it never calls
+/// `verify_with_options`.
+///
+/// The cycle delta between this and `test_recursion_cycle_count` is the
+/// actual cost of the STARK verifier inside the VM. Historically (40.5 B-cycle
+/// recursion guest) postcard decode was ~15.6 M cycles — negligible. Now that
+/// the recursion guest is ~67 M cycles, the same absolute cost would be ~23%
+/// of total; this test re-measures it.
+#[test]
+#[ignore = "diagnostic: runs the deserialize-only guest, prints cycle count"]
+fn test_deserialize_only_cycle_count() {
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let deser_elf_bytes = read_guest_elf(&root, "deserialize-only");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("deser-only", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    eprintln!("[deser-only] executing deserialize-only guest (streaming) ...");
+    let program = Elf::load(&deser_elf_bytes).expect("ELF load failed");
+    eprintln!(
+        "[deser-only] ELF: {} bytes, entry_point=0x{:x}",
+        deser_elf_bytes.len(),
+        program.entry_point,
+    );
+    assert_ne!(
+        program.entry_point, 0,
+        "deserialize-only ELF has entry_point=0 — build artifact is malformed"
+    );
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |_log| ControlFlow::Continue(()),
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(50) {
+                eprintln!(
+                    "[deser-only]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
+                );
+            }
+        },
+    );
+    let cycle_count = total_cycles;
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!("  DESERIALIZE-ONLY GUEST EXECUTION SUMMARY");
+    eprintln!("============================================================");
+    eprintln!("  Cycle count           : {cycle_count}");
+    eprintln!("  Executor wall time    : {exec_time:?}");
+    eprintln!();
+    eprintln!("  Compare against test_recursion_cycle_count (~40.5B cycles");
+    eprintln!("  with the same proof). Delta = verifier-in-VM cost.");
+    eprintln!("============================================================");
+}
+
+/// Diagnostic: PC histogram for the **deserialize-only** guest.
+///
+/// Sibling of `test_recursion_pc_histogram`, but targeting the
+/// deserialize-only control guest so we can locate the hot kernel inside the
+/// 15.7 M-cycle postcard decode itself. Every cycle goes through the
+/// histogram (no sampling), so attribution is exact — the previous sampled
+/// flamegraph at 1:1000 had broken stack reconstruction on skipped
+/// CALL/RETURNs, which made it unreliable for a workload this small.
+///
+/// Each top PC is resolved to its enclosing function via the in-house
+/// `executor::elf::SymbolTable`, reading the guest ELF's symbol table directly
+/// (no external tool, no DWARF dependency).
+#[test]
+#[ignore = "diagnostic: ~1 min; PC histogram for the deserialize-only guest"]
+fn test_deserialize_only_pc_histogram() {
+    use executor::elf::Elf;
+    use executor::vm::execution::Executor;
+    use std::collections::HashMap;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let deser_elf_bytes = read_guest_elf(&root, "deserialize-only");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("deser-pc-hist", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    eprintln!("[deser-pc-hist] executing deserialize-only guest (building PC histogram) ...");
+    let program = Elf::load(&deser_elf_bytes).expect("ELF load failed");
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+
+    // ~50k unique PCs is plenty: the deserialize-only guest is ~74 KB of ELF
+    // (~18k 4-byte instructions); the hot inner loop is much smaller still.
+    let mut pc_hist: HashMap<u64, u64> = HashMap::with_capacity(50_000);
+    let unique = std::cell::Cell::new(0usize);
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |log| {
+            *pc_hist.entry(log.current_pc).or_insert(0) += 1;
+            unique.set(pc_hist.len());
+            ControlFlow::Continue(())
+        },
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(50) {
+                eprintln!(
+                    "[deser-pc-hist]   ... {chunks} chunks, {cycles} cycles, {} unique PCs, {elapsed:?}",
+                    unique.get()
+                );
+            }
+        },
+    );
+
+    // Resolve PCs to functions directly from the ELF's symbol table.
+    let symbols = executor::elf::SymbolTable::parse(&deser_elf_bytes);
+    print_pc_histogram(
+        "DESERIALIZE-ONLY GUEST PC HISTOGRAM",
+        &symbols,
+        pc_hist,
+        total_cycles,
+        exec_time,
+    );
+}
+
+/// Diagnostic: bucket the recursion guest's cycles by which verifier step
+/// is currently executing.
+///
+/// The verifier's hot path is `verify_rounds_2_to_4`, which calls four
+/// sub-routines in a fixed order:
+///   1. `replay_rounds_after_round_1`               (recover challenges)
+///   2. `step_2_verify_claimed_composition_polynomial`
+///   3. `step_3_verify_fri`
+///   4. `step_4_verify_trace_and_composition_openings`
+///
+/// We resolve each sub-routine's entry PC from the recursion ELF's symbol
+/// table, then run a monotonic state machine over the execution stream:
+/// the active bucket only advances 0 → 1 → 2 → 3 → 4 (never backwards),
+/// so cycles inside a step's callees stay attributed to that step.
+///
+/// Bucket 0 ("setup") captures everything before step 1 is entered — the
+/// allocator init, postcard decode, and `VmAirs::new` (which contains the
+/// expensive preprocessed-commitment FFTs).
+///
+/// Streams chunks via `Executor::resume()` so memory stays bounded.
+#[test]
+#[ignore = "diagnostic: ~13 min; buckets the 40B cycles by verifier step"]
+fn test_recursion_step_breakdown() {
+    use executor::elf::{Elf, SymbolTable};
+    use executor::vm::execution::Executor;
+
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
+
+    let (_inner_proof, blob) =
+        prove_inner_and_encode_blob("step-bkd", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+
+    // Build a per-step "advance bucket to N" lookup. The verifier's step
+    // functions get inlined by LLVM in release mode, so we can't rely on
+    // matching their entry PCs directly. Instead we anchor on closures the
+    // compiler emits *inside* each step's body — iterator combinators like
+    // `.fold(|...|)` keep the step's method name as a substring in their
+    // mangled symbol. Any PC that resolves to a symbol containing step N's
+    // keyword advances the bucket to N (monotonically).
+    //
+    // If step N has no matching symbol at all (e.g. step 4 is fully inlined
+    // with no closure children of its own), its cycles get attributed to the
+    // previous bucket. We report that explicitly in the summary.
+    let symbols = SymbolTable::parse(&recursion_elf_bytes);
+    assert!(
+        !symbols.is_empty(),
+        "recursion ELF has no symbol table — was it stripped?"
+    );
+
+    let step_keywords = [
+        "replay_rounds_after_round_1",
+        "step_2_verify_claimed_composition_polynomial",
+        "step_3_verify_fri",
+        "step_4_verify_trace_and_composition_openings",
+    ];
+    let step_found: [bool; 4] = std::array::from_fn(|i| {
+        symbols
+            .functions()
+            .iter()
+            .any(|f| f.name.contains(step_keywords[i]))
+    });
+    for (i, found) in step_found.iter().enumerate() {
+        let n_matches = symbols
+            .functions()
+            .iter()
+            .filter(|f| f.name.contains(step_keywords[i]))
+            .count();
+        eprintln!(
+            "[step-bkd] step {}: keyword={:?} -> {} symbol(s) {}",
+            i + 1,
+            step_keywords[i],
+            n_matches,
+            if *found {
+                ""
+            } else {
+                "(fully inlined; will merge into the previous bucket)"
+            }
+        );
+    }
+
+    // Monotonic state machine: 0=setup, 1..=4=inside step N (or its callees /
+    // inlined-step-N-cycles attributed here because step N+1 is missing).
+    // `bucket` lives in a Cell so the per-log closure can advance it while the
+    // progress closure reads it for its live readout.
+    let bucket = std::cell::Cell::new(0u8);
+    let mut buckets = [0u64; 5];
+
+    eprintln!("[step-bkd] executing recursion guest (streaming) ...");
+    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
+    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+
+    // Cache the last symbol-table hit so we only do a binary search on
+    // function transitions, not on every cycle. Functions are typically
+    // long-running (>>1 instruction), so this cache hits ~all of the time.
+    let mut last_range: Option<(u64, u64)> = None;
+    let mut last_advance: u8 = 0;
+
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |log| {
+            let pc = log.current_pc;
+            let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e);
+            if !in_cached {
+                // Slow path: refresh the cache from the symbol table.
+                if let Some(sym) = symbols.lookup(pc) {
+                    // SymbolTable accepts size=0 symbols as "any address >="; for
+                    // those we'd need the next symbol's start for a real upper
+                    // bound. Cheapest workaround: set a tiny range so we re-resolve
+                    // soon enough that wrong attribution is bounded.
+                    let end = sym.address + sym.size.max(1);
+                    last_range = Some((sym.address, end));
+                    last_advance = 0;
+                    for (i, kw) in step_keywords.iter().enumerate() {
+                        if sym.name.contains(kw) {
+                            last_advance = (i + 1) as u8;
+                        }
+                    }
+                } else {
+                    last_range = None;
+                    last_advance = 0;
+                }
+            }
+            if bucket.get() < last_advance {
+                bucket.set(last_advance);
+            }
+            buckets[bucket.get() as usize] += 1;
+            ControlFlow::Continue(())
+        },
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(500) {
+                eprintln!(
+                    "[step-bkd]   ... {chunks} chunks, {cycles} cycles, bucket={}, {elapsed:?}",
+                    bucket.get()
+                );
+            }
+        },
+    );
+
+    let labels = [
+        "0. setup (alloc + postcard decode + VmAirs::new + pre-step-1)",
+        "1. step 1: replay_rounds_after_round_1",
+        "2. step 2: verify_claimed_composition_polynomial",
+        "3. step 3: verify_fri",
+        "4. step 4: verify_trace_and_composition_openings (+ wrap-up)",
+    ];
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!("  RECURSION GUEST PER-STEP CYCLE BREAKDOWN");
+    eprintln!("============================================================");
+    eprintln!("  Total cycles : {total_cycles}");
+    eprintln!("  Exec time    : {exec_time:?}");
+    eprintln!();
+    eprintln!("  {:<60}  {:>14}  {:>7}", "bucket", "cycles", "%");
+    for (label, cycles) in labels.iter().zip(buckets.iter()) {
+        let pct = if total_cycles > 0 {
+            100.0 * (*cycles as f64) / (total_cycles as f64)
+        } else {
+            0.0
+        };
+        eprintln!("  {:<60}  {:>14}  {:>6.2}%", label, cycles, pct);
+    }
+    eprintln!("============================================================");
+}
+
 /// Inner program: fibonacci(10).
 #[test]
 #[ignore = "slow: memory-bounded continuation prove of the verifier-in-VM"]

From 1cde708a0e8450095bff2e402e3c5e8ccab857ba Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 15:31:45 -0300
Subject: [PATCH 02/36] refactor(prover): drop per-address PC table from
 recursion profile

The top-100 per-address table carried bare PCs with no file:line, so it was
not actionable for optimization and the CI aggregator already discarded it.
Keep the per-function fold (the view that matters); terminate the aggregator's
function-table parse on the trailing rule instead of the removed PC header.
---
 .../scripts/aggregate_recursion_histogram.py  | 16 ++++-----
 prover/src/tests/recursion_smoke_test.rs      | 34 +++++--------------
 2 files changed, 16 insertions(+), 34 deletions(-)

diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
index 8a12dc05e..1ae34ff70 100755
--- a/.github/scripts/aggregate_recursion_histogram.py
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 """Format the recursion-guest per-function profile as a Markdown PR comment.
 
-`test_recursion_pc_histogram` prints a per-function summary table (cycles folded
-over each function's PCs, computed across the *full* histogram) followed by a
-per-address detail table. We extract the per-function table — the view that
-shows where the cycles actually go — and render it as Markdown.
+`test_recursion_pc_histogram` prints a per-function summary table: the cycles
+folded over each function's PCs, computed across the *full* histogram — the view
+that shows where the cycles actually go. We parse that table and render it as
+Markdown.
 
     Top 25 functions by cycle count (aggregated over their PCs):
-    rank          cycles        %    cum %    PCs  function (file:line)
+    rank          cycles        %    cum %    PCs  function
        1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
 
 Reads the test's captured output from argv[1]; writes the Markdown body to
@@ -18,12 +18,12 @@
 import sys
 
 # A per-function summary row: rank, cycles, pct%, cum%, pcs, function.
-# Distinguished from the per-PC detail rows by the absence of a 0x<pc> column.
 FN_ROW = re.compile(
     r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$"
 )
 FN_TABLE_START = re.compile(r"Top \d+ functions by cycle count")
-PC_TABLE_START = re.compile(r"Top \d+ PCs by cycle count")
+# The "====" rule the test prints right after the (now sole) function table.
+TABLE_END = re.compile(r"^=+\s*$")
 TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
 UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)")
 EXEC_TIME = re.compile(r"Exec time\s*:\s*(\S+)")
@@ -43,7 +43,7 @@ def parse(text):
         if FN_TABLE_START.search(line):
             in_fn_table = True
             continue
-        if PC_TABLE_START.search(line):
+        if in_fn_table and TABLE_END.match(line):
             in_fn_table = False
             continue
         if in_fn_table and (m := FN_ROW.match(line)):
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 4e810022a..5daa78f22 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -200,12 +200,14 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
     )
 }
 
-/// Print a PC histogram as two tables: a per-function summary (the cycles each
-/// resolved function accounts for, folded over all its PCs) followed by the
-/// top-100 per-address detail. `pc_hist` maps program counter → cycle count.
+/// Print a per-function PC-histogram summary: the cycles each resolved function
+/// accounts for, folded over all its PCs. `pc_hist` maps program counter →
+/// cycle count.
 ///
-/// The per-function view is the one that matters: an inlined kernel is spread
-/// across dozens of PCs, so the raw per-address table scatters its true cost.
+/// We fold by function deliberately: an inlined kernel is spread across dozens
+/// of PCs, so a raw per-address table scatters its true cost — and without
+/// file:line resolution a bare PC isn't actionable for optimization anyway, so
+/// there is no per-address detail table.
 fn print_pc_histogram(
     title: &str,
     symbols: &executor::elf::SymbolTable,
@@ -213,8 +215,7 @@ fn print_pc_histogram(
     total_cycles: u64,
     exec_time: std::time::Duration,
 ) {
-    let mut entries: Vec<(u64, u64)> = pc_hist.into_iter().collect();
-    entries.sort_unstable_by_key(|(_pc, count)| std::cmp::Reverse(*count));
+    let entries: Vec<(u64, u64)> = pc_hist.into_iter().collect();
 
     // Aggregate the full histogram by resolved function, resolving each PC once.
     let mut by_function: std::collections::HashMap<String, (u64, u64)> =
@@ -257,25 +258,6 @@ fn print_pc_histogram(
             name,
         );
     }
-    eprintln!();
-    eprintln!("  Top 100 PCs by cycle count (per-address detail):");
-    eprintln!(
-        "  {:>4}  {:>18}  {:>14}  {:>7}  {:>7}  {}",
-        "rank", "pc", "cycles", "%", "cum %", "function"
-    );
-    let mut cumulative: u64 = 0;
-    for (rank, (pc, count)) in entries.iter().take(100).enumerate() {
-        cumulative += count;
-        eprintln!(
-            "  {:>4}  {:#018x}  {:>14}  {:>6.2}%  {:>6.2}%  {}",
-            rank + 1,
-            pc,
-            count,
-            pct(*count),
-            pct(cumulative),
-            resolve_pc(symbols, *pc),
-        );
-    }
     eprintln!("============================================================");
 }
 

From bc86250d4b6948ccd74de439d73a28805d1cd6d7 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 15:40:41 -0300
Subject: [PATCH 03/36] refactor(prover): share setup/progress across recursion
 diagnostics

Extract setup_guest_run (blob build + ELF load + Executor::new) and a
log_progress throttled-readout factory, used by the cycle-count, page-count,
PC-histogram, sampled-flamegraph and step-breakdown diagnostics. Generalize
the PC-histogram runner over guest name + progress stride so the
deserialize-only histogram is a one-line caller instead of a near-duplicate.
---
 prover/src/tests/recursion_smoke_test.rs | 231 +++++++++--------------
 1 file changed, 88 insertions(+), 143 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 5daa78f22..422475533 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -188,6 +188,47 @@ fn drive_executor(
     (total_cycles, start.elapsed())
 }
 
+/// Shared preamble for every execute-only diagnostic below: build the standard
+/// recursion private-input blob (an `empty`-program inner proof produced under
+/// `opts`), load guest `guest_name`, and stand up an executor over it. Returns
+/// the guest's raw ELF bytes (callers that resolve PCs pass them to
+/// [`executor::elf::SymbolTable::parse`]), the loaded program, and the
+/// ready-to-drive executor.
+fn setup_guest_run(
+    label: &str,
+    guest_name: &str,
+    opts: &stark::proof::options::ProofOptions,
+) -> (
+    Vec<u8>,
+    executor::elf::Elf,
+    executor::vm::execution::Executor,
+) {
+    let root = workspace_root();
+    let empty_elf_bytes = read_guest_elf(&root, "empty");
+    let guest_elf_bytes = read_guest_elf(&root, guest_name);
+
+    let (_inner_proof, blob) = prove_inner_and_encode_blob(label, &empty_elf_bytes, &[], opts);
+
+    let program = executor::elf::Elf::load(&guest_elf_bytes).expect("ELF load failed");
+    let executor = executor::vm::execution::Executor::new(&program, blob).expect("Executor::new failed");
+    (guest_elf_bytes, program, executor)
+}
+
+/// A `drive_executor` progress callback that prints the throttled
+/// `[label]   ... N chunks, M cycles, T elapsed` line every `stride` chunks —
+/// the readout every counting diagnostic shares. Tests that need extra live
+/// state (unique PC count, active step bucket) keep their own closure instead.
+fn log_progress(
+    label: &'static str,
+    stride: usize,
+) -> impl FnMut(usize, u64, std::time::Duration) {
+    move |chunks, cycles, elapsed| {
+        if chunks.is_multiple_of(stride) {
+            eprintln!("[{label}]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed");
+        }
+    }
+}
+
 /// Resolve a guest PC to its (demangled) enclosing function name using the
 /// ELF's own symbol table — the same data `executor::flamegraph` resolves
 /// against. `<unknown>` when no function symbol covers the PC (e.g. PLT stubs
@@ -485,35 +526,20 @@ fn test_dump_recursion_input() {
 #[test]
 #[ignore = "diagnostic: runs the executor only, prints cycle counts"]
 fn test_recursion_cycle_count() {
-    use executor::elf::Elf;
-    use executor::vm::execution::Executor;
-
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
-
-    // Build the inner proof exactly as the smoke test does, with the
-    // absolute-minimum FRI params so the inner is as small as possible.
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("cycle-count", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+    // Build the inner proof with the absolute-minimum FRI params (smallest
+    // possible inner) and stand up the recursion guest over it.
+    let (_bytes, _program, mut executor) =
+        setup_guest_run("cycle-count", "recursion", &MIN_PROOF_OPTIONS);
 
     // Execute (NOT prove) the recursion guest. `drive_executor` streams chunks
     // and never accumulates logs in memory — this avoids the Vec<Log> blow-up
     // that OOMs even a 125 GB server (one Log is 40 B; a few billion of them is
     // hundreds of GB).
     eprintln!("[cycle-count] executing recursion guest (streaming counter only) ...");
-    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,
         |_log| ControlFlow::Continue(()),
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(50) {
-                eprintln!(
-                    "[cycle-count]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
-                );
-            }
-        },
+        log_progress("cycle-count", 50),
     );
     let cycle_count = total_cycles as usize;
 
@@ -569,23 +595,16 @@ fn test_recursion_cycle_count() {
 #[test]
 #[ignore = "diagnostic: counts distinct 4 KB memory pages touched by the recursion guest"]
 fn test_recursion_page_count() {
-    use executor::elf::Elf;
-    use executor::vm::execution::Executor;
     use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
     use std::collections::HashSet;
 
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
-
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("page-count", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+    let (_bytes, program, mut executor) =
+        setup_guest_run("page-count", "recursion", &MIN_PROOF_OPTIONS);
 
     // Precompute the recursion ELF's PT_LOAD ranges so we can bucket code/
     // static pages separately from heap. `Elf::load` already expands BSS
     // (memsz > filesz) into zero-valued words, so these ranges cover
     // .text + .rodata + .data + .bss.
-    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
     let segment_ranges: Vec<(u64, u64)> = program
         .data
         .iter()
@@ -606,17 +625,10 @@ fn test_recursion_page_count() {
     // would accumulate ~67 M `Log` records (~2.7 GB) we don't need. We only
     // care about the *final* memory state.
     eprintln!("[page-count] executing recursion guest (streaming) ...");
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,
         |_log| ControlFlow::Continue(()),
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(50) {
-                eprintln!(
-                    "[page-count]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
-                );
-            }
-        },
+        log_progress("page-count", 50),
     );
 
     // Collect the set of distinct 4 KB pages from every cell touched during
@@ -682,38 +694,33 @@ fn test_recursion_page_count() {
     eprintln!("============================================================");
 }
 
-/// Build a PC histogram of the recursion guest verifying an `empty`-program
+/// Build a PC histogram of guest `guest_name` verifying an `empty`-program
 /// inner proof produced with `inner_proof_options`, and print it via
 /// [`print_pc_histogram`] under `title`.
 ///
-/// `blowup_factor` and `fri_number_of_queries` are coupled (the query count is
-/// derived from blowup for a fixed security target), so each `#[test]` below is
-/// just this runner with a different `ProofOptions` — a single query at low
-/// blowup, vs. the security-derived multi-query count at a higher blowup.
+/// For the recursion guest, `blowup_factor` and `fri_number_of_queries` are
+/// coupled (the query count is derived from blowup for a fixed security
+/// target), so each recursion `#[test]` is just this runner with a different
+/// `ProofOptions` — a single query at low blowup, vs. the security-derived
+/// multi-query count at a higher blowup. The deserialize-only control guest
+/// reuses the same runner with its own ELF name.
 ///
 /// Streams chunks of logs via `Executor::resume()` so memory stays bounded to
 /// the histogram itself. Each PC is resolved to its enclosing function via the
-/// in-house `executor::elf::SymbolTable` (reading the recursion ELF's symbol
-/// table directly — no external tool, no DWARF dependency).
-fn run_recursion_pc_histogram(
+/// in-house `executor::elf::SymbolTable` (reading the guest ELF's symbol table
+/// directly — no external tool, no DWARF dependency).
+fn run_pc_histogram(
     title: &str,
+    guest_name: &str,
+    progress_stride: usize,
     inner_proof_options: stark::proof::options::ProofOptions,
 ) {
-    use executor::elf::Elf;
-    use executor::vm::execution::Executor;
     use std::collections::HashMap;
 
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
-
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("pc-hist", &empty_elf_bytes, &[], &inner_proof_options);
-
-    eprintln!("[pc-hist] executing recursion guest (building PC histogram) ...");
-    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
+    let (guest_elf_bytes, _program, mut executor) =
+        setup_guest_run("pc-hist", guest_name, &inner_proof_options);
 
+    eprintln!("[pc-hist] executing {guest_name} guest (building PC histogram) ...");
     let mut pc_hist: HashMap<u64, u64> = HashMap::with_capacity(300_000);
     let unique = std::cell::Cell::new(0usize);
     let (total_cycles, exec_time) = drive_executor(
@@ -724,7 +731,7 @@ fn run_recursion_pc_histogram(
             ControlFlow::Continue(())
         },
         |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(500) {
+            if chunks.is_multiple_of(progress_stride) {
                 eprintln!(
                     "[pc-hist]   ... {chunks} chunks, {cycles} cycles, {} unique PCs, {elapsed:?}",
                     unique.get()
@@ -734,7 +741,7 @@ fn run_recursion_pc_histogram(
     );
 
     // Resolve PCs to functions directly from the ELF's symbol table.
-    let symbols = executor::elf::SymbolTable::parse(&recursion_elf_bytes);
+    let symbols = executor::elf::SymbolTable::parse(&guest_elf_bytes);
     print_pc_histogram(title, &symbols, pc_hist, total_cycles, exec_time);
 }
 
@@ -744,8 +751,10 @@ fn run_recursion_pc_histogram(
 #[test]
 #[ignore = "diagnostic: ~8 minutes; prints PC histogram of the verifier-in-VM"]
 fn test_recursion_pc_histogram_1query() {
-    run_recursion_pc_histogram(
+    run_pc_histogram(
         "RECURSION GUEST PC HISTOGRAM (blowup=2, 1 query)",
+        "recursion",
+        500,
         MIN_PROOF_OPTIONS,
     );
 }
@@ -759,11 +768,13 @@ fn test_recursion_pc_histogram_1query() {
 fn test_recursion_pc_histogram_multiquery() {
     let inner_proof_options =
         crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid");
-    run_recursion_pc_histogram(
+    run_pc_histogram(
         &format!(
             "RECURSION GUEST PC HISTOGRAM (blowup=8, {} queries, 128-bit)",
             inner_proof_options.fri_number_of_queries
         ),
+        "recursion",
+        500,
         inner_proof_options,
     );
 }
@@ -783,9 +794,7 @@ fn test_recursion_pc_histogram_multiquery() {
 #[test]
 #[ignore = "diagnostic: sampled flamegraph for the verifier-in-VM"]
 fn test_recursion_sampled_flamegraph() {
-    use executor::elf::Elf;
     use executor::flamegraph::FlamegraphGenerator;
-    use executor::vm::execution::Executor;
     use std::io::BufWriter;
 
     /// 1 in N logs is fed to `process_logs`, which both updates the call
@@ -805,18 +814,12 @@ fn test_recursion_sampled_flamegraph() {
     /// At SAMPLE_RATE=1, ~57µs per cycle means 5M cycles ≈ 5 min wall time.
     const CYCLE_BUDGET: u64 = 5_000_000;
 
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
-
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("sampled-fg", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+    let (recursion_elf_bytes, program, mut executor) =
+        setup_guest_run("sampled-fg", "recursion", &MIN_PROOF_OPTIONS);
 
     eprintln!("[sampled-fg] executing recursion guest (sampling 1-in-{SAMPLE_RATE}) ...",);
-    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
     let symbols = executor::elf::SymbolTable::parse(&recursion_elf_bytes);
     let entry_point = program.entry_point;
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
 
     // Build our own instruction cache from the same segments `Executor::new`
     // decodes internally. Owning it (rather than reading `executor.instructions`
@@ -964,18 +967,9 @@ fn test_host_verify_step_timings() {
 #[test]
 #[ignore = "diagnostic: runs the deserialize-only guest, prints cycle count"]
 fn test_deserialize_only_cycle_count() {
-    use executor::elf::Elf;
-    use executor::vm::execution::Executor;
+    let (deser_elf_bytes, program, mut executor) =
+        setup_guest_run("deser-only", "deserialize-only", &MIN_PROOF_OPTIONS);
 
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let deser_elf_bytes = read_guest_elf(&root, "deserialize-only");
-
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("deser-only", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
-
-    eprintln!("[deser-only] executing deserialize-only guest (streaming) ...");
-    let program = Elf::load(&deser_elf_bytes).expect("ELF load failed");
     eprintln!(
         "[deser-only] ELF: {} bytes, entry_point=0x{:x}",
         deser_elf_bytes.len(),
@@ -985,18 +979,12 @@ fn test_deserialize_only_cycle_count() {
         program.entry_point, 0,
         "deserialize-only ELF has entry_point=0 — build artifact is malformed"
     );
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
 
+    eprintln!("[deser-only] executing deserialize-only guest (streaming) ...");
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,
         |_log| ControlFlow::Continue(()),
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(50) {
-                eprintln!(
-                    "[deser-only]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
-                );
-            }
-        },
+        log_progress("deser-only", 50),
     );
     let cycle_count = total_cycles;
 
@@ -1027,50 +1015,14 @@ fn test_deserialize_only_cycle_count() {
 #[test]
 #[ignore = "diagnostic: ~1 min; PC histogram for the deserialize-only guest"]
 fn test_deserialize_only_pc_histogram() {
-    use executor::elf::Elf;
-    use executor::vm::execution::Executor;
-    use std::collections::HashMap;
-
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let deser_elf_bytes = read_guest_elf(&root, "deserialize-only");
-
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("deser-pc-hist", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
-
-    eprintln!("[deser-pc-hist] executing deserialize-only guest (building PC histogram) ...");
-    let program = Elf::load(&deser_elf_bytes).expect("ELF load failed");
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
-
-    // ~50k unique PCs is plenty: the deserialize-only guest is ~74 KB of ELF
-    // (~18k 4-byte instructions); the hot inner loop is much smaller still.
-    let mut pc_hist: HashMap<u64, u64> = HashMap::with_capacity(50_000);
-    let unique = std::cell::Cell::new(0usize);
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |log| {
-            *pc_hist.entry(log.current_pc).or_insert(0) += 1;
-            unique.set(pc_hist.len());
-            ControlFlow::Continue(())
-        },
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(50) {
-                eprintln!(
-                    "[deser-pc-hist]   ... {chunks} chunks, {cycles} cycles, {} unique PCs, {elapsed:?}",
-                    unique.get()
-                );
-            }
-        },
-    );
-
-    // Resolve PCs to functions directly from the ELF's symbol table.
-    let symbols = executor::elf::SymbolTable::parse(&deser_elf_bytes);
-    print_pc_histogram(
+    // Same runner as the recursion PC histograms, pointed at the deserialize-only
+    // control guest. Smaller workload (~16 M cycles, far fewer chunks), so use a
+    // tighter progress stride to still get periodic readouts.
+    run_pc_histogram(
         "DESERIALIZE-ONLY GUEST PC HISTOGRAM",
-        &symbols,
-        pc_hist,
-        total_cycles,
-        exec_time,
+        "deserialize-only",
+        50,
+        MIN_PROOF_OPTIONS,
     );
 }
 
@@ -1097,15 +1049,10 @@ fn test_deserialize_only_pc_histogram() {
 #[test]
 #[ignore = "diagnostic: ~13 min; buckets the 40B cycles by verifier step"]
 fn test_recursion_step_breakdown() {
-    use executor::elf::{Elf, SymbolTable};
-    use executor::vm::execution::Executor;
-
-    let root = workspace_root();
-    let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let recursion_elf_bytes = read_guest_elf(&root, "recursion");
+    use executor::elf::SymbolTable;
 
-    let (_inner_proof, blob) =
-        prove_inner_and_encode_blob("step-bkd", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
+    let (recursion_elf_bytes, _program, mut executor) =
+        setup_guest_run("step-bkd", "recursion", &MIN_PROOF_OPTIONS);
 
     // Build a per-step "advance bucket to N" lookup. The verifier's step
     // functions get inlined by LLVM in release mode, so we can't rely on
@@ -1163,8 +1110,6 @@ fn test_recursion_step_breakdown() {
     let mut buckets = [0u64; 5];
 
     eprintln!("[step-bkd] executing recursion guest (streaming) ...");
-    let program = Elf::load(&recursion_elf_bytes).expect("ELF load failed");
-    let mut executor = Executor::new(&program, blob).expect("Executor::new failed");
 
     // Cache the last symbol-table hit so we only do a binary search on
     // function transitions, not on every cycle. Functions are typically

From 8dbfe245f64afd620287a4c012543c30931deec2 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 15:49:08 -0300
Subject: [PATCH 04/36] cargo fmt

---
 prover/src/tests/recursion_smoke_test.rs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 422475533..ae2daf9b3 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -210,7 +210,8 @@ fn setup_guest_run(
     let (_inner_proof, blob) = prove_inner_and_encode_blob(label, &empty_elf_bytes, &[], opts);
 
     let program = executor::elf::Elf::load(&guest_elf_bytes).expect("ELF load failed");
-    let executor = executor::vm::execution::Executor::new(&program, blob).expect("Executor::new failed");
+    let executor =
+        executor::vm::execution::Executor::new(&program, blob).expect("Executor::new failed");
     (guest_elf_bytes, program, executor)
 }
 
@@ -218,10 +219,7 @@ fn setup_guest_run(
 /// `[label]   ... N chunks, M cycles, T elapsed` line every `stride` chunks —
 /// the readout every counting diagnostic shares. Tests that need extra live
 /// state (unique PC count, active step bucket) keep their own closure instead.
-fn log_progress(
-    label: &'static str,
-    stride: usize,
-) -> impl FnMut(usize, u64, std::time::Duration) {
+fn log_progress(label: &'static str, stride: usize) -> impl FnMut(usize, u64, std::time::Duration) {
     move |chunks, cycles, elapsed| {
         if chunks.is_multiple_of(stride) {
             eprintln!("[{label}]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed");

From 7e00788d9b2c9f9926c97c9603d5996847f7c45b Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 17:12:59 -0300
Subject: [PATCH 05/36] refactor(prover): unify recursion execute-only
 diagnostics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Collapse the cycle-count, PC-histogram and step-breakdown diagnostics into one
parameterized run_profile(guest, stride, opts, detailed): total cycles print
unconditionally, the top-25 functions + per-step breakdown gate on detailed
(they share one streamed pass over the same PC stream). Every variant now comes
in 1query and multiquery flavours for both recursion and the deserialize-only
control. Route execute_outer_and_commit through drive_executor too — the rebased
streaming finish() makes its hand-rolled drain loop redundant.
---
 prover/src/tests/recursion_smoke_test.rs | 623 ++++++++---------------
 1 file changed, 224 insertions(+), 399 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index ae2daf9b3..963f3d7c6 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -106,12 +106,8 @@ fn execute_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8]
     let program = Elf::load(recursion_elf_bytes).expect("load recursion elf");
     let mut executor = Executor::new(&program, blob.to_vec()).expect("executor new");
 
-    // Drain chunks to completion without retaining logs or building a trace.
-    while executor
-        .resume()
-        .expect("recursion guest execution failed (verify panicked in-VM?)")
-        .is_some()
-    {}
+    let (total_cycles, exec_time) =
+        drive_executor(&mut executor, |_log| ControlFlow::Continue(()), |_, _, _| {});
 
     let committed = executor
         .finish()
@@ -119,7 +115,7 @@ fn execute_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8]
         .memory_values;
 
     eprintln!(
-        "[{label}] committed {} bytes: {:?} (as str: {:?})",
+        "[{label}] {total_cycles} cycles in {exec_time:?}; committed {} bytes: {:?} (as str: {:?})",
         committed.len(),
         committed,
         String::from_utf8_lossy(&committed),
@@ -170,7 +166,7 @@ fn drive_executor(
     let start = std::time::Instant::now();
     let mut total_cycles: u64 = 0;
     let mut chunks: usize = 0;
-    while let Some(logs) = executor.resume().expect("executor resume failed") {
+    while let Some(logs) = executor.resume().expect("executor resume failed (guest panicked in-VM?)") {
         let mut stop = false;
         for log in logs {
             total_cycles += 1;
@@ -210,6 +206,10 @@ fn setup_guest_run(
     let (_inner_proof, blob) = prove_inner_and_encode_blob(label, &empty_elf_bytes, &[], opts);
 
     let program = executor::elf::Elf::load(&guest_elf_bytes).expect("ELF load failed");
+    assert_ne!(
+        program.entry_point, 0,
+        "{guest_name} ELF has entry_point=0 — build artifact is malformed"
+    );
     let executor =
         executor::vm::execution::Executor::new(&program, blob).expect("Executor::new failed");
     (guest_elf_bytes, program, executor)
@@ -217,9 +217,12 @@ fn setup_guest_run(
 
 /// A `drive_executor` progress callback that prints the throttled
 /// `[label]   ... N chunks, M cycles, T elapsed` line every `stride` chunks —
-/// the readout every counting diagnostic shares. Tests that need extra live
-/// state (unique PC count, active step bucket) keep their own closure instead.
-fn log_progress(label: &'static str, stride: usize) -> impl FnMut(usize, u64, std::time::Duration) {
+/// the readout the counting diagnostics share. Tests that need extra live state
+/// (unique PC count, active step bucket) keep their own closure instead. Takes
+/// `impl Into<String>` so it works with both `&'static` tags and a run's
+/// dynamic `label`.
+fn log_progress(label: impl Into<String>, stride: usize) -> impl FnMut(usize, u64, std::time::Duration) {
+    let label = label.into();
     move |chunks, cycles, elapsed| {
         if chunks.is_multiple_of(stride) {
             eprintln!("[{label}]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed");
@@ -239,30 +242,35 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
     )
 }
 
-/// Print a per-function PC-histogram summary: the cycles each resolved function
-/// accounts for, folded over all its PCs. `pc_hist` maps program counter →
-/// cycle count.
-///
-/// We fold by function deliberately: an inlined kernel is spread across dozens
-/// of PCs, so a raw per-address table scatters its true cost — and without
-/// file:line resolution a bare PC isn't actionable for optimization anyway, so
-/// there is no per-address detail table.
-fn print_pc_histogram(
-    title: &str,
+/// Verifier sub-routines in execution order. LLVM inlines the step bodies, but
+/// closures inside each keep the method name in their mangled symbol, so
+/// `run_profile` advances the step bucket by substring-matching the enclosing
+/// symbol. A step with no matching symbol merges into the previous bucket.
+const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
+    "replay_rounds_after_round_1",
+    "step_2_verify_claimed_composition_polynomial",
+    "step_3_verify_fri",
+    "step_4_verify_trace_and_composition_openings",
+];
+
+/// `blowup=8` inner-proof options: the security-derived multi-query count (tens
+/// of queries, 128-bit) used by every `multiquery` profiling variant.
+fn blowup8() -> stark::proof::options::ProofOptions {
+    crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid")
+}
+
+/// Fold the PC histogram by enclosing function and print the top-25 by cycles.
+/// Folded because an inlined kernel spreads across many PCs; no per-address
+/// table since a bare PC isn't actionable without file:line.
+fn print_function_table(
     symbols: &executor::elf::SymbolTable,
     pc_hist: std::collections::HashMap<u64, u64>,
     total_cycles: u64,
-    exec_time: std::time::Duration,
 ) {
-    let entries: Vec<(u64, u64)> = pc_hist.into_iter().collect();
-
-    // Aggregate the full histogram by resolved function, resolving each PC once.
     let mut by_function: std::collections::HashMap<String, (u64, u64)> =
         std::collections::HashMap::new();
-    for (pc, count) in &entries {
-        let entry = by_function
-            .entry(resolve_pc(symbols, *pc))
-            .or_insert((0, 0));
+    for (pc, count) in &pc_hist {
+        let entry = by_function.entry(resolve_pc(symbols, *pc)).or_insert((0, 0));
         entry.0 += *count; // cycles
         entry.1 += 1; // distinct PCs folded into this function
     }
@@ -270,20 +278,10 @@ fn print_pc_histogram(
     fn_entries.sort_unstable_by_key(|(_name, (cycles, _pcs))| std::cmp::Reverse(*cycles));
 
     let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
-
-    eprintln!();
-    eprintln!("============================================================");
-    eprintln!("  {title}");
-    eprintln!("============================================================");
-    eprintln!("  Total cycles : {total_cycles}");
-    eprintln!("  Unique PCs   : {}", entries.len());
-    eprintln!("  Exec time    : {exec_time:?}");
+    eprintln!("  Unique PCs   : {}", pc_hist.len());
     eprintln!();
     eprintln!("  Top 25 functions by cycle count (aggregated over their PCs):");
-    eprintln!(
-        "  {:>4}  {:>14}  {:>7}  {:>7}  {:>5}  {}",
-        "rank", "cycles", "%", "cum %", "PCs", "function"
-    );
+    eprintln!("  rank          cycles        %    cum %    PCs  function");
     let mut fn_cumulative: u64 = 0;
     for (rank, (name, (cycles, pcs))) in fn_entries.iter().take(25).enumerate() {
         fn_cumulative += cycles;
@@ -297,6 +295,149 @@ fn print_pc_histogram(
             name,
         );
     }
+}
+
+/// Print the monotonic per-verifier-step cycle bucketing. `buckets[0]` is
+/// pre-step-1 setup (alloc + postcard decode + `VmAirs::new`); `buckets[i]` is
+/// verifier step i (with a missing step's cycles merged into the previous one).
+fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
+    let labels = [
+        "0. setup (alloc + postcard decode + VmAirs::new + pre-step-1)",
+        "1. step 1: replay_rounds_after_round_1",
+        "2. step 2: verify_claimed_composition_polynomial",
+        "3. step 3: verify_fri",
+        "4. step 4: verify_trace_and_composition_openings (+ wrap-up)",
+    ];
+    eprintln!();
+    eprintln!("  Per-step cycle breakdown (monotonic state machine):");
+    eprintln!("  {:<60}  {:>14}  {:>7}", "bucket", "cycles", "%");
+    for (label, cycles) in labels.iter().zip(buckets.iter()) {
+        let pct = if total_cycles > 0 {
+            100.0 * (*cycles as f64) / (total_cycles as f64)
+        } else {
+            0.0
+        };
+        eprintln!("  {:<60}  {:>14}  {:>6.2}%", label, cycles, pct);
+    }
+}
+
+/// Single-pass execute-only profiler. Always prints total cycles + wall time +
+/// a rough trace/LDE size estimate. With `detailed`, the same pass also builds
+/// the PC histogram and verifier-step bucketing and prints the top-25 functions
+/// and the per-step breakdown (the two always come together); `!detailed` does
+/// no per-log work, so it's just a fast cycle counter. `progress_stride`
+/// throttles the readout (recursion large, the deserialize-only control small).
+fn run_profile(
+    guest_name: &str,
+    progress_stride: usize,
+    opts: stark::proof::options::ProofOptions,
+    detailed: bool,
+) {
+    use std::collections::HashMap;
+
+    let (guest_elf_bytes, _program, mut executor) = setup_guest_run("profile", guest_name, &opts);
+    let symbols = executor::elf::SymbolTable::parse(&guest_elf_bytes);
+
+    let mut pc_hist: HashMap<u64, u64> = HashMap::new();
+    let mut buckets = [0u64; 5];
+    let mut last_range: Option<(u64, u64)> = None;
+    let mut last_advance: u8 = 0;
+    let bucket = std::cell::Cell::new(0u8);
+    let unique = std::cell::Cell::new(0usize);
+
+    if detailed {
+        assert!(
+            !symbols.is_empty(),
+            "{guest_name} ELF has no symbol table — was it stripped?"
+        );
+        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
+            let n = symbols.functions().iter().filter(|f| f.name.contains(kw)).count();
+            eprintln!(
+                "[profile] step {}: keyword={kw:?} -> {n} symbol(s) {}",
+                i + 1,
+                if n > 0 { "" } else { "(no match; merges into previous bucket)" },
+            );
+        }
+    }
+
+    eprintln!(
+        "[profile] executing {guest_name} guest ({}) ...",
+        if detailed { "histogram + steps" } else { "cycle counter" }
+    );
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |log| {
+            if detailed {
+                let pc = log.current_pc;
+                *pc_hist.entry(pc).or_insert(0) += 1;
+                unique.set(pc_hist.len());
+
+                let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e);
+                if !in_cached {
+                    if let Some(sym) = symbols.lookup(pc) {
+                        last_range = Some((sym.address, sym.address + sym.size.max(1)));
+                        last_advance = 0;
+                        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
+                            if sym.name.contains(kw) {
+                                last_advance = (i + 1) as u8;
+                            }
+                        }
+                    } else {
+                        last_range = None;
+                        last_advance = 0;
+                    }
+                }
+                if bucket.get() < last_advance {
+                    bucket.set(last_advance);
+                }
+                buckets[bucket.get() as usize] += 1;
+            }
+            ControlFlow::Continue(())
+        },
+        |chunks, cycles, elapsed| {
+            if chunks.is_multiple_of(progress_stride) {
+                if detailed {
+                    eprintln!(
+                        "[profile]   ... {chunks} chunks, {cycles} cycles, {} unique PCs, bucket={}, {elapsed:?}",
+                        unique.get(),
+                        bucket.get(),
+                    );
+                } else {
+                    eprintln!("[profile]   ... {chunks} chunks, {cycles} cycles, {elapsed:?}");
+                }
+            }
+        },
+    );
+
+    eprintln!();
+    eprintln!("============================================================");
+    eprintln!(
+        "  {} GUEST PROFILE (blowup={}, {} queries)",
+        guest_name.to_uppercase(),
+        opts.blowup_factor,
+        opts.fri_number_of_queries,
+    );
+    eprintln!("============================================================");
+    eprintln!("  Total cycles : {total_cycles}");
+    eprintln!("  Exec time    : {exec_time:?}");
+    eprintln!();
+    eprintln!("  Rough trace/LDE size if this guest were proven:");
+    let approx_columns = 250u64;
+    let main_trace_bytes = total_cycles * approx_columns * 8;
+    eprintln!(
+        "    main trace          : ~{:.2} GB ({total_cycles} cycles × ~{approx_columns} cols × 8 B)",
+        main_trace_bytes as f64 / 1e9,
+    );
+    eprintln!(
+        "    main LDE (blowup=2) : ~{:.2} GB  (+aux ≈ 50% more → peak ≈ 2-3× LDE)",
+        (main_trace_bytes * 2) as f64 / 1e9,
+    );
+
+    if detailed {
+        eprintln!();
+        print_function_table(&symbols, pc_hist, total_cycles);
+        print_step_breakdown(&buckets, total_cycles);
+    }
     eprintln!("============================================================");
 }
 
@@ -514,59 +655,33 @@ fn test_dump_recursion_input() {
     eprintln!("[dump-input] wrote {} bytes to {path}", blob.len());
 }
 
-/// Diagnostic: build the inner proof + recursion guest input, then **execute
-/// only** the recursion guest (no STARK proving) and report cycle counts +
-/// trace size estimates.
-///
-/// This is the cheap way to find out how many RISC-V instructions the
-/// verifier actually executes inside the guest — a much faster signal than
-/// running the full outer prove (which can OOM on a 125 GB machine).
+/// Cycle count only of the recursion guest verifying a 1-query inner proof.
 #[test]
-#[ignore = "diagnostic: runs the executor only, prints cycle counts"]
-fn test_recursion_cycle_count() {
-    // Build the inner proof with the absolute-minimum FRI params (smallest
-    // possible inner) and stand up the recursion guest over it.
-    let (_bytes, _program, mut executor) =
-        setup_guest_run("cycle-count", "recursion", &MIN_PROOF_OPTIONS);
-
-    // Execute (NOT prove) the recursion guest. `drive_executor` streams chunks
-    // and never accumulates logs in memory — this avoids the Vec<Log> blow-up
-    // that OOMs even a 125 GB server (one Log is 40 B; a few billion of them is
-    // hundreds of GB).
-    eprintln!("[cycle-count] executing recursion guest (streaming counter only) ...");
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |_log| ControlFlow::Continue(()),
-        log_progress("cycle-count", 50),
-    );
-    let cycle_count = total_cycles as usize;
+#[ignore = "diagnostic: fast; recursion guest cycle count (1 query)"]
+fn test_recursion_cycles_1query() {
+    run_profile("recursion", 500, MIN_PROOF_OPTIONS, false);
+}
 
-    eprintln!();
-    eprintln!("============================================================");
-    eprintln!("  RECURSION GUEST EXECUTION SUMMARY");
-    eprintln!("============================================================");
-    eprintln!("  Cycle count           : {cycle_count}");
-    eprintln!("  Executor wall time    : {exec_time:?}");
-    eprintln!();
-    eprintln!("  Rough memory estimate for outer prove:");
-    let bytes_per_field = 8usize;
-    let approx_columns = 250usize; // CPU + MEMW + DECODE + bus columns combined
-    let main_trace_bytes = cycle_count * approx_columns * bytes_per_field;
-    let blowup = 2usize;
-    let lde_main_bytes = main_trace_bytes * blowup;
-    eprintln!(
-        "    main trace            : ~{:.2} GB ({} cycles × ~{} cols × 8 B)",
-        main_trace_bytes as f64 / 1e9,
-        cycle_count,
-        approx_columns
-    );
-    eprintln!(
-        "    main LDE (blowup={})   : ~{:.2} GB",
-        blowup,
-        lde_main_bytes as f64 / 1e9
-    );
-    eprintln!("  (aux trace adds roughly 50% more, so peak peak ≈ 2-3× LDE)");
-    eprintln!("============================================================");
+/// Cycle count only at 128-bit security: more FRI queries → more verifier cycles.
+#[test]
+#[ignore = "diagnostic: fast; recursion guest cycle count (multi-query)"]
+fn test_recursion_cycles_multiquery() {
+    run_profile("recursion", 500, blowup8(), false);
+}
+
+/// Full profile (top-25 functions + per-step breakdown) of the 1-query run —
+/// the cheapest verifier run, dominated by fixed setup.
+#[test]
+#[ignore = "diagnostic: ~8 min; recursion guest histogram + steps (1 query)"]
+fn test_recursion_profile_1query() {
+    run_profile("recursion", 500, MIN_PROOF_OPTIONS, true);
+}
+
+/// Full profile at 128-bit security: weight shifts toward per-query FRI/Merkle.
+#[test]
+#[ignore = "diagnostic: heavy; recursion guest histogram + steps (multi-query)"]
+fn test_recursion_profile_multiquery() {
+    run_profile("recursion", 500, blowup8(), true);
 }
 
 /// Diagnostic: count the distinct 4 KB memory pages the recursion guest
@@ -692,91 +807,6 @@ fn test_recursion_page_count() {
     eprintln!("============================================================");
 }
 
-/// Build a PC histogram of guest `guest_name` verifying an `empty`-program
-/// inner proof produced with `inner_proof_options`, and print it via
-/// [`print_pc_histogram`] under `title`.
-///
-/// For the recursion guest, `blowup_factor` and `fri_number_of_queries` are
-/// coupled (the query count is derived from blowup for a fixed security
-/// target), so each recursion `#[test]` is just this runner with a different
-/// `ProofOptions` — a single query at low blowup, vs. the security-derived
-/// multi-query count at a higher blowup. The deserialize-only control guest
-/// reuses the same runner with its own ELF name.
-///
-/// Streams chunks of logs via `Executor::resume()` so memory stays bounded to
-/// the histogram itself. Each PC is resolved to its enclosing function via the
-/// in-house `executor::elf::SymbolTable` (reading the guest ELF's symbol table
-/// directly — no external tool, no DWARF dependency).
-fn run_pc_histogram(
-    title: &str,
-    guest_name: &str,
-    progress_stride: usize,
-    inner_proof_options: stark::proof::options::ProofOptions,
-) {
-    use std::collections::HashMap;
-
-    let (guest_elf_bytes, _program, mut executor) =
-        setup_guest_run("pc-hist", guest_name, &inner_proof_options);
-
-    eprintln!("[pc-hist] executing {guest_name} guest (building PC histogram) ...");
-    let mut pc_hist: HashMap<u64, u64> = HashMap::with_capacity(300_000);
-    let unique = std::cell::Cell::new(0usize);
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |log| {
-            *pc_hist.entry(log.current_pc).or_insert(0) += 1;
-            unique.set(pc_hist.len());
-            ControlFlow::Continue(())
-        },
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(progress_stride) {
-                eprintln!(
-                    "[pc-hist]   ... {chunks} chunks, {cycles} cycles, {} unique PCs, {elapsed:?}",
-                    unique.get()
-                );
-            }
-        },
-    );
-
-    // Resolve PCs to functions directly from the ELF's symbol table.
-    let symbols = executor::elf::SymbolTable::parse(&guest_elf_bytes);
-    print_pc_histogram(title, &symbols, pc_hist, total_cycles, exec_time);
-}
-
-/// Diagnostic: PC histogram of the recursion guest with a **single** FRI query
-/// at blowup=2 — the cheapest verifier run, dominated by fixed setup cost
-/// (decode, allocator, postcard) rather than per-query FRI/Merkle work.
-#[test]
-#[ignore = "diagnostic: ~8 minutes; prints PC histogram of the verifier-in-VM"]
-fn test_recursion_pc_histogram_1query() {
-    run_pc_histogram(
-        "RECURSION GUEST PC HISTOGRAM (blowup=2, 1 query)",
-        "recursion",
-        500,
-        MIN_PROOF_OPTIONS,
-    );
-}
-
-/// Diagnostic: PC histogram of the recursion guest at **128-bit security**
-/// (blowup=8, FRI query count derived by the Johnson Bound Regime — tens of
-/// queries). Compared against the single-query runs, weight shifts toward the
-/// verifier's per-query FRI-layer / Merkle-opening and field arithmetic.
-#[test]
-#[ignore = "diagnostic: heavy; PC histogram of the multi-query verifier-in-VM"]
-fn test_recursion_pc_histogram_multiquery() {
-    let inner_proof_options =
-        crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid");
-    run_pc_histogram(
-        &format!(
-            "RECURSION GUEST PC HISTOGRAM (blowup=8, {} queries, 128-bit)",
-            inner_proof_options.fri_number_of_queries
-        ),
-        "recursion",
-        500,
-        inner_proof_options,
-    );
-}
-
 /// Diagnostic: build a **sampled** call-stack histogram of the recursion guest.
 ///
 /// Like `test_recursion_pc_histogram` but groups by full call stack (not PC).
@@ -951,236 +981,31 @@ fn test_host_verify_step_timings() {
     eprintln!("[host-verify] verified OK");
 }
 
-/// Diagnostic: cycle count for the **deserialize-only** counterpart of the
-/// recursion guest. Same input layout
-/// (`(VmProof, Vec<u8>, ProofOptions)`) and same proof, but
-/// the guest just postcard-decodes the blob and halts — it never calls
-/// `verify_with_options`.
-///
-/// The cycle delta between this and `test_recursion_cycle_count` is the
-/// actual cost of the STARK verifier inside the VM. Historically (40.5 B-cycle
-/// recursion guest) postcard decode was ~15.6 M cycles — negligible. Now that
-/// the recursion guest is ~67 M cycles, the same absolute cost would be ~23%
-/// of total; this test re-measures it.
-#[test]
-#[ignore = "diagnostic: runs the deserialize-only guest, prints cycle count"]
-fn test_deserialize_only_cycle_count() {
-    let (deser_elf_bytes, program, mut executor) =
-        setup_guest_run("deser-only", "deserialize-only", &MIN_PROOF_OPTIONS);
-
-    eprintln!(
-        "[deser-only] ELF: {} bytes, entry_point=0x{:x}",
-        deser_elf_bytes.len(),
-        program.entry_point,
-    );
-    assert_ne!(
-        program.entry_point, 0,
-        "deserialize-only ELF has entry_point=0 — build artifact is malformed"
-    );
+// Control guest: decodes the blob and halts. Its cycle count subtracted from
+// the matching recursion run isolates the in-VM verifier cost.
 
-    eprintln!("[deser-only] executing deserialize-only guest (streaming) ...");
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |_log| ControlFlow::Continue(()),
-        log_progress("deser-only", 50),
-    );
-    let cycle_count = total_cycles;
-
-    eprintln!();
-    eprintln!("============================================================");
-    eprintln!("  DESERIALIZE-ONLY GUEST EXECUTION SUMMARY");
-    eprintln!("============================================================");
-    eprintln!("  Cycle count           : {cycle_count}");
-    eprintln!("  Executor wall time    : {exec_time:?}");
-    eprintln!();
-    eprintln!("  Compare against test_recursion_cycle_count (~40.5B cycles");
-    eprintln!("  with the same proof). Delta = verifier-in-VM cost.");
-    eprintln!("============================================================");
+#[test]
+#[ignore = "diagnostic: fast; deserialize-only guest cycle count (1 query)"]
+fn test_deserialize_only_cycles_1query() {
+    run_profile("deserialize-only", 50, MIN_PROOF_OPTIONS, false);
 }
 
-/// Diagnostic: PC histogram for the **deserialize-only** guest.
-///
-/// Sibling of `test_recursion_pc_histogram`, but targeting the
-/// deserialize-only control guest so we can locate the hot kernel inside the
-/// 15.7 M-cycle postcard decode itself. Every cycle goes through the
-/// histogram (no sampling), so attribution is exact — the previous sampled
-/// flamegraph at 1:1000 had broken stack reconstruction on skipped
-/// CALL/RETURNs, which made it unreliable for a workload this small.
-///
-/// Each top PC is resolved to its enclosing function via the in-house
-/// `executor::elf::SymbolTable`, reading the guest ELF's symbol table directly
-/// (no external tool, no DWARF dependency).
 #[test]
-#[ignore = "diagnostic: ~1 min; PC histogram for the deserialize-only guest"]
-fn test_deserialize_only_pc_histogram() {
-    // Same runner as the recursion PC histograms, pointed at the deserialize-only
-    // control guest. Smaller workload (~16 M cycles, far fewer chunks), so use a
-    // tighter progress stride to still get periodic readouts.
-    run_pc_histogram(
-        "DESERIALIZE-ONLY GUEST PC HISTOGRAM",
-        "deserialize-only",
-        50,
-        MIN_PROOF_OPTIONS,
-    );
+#[ignore = "diagnostic: fast; deserialize-only guest cycle count (multi-query)"]
+fn test_deserialize_only_cycles_multiquery() {
+    run_profile("deserialize-only", 50, blowup8(), false);
 }
 
-/// Diagnostic: bucket the recursion guest's cycles by which verifier step
-/// is currently executing.
-///
-/// The verifier's hot path is `verify_rounds_2_to_4`, which calls four
-/// sub-routines in a fixed order:
-///   1. `replay_rounds_after_round_1`               (recover challenges)
-///   2. `step_2_verify_claimed_composition_polynomial`
-///   3. `step_3_verify_fri`
-///   4. `step_4_verify_trace_and_composition_openings`
-///
-/// We resolve each sub-routine's entry PC from the recursion ELF's symbol
-/// table, then run a monotonic state machine over the execution stream:
-/// the active bucket only advances 0 → 1 → 2 → 3 → 4 (never backwards),
-/// so cycles inside a step's callees stay attributed to that step.
-///
-/// Bucket 0 ("setup") captures everything before step 1 is entered — the
-/// allocator init, postcard decode, and `VmAirs::new` (which contains the
-/// expensive preprocessed-commitment FFTs).
-///
-/// Streams chunks via `Executor::resume()` so memory stays bounded.
 #[test]
-#[ignore = "diagnostic: ~13 min; buckets the 40B cycles by verifier step"]
-fn test_recursion_step_breakdown() {
-    use executor::elf::SymbolTable;
-
-    let (recursion_elf_bytes, _program, mut executor) =
-        setup_guest_run("step-bkd", "recursion", &MIN_PROOF_OPTIONS);
-
-    // Build a per-step "advance bucket to N" lookup. The verifier's step
-    // functions get inlined by LLVM in release mode, so we can't rely on
-    // matching their entry PCs directly. Instead we anchor on closures the
-    // compiler emits *inside* each step's body — iterator combinators like
-    // `.fold(|...|)` keep the step's method name as a substring in their
-    // mangled symbol. Any PC that resolves to a symbol containing step N's
-    // keyword advances the bucket to N (monotonically).
-    //
-    // If step N has no matching symbol at all (e.g. step 4 is fully inlined
-    // with no closure children of its own), its cycles get attributed to the
-    // previous bucket. We report that explicitly in the summary.
-    let symbols = SymbolTable::parse(&recursion_elf_bytes);
-    assert!(
-        !symbols.is_empty(),
-        "recursion ELF has no symbol table — was it stripped?"
-    );
-
-    let step_keywords = [
-        "replay_rounds_after_round_1",
-        "step_2_verify_claimed_composition_polynomial",
-        "step_3_verify_fri",
-        "step_4_verify_trace_and_composition_openings",
-    ];
-    let step_found: [bool; 4] = std::array::from_fn(|i| {
-        symbols
-            .functions()
-            .iter()
-            .any(|f| f.name.contains(step_keywords[i]))
-    });
-    for (i, found) in step_found.iter().enumerate() {
-        let n_matches = symbols
-            .functions()
-            .iter()
-            .filter(|f| f.name.contains(step_keywords[i]))
-            .count();
-        eprintln!(
-            "[step-bkd] step {}: keyword={:?} -> {} symbol(s) {}",
-            i + 1,
-            step_keywords[i],
-            n_matches,
-            if *found {
-                ""
-            } else {
-                "(fully inlined; will merge into the previous bucket)"
-            }
-        );
-    }
-
-    // Monotonic state machine: 0=setup, 1..=4=inside step N (or its callees /
-    // inlined-step-N-cycles attributed here because step N+1 is missing).
-    // `bucket` lives in a Cell so the per-log closure can advance it while the
-    // progress closure reads it for its live readout.
-    let bucket = std::cell::Cell::new(0u8);
-    let mut buckets = [0u64; 5];
-
-    eprintln!("[step-bkd] executing recursion guest (streaming) ...");
-
-    // Cache the last symbol-table hit so we only do a binary search on
-    // function transitions, not on every cycle. Functions are typically
-    // long-running (>>1 instruction), so this cache hits ~all of the time.
-    let mut last_range: Option<(u64, u64)> = None;
-    let mut last_advance: u8 = 0;
-
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |log| {
-            let pc = log.current_pc;
-            let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e);
-            if !in_cached {
-                // Slow path: refresh the cache from the symbol table.
-                if let Some(sym) = symbols.lookup(pc) {
-                    // SymbolTable accepts size=0 symbols as "any address >="; for
-                    // those we'd need the next symbol's start for a real upper
-                    // bound. Cheapest workaround: set a tiny range so we re-resolve
-                    // soon enough that wrong attribution is bounded.
-                    let end = sym.address + sym.size.max(1);
-                    last_range = Some((sym.address, end));
-                    last_advance = 0;
-                    for (i, kw) in step_keywords.iter().enumerate() {
-                        if sym.name.contains(kw) {
-                            last_advance = (i + 1) as u8;
-                        }
-                    }
-                } else {
-                    last_range = None;
-                    last_advance = 0;
-                }
-            }
-            if bucket.get() < last_advance {
-                bucket.set(last_advance);
-            }
-            buckets[bucket.get() as usize] += 1;
-            ControlFlow::Continue(())
-        },
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(500) {
-                eprintln!(
-                    "[step-bkd]   ... {chunks} chunks, {cycles} cycles, bucket={}, {elapsed:?}",
-                    bucket.get()
-                );
-            }
-        },
-    );
-
-    let labels = [
-        "0. setup (alloc + postcard decode + VmAirs::new + pre-step-1)",
-        "1. step 1: replay_rounds_after_round_1",
-        "2. step 2: verify_claimed_composition_polynomial",
-        "3. step 3: verify_fri",
-        "4. step 4: verify_trace_and_composition_openings (+ wrap-up)",
-    ];
+#[ignore = "diagnostic: ~1 min; deserialize-only guest histogram (1 query)"]
+fn test_deserialize_only_profile_1query() {
+    run_profile("deserialize-only", 50, MIN_PROOF_OPTIONS, true);
+}
 
-    eprintln!();
-    eprintln!("============================================================");
-    eprintln!("  RECURSION GUEST PER-STEP CYCLE BREAKDOWN");
-    eprintln!("============================================================");
-    eprintln!("  Total cycles : {total_cycles}");
-    eprintln!("  Exec time    : {exec_time:?}");
-    eprintln!();
-    eprintln!("  {:<60}  {:>14}  {:>7}", "bucket", "cycles", "%");
-    for (label, cycles) in labels.iter().zip(buckets.iter()) {
-        let pct = if total_cycles > 0 {
-            100.0 * (*cycles as f64) / (total_cycles as f64)
-        } else {
-            0.0
-        };
-        eprintln!("  {:<60}  {:>14}  {:>6.2}%", label, cycles, pct);
-    }
-    eprintln!("============================================================");
+#[test]
+#[ignore = "diagnostic: deserialize-only guest histogram (multi-query)"]
+fn test_deserialize_only_profile_multiquery() {
+    run_profile("deserialize-only", 50, blowup8(), true);
 }
 
 /// Inner program: fibonacci(10).

From 5aaae7a10da3628d4493e2a7f2197bdfe92d36e5 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 17:29:33 -0300
Subject: [PATCH 06/36] build: enable the deserialize-only recursion guest

Add deserialize-only to RECURSION_GUESTS and migrate the guest to the recursion
guest's std shape (lambda_vm_syscalls + build-std std), since the old no_std
panic handler collided with std. Add getrandom_backend="custom" to its cargo
config (transitive getrandom 0.3 needs it) and track its Cargo.lock. The deser
control guest now builds and its profile tests run.
---
 Makefile                                      |    2 +-
 .../deserialize-only/.cargo/config.toml       |    1 +
 bench_vs/lambda/deserialize-only/Cargo.lock   | 1199 +++++++++++++++++
 bench_vs/lambda/deserialize-only/Cargo.toml   |    6 +-
 bench_vs/lambda/deserialize-only/src/main.rs  |   99 +-
 5 files changed, 1222 insertions(+), 85 deletions(-)
 create mode 100644 bench_vs/lambda/deserialize-only/Cargo.lock

diff --git a/Makefile b/Makefile
index 30e3029da..60bb8a0c5 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ BENCH_ARTIFACTS := $(addprefix $(BENCH_ARTIFACTS_DIR)/, $(addsuffix .elf, $(BENC
 # rather than executor/programs/. The recursion guest is the in-VM STARK verifier.
 RECURSION_GUESTS_DIR=./bench_vs/lambda
 RECURSION_ARTIFACTS_DIR=./executor/program_artifacts/recursion
-RECURSION_GUESTS := empty fibonacci recursion
+RECURSION_GUESTS := empty fibonacci recursion deserialize-only
 RECURSION_ARTIFACTS := $(addprefix $(RECURSION_ARTIFACTS_DIR)/, $(addsuffix .elf, $(RECURSION_GUESTS)))
 
 # Override with: make ... SYSROOT_DIR=$HOME/.lambda-vm-sysroot
diff --git a/bench_vs/lambda/deserialize-only/.cargo/config.toml b/bench_vs/lambda/deserialize-only/.cargo/config.toml
index be730c3ec..f5ea686ff 100644
--- a/bench_vs/lambda/deserialize-only/.cargo/config.toml
+++ b/bench_vs/lambda/deserialize-only/.cargo/config.toml
@@ -2,5 +2,6 @@
 rustflags = [
   "-C", "link-arg=-e",
   "-C", "link-arg=main",
+  "--cfg", "getrandom_backend=\"custom\"",
   "-C", "passes=lower-atomic"
 ]
diff --git a/bench_vs/lambda/deserialize-only/Cargo.lock b/bench_vs/lambda/deserialize-only/Cargo.lock
new file mode 100644
index 000000000..9433fadb3
--- /dev/null
+++ b/bench_vs/lambda/deserialize-only/Cargo.lock
@@ -0,0 +1,1199 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "atomic-polyfill"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
+dependencies = [
+ "critical-section",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
+
+[[package]]
+name = "base16ct"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.20.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cobs"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1"
+dependencies = [
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "const-default"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa"
+
+[[package]]
+name = "const-oid"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "critical-section"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crypto"
+version = "0.1.0"
+dependencies = [
+ "digest",
+ "math",
+ "rand 0.8.6",
+ "rand_chacha 0.3.1",
+ "serde",
+ "sha3",
+]
+
+[[package]]
+name = "crypto-bigint"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
+dependencies = [
+ "generic-array",
+ "rand_core 0.6.4",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "der"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
+dependencies = [
+ "const-oid",
+ "zeroize",
+]
+
+[[package]]
+name = "deserialize-only-bench"
+version = "0.1.0"
+dependencies = [
+ "lambda-vm-prover",
+ "lambda-vm-syscalls",
+ "postcard",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
+[[package]]
+name = "ecsm"
+version = "0.1.0"
+dependencies = [
+ "k256",
+ "num-bigint",
+ "num-traits",
+]
+
+[[package]]
+name = "either"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
+
+[[package]]
+name = "elliptic-curve"
+version = "0.13.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
+dependencies = [
+ "base16ct",
+ "crypto-bigint",
+ "ff",
+ "generic-array",
+ "group",
+ "rand_core 0.6.4",
+ "sec1",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "embedded-alloc"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd"
+dependencies = [
+ "const-default",
+ "critical-section",
+ "linked_list_allocator",
+ "rlsf",
+]
+
+[[package]]
+name = "embedded-hal"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
+
+[[package]]
+name = "embedded-io"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
+
+[[package]]
+name = "embedded-io"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
+
+[[package]]
+name = "executor"
+version = "0.1.0"
+dependencies = [
+ "ecsm",
+ "rustc-demangle",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "ff"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
+dependencies = [
+ "rand_core 0.6.4",
+ "subtle",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2"
+dependencies = [
+ "typenum",
+ "version_check",
+ "zeroize",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "group"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
+dependencies = [
+ "ff",
+ "rand_core 0.6.4",
+ "subtle",
+]
+
+[[package]]
+name = "half"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403"
+
+[[package]]
+name = "hash32"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
+dependencies = [
+ "byteorder",
+]
+
+[[package]]
+name = "heapless"
+version = "0.7.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
+dependencies = [
+ "atomic-polyfill",
+ "hash32",
+ "rustc_version",
+ "serde",
+ "spin",
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "js-sys"
+version = "0.3.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53b44bfcdb3f8d5837a46dae1ca9660a837176eee74a28b229bc626816589102"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "k256"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
+dependencies = [
+ "cfg-if",
+ "elliptic-curve",
+]
+
+[[package]]
+name = "keccak"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653"
+dependencies = [
+ "cpufeatures",
+]
+
+[[package]]
+name = "lambda-vm-prover"
+version = "0.1.0"
+dependencies = [
+ "crypto",
+ "ecsm",
+ "executor",
+ "log",
+ "math",
+ "serde",
+ "sha3",
+ "stark",
+ "sysinfo",
+]
+
+[[package]]
+name = "lambda-vm-syscalls"
+version = "0.1.0"
+dependencies = [
+ "embedded-alloc",
+ "getrandom 0.2.17",
+ "getrandom 0.3.4",
+ "lazy_static",
+ "rand 0.9.4",
+ "riscv",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "linked_list_allocator"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad"
+
+[[package]]
+name = "math"
+version = "0.1.0"
+dependencies = [
+ "getrandom 0.2.17",
+ "num-bigint",
+ "num-traits",
+ "rand 0.8.6",
+ "rayon",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
+
+[[package]]
+name = "ntapi"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "postcard"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24"
+dependencies = [
+ "cobs",
+ "embedded-io 0.4.0",
+ "embedded-io 0.6.1",
+ "heapless",
+ "serde",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
+dependencies = [
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "riscv"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05cfa3f7b30c84536a9025150d44d26b8e1cc20ddf436448d74cd9591eefb25"
+dependencies = [
+ "critical-section",
+ "embedded-hal",
+ "paste",
+ "riscv-macros",
+ "riscv-pac",
+]
+
+[[package]]
+name = "riscv-macros"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d323d13972c1b104aa036bc692cd08b822c8bbf23d79a27c526095856499799"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "riscv-pac"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
+
+[[package]]
+name = "rlsf"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a"
+dependencies = [
+ "cfg-if",
+ "const-default",
+ "libc",
+ "rustversion",
+ "svgbobdoc",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "sec1"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
+dependencies = [
+ "base16ct",
+ "der",
+ "generic-array",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "semver"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_cbor"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
+dependencies = [
+ "half",
+ "serde",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.143"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sha3"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77fd7028345d415a4034cf8777cd4f8ab1851274233b45f84e3d955502d93874"
+dependencies = [
+ "digest",
+ "keccak",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "stark"
+version = "0.1.0"
+dependencies = [
+ "crypto",
+ "itertools",
+ "log",
+ "math",
+ "serde",
+ "serde_cbor",
+ "sha3",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "svgbobdoc"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50"
+dependencies = [
+ "base64",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "unicode-width",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sysinfo"
+version = "0.31.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "355dbe4f8799b304b05e1b0f05fc59b2a18d36645cf169607da45bde2f69a1be"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+ "memchr",
+ "ntapi",
+ "windows",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "typenum"
+version = "1.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.4+wasi-0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b067c0c11094aef6b7a801c1e34a26affafdf3d051dba08456b868789aaf9a4"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "167ce5e579f6bcf889c4f7175a8a5a585de84e8ff93976ce393efa5f2837aab1"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3997c7839262f4ef12cf90b818d6340c18e80f263f1a94bf157d0ec4420380e"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc1b4cb0cc549fcf58d7dfc081778139b3d283a081644e833e84682ad71cea24"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows"
+version = "0.57.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143"
+dependencies = [
+ "windows-core",
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.57.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-result",
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.57.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.57.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13c156562582aa81c60cb29407084cdb54c4164760106ab78e6c5b0858cf64e"
diff --git a/bench_vs/lambda/deserialize-only/Cargo.toml b/bench_vs/lambda/deserialize-only/Cargo.toml
index b4a4616f4..fac6a7628 100644
--- a/bench_vs/lambda/deserialize-only/Cargo.toml
+++ b/bench_vs/lambda/deserialize-only/Cargo.toml
@@ -7,7 +7,5 @@ edition = "2024"
 
 [dependencies]
 lambda-vm-prover = { path = "../../../prover", default-features = false }
-embedded-alloc = "0.6"
-riscv = { version = "0.15", features = ["critical-section-single-hart"] }
-serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] }
-postcard = { version = "1.0", default-features = false, features = ["alloc"] }
+lambda-vm-syscalls = { path = "../../../syscalls" }
+postcard = { version = "1.0", features = ["alloc"] }
diff --git a/bench_vs/lambda/deserialize-only/src/main.rs b/bench_vs/lambda/deserialize-only/src/main.rs
index 8627776a1..7ba9a9d93 100644
--- a/bench_vs/lambda/deserialize-only/src/main.rs
+++ b/bench_vs/lambda/deserialize-only/src/main.rs
@@ -1,93 +1,32 @@
 //! Deserialize-only counterpart to the recursion guest.
 //!
 //! Reads the same private-input blob as `recursion-bench`, postcard-decodes
-//! `(VmProof, Vec<u8>, ProofOptions)`, then commits success
-//! and halts — without ever calling `verify_with_options`. The cycle delta
-//! between this guest and `recursion-bench` is the actual cost of the STARK
-//! verifier inside the VM (everything else being equal).
+//! `(VmProof, Vec<u8>, ProofOptions)`, then commits and halts — without ever
+//! calling `verify_with_options`. The cycle delta between this guest and
+//! `recursion-bench` is the actual cost of the STARK verifier inside the VM.
+//!
+//! Mirrors the recursion guest's std setup (build-std + `lambda_vm_syscalls`)
+//! so the two differ only in the verify call.
 
-#![no_std]
 #![no_main]
 
-extern crate alloc;
-
-use alloc::vec::Vec;
-use core::arch::asm;
-use core::panic::PanicInfo;
-
-use embedded_alloc::TlsfHeap as Heap;
 use lambda_vm_prover::{ProofOptions, VmProof};
-// Required to pull in the riscv crate's critical-section implementation.
-use riscv as _;
-
-const PRIVATE_INPUT_START: usize = 0xFF000000;
-const SYSCALL_COMMIT: u64 = 64;
-const SYSCALL_HALT: u64 = 93;
-const MAX_MEMORY_SIZE: usize = 0xC000_0000;
-
-#[global_allocator]
-static HEAP: Heap = Heap::empty();
-
-#[panic_handler]
-fn panic(_info: &PanicInfo) -> ! {
-    loop {}
-}
-
-fn init_allocator() {
-    unsafe extern "C" {
-        static _end: u8;
-    }
-    let heap_pos = (&raw const _end) as usize;
-    unsafe { HEAP.init(heap_pos, MAX_MEMORY_SIZE - heap_pos) }
-}
 
-fn read_private_input() -> &'static [u8] {
-    let len = unsafe { core::ptr::read_volatile(PRIVATE_INPUT_START as *const u32) } as usize;
-    let data = (PRIVATE_INPUT_START + 4) as *const u8;
-    unsafe { core::slice::from_raw_parts(data, len) }
-}
-
-fn commit(bytes: &[u8]) {
-    unsafe {
-        asm!(
-            "ecall",
-            in("a0") 1u64,
-            in("a1") bytes.as_ptr(),
-            in("a2") bytes.len(),
-            in("a7") SYSCALL_COMMIT,
-        );
-    }
-}
-
-fn halt() -> ! {
-    unsafe {
-        asm!(
-            "ecall",
-            in("a0") 0u64,
-            in("a7") SYSCALL_HALT,
-            options(noreturn),
-        );
-    }
-}
-
-#[unsafe(no_mangle)]
+#[unsafe(export_name = "main")]
 pub fn main() -> ! {
-    init_allocator();
+    lambda_vm_syscalls::allocator::init_allocator();
 
-    let blob = read_private_input();
-    let decoded: (VmProof, Vec<u8>, ProofOptions) =
-        postcard::from_bytes(blob).expect("failed to deserialize");
+    const PANIC_MSG: &str = "PANICKED";
+    std::panic::set_hook(Box::new(|_| unsafe {
+        lambda_vm_syscalls::syscalls::sys_panic(PANIC_MSG.as_ptr(), PANIC_MSG.len())
+    }));
 
-    // Force the commit byte to depend on the actually-decoded value. Without
-    // this, LLVM at -O3 was eliding the postcard decode entirely — the only
-    // sinks for `decoded` were `black_box(&decoded)` (which only forces the
-    // *reference* to materialize, not the pointee) and `Drop`, neither of
-    // which require the decoded bytes to be real. With the commit byte tied
-    // to a deep field of the decoded value, the decode has to run.
-    let proof_options_byte = decoded.2.blowup_factor;
-    let inner_elf_byte = *decoded.1.first().unwrap_or(&0);
-    let marker = proof_options_byte ^ inner_elf_byte;
+    let blob = lambda_vm_syscalls::syscalls::get_private_input();
+    let decoded: (VmProof, Vec<u8>, ProofOptions) =
+        postcard::from_bytes(&blob).expect("failed to deserialize recursion input");
 
-    commit(&[marker]);
-    halt()
+    // Tie the committed byte to the decoded value so LLVM can't elide the decode.
+    let marker = decoded.2.blowup_factor ^ *decoded.1.first().unwrap_or(&0);
+    lambda_vm_syscalls::syscalls::commit(&[marker]);
+    lambda_vm_syscalls::syscalls::sys_halt();
 }

From 75a2421c8b001c0284a0b04d0af4df3b42e4a9ad Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:11:58 -0300
Subject: [PATCH 07/36] build: point profile-recursion make targets at renamed
 tests

---
 Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 60bb8a0c5..801845534 100644
--- a/Makefile
+++ b/Makefile
@@ -234,11 +234,11 @@ test-flamegraph:
 
 test-profile-recursion: test-profile-recursion-single test-profile-recursion-multi
 
-test-profile-recursion-single: compile-programs-rust
-	cargo test --package lambda-vm-prover --lib test_recursion_pc_histogram_1query -- --ignored --nocapture
+test-profile-recursion-single: compile-recursion-elfs
+	cargo test --package lambda-vm-prover --lib test_recursion_profile_1query -- --ignored --nocapture
 
-test-profile-recursion-multi: compile-programs-rust
-	cargo test --package lambda-vm-prover --lib test_recursion_pc_histogram_multiquery -- --ignored --nocapture
+test-profile-recursion-multi: compile-recursion-elfs
+	cargo test --package lambda-vm-prover --lib test_recursion_profile_multiquery -- --ignored --nocapture
 
 # Regenerate the committed ethrex block fixtures (see tooling/ethrex-fixtures).
 # Run after bumping the ethrex rev; README checksums are refreshed automatically.

From 914bdcc6ddb9991555377919ec6830b5aeee498b Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:20:16 -0300
Subject: [PATCH 08/36] docs: trim recursion smoke-test doc comments

---
 prover/src/tests/recursion_smoke_test.rs | 233 ++++++-----------------
 1 file changed, 53 insertions(+), 180 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 963f3d7c6..1679fe1e0 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -1,16 +1,7 @@
-//! End-to-end naive recursion pipeline smoke tests.
-//!
-//! Each test:
-//! 1. Proves an inner program on the host.
-//! 2. Serializes `(VmProof, inner_elf)` with postcard.
-//! 3. Hands that as private input to the recursion guest.
-//! 4. Either **proves** the recursion guest's execution (memory-bounded via
-//!    continuations) and verifies the outer proof (`OuterMode::Prove`), or
-//!    merely **executes** the guest in-VM and reads the committed marker
-//!    straight off the executor's memory (`OuterMode::ExecuteOnly`) — a cheaper
-//!    tier that skips the LDE/FRI that dominate the full pipeline.
-//!
-//! The guest ELFs are assumed built by `make compile-recursion-elfs`.
+//! End-to-end naive recursion pipeline smoke tests: prove an inner program,
+//! hand `(VmProof, elf, opts)` to the in-VM verifier guest, then either prove
+//! the guest's execution (`OuterMode::Prove`) or just execute it
+//! (`OuterMode::ExecuteOnly`). Guest ELFs come from `make compile-recursion-elfs`.
 
 use std::ops::ControlFlow;
 use std::path::PathBuf;
@@ -33,11 +24,8 @@ fn read_guest_elf(root: &std::path::Path, name: &str) -> Vec<u8> {
     })
 }
 
-/// Minimum-security FRI parameters: blowup=2, a single FRI query. Security is
-/// intentionally terrible — used by the capacity-probing test and every cheap
-/// diagnostic below, where the goal is the smallest possible inner proof, not
-/// a sound one. (`GoldilocksCubicProofOptions::with_blowup` derives a query
-/// count from a 128-bit target, far more than we want here.)
+/// Smallest possible inner proof (blowup=2, 1 query). Intentionally insecure —
+/// for the cheap diagnostics, not soundness.
 const MIN_PROOF_OPTIONS: stark::proof::options::ProofOptions =
     stark::proof::options::ProofOptions {
         blowup_factor: 2,
@@ -46,11 +34,8 @@ const MIN_PROOF_OPTIONS: stark::proof::options::ProofOptions =
         grinding_factor: 1,
     };
 
-/// Prove `inner_elf` (fed `inner_input`) under `opts`, then package
-/// `(proof, elf, opts)` into the postcard blob the recursion and
-/// deserialize-only guests consume as their private input. `tag` prefixes the
-/// progress lines. Returns the inner proof — callers that re-verify it on the
-/// host need it — next to the encoded blob.
+/// Prove `inner_elf` under `opts` and postcard-encode `(proof, elf, opts)` into
+/// the guest's private-input blob. Returns the proof and the blob.
 fn prove_inner_and_encode_blob(
     tag: &str,
     inner_elf: &[u8],
@@ -75,29 +60,17 @@ fn prove_inner_and_encode_blob(
     (inner_proof, blob)
 }
 
-/// How far to take the recursion guest after it has been handed the inner
-/// proof. The guest under test is the verifier either way — this only chooses
-/// whether we also prove the guest's own execution.
+/// Whether to also prove the guest's own execution after handing it the proof.
 #[derive(Clone, Copy, Debug)]
 enum OuterMode {
-    /// Execute the guest in-VM and read the committed marker straight off the
-    /// executor's memory. Streams logs via `Executor::resume()` and never
-    /// builds a `Traces`, so footprint stays bounded to the VM's touched
-    /// memory + instruction cache. Skips the LDE/FRI of the full pipeline entirely.
+    /// Execute in-VM, read the committed marker off memory; no LDE/FRI.
     ExecuteOnly,
-    /// Prove the guest's execution via continuations, then verify the outer
-    /// proof on the host. `prove_and_verify_continuation` retains every epoch's
-    /// STARK proof in the bundle before verifying, so peak RAM grows with epoch
-    /// count. Heavy — excluded from CI, run manually. A future verify-one-and-
-    /// discard API extension would make this memory-friendlier.
+    /// Prove the execution (memory-bounded via continuations) and verify on host.
     Prove,
 }
 
-/// Execute the recursion guest in-VM on `blob` and return the bytes it
-/// committed (the success marker the in-VM verifier emits).
-///
-/// Streams execution via `Executor::resume()`. The committed marker is
-/// read directly off the executor's memory. This avoids OOMs.
+/// Execute the recursion guest in-VM on `blob` and return its committed bytes,
+/// read straight off the executor's memory after a streamed run.
 fn execute_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8]) -> Vec<u8> {
     use executor::elf::Elf;
     use executor::vm::execution::Executor;
@@ -127,9 +100,8 @@ fn execute_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8]
 /// trace+LDE stays under the ~16GiB CI runners.
 const OUTER_EPOCH_SIZE_LOG2: u32 = 16;
 
-/// Prove the recursion guest's execution on `blob` memory-bounded via
-/// continuations and verify the bundle on the host, returning the bytes the
-/// guest committed.
+/// Prove the guest's execution via continuations, verify on host, return the
+/// committed bytes.
 fn prove_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8]) -> Vec<u8> {
     let opts =
         crate::GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid");
@@ -149,15 +121,9 @@ fn prove_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8])
     committed
 }
 
-/// Stream a guest's execution via `Executor::resume()`, calling `on_log` for
-/// every `Log` without ever buffering the full log stream (`Executor::run`
-/// would accumulate tens of millions of `Log`s and OOM even a 125 GB box).
-/// `on_log` returns `ControlFlow::Break(())` to stop the run early (e.g. once a
-/// cycle budget is hit); `Continue(())` to keep going. `on_progress(chunks,
-/// total_cycles, elapsed)` fires once per resumed chunk; callers throttle and
-/// format their own progress lines. Returns `(total_cycles, wall_time)` —
-/// `total_cycles` counts logs actually visited, so it is exact even when a run
-/// breaks mid-chunk.
+/// Stream a guest's execution via `Executor::resume()` without buffering the log
+/// stream. `on_log` returns `Break` to stop early; `on_progress` fires per chunk.
+/// Returns `(total_cycles, wall_time)`, exact even on an early break.
 fn drive_executor(
     executor: &mut executor::vm::execution::Executor,
     mut on_log: impl FnMut(&executor::vm::logs::Log) -> ControlFlow<()>,
@@ -184,12 +150,8 @@ fn drive_executor(
     (total_cycles, start.elapsed())
 }
 
-/// Shared preamble for every execute-only diagnostic below: build the standard
-/// recursion private-input blob (an `empty`-program inner proof produced under
-/// `opts`), load guest `guest_name`, and stand up an executor over it. Returns
-/// the guest's raw ELF bytes (callers that resolve PCs pass them to
-/// [`executor::elf::SymbolTable::parse`]), the loaded program, and the
-/// ready-to-drive executor.
+/// Shared preamble: build the blob (an `empty` inner proof under `opts`), load
+/// `guest_name`, and stand up an executor. Returns `(elf_bytes, program, executor)`.
 fn setup_guest_run(
     label: &str,
     guest_name: &str,
@@ -215,12 +177,7 @@ fn setup_guest_run(
     (guest_elf_bytes, program, executor)
 }
 
-/// A `drive_executor` progress callback that prints the throttled
-/// `[label]   ... N chunks, M cycles, T elapsed` line every `stride` chunks —
-/// the readout the counting diagnostics share. Tests that need extra live state
-/// (unique PC count, active step bucket) keep their own closure instead. Takes
-/// `impl Into<String>` so it works with both `&'static` tags and a run's
-/// dynamic `label`.
+/// A `drive_executor` progress callback printing one line every `stride` chunks.
 fn log_progress(label: impl Into<String>, stride: usize) -> impl FnMut(usize, u64, std::time::Duration) {
     let label = label.into();
     move |chunks, cycles, elapsed| {
@@ -230,11 +187,8 @@ fn log_progress(label: impl Into<String>, stride: usize) -> impl FnMut(usize, u6
     }
 }
 
-/// Resolve a guest PC to its (demangled) enclosing function name using the
-/// ELF's own symbol table — the same data `executor::flamegraph` resolves
-/// against. `<unknown>` when no function symbol covers the PC (e.g. PLT stubs
-/// or a release build that dropped symbols). No file:line: the symbol table
-/// carries function ranges only, not DWARF line info.
+/// Demangled enclosing-function name for a PC via the ELF symbol table;
+/// `<unknown>` if none covers it. No file:line (symtab has no DWARF).
 fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
     symbols.lookup(pc).map_or_else(
         || "<unknown>".to_string(),
@@ -242,10 +196,8 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
     )
 }
 
-/// Verifier sub-routines in execution order. LLVM inlines the step bodies, but
-/// closures inside each keep the method name in their mangled symbol, so
-/// `run_profile` advances the step bucket by substring-matching the enclosing
-/// symbol. A step with no matching symbol merges into the previous bucket.
+/// Verifier sub-routines in execution order; `run_profile` buckets cycles by
+/// substring-matching the enclosing symbol (a missing step merges into the prior).
 const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
     "replay_rounds_after_round_1",
     "step_2_verify_claimed_composition_polynomial",
@@ -253,15 +205,12 @@ const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
     "step_4_verify_trace_and_composition_openings",
 ];
 
-/// `blowup=8` inner-proof options: the security-derived multi-query count (tens
-/// of queries, 128-bit) used by every `multiquery` profiling variant.
+/// `blowup=8` (128-bit, multi-query) options for the `multiquery` variants.
 fn blowup8() -> stark::proof::options::ProofOptions {
     crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid")
 }
 
-/// Fold the PC histogram by enclosing function and print the top-25 by cycles.
-/// Folded because an inlined kernel spreads across many PCs; no per-address
-/// table since a bare PC isn't actionable without file:line.
+/// Print the top-25 functions by cycles, folding the PC histogram by symbol.
 fn print_function_table(
     symbols: &executor::elf::SymbolTable,
     pc_hist: std::collections::HashMap<u64, u64>,
@@ -297,9 +246,7 @@ fn print_function_table(
     }
 }
 
-/// Print the monotonic per-verifier-step cycle bucketing. `buckets[0]` is
-/// pre-step-1 setup (alloc + postcard decode + `VmAirs::new`); `buckets[i]` is
-/// verifier step i (with a missing step's cycles merged into the previous one).
+/// Print the monotonic per-verifier-step cycle bucketing (`buckets[0]` = setup).
 fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
     let labels = [
         "0. setup (alloc + postcard decode + VmAirs::new + pre-step-1)",
@@ -321,12 +268,9 @@ fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
     }
 }
 
-/// Single-pass execute-only profiler. Always prints total cycles + wall time +
-/// a rough trace/LDE size estimate. With `detailed`, the same pass also builds
-/// the PC histogram and verifier-step bucketing and prints the top-25 functions
-/// and the per-step breakdown (the two always come together); `!detailed` does
-/// no per-log work, so it's just a fast cycle counter. `progress_stride`
-/// throttles the readout (recursion large, the deserialize-only control small).
+/// Single-pass execute-only profiler. Always prints total cycles + a rough
+/// trace/LDE estimate; with `detailed`, also the top-25 functions + per-step
+/// breakdown (one streamed pass). `!detailed` does no per-log work.
 fn run_profile(
     guest_name: &str,
     progress_stride: usize,
@@ -441,10 +385,8 @@ fn run_profile(
     eprintln!("============================================================");
 }
 
-/// Core pipeline: prove an inner program with the given options, hand the
-/// proof+ELF+options to the recursion guest, then take the guest to `mode`
-/// (execute-only or full prove) and assert it committed the `[1]` success
-/// marker — i.e. the in-VM verifier accepted the inner proof.
+/// Core pipeline: prove the inner program, run the guest to `mode`, assert it
+/// committed `[1]` (the in-VM verifier accepted the proof).
 fn run_recursion_pipeline_with_options(
     label: &str,
     inner_elf_bytes: &[u8],
@@ -491,8 +433,7 @@ fn run_recursion_pipeline_with_options(
     eprintln!("[{label}] guest committed [1]: in-VM verify accepted ✓");
 }
 
-/// Convenience wrapper using `blowup=8` for the inner proof — the default for
-/// the `empty` and `fibonacci` cases, chosen to keep outer-prove memory tractable.
+/// `run_recursion_pipeline_with_options` with `blowup=8` (the `empty`/`fibonacci` default).
 fn run_recursion_pipeline(
     label: &str,
     inner_elf_bytes: &[u8],
@@ -510,9 +451,8 @@ fn run_recursion_pipeline(
     );
 }
 
-/// Reproduce the recursion guest's EXACT path on the host — decode the postcard
-/// blob into `(VmProof, Vec<u8>, ProofOptions)` and call `verify_with_options`.
-/// Cheap regression guard.
+/// Decode the blob on the host and verify — a cheap guard on the encode/decode
+/// contract without running the VM.
 #[test]
 #[ignore = "needs prebuilt guest ELF (make compile-recursion-elfs)"]
 fn test_recursion_blob_decodes_and_verifies_on_host() {
@@ -545,8 +485,7 @@ fn test_recursion_blob_decodes_and_verifies_on_host() {
 
 // === Execute-only tier ========================================================
 
-/// Execute-only mirror of `test_recursion_prove_empty`: verify a `blowup=8`
-/// proof of the empty program in-VM.
+/// Execute-only: verify a `blowup=8` proof of the empty program in-VM.
 #[test]
 #[ignore = "slow: runs the in-VM STARK verifier (minutes on CI)"]
 fn test_recursion_execute_empty() {
@@ -560,8 +499,7 @@ fn test_recursion_execute_empty() {
     );
 }
 
-/// Execute-only mirror of `test_recursion_prove_1query`: smallest possible
-/// inner proof (blowup=2, 1 query) → least guest work.
+/// Execute-only: smallest inner proof (blowup=2, 1 query) → least guest work.
 #[test]
 #[ignore = "slow: runs the in-VM STARK verifier (minutes on CI)"]
 fn test_recursion_execute_1query() {
@@ -576,8 +514,7 @@ fn test_recursion_execute_1query() {
     );
 }
 
-/// Execute-only mirror of `test_recursion_prove`: verify a `blowup=8` proof of
-/// fibonacci(10) in-VM.
+/// Execute-only: verify a `blowup=8` proof of fibonacci(10) in-VM.
 #[test]
 #[ignore = "slow: runs the in-VM STARK verifier (minutes on CI)"]
 fn test_recursion_execute() {
@@ -597,8 +534,7 @@ fn test_recursion_execute() {
 
 // === Full-prove tier ==========================================================
 
-/// Inner program: empty (halt immediately). Useful for measuring the
-/// verifier's intrinsic recursion overhead.
+/// Inner program: empty — the verifier's intrinsic recursion overhead.
 #[test]
 #[ignore = "slow: memory-bounded continuation prove of the verifier-in-VM"]
 fn test_recursion_prove_empty() {
@@ -612,8 +548,7 @@ fn test_recursion_prove_empty() {
     );
 }
 
-/// Inner program: empty, but with the absolute-minimum FRI parameters
-/// (blowup=2, **fri_number_of_queries=1**). For quick profiling only.
+/// Inner program: empty, blowup=2/1-query. Quick profiling only.
 #[test]
 #[ignore = "slow: memory-bounded continuation prove of the verifier-in-VM"]
 fn test_recursion_prove_1query() {
@@ -629,18 +564,8 @@ fn test_recursion_prove_1query() {
     );
 }
 
-/// Diagnostic: build the inner proof and dump the recursion guest's private-input
-/// blob to `/tmp/recursion_input.bin` so the CLI's `execute --flamegraph` can
-/// consume it.
-///
-/// Usage after running this test:
-/// ```
-/// cargo run -p cli --release -- execute \
-///     bench_vs/lambda/recursion/target/riscv64im-lambda-vm-elf/release/recursion-bench \
-///     --private-input /tmp/recursion_input.bin \
-///     --flamegraph /tmp/recursion_folded.txt
-/// cat /tmp/recursion_folded.txt | inferno-flamegraph > /tmp/recursion_flamegraph.svg
-/// ```
+/// Dump the guest's private-input blob to `/tmp/recursion_input.bin` for the
+/// CLI's `execute --flamegraph`.
 #[test]
 #[ignore = "diagnostic: writes recursion private input to /tmp/recursion_input.bin"]
 fn test_dump_recursion_input() {
@@ -669,8 +594,7 @@ fn test_recursion_cycles_multiquery() {
     run_profile("recursion", 500, blowup8(), false);
 }
 
-/// Full profile (top-25 functions + per-step breakdown) of the 1-query run —
-/// the cheapest verifier run, dominated by fixed setup.
+/// Full profile (top-25 + per-step) of the 1-query run.
 #[test]
 #[ignore = "diagnostic: ~8 min; recursion guest histogram + steps (1 query)"]
 fn test_recursion_profile_1query() {
@@ -684,27 +608,8 @@ fn test_recursion_profile_multiquery() {
     run_profile("recursion", 500, blowup8(), true);
 }
 
-/// Diagnostic: count the distinct 4 KB memory pages the recursion guest
-/// touches when verifying a small inner proof.
-///
-/// We suspect the outer prover's 125 GB OOM wall is dominated by per-page
-/// PAGE-table overhead. The number of PAGE tables the prover would build
-/// equals the number of distinct 4 KB pages the executor touches — code,
-/// heap, private input, and stack. This test surfaces that count without
-/// running the prover.
-///
-/// Layout (per `executor::constants` + `bench_vs/lambda/recursion/src/main.rs`):
-/// - Code/static: whatever PT_LOAD segments the recursion ELF carries.
-/// - Heap: `_end .. 0xC000_0000` (`MAX_MEMORY_SIZE`); `TlsfHeap` scatters
-///   allocations across this region.
-/// - Private input: starts at `PRIVATE_INPUT_START_INDEX = 0xFF000000`.
-/// - Stack: top of address space (down from `STACK_TOP = 0xFFFFFFFFFFFFFFF0`).
-///
-/// Interpretation (rough):
-/// - <1,000 pages: PAGE-table overhead is not the bottleneck.
-/// - 10k-100k pages: TLSF heap fragmentation; design a tighter bump allocator
-///   and re-measure.
-/// - >100k pages: postcard decode dominates; consider streaming decode.
+/// Count the distinct 4 KB pages the guest touches (code/heap/input/stack) — a
+/// proxy for the prover's per-page PAGE-table overhead, without running it.
 #[test]
 #[ignore = "diagnostic: counts distinct 4 KB memory pages touched by the recursion guest"]
 fn test_recursion_page_count() {
@@ -807,39 +712,19 @@ fn test_recursion_page_count() {
     eprintln!("============================================================");
 }
 
-/// Diagnostic: build a **sampled** call-stack histogram of the recursion guest.
-///
-/// Like `test_recursion_pc_histogram` but groups by full call stack (not PC).
-/// To stay fast, only every `SAMPLE_RATE`-th log is recorded into the histogram.
-/// The call stack itself is updated on every log (skipping would corrupt it).
-///
-/// Output is written to `/tmp/recursion_folded_sampled.txt` in
-/// inferno-flamegraph "folded stacks" format. Pipe it through:
-///
-///     cat /tmp/recursion_folded_sampled.txt | inferno-flamegraph > svg.svg
-///
-/// Expect ~10-20 minutes for SAMPLE_RATE=100 on a 40B-cycle guest.
+/// Sampled call-stack flamegraph of the recursion guest, written to
+/// `/tmp/recursion_folded_sampled.txt` (inferno "folded stacks" format).
 #[test]
 #[ignore = "diagnostic: sampled flamegraph for the verifier-in-VM"]
 fn test_recursion_sampled_flamegraph() {
     use executor::flamegraph::FlamegraphGenerator;
     use std::io::BufWriter;
 
-    /// 1 in N logs is fed to `process_logs`, which both updates the call
-    /// stack and records a sample. At 1, every cycle goes through — the call
-    /// stack stays exactly in sync with execution so frame widths are
-    /// trustworthy, but the per-cycle cost (~57µs) limits how many cycles
-    /// we can cover within a wall-clock budget.
-    ///
-    /// At SAMPLE_RATE > 1, every CALL/RETURN that lands on a skipped cycle
-    /// silently desyncs the stack, producing the "stuck-in-visit_seq" effect
-    /// we saw at 1:1000. Use values > 1 only when stack accuracy is
-    /// expendable.
+    /// 1-in-N logs sampled. >1 desyncs the call stack on skipped CALL/RETURNs,
+    /// so keep at 1 unless stack accuracy is expendable.
     const SAMPLE_RATE: usize = 1;
 
-    /// Stop the executor early once we've covered this many cycles.
-    /// Set to 0 to run to completion (40B+ cycles, hours at SAMPLE_RATE=1).
-    /// At SAMPLE_RATE=1, ~57µs per cycle means 5M cycles ≈ 5 min wall time.
+    /// Stop after this many cycles (0 = run to completion).
     const CYCLE_BUDGET: u64 = 5_000_000;
 
     let (recursion_elf_bytes, program, mut executor) =
@@ -935,20 +820,8 @@ fn test_recursion_sampled_flamegraph() {
     eprintln!("============================================================");
 }
 
-/// Diagnostic: host-side per-step timings for the verifier.
-///
-/// Runs an inner prove (empty guest, blowup=2, 1 query) and then verifies it
-/// on the host. When built with `--features stark/instruments`, the verifier
-/// prints `Time spent: ...` for each of the four steps (replay challenges,
-/// composition polynomial, FRI, DEEP openings) plus the step-1-replay it
-/// does before step 2. Lets us see the host-side split in seconds, without
-/// running anything inside the VM.
-///
-/// Usage:
-/// ```
-/// cargo test --release -p lambda-vm-prover --features stark/instruments \
-///   --lib test_host_verify_step_timings -- --ignored --nocapture
-/// ```
+/// Host-side per-step verifier timings (build with `--features stark/instruments`
+/// for the `Time spent:` lines). No VM execution.
 #[test]
 #[ignore = "diagnostic: prints host-side verifier step timings"]
 fn test_host_verify_step_timings() {

From 45fe99cb7dab265561bbf831a1b04bf06c93f93b Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:23:07 -0300
Subject: [PATCH 09/36] refactor(prover): drop test_host_verify_step_timings

The smoke pipelines already host-verify the inner proof, so building with
--features stark/instruments surfaces the per-step timings; the dedicated test
was just that verify minus the guest run. Documented the flag in the module doc.
---
 prover/src/tests/recursion_smoke_test.rs | 38 +++---------------------
 1 file changed, 4 insertions(+), 34 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 1679fe1e0..b1c3f7e1f 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -2,6 +2,10 @@
 //! hand `(VmProof, elf, opts)` to the in-VM verifier guest, then either prove
 //! the guest's execution (`OuterMode::Prove`) or just execute it
 //! (`OuterMode::ExecuteOnly`). Guest ELFs come from `make compile-recursion-elfs`.
+//!
+//! Every pipeline host-verifies the inner proof, so building with
+//! `--features stark/instruments` makes any of these tests print the verifier's
+//! per-step `Time spent:` timings.
 
 use std::ops::ControlFlow;
 use std::path::PathBuf;
@@ -820,40 +824,6 @@ fn test_recursion_sampled_flamegraph() {
     eprintln!("============================================================");
 }
 
-/// Host-side per-step verifier timings (build with `--features stark/instruments`
-/// for the `Time spent:` lines). No VM execution.
-#[test]
-#[ignore = "diagnostic: prints host-side verifier step timings"]
-fn test_host_verify_step_timings() {
-    let root = workspace_root();
-    let empty_path =
-        root.join("bench_vs/lambda/empty/target/riscv64im-lambda-vm-elf/release/empty-bench");
-    let empty_elf_bytes = std::fs::read(&empty_path).expect("read empty-bench");
-
-    let inner_proof_options = MIN_PROOF_OPTIONS;
-
-    eprintln!("[host-verify] proving empty (blowup=2, fri_queries=1) ...");
-    let inner_proof = crate::prove_with_options_and_inputs(
-        &empty_elf_bytes,
-        &[],
-        &inner_proof_options,
-        &crate::MaxRowsConfig::default(),
-    )
-    .expect("inner prove should succeed");
-
-    eprintln!("[host-verify] verifying on host (with instruments) ...");
-    let ok = crate::verify_with_options(
-        &inner_proof,
-        &empty_elf_bytes,
-        &inner_proof_options,
-        None,
-        None,
-    )
-    .expect("verify errored");
-    assert!(ok, "proof must verify");
-    eprintln!("[host-verify] verified OK");
-}
-
 // Control guest: decodes the blob and halts. Its cycle count subtracted from
 // the matching recursion run isolates the in-VM verifier cost.
 

From 5df25188abe8114901e34d1ee5149fc76d4d8046 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:29:58 -0300
Subject: [PATCH 10/36] Remove the unused SP1 verifier bench program

It was never wired into the bench harness or CI (run.sh uses sp1/fibonacci),
and its in-VM verifier-cost comparison is superseded by the recursion profile
tests in this PR.
---
 bench_vs/sp1/verifier/Cargo.toml          |  3 -
 bench_vs/sp1/verifier/program/Cargo.toml  | 10 ---
 bench_vs/sp1/verifier/program/src/main.rs | 34 ----------
 bench_vs/sp1/verifier/script/Cargo.toml   | 13 ----
 bench_vs/sp1/verifier/script/build.rs     |  5 --
 bench_vs/sp1/verifier/script/src/main.rs  | 83 -----------------------
 6 files changed, 148 deletions(-)
 delete mode 100644 bench_vs/sp1/verifier/Cargo.toml
 delete mode 100644 bench_vs/sp1/verifier/program/Cargo.toml
 delete mode 100644 bench_vs/sp1/verifier/program/src/main.rs
 delete mode 100644 bench_vs/sp1/verifier/script/Cargo.toml
 delete mode 100644 bench_vs/sp1/verifier/script/build.rs
 delete mode 100644 bench_vs/sp1/verifier/script/src/main.rs

diff --git a/bench_vs/sp1/verifier/Cargo.toml b/bench_vs/sp1/verifier/Cargo.toml
deleted file mode 100644
index fc24039c2..000000000
--- a/bench_vs/sp1/verifier/Cargo.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-[workspace]
-members = ["program", "script"]
-resolver = "2"
diff --git a/bench_vs/sp1/verifier/program/Cargo.toml b/bench_vs/sp1/verifier/program/Cargo.toml
deleted file mode 100644
index 7fbc9c5ce..000000000
--- a/bench_vs/sp1/verifier/program/Cargo.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-[package]
-name = "verifier-program"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-sp1-zkvm = "6.0.1"
-lambda-vm-prover = { path = "../../../../prover", default-features = false }
-serde = { version = "=1.0.219", default-features = false, features = ["derive", "alloc"] }
-postcard = { version = "1.0", default-features = false, features = ["alloc"] }
diff --git a/bench_vs/sp1/verifier/program/src/main.rs b/bench_vs/sp1/verifier/program/src/main.rs
deleted file mode 100644
index c63bb67ca..000000000
--- a/bench_vs/sp1/verifier/program/src/main.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-//! SP1 guest that runs lambda-vm's `verify_with_options` on a single proof.
-//!
-//! Input layout (postcard-encoded `Vec<u8>` written via `SP1Stdin::write_vec`):
-//!   `(VmProof, Vec<u8>, ProofOptions)`
-//! where the inner `Vec<u8>` is the inner program's ELF bytes.
-//!
-//! Output: commits `[1u8]` on successful verify; the guest panics otherwise.
-//!
-//! Caveats:
-//! - The verifier hashes through the `keccak` crate. SP1 has a Keccak
-//!   precompile but it patches `tiny-keccak`, not `keccak`. We don't patch
-//!   here, so Keccak runs as software inside the guest. Cycle counts will be
-//!   inflated by that overhead. Worth keeping in mind when interpreting the
-//!   number relative to lambda-vm's in-VM count.
-
-#![no_main]
-
-extern crate alloc;
-
-use alloc::vec::Vec;
-
-use lambda_vm_prover::{ProofOptions, VmProof};
-
-sp1_zkvm::entrypoint!(main);
-
-pub fn main() {
-    let blob = sp1_zkvm::io::read_vec();
-    let (vm_proof, inner_elf, options): (VmProof, Vec<u8>, ProofOptions) =
-        postcard::from_bytes(&blob).expect("failed to deserialize input");
-    let ok = lambda_vm_prover::verify_with_options(&vm_proof, &inner_elf, &options, None, None)
-        .expect("verify errored");
-    assert!(ok, "inner proof failed verification");
-    sp1_zkvm::io::commit_slice(&[1u8]);
-}
diff --git a/bench_vs/sp1/verifier/script/Cargo.toml b/bench_vs/sp1/verifier/script/Cargo.toml
deleted file mode 100644
index 3198059bd..000000000
--- a/bench_vs/sp1/verifier/script/Cargo.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[package]
-name = "verifier-script"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-sp1-sdk = { version = "6.0.1", features = ["blocking", "profiling"] }
-lambda-vm-prover = { path = "../../../../prover" }
-stark = { path = "../../../../crypto/stark" }
-postcard = { version = "1.0", features = ["alloc"] }
-
-[build-dependencies]
-sp1-build = "6.0.1"
diff --git a/bench_vs/sp1/verifier/script/build.rs b/bench_vs/sp1/verifier/script/build.rs
deleted file mode 100644
index d6cf925d6..000000000
--- a/bench_vs/sp1/verifier/script/build.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-use sp1_build::build_program_with_args;
-
-fn main() {
-    build_program_with_args("../program", Default::default());
-}
diff --git a/bench_vs/sp1/verifier/script/src/main.rs b/bench_vs/sp1/verifier/script/src/main.rs
deleted file mode 100644
index 86e46a710..000000000
--- a/bench_vs/sp1/verifier/script/src/main.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-//! Host driver: prove an inner empty program on lambda-vm, then execute the
-//! lambda-vm verifier inside SP1's executor, printing the cycle count.
-//!
-//! Set `TRACE_FILE=profiles/verifier.json` to capture a DWARF-attributed
-//! profile (1 sample = 1 cycle). The output can be opened with
-//! `samply load profiles/verifier.json`.
-
-use std::path::PathBuf;
-
-use sp1_sdk::blocking::{Prover, ProverClient};
-use sp1_sdk::{SP1Stdin, include_elf};
-
-const VERIFIER_ELF: sp1_sdk::Elf = include_elf!("verifier-program");
-
-fn workspace_root() -> PathBuf {
-    // CARGO_MANIFEST_DIR for this crate is `<root>/bench_vs/sp1/verifier/script`.
-    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-        .ancestors()
-        .nth(4)
-        .expect("workspace root")
-        .to_path_buf()
-}
-
-fn main() {
-    sp1_sdk::utils::setup_logger();
-
-    let root = workspace_root();
-    let empty_elf_path = root
-        .join("bench_vs/lambda/empty/target/riscv64im-lambda-vm-elf/release/empty-bench");
-    assert!(
-        empty_elf_path.exists(),
-        "empty-bench ELF not found at {} — run `bash bench_vs/build_recursion_elfs.sh` first",
-        empty_elf_path.display(),
-    );
-    let inner_elf = std::fs::read(&empty_elf_path).expect("read empty-bench");
-
-    let options = stark::proof::options::ProofOptions {
-        blowup_factor: 2,
-        fri_number_of_queries: 1,
-        coset_offset: 3,
-        grinding_factor: 1,
-    };
-
-    println!("[sp1-verifier] proving inner (empty, blowup=2, 1 query) ...");
-    let inner_proof = lambda_vm_prover::prove_with_options_and_inputs(
-        &inner_elf,
-        &[],
-        &options,
-        &lambda_vm_prover::MaxRowsConfig::default(),
-    )
-    .expect("inner prove should succeed");
-
-    let blob = postcard::to_allocvec(&(&inner_proof, &inner_elf, &options))
-        .expect("postcard encode failed");
-    println!("[sp1-verifier] postcard blob: {} bytes", blob.len());
-
-    let client = ProverClient::from_env();
-    let mut stdin = SP1Stdin::new();
-    stdin.write_vec(blob);
-
-    println!("[sp1-verifier] executing verifier in SP1 ...");
-    let (_, report) = client
-        .execute(VERIFIER_ELF.clone(), stdin)
-        .run()
-        .expect("execute failed");
-
-    let cycles = report.total_instruction_count();
-    println!();
-    println!("============================================================");
-    println!("  SP1 EXECUTION SUMMARY — lambda-vm verifier inside SP1");
-    println!("============================================================");
-    println!("  Total cycles : {cycles}");
-    println!();
-    println!("  Compare against lambda-vm in-VM count (~40.5B for the same");
-    println!("  proof). Both VMs target riscv64im, so word width is symmetric.");
-    println!("  Main remaining asymmetry: lambda-vm's KeccakPermute precompile");
-    println!("  is patched on its guests but SP1 does not patch `keccak` (only");
-    println!("  `tiny-keccak`), so Keccak rounds run as software in SP1 here.");
-    println!();
-    println!("  If TRACE_FILE was set, the profile was written there.");
-    println!("  Render with: samply load <trace>");
-    println!("============================================================");
-}

From 83e467701399f9ac8533164f2afffebd31b487e9 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:41:40 -0300
Subject: [PATCH 11/36] cargo fmt

---
 prover/src/tests/recursion_smoke_test.rs | 39 +++++++++++++++++++-----
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index b1c3f7e1f..9679bd2f9 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -83,8 +83,11 @@ fn execute_outer_and_commit(label: &str, recursion_elf_bytes: &[u8], blob: &[u8]
     let program = Elf::load(recursion_elf_bytes).expect("load recursion elf");
     let mut executor = Executor::new(&program, blob.to_vec()).expect("executor new");
 
-    let (total_cycles, exec_time) =
-        drive_executor(&mut executor, |_log| ControlFlow::Continue(()), |_, _, _| {});
+    let (total_cycles, exec_time) = drive_executor(
+        &mut executor,
+        |_log| ControlFlow::Continue(()),
+        |_, _, _| {},
+    );
 
     let committed = executor
         .finish()
@@ -136,7 +139,10 @@ fn drive_executor(
     let start = std::time::Instant::now();
     let mut total_cycles: u64 = 0;
     let mut chunks: usize = 0;
-    while let Some(logs) = executor.resume().expect("executor resume failed (guest panicked in-VM?)") {
+    while let Some(logs) = executor
+        .resume()
+        .expect("executor resume failed (guest panicked in-VM?)")
+    {
         let mut stop = false;
         for log in logs {
             total_cycles += 1;
@@ -182,7 +188,10 @@ fn setup_guest_run(
 }
 
 /// A `drive_executor` progress callback printing one line every `stride` chunks.
-fn log_progress(label: impl Into<String>, stride: usize) -> impl FnMut(usize, u64, std::time::Duration) {
+fn log_progress(
+    label: impl Into<String>,
+    stride: usize,
+) -> impl FnMut(usize, u64, std::time::Duration) {
     let label = label.into();
     move |chunks, cycles, elapsed| {
         if chunks.is_multiple_of(stride) {
@@ -223,7 +232,9 @@ fn print_function_table(
     let mut by_function: std::collections::HashMap<String, (u64, u64)> =
         std::collections::HashMap::new();
     for (pc, count) in &pc_hist {
-        let entry = by_function.entry(resolve_pc(symbols, *pc)).or_insert((0, 0));
+        let entry = by_function
+            .entry(resolve_pc(symbols, *pc))
+            .or_insert((0, 0));
         entry.0 += *count; // cycles
         entry.1 += 1; // distinct PCs folded into this function
     }
@@ -299,18 +310,30 @@ fn run_profile(
             "{guest_name} ELF has no symbol table — was it stripped?"
         );
         for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
-            let n = symbols.functions().iter().filter(|f| f.name.contains(kw)).count();
+            let n = symbols
+                .functions()
+                .iter()
+                .filter(|f| f.name.contains(kw))
+                .count();
             eprintln!(
                 "[profile] step {}: keyword={kw:?} -> {n} symbol(s) {}",
                 i + 1,
-                if n > 0 { "" } else { "(no match; merges into previous bucket)" },
+                if n > 0 {
+                    ""
+                } else {
+                    "(no match; merges into previous bucket)"
+                },
             );
         }
     }
 
     eprintln!(
         "[profile] executing {guest_name} guest ({}) ...",
-        if detailed { "histogram + steps" } else { "cycle counter" }
+        if detailed {
+            "histogram + steps"
+        } else {
+            "cycle counter"
+        }
     );
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,

From 6312108fea3f9d09f2701e3b26cd375de4466ebb Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:33:55 -0300
Subject: [PATCH 12/36] fix ci bug

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
---
 .github/workflows/profile-recursion.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/profile-recursion.yml b/.github/workflows/profile-recursion.yml
index 420cebfcb..d7fb12447 100644
--- a/.github/workflows/profile-recursion.yml
+++ b/.github/workflows/profile-recursion.yml
@@ -39,7 +39,7 @@ jobs:
             test: single
             title: "Single query (blowup=2, 1 query)"
           - name: multi-query
-            test: single
+            test: multi
             title: "Multi query (blowup=8, 128-bit)"
     steps:
       - name: React to comment

From c83dbcc69068938bbf93a725080b680eb59ea6cf Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:34:29 -0300
Subject: [PATCH 13/36] fix ci bug

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
---
 .github/workflows/profile-recursion.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/profile-recursion.yml b/.github/workflows/profile-recursion.yml
index d7fb12447..1e2f2ae0c 100644
--- a/.github/workflows/profile-recursion.yml
+++ b/.github/workflows/profile-recursion.yml
@@ -84,7 +84,7 @@ jobs:
           # test triggers picks this up via the Makefile's `SYSROOT_DIR ?=`.
           export SYSROOT_DIR="$HOME/.lambda-vm-sysroot"
           set -o pipefail
-          make test-profile-recursion-$TEST
+          make test-profile-recursion-$TEST 2>&1 | tee /tmp/hist.log
 
       - name: Aggregate into a per-function fragment
         if: always()

From 30c9d67bb1c515d7122d937213d768777637a488 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:43:55 -0300
Subject: [PATCH 14/36] ci: gate recursion-profile comment job on profile not
 being skipped

---
 .github/workflows/profile-recursion.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/profile-recursion.yml b/.github/workflows/profile-recursion.yml
index 1e2f2ae0c..0e614fcd8 100644
--- a/.github/workflows/profile-recursion.yml
+++ b/.github/workflows/profile-recursion.yml
@@ -106,7 +106,10 @@ jobs:
   # Stitch the matrix fragments into a single PR comment.
   comment:
     needs: profile
-    if: always() && github.event_name == 'issue_comment'
+    # always() so partial-matrix failures still post; skip when `profile` was
+    # skipped (non-/profile_recursion or non-member comment) so this job — and
+    # the self-hosted bench runner it spins up — doesn't fire on every comment.
+    if: always() && github.event_name == 'issue_comment' && needs.profile.result != 'skipped'
     runs-on: [self-hosted, bench]
     steps:
       - name: Get PR head ref

From ce36d78bff22484e2635366c1534a6bc6ce674b5 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 18:49:04 -0300
Subject: [PATCH 15/36] lint

---
 prover/src/tests/recursion_smoke_test.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 9679bd2f9..f0196b9c6 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -787,7 +787,7 @@ fn test_recursion_sampled_flamegraph() {
             // body. Skipped logs lose stack accuracy — acceptable diagnostic
             // quality at higher rates.
             #[allow(clippy::modulo_one)]
-            let take = i % SAMPLE_RATE == 0;
+            let take = i.is_multiple_of(SAMPLE_RATE);
             if take {
                 generator
                     .borrow_mut()

From 134f81c7d6efd333c4040a216590ddab01d24580 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 21:11:40 -0300
Subject: [PATCH 16/36] inline(never) for high-level steps to avoid missing
 symbols

---
 crypto/stark/src/verifier.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs
index 03119f617..d4186e563 100644
--- a/crypto/stark/src/verifier.rs
+++ b/crypto/stark/src/verifier.rs
@@ -97,6 +97,7 @@ pub trait IsStarkVerifier<
     /// Checks whether the purported evaluations of the composition polynomial parts and the trace
     /// polynomials at the out-of-domain challenge are consistent.
     /// See https://lambdaclass.github.io/lambdaworks/starks/protocol.html#step-2-verify-claimed-composition-polynomial
+    #[inline(never)]
     fn step_2_verify_claimed_composition_polynomial(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
         proof: &StarkProof<Field, FieldExtension, PI>,
@@ -241,6 +242,7 @@ pub trait IsStarkVerifier<
     /// Reconstructs the Deep composition polynomial evaluations at the challenge indices values using the provided
     /// openings of the trace polynomials and the composition polynomial parts. It then uses these to verify that the
     /// FRI decommitments are valid and correspond to the Deep composition polynomial.
+    #[inline(never)]
     fn step_3_verify_fri(
         proof: &StarkProof<Field, FieldExtension, PI>,
         domain: &VerifierDomain<Field>,
@@ -396,6 +398,7 @@ pub trait IsStarkVerifier<
     /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial
     /// parts at the domain elements and their symmetric counterparts corresponding to all the FRI query
     /// index challenges.
+    #[inline(never)]
     fn step_4_verify_trace_and_composition_openings(
         proof: &StarkProof<Field, FieldExtension, PI>,
         challenges: &Challenges<FieldExtension>,
@@ -903,6 +906,7 @@ pub trait IsStarkVerifier<
 
     /// Replays rounds 2, 3 and 4 of the protocol for a given proof, assuming round 1 has
     /// already been replayed and the RAP challenges are known.
+    #[inline(never)]
     fn replay_rounds_after_round_1(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
         proof: &StarkProof<Field, FieldExtension, PI>,

From b4292f398e56fda3ace772b93800a2c9e7328d73 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Tue, 30 Jun 2026 21:23:55 -0300
Subject: [PATCH 17/36] Revert inline(never) for after_round_1

For some reason that alone appears to inhibit completely the effects of
next PRs pre-built commitments and vkey optimization.
---
 crypto/stark/src/verifier.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs
index d4186e563..7764ea679 100644
--- a/crypto/stark/src/verifier.rs
+++ b/crypto/stark/src/verifier.rs
@@ -906,7 +906,6 @@ pub trait IsStarkVerifier<
 
     /// Replays rounds 2, 3 and 4 of the protocol for a given proof, assuming round 1 has
     /// already been replayed and the RAP challenges are known.
-    #[inline(never)]
     fn replay_rounds_after_round_1(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
         proof: &StarkProof<Field, FieldExtension, PI>,

From 62c50cacf86c583c1a5379423ea576c9cdeea6d2 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 00:22:54 -0300
Subject: [PATCH 18/36] fix: reintroduce addr2line

Steps detection was misbehaving due to inlined functions not emitting
symbols. The solutions were either marking `#[inline(never)]`, which in
the case of `replay_rounds_after_round_1` inhibits optimizations.

Since we added the dependency, we took more advantage of it and expanded
the detailed profile with a per file:line breakdown as well.
---
 .../scripts/aggregate_recursion_histogram.py  |  59 ++++-
 Cargo.lock                                    | 111 +++++++++
 bench_vs/lambda/deserialize-only/Cargo.toml   |   3 +
 bench_vs/lambda/empty/Cargo.toml              |   3 +
 bench_vs/lambda/fibonacci/Cargo.toml          |   3 +
 bench_vs/lambda/recursion/Cargo.toml          |   3 +
 crypto/stark/src/verifier.rs                  |   3 -
 prover/Cargo.toml                             |   1 +
 prover/src/tests/recursion_smoke_test.rs      | 220 ++++++++++++------
 9 files changed, 318 insertions(+), 88 deletions(-)

diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
index 1ae34ff70..a46cab85e 100755
--- a/.github/scripts/aggregate_recursion_histogram.py
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -1,15 +1,20 @@
 #!/usr/bin/env python3
-"""Format the recursion-guest per-function profile as a Markdown PR comment.
+"""Format the recursion-guest profile tables as a Markdown PR comment.
 
-`test_recursion_pc_histogram` prints a per-function summary table: the cycles
-folded over each function's PCs, computed across the *full* histogram — the view
-that shows where the cycles actually go. We parse that table and render it as
-Markdown.
+`test_recursion_profile_*` prints two tables: a per-function summary (cycles
+folded over each function's PCs, resolved via DWARF so inlined steps still
+count towards their own function) and a per-PC detail table (the hottest raw
+program counters, each resolved to file:line). We parse both and render them
+as Markdown.
 
     Top 25 functions by cycle count (aggregated over their PCs):
     rank          cycles        %    cum %    PCs  function
        1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
 
+    Top 100 PCs by cycle count:
+    rank          cycles        %    cum %  pc          location (function)
+       1          123456   12.34%   12.34%  0x00012ab4  src/verifier.rs:250  (...)
+
 Reads the test's captured output from argv[1]; writes the Markdown body to
 argv[2] (or stdout).
 """
@@ -22,7 +27,12 @@
     r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$"
 )
 FN_TABLE_START = re.compile(r"Top \d+ functions by cycle count")
-# The "====" rule the test prints right after the (now sole) function table.
+# A per-PC detail row: rank, cycles, pct%, cum%, pc (hex), location (function).
+PC_ROW = re.compile(
+    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(0x[0-9a-fA-F]+)\s+(.*\S)\s*$"
+)
+PC_TABLE_START = re.compile(r"Top \d+ PCs by cycle count")
+# The "====" rule the test prints right after the tables.
 TABLE_END = re.compile(r"^=+\s*$")
 TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
 UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)")
@@ -32,7 +42,9 @@
 def parse(text):
     total_cycles = unique_pcs = exec_time = None
     rows = []
+    pc_rows = []
     in_fn_table = False
+    in_pc_table = False
     for line in text.splitlines():
         if total_cycles is None and (m := TOTAL_CYCLES.search(line)):
             total_cycles = int(m.group(1))
@@ -43,8 +55,12 @@ def parse(text):
         if FN_TABLE_START.search(line):
             in_fn_table = True
             continue
-        if in_fn_table and TABLE_END.match(line):
+        if PC_TABLE_START.search(line):
+            in_pc_table = True
+            continue
+        if TABLE_END.match(line):
             in_fn_table = False
+            in_pc_table = False
             continue
         if in_fn_table and (m := FN_ROW.match(line)):
             rows.append(
@@ -56,14 +72,24 @@ def parse(text):
                     "fn": m.group(5),
                 }
             )
-    return total_cycles, unique_pcs, exec_time, rows
+        if in_pc_table and (m := PC_ROW.match(line)):
+            pc_rows.append(
+                {
+                    "cycles": int(m.group(1)),
+                    "pct": m.group(2),
+                    "cum": m.group(3),
+                    "pc": m.group(4),
+                    "loc": m.group(5),
+                }
+            )
+    return total_cycles, unique_pcs, exec_time, rows, pc_rows
 
 
 def short(name, width=90):
     return name if len(name) <= width else name[: width - 1] + "…"
 
 
-def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest profile"):
+def render(total_cycles, unique_pcs, exec_time, rows, pc_rows, title="Recursion guest profile"):
     if not rows:
         return (
             f"### {title}\n\n"
@@ -95,6 +121,21 @@ def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest pro
         f"across the full histogram; the top {len(rows)} cover {last_cum}% of total "
         f"cycles. Percentages are of total cycles.</sub>\n"
     )
+
+    if pc_rows:
+        body += (
+            f"\n<details><summary>Top {len(pc_rows)} individual PCs "
+            f"(unfolded, with file:line)</summary>\n\n"
+        )
+        body += "| Rank | Cycles | % | Cum % | PC | Location (function) |\n"
+        body += "|-----:|-------:|--:|------:|----|----------------------|\n"
+        for i, r in enumerate(pc_rows, 1):
+            body += (
+                f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | "
+                f"`{r['pc']}` | `{short(r['loc'])}` |\n"
+            )
+        body += "\n</details>\n"
+
     return body
 
 
diff --git a/Cargo.lock b/Cargo.lock
index 6a9cae1ef..8ceba41a8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,28 @@
 # It is not intended for manual editing.
 version = 4
 
+[[package]]
+name = "addr2line"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "efe1709241908a54ef1925c6018f41d3f523d0cfe174719761eb39e7b7bf086a"
+dependencies = [
+ "cpp_demangle",
+ "fallible-iterator",
+ "gimli",
+ "memmap2",
+ "object",
+ "rustc-demangle",
+ "smallvec",
+ "typed-arena",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
 [[package]]
 name = "ahash"
 version = "0.8.12"
@@ -608,6 +630,15 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cpp_demangle"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def"
+dependencies = [
+ "cfg-if",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -1227,6 +1258,12 @@ dependencies = [
  "tiny-keccak",
 ]
 
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
 [[package]]
 name = "fastrand"
 version = "2.3.0"
@@ -1262,6 +1299,16 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -1322,6 +1369,15 @@ dependencies = [
  "wasip2",
 ]
 
+[[package]]
+name = "gimli"
+version = "0.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1033caf0b349c518623b5396bfb2cf0bddf44f0306d543a250e5743297aafd10"
+dependencies = [
+ "stable_deref_trait",
+]
+
 [[package]]
 name = "group"
 version = "0.13.0"
@@ -1678,6 +1734,7 @@ dependencies = [
 name = "lambda-vm-prover"
 version = "0.1.0"
 dependencies = [
+ "addr2line",
  "bincode",
  "criterion 0.5.1",
  "crypto",
@@ -1883,6 +1940,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
 [[package]]
 name = "munge"
 version = "0.4.7"
@@ -1955,6 +2022,17 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "object"
+version = "0.39.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e5a6c098c7a3b6547378093f5cc30bc54fd361ce711e05293a5cc589562739b"
+dependencies = [
+ "flate2",
+ "memchr",
+ "ruzstd",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -2505,6 +2583,15 @@ dependencies = [
  "wait-timeout",
 ]
 
+[[package]]
+name = "ruzstd"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7c1c839d570d835527c9a5e4db7cb2198683a988cb9d7293fc8674e6bd58fc8"
+dependencies = [
+ "twox-hash",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.21"
@@ -2740,12 +2827,24 @@ dependencies = [
  "rand_core 0.6.4",
 ]
 
+[[package]]
+name = "simd-adler32"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
+
 [[package]]
 name = "simdutf8"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
 
+[[package]]
+name = "smallvec"
+version = "1.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90"
+
 [[package]]
 name = "spin"
 version = "0.9.8"
@@ -3145,6 +3244,18 @@ dependencies = [
  "tracing-log",
 ]
 
+[[package]]
+name = "twox-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
+
+[[package]]
+name = "typed-arena"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
+
 [[package]]
 name = "typenum"
 version = "1.19.0"
diff --git a/bench_vs/lambda/deserialize-only/Cargo.toml b/bench_vs/lambda/deserialize-only/Cargo.toml
index fac6a7628..559b4e104 100644
--- a/bench_vs/lambda/deserialize-only/Cargo.toml
+++ b/bench_vs/lambda/deserialize-only/Cargo.toml
@@ -9,3 +9,6 @@ edition = "2024"
 lambda-vm-prover = { path = "../../../prover", default-features = false }
 lambda-vm-syscalls = { path = "../../../syscalls" }
 postcard = { version = "1.0", features = ["alloc"] }
+
+[profile.release]
+debug = true
diff --git a/bench_vs/lambda/empty/Cargo.toml b/bench_vs/lambda/empty/Cargo.toml
index a6e4a0530..7b79663d2 100644
--- a/bench_vs/lambda/empty/Cargo.toml
+++ b/bench_vs/lambda/empty/Cargo.toml
@@ -6,3 +6,6 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
+
+[profile.release]
+debug = true
diff --git a/bench_vs/lambda/fibonacci/Cargo.toml b/bench_vs/lambda/fibonacci/Cargo.toml
index 8ce06fec5..43b192dfc 100644
--- a/bench_vs/lambda/fibonacci/Cargo.toml
+++ b/bench_vs/lambda/fibonacci/Cargo.toml
@@ -6,3 +6,6 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
+
+[profile.release]
+debug = true
diff --git a/bench_vs/lambda/recursion/Cargo.toml b/bench_vs/lambda/recursion/Cargo.toml
index bdfeb38dc..8333fe80d 100644
--- a/bench_vs/lambda/recursion/Cargo.toml
+++ b/bench_vs/lambda/recursion/Cargo.toml
@@ -9,3 +9,6 @@ edition = "2024"
 lambda-vm-prover = { path = "../../../prover", default-features = false }
 lambda-vm-syscalls = { path = "../../../syscalls" }
 postcard = { version = "1.0", features = ["alloc"] }
+
+[profile.release]
+debug = true
diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs
index 7764ea679..03119f617 100644
--- a/crypto/stark/src/verifier.rs
+++ b/crypto/stark/src/verifier.rs
@@ -97,7 +97,6 @@ pub trait IsStarkVerifier<
     /// Checks whether the purported evaluations of the composition polynomial parts and the trace
     /// polynomials at the out-of-domain challenge are consistent.
     /// See https://lambdaclass.github.io/lambdaworks/starks/protocol.html#step-2-verify-claimed-composition-polynomial
-    #[inline(never)]
     fn step_2_verify_claimed_composition_polynomial(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
         proof: &StarkProof<Field, FieldExtension, PI>,
@@ -242,7 +241,6 @@ pub trait IsStarkVerifier<
     /// Reconstructs the Deep composition polynomial evaluations at the challenge indices values using the provided
     /// openings of the trace polynomials and the composition polynomial parts. It then uses these to verify that the
     /// FRI decommitments are valid and correspond to the Deep composition polynomial.
-    #[inline(never)]
     fn step_3_verify_fri(
         proof: &StarkProof<Field, FieldExtension, PI>,
         domain: &VerifierDomain<Field>,
@@ -398,7 +396,6 @@ pub trait IsStarkVerifier<
     /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial
     /// parts at the domain elements and their symmetric counterparts corresponding to all the FRI query
     /// index challenges.
-    #[inline(never)]
     fn step_4_verify_trace_and_composition_openings(
         proof: &StarkProof<Field, FieldExtension, PI>,
         challenges: &Challenges<FieldExtension>,
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index ff6922f63..6660b2050 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -36,6 +36,7 @@ tiny-keccak = { version = "2.0", features = ["keccak"] }
 # Enable stark's test-utils so cross-crate tests can reach
 # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench.
 stark = { path = "../crypto/stark", features = ["test-utils"] }
+addr2line = "0.27.0"
 
 [[bench]]
 name = "vm_prover_benchmark"
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index f0196b9c6..82736786b 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -7,6 +7,7 @@
 //! `--features stark/instruments` makes any of these tests print the verifier's
 //! per-step `Time spent:` timings.
 
+use std::collections::HashMap;
 use std::ops::ControlFlow;
 use std::path::PathBuf;
 
@@ -17,9 +18,14 @@ fn workspace_root() -> PathBuf {
         .to_path_buf()
 }
 
+/// Path to a recursion-suite guest ELF artifact, built by `make compile-recursion-elfs`.
+fn guest_elf_path(root: &std::path::Path, name: &str) -> PathBuf {
+    root.join(format!("executor/program_artifacts/recursion/{name}.elf"))
+}
+
 /// Read a recursion-suite guest ELF artifact, built by `make compile-recursion-elfs`.
 fn read_guest_elf(root: &std::path::Path, name: &str) -> Vec<u8> {
-    let path = root.join(format!("executor/program_artifacts/recursion/{name}.elf"));
+    let path = guest_elf_path(root, name);
     std::fs::read(&path).unwrap_or_else(|e| {
         panic!(
             "failed to read {} — run `make compile-recursion-elfs`: {e}",
@@ -200,17 +206,9 @@ fn log_progress(
     }
 }
 
-/// Demangled enclosing-function name for a PC via the ELF symbol table;
-/// `<unknown>` if none covers it. No file:line (symtab has no DWARF).
-fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
-    symbols.lookup(pc).map_or_else(
-        || "<unknown>".to_string(),
-        |s| executor::flamegraph::demangle(&s.name),
-    )
-}
-
-/// Verifier sub-routines in execution order; `run_profile` buckets cycles by
-/// substring-matching the enclosing symbol (a missing step merges into the prior).
+/// Verifier sub-routines in execution order; `DwarfSteps` buckets cycles by
+/// substring-matching the DWARF inline chain covering each PC (a missing step
+/// merges into the prior).
 const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
     "replay_rounds_after_round_1",
     "step_2_verify_claimed_composition_polynomial",
@@ -218,24 +216,106 @@ const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
     "step_4_verify_trace_and_composition_openings",
 ];
 
+/// DWARF-backed PC resolution for the recursion guest ELF. Reading debug info
+/// directly (rather than the flat symbol table) means an inlined step is still
+/// attributed correctly: LLVM still emits a `DW_TAG_inlined_subroutine` entry
+/// for it, so `find_frames` walks the full inline chain even when the step's
+/// code has been folded into its caller. This makes step detection immune to
+/// inlining decisions, so no `#[inline(never)]` is needed on the verifier.
+struct DwarfSteps {
+    loader: addr2line::Loader,
+    step_cache: HashMap<u64, u8>,
+}
+
+impl DwarfSteps {
+    fn open(elf_path: &std::path::Path) -> Self {
+        let loader = addr2line::Loader::new(elf_path).unwrap_or_else(|e| {
+            panic!(
+                "addr2line: failed to load debug info from {} \
+                 (guest must be built with debug info, e.g. CARGO_PROFILE_RELEASE_DEBUG=1): {e}",
+                elf_path.display()
+            )
+        });
+        Self {
+            loader,
+            step_cache: HashMap::new(),
+        }
+    }
+
+    /// Highest verifier step whose (possibly-inlined) frame covers `pc`, or 0.
+    fn step_for_pc(&mut self, pc: u64) -> u8 {
+        if let Some(&step) = self.step_cache.get(&pc) {
+            return step;
+        }
+        let mut step = 0u8;
+        if let Ok(mut frames) = self.loader.find_frames(pc) {
+            while let Ok(Some(frame)) = frames.next() {
+                let Some(name) = frame.function.as_ref().and_then(|f| f.demangle().ok()) else {
+                    continue;
+                };
+                for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
+                    if name.contains(kw) {
+                        step = step.max((i + 1) as u8);
+                    }
+                }
+            }
+        }
+        self.step_cache.insert(pc, step);
+        step
+    }
+
+    /// `(file:line, demangled function)` for the innermost frame at `pc`, from
+    /// the ELF's DWARF info. `location` is `None` if the frame has no line entry.
+    fn innermost_frame(&self, pc: u64) -> (Option<String>, String) {
+        let Ok(mut frames) = self.loader.find_frames(pc) else {
+            return (None, "<unknown>".to_string());
+        };
+        let Ok(Some(frame)) = frames.next() else {
+            return (None, "<unknown>".to_string());
+        };
+        let location = frame
+            .location
+            .as_ref()
+            .and_then(|loc| match (loc.file, loc.line) {
+                (Some(file), Some(line)) => Some(format!("{file}:{line}")),
+                (Some(file), None) => Some(file.to_string()),
+                _ => None,
+            });
+        let function = frame
+            .function
+            .as_ref()
+            .and_then(|f| f.demangle().ok())
+            .map_or_else(|| "<unknown>".to_string(), |c| c.into_owned());
+        (location, function)
+    }
+
+    /// Demangled name of the innermost frame at `pc`, used to fold the PC
+    /// histogram by function (same as the old symbol-table fold, but correct
+    /// under inlining since it reads the DWARF inline chain).
+    fn function_name(&self, pc: u64) -> String {
+        self.innermost_frame(pc).1
+    }
+
+    /// Best-effort "file:line  (function)" for the innermost frame at `pc`.
+    fn describe_pc(&self, pc: u64) -> String {
+        let (location, function) = self.innermost_frame(pc);
+        let location = location.unwrap_or_else(|| "<unknown location>".to_string());
+        format!("{location}  ({function})")
+    }
+}
+
 /// `blowup=8` (128-bit, multi-query) options for the `multiquery` variants.
 fn blowup8() -> stark::proof::options::ProofOptions {
     crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid")
 }
 
-/// Print the top-25 functions by cycles, folding the PC histogram by symbol.
-fn print_function_table(
-    symbols: &executor::elf::SymbolTable,
-    pc_hist: std::collections::HashMap<u64, u64>,
-    total_cycles: u64,
-) {
-    let mut by_function: std::collections::HashMap<String, (u64, u64)> =
-        std::collections::HashMap::new();
-    for (pc, count) in &pc_hist {
-        let entry = by_function
-            .entry(resolve_pc(symbols, *pc))
-            .or_insert((0, 0));
-        entry.0 += *count; // cycles
+/// Print the top-25 functions by cycles, folding the PC histogram by the
+/// DWARF-resolved innermost (possibly-inlined) function per PC.
+fn print_function_table(dwarf: &DwarfSteps, pc_hist: &HashMap<u64, u64>, total_cycles: u64) {
+    let mut by_function: HashMap<String, (u64, u64)> = HashMap::new();
+    for (&pc, &count) in pc_hist {
+        let entry = by_function.entry(dwarf.function_name(pc)).or_insert((0, 0));
+        entry.0 += count; // cycles
         entry.1 += 1; // distinct PCs folded into this function
     }
     let mut fn_entries: Vec<(String, (u64, u64))> = by_function.into_iter().collect();
@@ -261,6 +341,32 @@ fn print_function_table(
     }
 }
 
+/// Print the top-100 hottest individual PCs (not folded by function), each
+/// resolved to its DWARF file:line and (innermost, possibly-inlined) function.
+fn print_pc_table(dwarf: &DwarfSteps, pc_hist: &HashMap<u64, u64>, total_cycles: u64) {
+    let mut pc_entries: Vec<(u64, u64)> =
+        pc_hist.iter().map(|(&pc, &cycles)| (pc, cycles)).collect();
+    pc_entries.sort_unstable_by_key(|(_pc, cycles)| std::cmp::Reverse(*cycles));
+
+    let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
+    eprintln!();
+    eprintln!("  Top 100 PCs by cycle count:");
+    eprintln!("  rank          cycles        %    cum %  pc          location (function)");
+    let mut cumulative: u64 = 0;
+    for (rank, (pc, cycles)) in pc_entries.iter().take(100).enumerate() {
+        cumulative += cycles;
+        eprintln!(
+            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  0x{:08x}  {}",
+            rank + 1,
+            cycles,
+            pct(*cycles),
+            pct(cumulative),
+            pc,
+            dwarf.describe_pc(*pc),
+        );
+    }
+}
+
 /// Print the monotonic per-verifier-step cycle bucketing (`buckets[0]` = setup).
 fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
     let labels = [
@@ -284,49 +390,24 @@ fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
 }
 
 /// Single-pass execute-only profiler. Always prints total cycles + a rough
-/// trace/LDE estimate; with `detailed`, also the top-25 functions + per-step
-/// breakdown (one streamed pass). `!detailed` does no per-log work.
+/// trace/LDE estimate; with `detailed`, also the top-25 functions, top-100
+/// PCs, and per-step breakdown (one streamed pass). `!detailed` does no
+/// per-log work.
 fn run_profile(
     guest_name: &str,
     progress_stride: usize,
     opts: stark::proof::options::ProofOptions,
     detailed: bool,
 ) {
-    use std::collections::HashMap;
-
-    let (guest_elf_bytes, _program, mut executor) = setup_guest_run("profile", guest_name, &opts);
-    let symbols = executor::elf::SymbolTable::parse(&guest_elf_bytes);
+    let root = workspace_root();
+    let (_guest_elf_bytes, _program, mut executor) = setup_guest_run("profile", guest_name, &opts);
+    let mut dwarf = detailed.then(|| DwarfSteps::open(&guest_elf_path(&root, guest_name)));
 
     let mut pc_hist: HashMap<u64, u64> = HashMap::new();
     let mut buckets = [0u64; 5];
-    let mut last_range: Option<(u64, u64)> = None;
-    let mut last_advance: u8 = 0;
     let bucket = std::cell::Cell::new(0u8);
     let unique = std::cell::Cell::new(0usize);
 
-    if detailed {
-        assert!(
-            !symbols.is_empty(),
-            "{guest_name} ELF has no symbol table — was it stripped?"
-        );
-        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
-            let n = symbols
-                .functions()
-                .iter()
-                .filter(|f| f.name.contains(kw))
-                .count();
-            eprintln!(
-                "[profile] step {}: keyword={kw:?} -> {n} symbol(s) {}",
-                i + 1,
-                if n > 0 {
-                    ""
-                } else {
-                    "(no match; merges into previous bucket)"
-                },
-            );
-        }
-    }
-
     eprintln!(
         "[profile] executing {guest_name} guest ({}) ...",
         if detailed {
@@ -338,28 +419,14 @@ fn run_profile(
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,
         |log| {
-            if detailed {
+            if let Some(dwarf) = dwarf.as_mut() {
                 let pc = log.current_pc;
                 *pc_hist.entry(pc).or_insert(0) += 1;
                 unique.set(pc_hist.len());
 
-                let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e);
-                if !in_cached {
-                    if let Some(sym) = symbols.lookup(pc) {
-                        last_range = Some((sym.address, sym.address + sym.size.max(1)));
-                        last_advance = 0;
-                        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
-                            if sym.name.contains(kw) {
-                                last_advance = (i + 1) as u8;
-                            }
-                        }
-                    } else {
-                        last_range = None;
-                        last_advance = 0;
-                    }
-                }
-                if bucket.get() < last_advance {
-                    bucket.set(last_advance);
+                let advance = dwarf.step_for_pc(pc);
+                if bucket.get() < advance {
+                    bucket.set(advance);
                 }
                 buckets[bucket.get() as usize] += 1;
             }
@@ -404,9 +471,10 @@ fn run_profile(
         (main_trace_bytes * 2) as f64 / 1e9,
     );
 
-    if detailed {
+    if let Some(dwarf) = dwarf.as_ref() {
         eprintln!();
-        print_function_table(&symbols, pc_hist, total_cycles);
+        print_function_table(dwarf, &pc_hist, total_cycles);
+        print_pc_table(dwarf, &pc_hist, total_cycles);
         print_step_breakdown(&buckets, total_cycles);
     }
     eprintln!("============================================================");

From f890fc01eb6ba01aa25c6a2573430e6f6dd62f57 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 12:00:36 -0300
Subject: [PATCH 19/36] Revert "fix: reintroduce addr2line"

This reverts commit 3df1e084f52e1ccfd8286b2a654c13da36427f73.
---
 .../scripts/aggregate_recursion_histogram.py  |  59 +----
 Cargo.lock                                    | 111 ---------
 bench_vs/lambda/deserialize-only/Cargo.toml   |   3 -
 bench_vs/lambda/empty/Cargo.toml              |   3 -
 bench_vs/lambda/fibonacci/Cargo.toml          |   3 -
 bench_vs/lambda/recursion/Cargo.toml          |   3 -
 crypto/stark/src/verifier.rs                  |   3 +
 prover/Cargo.toml                             |   1 -
 prover/src/tests/recursion_smoke_test.rs      | 220 ++++++------------
 9 files changed, 88 insertions(+), 318 deletions(-)

diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
index a46cab85e..1ae34ff70 100755
--- a/.github/scripts/aggregate_recursion_histogram.py
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -1,20 +1,15 @@
 #!/usr/bin/env python3
-"""Format the recursion-guest profile tables as a Markdown PR comment.
+"""Format the recursion-guest per-function profile as a Markdown PR comment.
 
-`test_recursion_profile_*` prints two tables: a per-function summary (cycles
-folded over each function's PCs, resolved via DWARF so inlined steps still
-count towards their own function) and a per-PC detail table (the hottest raw
-program counters, each resolved to file:line). We parse both and render them
-as Markdown.
+`test_recursion_pc_histogram` prints a per-function summary table: the cycles
+folded over each function's PCs, computed across the *full* histogram — the view
+that shows where the cycles actually go. We parse that table and render it as
+Markdown.
 
     Top 25 functions by cycle count (aggregated over their PCs):
     rank          cycles        %    cum %    PCs  function
        1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
 
-    Top 100 PCs by cycle count:
-    rank          cycles        %    cum %  pc          location (function)
-       1          123456   12.34%   12.34%  0x00012ab4  src/verifier.rs:250  (...)
-
 Reads the test's captured output from argv[1]; writes the Markdown body to
 argv[2] (or stdout).
 """
@@ -27,12 +22,7 @@
     r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$"
 )
 FN_TABLE_START = re.compile(r"Top \d+ functions by cycle count")
-# A per-PC detail row: rank, cycles, pct%, cum%, pc (hex), location (function).
-PC_ROW = re.compile(
-    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(0x[0-9a-fA-F]+)\s+(.*\S)\s*$"
-)
-PC_TABLE_START = re.compile(r"Top \d+ PCs by cycle count")
-# The "====" rule the test prints right after the tables.
+# The "====" rule the test prints right after the (now sole) function table.
 TABLE_END = re.compile(r"^=+\s*$")
 TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
 UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)")
@@ -42,9 +32,7 @@
 def parse(text):
     total_cycles = unique_pcs = exec_time = None
     rows = []
-    pc_rows = []
     in_fn_table = False
-    in_pc_table = False
     for line in text.splitlines():
         if total_cycles is None and (m := TOTAL_CYCLES.search(line)):
             total_cycles = int(m.group(1))
@@ -55,12 +43,8 @@ def parse(text):
         if FN_TABLE_START.search(line):
             in_fn_table = True
             continue
-        if PC_TABLE_START.search(line):
-            in_pc_table = True
-            continue
-        if TABLE_END.match(line):
+        if in_fn_table and TABLE_END.match(line):
             in_fn_table = False
-            in_pc_table = False
             continue
         if in_fn_table and (m := FN_ROW.match(line)):
             rows.append(
@@ -72,24 +56,14 @@ def parse(text):
                     "fn": m.group(5),
                 }
             )
-        if in_pc_table and (m := PC_ROW.match(line)):
-            pc_rows.append(
-                {
-                    "cycles": int(m.group(1)),
-                    "pct": m.group(2),
-                    "cum": m.group(3),
-                    "pc": m.group(4),
-                    "loc": m.group(5),
-                }
-            )
-    return total_cycles, unique_pcs, exec_time, rows, pc_rows
+    return total_cycles, unique_pcs, exec_time, rows
 
 
 def short(name, width=90):
     return name if len(name) <= width else name[: width - 1] + "…"
 
 
-def render(total_cycles, unique_pcs, exec_time, rows, pc_rows, title="Recursion guest profile"):
+def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest profile"):
     if not rows:
         return (
             f"### {title}\n\n"
@@ -121,21 +95,6 @@ def render(total_cycles, unique_pcs, exec_time, rows, pc_rows, title="Recursion
         f"across the full histogram; the top {len(rows)} cover {last_cum}% of total "
         f"cycles. Percentages are of total cycles.</sub>\n"
     )
-
-    if pc_rows:
-        body += (
-            f"\n<details><summary>Top {len(pc_rows)} individual PCs "
-            f"(unfolded, with file:line)</summary>\n\n"
-        )
-        body += "| Rank | Cycles | % | Cum % | PC | Location (function) |\n"
-        body += "|-----:|-------:|--:|------:|----|----------------------|\n"
-        for i, r in enumerate(pc_rows, 1):
-            body += (
-                f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | "
-                f"`{r['pc']}` | `{short(r['loc'])}` |\n"
-            )
-        body += "\n</details>\n"
-
     return body
 
 
diff --git a/Cargo.lock b/Cargo.lock
index 8ceba41a8..6a9cae1ef 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,28 +2,6 @@
 # It is not intended for manual editing.
 version = 4
 
-[[package]]
-name = "addr2line"
-version = "0.27.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efe1709241908a54ef1925c6018f41d3f523d0cfe174719761eb39e7b7bf086a"
-dependencies = [
- "cpp_demangle",
- "fallible-iterator",
- "gimli",
- "memmap2",
- "object",
- "rustc-demangle",
- "smallvec",
- "typed-arena",
-]
-
-[[package]]
-name = "adler2"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
-
 [[package]]
 name = "ahash"
 version = "0.8.12"
@@ -630,15 +608,6 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
-[[package]]
-name = "cpp_demangle"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def"
-dependencies = [
- "cfg-if",
-]
-
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -1258,12 +1227,6 @@ dependencies = [
  "tiny-keccak",
 ]
 
-[[package]]
-name = "fallible-iterator"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
-
 [[package]]
 name = "fastrand"
 version = "2.3.0"
@@ -1299,16 +1262,6 @@ dependencies = [
  "static_assertions",
 ]
 
-[[package]]
-name = "flate2"
-version = "1.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
-dependencies = [
- "crc32fast",
- "miniz_oxide",
-]
-
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -1369,15 +1322,6 @@ dependencies = [
  "wasip2",
 ]
 
-[[package]]
-name = "gimli"
-version = "0.34.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1033caf0b349c518623b5396bfb2cf0bddf44f0306d543a250e5743297aafd10"
-dependencies = [
- "stable_deref_trait",
-]
-
 [[package]]
 name = "group"
 version = "0.13.0"
@@ -1734,7 +1678,6 @@ dependencies = [
 name = "lambda-vm-prover"
 version = "0.1.0"
 dependencies = [
- "addr2line",
  "bincode",
  "criterion 0.5.1",
  "crypto",
@@ -1940,16 +1883,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "miniz_oxide"
-version = "0.8.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
-dependencies = [
- "adler2",
- "simd-adler32",
-]
-
 [[package]]
 name = "munge"
 version = "0.4.7"
@@ -2022,17 +1955,6 @@ dependencies = [
  "autocfg",
 ]
 
-[[package]]
-name = "object"
-version = "0.39.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e5a6c098c7a3b6547378093f5cc30bc54fd361ce711e05293a5cc589562739b"
-dependencies = [
- "flate2",
- "memchr",
- "ruzstd",
-]
-
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -2583,15 +2505,6 @@ dependencies = [
  "wait-timeout",
 ]
 
-[[package]]
-name = "ruzstd"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7c1c839d570d835527c9a5e4db7cb2198683a988cb9d7293fc8674e6bd58fc8"
-dependencies = [
- "twox-hash",
-]
-
 [[package]]
 name = "ryu"
 version = "1.0.21"
@@ -2827,24 +2740,12 @@ dependencies = [
  "rand_core 0.6.4",
 ]
 
-[[package]]
-name = "simd-adler32"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
-
 [[package]]
 name = "simdutf8"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
 
-[[package]]
-name = "smallvec"
-version = "1.15.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90"
-
 [[package]]
 name = "spin"
 version = "0.9.8"
@@ -3244,18 +3145,6 @@ dependencies = [
  "tracing-log",
 ]
 
-[[package]]
-name = "twox-hash"
-version = "2.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
-
-[[package]]
-name = "typed-arena"
-version = "2.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
-
 [[package]]
 name = "typenum"
 version = "1.19.0"
diff --git a/bench_vs/lambda/deserialize-only/Cargo.toml b/bench_vs/lambda/deserialize-only/Cargo.toml
index 559b4e104..fac6a7628 100644
--- a/bench_vs/lambda/deserialize-only/Cargo.toml
+++ b/bench_vs/lambda/deserialize-only/Cargo.toml
@@ -9,6 +9,3 @@ edition = "2024"
 lambda-vm-prover = { path = "../../../prover", default-features = false }
 lambda-vm-syscalls = { path = "../../../syscalls" }
 postcard = { version = "1.0", features = ["alloc"] }
-
-[profile.release]
-debug = true
diff --git a/bench_vs/lambda/empty/Cargo.toml b/bench_vs/lambda/empty/Cargo.toml
index 7b79663d2..a6e4a0530 100644
--- a/bench_vs/lambda/empty/Cargo.toml
+++ b/bench_vs/lambda/empty/Cargo.toml
@@ -6,6 +6,3 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
-
-[profile.release]
-debug = true
diff --git a/bench_vs/lambda/fibonacci/Cargo.toml b/bench_vs/lambda/fibonacci/Cargo.toml
index 43b192dfc..8ce06fec5 100644
--- a/bench_vs/lambda/fibonacci/Cargo.toml
+++ b/bench_vs/lambda/fibonacci/Cargo.toml
@@ -6,6 +6,3 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
-
-[profile.release]
-debug = true
diff --git a/bench_vs/lambda/recursion/Cargo.toml b/bench_vs/lambda/recursion/Cargo.toml
index 8333fe80d..bdfeb38dc 100644
--- a/bench_vs/lambda/recursion/Cargo.toml
+++ b/bench_vs/lambda/recursion/Cargo.toml
@@ -9,6 +9,3 @@ edition = "2024"
 lambda-vm-prover = { path = "../../../prover", default-features = false }
 lambda-vm-syscalls = { path = "../../../syscalls" }
 postcard = { version = "1.0", features = ["alloc"] }
-
-[profile.release]
-debug = true
diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs
index 03119f617..7764ea679 100644
--- a/crypto/stark/src/verifier.rs
+++ b/crypto/stark/src/verifier.rs
@@ -97,6 +97,7 @@ pub trait IsStarkVerifier<
     /// Checks whether the purported evaluations of the composition polynomial parts and the trace
     /// polynomials at the out-of-domain challenge are consistent.
     /// See https://lambdaclass.github.io/lambdaworks/starks/protocol.html#step-2-verify-claimed-composition-polynomial
+    #[inline(never)]
     fn step_2_verify_claimed_composition_polynomial(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
         proof: &StarkProof<Field, FieldExtension, PI>,
@@ -241,6 +242,7 @@ pub trait IsStarkVerifier<
     /// Reconstructs the Deep composition polynomial evaluations at the challenge indices values using the provided
     /// openings of the trace polynomials and the composition polynomial parts. It then uses these to verify that the
     /// FRI decommitments are valid and correspond to the Deep composition polynomial.
+    #[inline(never)]
     fn step_3_verify_fri(
         proof: &StarkProof<Field, FieldExtension, PI>,
         domain: &VerifierDomain<Field>,
@@ -396,6 +398,7 @@ pub trait IsStarkVerifier<
     /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial
     /// parts at the domain elements and their symmetric counterparts corresponding to all the FRI query
     /// index challenges.
+    #[inline(never)]
     fn step_4_verify_trace_and_composition_openings(
         proof: &StarkProof<Field, FieldExtension, PI>,
         challenges: &Challenges<FieldExtension>,
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index 6660b2050..ff6922f63 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -36,7 +36,6 @@ tiny-keccak = { version = "2.0", features = ["keccak"] }
 # Enable stark's test-utils so cross-crate tests can reach
 # `compute_precomputed_commitment_for_testing`. Only active under cargo test/bench.
 stark = { path = "../crypto/stark", features = ["test-utils"] }
-addr2line = "0.27.0"
 
 [[bench]]
 name = "vm_prover_benchmark"
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 82736786b..f0196b9c6 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -7,7 +7,6 @@
 //! `--features stark/instruments` makes any of these tests print the verifier's
 //! per-step `Time spent:` timings.
 
-use std::collections::HashMap;
 use std::ops::ControlFlow;
 use std::path::PathBuf;
 
@@ -18,14 +17,9 @@ fn workspace_root() -> PathBuf {
         .to_path_buf()
 }
 
-/// Path to a recursion-suite guest ELF artifact, built by `make compile-recursion-elfs`.
-fn guest_elf_path(root: &std::path::Path, name: &str) -> PathBuf {
-    root.join(format!("executor/program_artifacts/recursion/{name}.elf"))
-}
-
 /// Read a recursion-suite guest ELF artifact, built by `make compile-recursion-elfs`.
 fn read_guest_elf(root: &std::path::Path, name: &str) -> Vec<u8> {
-    let path = guest_elf_path(root, name);
+    let path = root.join(format!("executor/program_artifacts/recursion/{name}.elf"));
     std::fs::read(&path).unwrap_or_else(|e| {
         panic!(
             "failed to read {} — run `make compile-recursion-elfs`: {e}",
@@ -206,9 +200,17 @@ fn log_progress(
     }
 }
 
-/// Verifier sub-routines in execution order; `DwarfSteps` buckets cycles by
-/// substring-matching the DWARF inline chain covering each PC (a missing step
-/// merges into the prior).
+/// Demangled enclosing-function name for a PC via the ELF symbol table;
+/// `<unknown>` if none covers it. No file:line (symtab has no DWARF).
+fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
+    symbols.lookup(pc).map_or_else(
+        || "<unknown>".to_string(),
+        |s| executor::flamegraph::demangle(&s.name),
+    )
+}
+
+/// Verifier sub-routines in execution order; `run_profile` buckets cycles by
+/// substring-matching the enclosing symbol (a missing step merges into the prior).
 const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
     "replay_rounds_after_round_1",
     "step_2_verify_claimed_composition_polynomial",
@@ -216,106 +218,24 @@ const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
     "step_4_verify_trace_and_composition_openings",
 ];
 
-/// DWARF-backed PC resolution for the recursion guest ELF. Reading debug info
-/// directly (rather than the flat symbol table) means an inlined step is still
-/// attributed correctly: LLVM still emits a `DW_TAG_inlined_subroutine` entry
-/// for it, so `find_frames` walks the full inline chain even when the step's
-/// code has been folded into its caller. This makes step detection immune to
-/// inlining decisions, so no `#[inline(never)]` is needed on the verifier.
-struct DwarfSteps {
-    loader: addr2line::Loader,
-    step_cache: HashMap<u64, u8>,
-}
-
-impl DwarfSteps {
-    fn open(elf_path: &std::path::Path) -> Self {
-        let loader = addr2line::Loader::new(elf_path).unwrap_or_else(|e| {
-            panic!(
-                "addr2line: failed to load debug info from {} \
-                 (guest must be built with debug info, e.g. CARGO_PROFILE_RELEASE_DEBUG=1): {e}",
-                elf_path.display()
-            )
-        });
-        Self {
-            loader,
-            step_cache: HashMap::new(),
-        }
-    }
-
-    /// Highest verifier step whose (possibly-inlined) frame covers `pc`, or 0.
-    fn step_for_pc(&mut self, pc: u64) -> u8 {
-        if let Some(&step) = self.step_cache.get(&pc) {
-            return step;
-        }
-        let mut step = 0u8;
-        if let Ok(mut frames) = self.loader.find_frames(pc) {
-            while let Ok(Some(frame)) = frames.next() {
-                let Some(name) = frame.function.as_ref().and_then(|f| f.demangle().ok()) else {
-                    continue;
-                };
-                for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
-                    if name.contains(kw) {
-                        step = step.max((i + 1) as u8);
-                    }
-                }
-            }
-        }
-        self.step_cache.insert(pc, step);
-        step
-    }
-
-    /// `(file:line, demangled function)` for the innermost frame at `pc`, from
-    /// the ELF's DWARF info. `location` is `None` if the frame has no line entry.
-    fn innermost_frame(&self, pc: u64) -> (Option<String>, String) {
-        let Ok(mut frames) = self.loader.find_frames(pc) else {
-            return (None, "<unknown>".to_string());
-        };
-        let Ok(Some(frame)) = frames.next() else {
-            return (None, "<unknown>".to_string());
-        };
-        let location = frame
-            .location
-            .as_ref()
-            .and_then(|loc| match (loc.file, loc.line) {
-                (Some(file), Some(line)) => Some(format!("{file}:{line}")),
-                (Some(file), None) => Some(file.to_string()),
-                _ => None,
-            });
-        let function = frame
-            .function
-            .as_ref()
-            .and_then(|f| f.demangle().ok())
-            .map_or_else(|| "<unknown>".to_string(), |c| c.into_owned());
-        (location, function)
-    }
-
-    /// Demangled name of the innermost frame at `pc`, used to fold the PC
-    /// histogram by function (same as the old symbol-table fold, but correct
-    /// under inlining since it reads the DWARF inline chain).
-    fn function_name(&self, pc: u64) -> String {
-        self.innermost_frame(pc).1
-    }
-
-    /// Best-effort "file:line  (function)" for the innermost frame at `pc`.
-    fn describe_pc(&self, pc: u64) -> String {
-        let (location, function) = self.innermost_frame(pc);
-        let location = location.unwrap_or_else(|| "<unknown location>".to_string());
-        format!("{location}  ({function})")
-    }
-}
-
 /// `blowup=8` (128-bit, multi-query) options for the `multiquery` variants.
 fn blowup8() -> stark::proof::options::ProofOptions {
     crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid")
 }
 
-/// Print the top-25 functions by cycles, folding the PC histogram by the
-/// DWARF-resolved innermost (possibly-inlined) function per PC.
-fn print_function_table(dwarf: &DwarfSteps, pc_hist: &HashMap<u64, u64>, total_cycles: u64) {
-    let mut by_function: HashMap<String, (u64, u64)> = HashMap::new();
-    for (&pc, &count) in pc_hist {
-        let entry = by_function.entry(dwarf.function_name(pc)).or_insert((0, 0));
-        entry.0 += count; // cycles
+/// Print the top-25 functions by cycles, folding the PC histogram by symbol.
+fn print_function_table(
+    symbols: &executor::elf::SymbolTable,
+    pc_hist: std::collections::HashMap<u64, u64>,
+    total_cycles: u64,
+) {
+    let mut by_function: std::collections::HashMap<String, (u64, u64)> =
+        std::collections::HashMap::new();
+    for (pc, count) in &pc_hist {
+        let entry = by_function
+            .entry(resolve_pc(symbols, *pc))
+            .or_insert((0, 0));
+        entry.0 += *count; // cycles
         entry.1 += 1; // distinct PCs folded into this function
     }
     let mut fn_entries: Vec<(String, (u64, u64))> = by_function.into_iter().collect();
@@ -341,32 +261,6 @@ fn print_function_table(dwarf: &DwarfSteps, pc_hist: &HashMap<u64, u64>, total_c
     }
 }
 
-/// Print the top-100 hottest individual PCs (not folded by function), each
-/// resolved to its DWARF file:line and (innermost, possibly-inlined) function.
-fn print_pc_table(dwarf: &DwarfSteps, pc_hist: &HashMap<u64, u64>, total_cycles: u64) {
-    let mut pc_entries: Vec<(u64, u64)> =
-        pc_hist.iter().map(|(&pc, &cycles)| (pc, cycles)).collect();
-    pc_entries.sort_unstable_by_key(|(_pc, cycles)| std::cmp::Reverse(*cycles));
-
-    let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
-    eprintln!();
-    eprintln!("  Top 100 PCs by cycle count:");
-    eprintln!("  rank          cycles        %    cum %  pc          location (function)");
-    let mut cumulative: u64 = 0;
-    for (rank, (pc, cycles)) in pc_entries.iter().take(100).enumerate() {
-        cumulative += cycles;
-        eprintln!(
-            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  0x{:08x}  {}",
-            rank + 1,
-            cycles,
-            pct(*cycles),
-            pct(cumulative),
-            pc,
-            dwarf.describe_pc(*pc),
-        );
-    }
-}
-
 /// Print the monotonic per-verifier-step cycle bucketing (`buckets[0]` = setup).
 fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
     let labels = [
@@ -390,24 +284,49 @@ fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
 }
 
 /// Single-pass execute-only profiler. Always prints total cycles + a rough
-/// trace/LDE estimate; with `detailed`, also the top-25 functions, top-100
-/// PCs, and per-step breakdown (one streamed pass). `!detailed` does no
-/// per-log work.
+/// trace/LDE estimate; with `detailed`, also the top-25 functions + per-step
+/// breakdown (one streamed pass). `!detailed` does no per-log work.
 fn run_profile(
     guest_name: &str,
     progress_stride: usize,
     opts: stark::proof::options::ProofOptions,
     detailed: bool,
 ) {
-    let root = workspace_root();
-    let (_guest_elf_bytes, _program, mut executor) = setup_guest_run("profile", guest_name, &opts);
-    let mut dwarf = detailed.then(|| DwarfSteps::open(&guest_elf_path(&root, guest_name)));
+    use std::collections::HashMap;
+
+    let (guest_elf_bytes, _program, mut executor) = setup_guest_run("profile", guest_name, &opts);
+    let symbols = executor::elf::SymbolTable::parse(&guest_elf_bytes);
 
     let mut pc_hist: HashMap<u64, u64> = HashMap::new();
     let mut buckets = [0u64; 5];
+    let mut last_range: Option<(u64, u64)> = None;
+    let mut last_advance: u8 = 0;
     let bucket = std::cell::Cell::new(0u8);
     let unique = std::cell::Cell::new(0usize);
 
+    if detailed {
+        assert!(
+            !symbols.is_empty(),
+            "{guest_name} ELF has no symbol table — was it stripped?"
+        );
+        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
+            let n = symbols
+                .functions()
+                .iter()
+                .filter(|f| f.name.contains(kw))
+                .count();
+            eprintln!(
+                "[profile] step {}: keyword={kw:?} -> {n} symbol(s) {}",
+                i + 1,
+                if n > 0 {
+                    ""
+                } else {
+                    "(no match; merges into previous bucket)"
+                },
+            );
+        }
+    }
+
     eprintln!(
         "[profile] executing {guest_name} guest ({}) ...",
         if detailed {
@@ -419,14 +338,28 @@ fn run_profile(
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,
         |log| {
-            if let Some(dwarf) = dwarf.as_mut() {
+            if detailed {
                 let pc = log.current_pc;
                 *pc_hist.entry(pc).or_insert(0) += 1;
                 unique.set(pc_hist.len());
 
-                let advance = dwarf.step_for_pc(pc);
-                if bucket.get() < advance {
-                    bucket.set(advance);
+                let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e);
+                if !in_cached {
+                    if let Some(sym) = symbols.lookup(pc) {
+                        last_range = Some((sym.address, sym.address + sym.size.max(1)));
+                        last_advance = 0;
+                        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
+                            if sym.name.contains(kw) {
+                                last_advance = (i + 1) as u8;
+                            }
+                        }
+                    } else {
+                        last_range = None;
+                        last_advance = 0;
+                    }
+                }
+                if bucket.get() < last_advance {
+                    bucket.set(last_advance);
                 }
                 buckets[bucket.get() as usize] += 1;
             }
@@ -471,10 +404,9 @@ fn run_profile(
         (main_trace_bytes * 2) as f64 / 1e9,
     );
 
-    if let Some(dwarf) = dwarf.as_ref() {
+    if detailed {
         eprintln!();
-        print_function_table(dwarf, &pc_hist, total_cycles);
-        print_pc_table(dwarf, &pc_hist, total_cycles);
+        print_function_table(&symbols, pc_hist, total_cycles);
         print_step_breakdown(&buckets, total_cycles);
     }
     eprintln!("============================================================");

From 600670d11d4692ea69f723669a7602effc1739d0 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 12:20:41 -0300
Subject: [PATCH 20/36] feat: guest-side step-profiling markers for the
 recursion verifier

Replace symbol/DWARF-based verifier-step detection with an explicit
addi x0,x0,N marker instruction, immune to inlining. Adds a
STEP_DECODE_DONE marker in the recursion guest itself, making the
deserialize-only control guest (manual A/B subtraction) redundant.
---
 Makefile                                      |    2 +-
 .../deserialize-only/.cargo/config.toml       |    7 -
 bench_vs/lambda/deserialize-only/Cargo.lock   | 1199 -----------------
 bench_vs/lambda/deserialize-only/Cargo.toml   |   11 -
 bench_vs/lambda/deserialize-only/src/main.rs  |   32 -
 bench_vs/lambda/recursion/Cargo.toml          |    4 +-
 bench_vs/lambda/recursion/src/main.rs         |    3 +
 crypto/stark/Cargo.toml                       |    1 +
 crypto/stark/src/lib.rs                       |    1 +
 crypto/stark/src/profile_markers.rs           |   26 +
 crypto/stark/src/verifier.rs                  |   13 +-
 executor/src/vm/execution.rs                  |   16 +
 prover/Cargo.toml                             |    1 +
 prover/src/lib.rs                             |    1 +
 prover/src/tests/recursion_smoke_test.rs      |  194 +--
 15 files changed, 160 insertions(+), 1351 deletions(-)
 delete mode 100644 bench_vs/lambda/deserialize-only/.cargo/config.toml
 delete mode 100644 bench_vs/lambda/deserialize-only/Cargo.lock
 delete mode 100644 bench_vs/lambda/deserialize-only/Cargo.toml
 delete mode 100644 bench_vs/lambda/deserialize-only/src/main.rs
 create mode 100644 crypto/stark/src/profile_markers.rs

diff --git a/Makefile b/Makefile
index 801845534..d725ca2d7 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ BENCH_ARTIFACTS := $(addprefix $(BENCH_ARTIFACTS_DIR)/, $(addsuffix .elf, $(BENC
 # rather than executor/programs/. The recursion guest is the in-VM STARK verifier.
 RECURSION_GUESTS_DIR=./bench_vs/lambda
 RECURSION_ARTIFACTS_DIR=./executor/program_artifacts/recursion
-RECURSION_GUESTS := empty fibonacci recursion deserialize-only
+RECURSION_GUESTS := empty fibonacci recursion
 RECURSION_ARTIFACTS := $(addprefix $(RECURSION_ARTIFACTS_DIR)/, $(addsuffix .elf, $(RECURSION_GUESTS)))
 
 # Override with: make ... SYSROOT_DIR=$HOME/.lambda-vm-sysroot
diff --git a/bench_vs/lambda/deserialize-only/.cargo/config.toml b/bench_vs/lambda/deserialize-only/.cargo/config.toml
deleted file mode 100644
index f5ea686ff..000000000
--- a/bench_vs/lambda/deserialize-only/.cargo/config.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[target.riscv64im-lambda-vm-elf]
-rustflags = [
-  "-C", "link-arg=-e",
-  "-C", "link-arg=main",
-  "--cfg", "getrandom_backend=\"custom\"",
-  "-C", "passes=lower-atomic"
-]
diff --git a/bench_vs/lambda/deserialize-only/Cargo.lock b/bench_vs/lambda/deserialize-only/Cargo.lock
deleted file mode 100644
index 9433fadb3..000000000
--- a/bench_vs/lambda/deserialize-only/Cargo.lock
+++ /dev/null
@@ -1,1199 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "atomic-polyfill"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
-dependencies = [
- "critical-section",
-]
-
-[[package]]
-name = "autocfg"
-version = "1.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
-
-[[package]]
-name = "base16ct"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
-
-[[package]]
-name = "base64"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
-
-[[package]]
-name = "block-buffer"
-version = "0.10.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
-dependencies = [
- "generic-array",
-]
-
-[[package]]
-name = "bumpalo"
-version = "3.20.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
-
-[[package]]
-name = "byteorder"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
-
-[[package]]
-name = "cobs"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1"
-dependencies = [
- "thiserror 2.0.18",
-]
-
-[[package]]
-name = "const-default"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa"
-
-[[package]]
-name = "const-oid"
-version = "0.9.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
-
-[[package]]
-name = "core-foundation-sys"
-version = "0.8.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
-
-[[package]]
-name = "cpufeatures"
-version = "0.2.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "critical-section"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
-
-[[package]]
-name = "crossbeam-deque"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
-dependencies = [
- "crossbeam-epoch",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-epoch"
-version = "0.9.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
-dependencies = [
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
-
-[[package]]
-name = "crypto"
-version = "0.1.0"
-dependencies = [
- "digest",
- "math",
- "rand 0.8.6",
- "rand_chacha 0.3.1",
- "serde",
- "sha3",
-]
-
-[[package]]
-name = "crypto-bigint"
-version = "0.5.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
-dependencies = [
- "generic-array",
- "rand_core 0.6.4",
- "subtle",
- "zeroize",
-]
-
-[[package]]
-name = "crypto-common"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
-dependencies = [
- "generic-array",
- "typenum",
-]
-
-[[package]]
-name = "der"
-version = "0.7.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
-dependencies = [
- "const-oid",
- "zeroize",
-]
-
-[[package]]
-name = "deserialize-only-bench"
-version = "0.1.0"
-dependencies = [
- "lambda-vm-prover",
- "lambda-vm-syscalls",
- "postcard",
-]
-
-[[package]]
-name = "digest"
-version = "0.10.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
-dependencies = [
- "block-buffer",
- "crypto-common",
-]
-
-[[package]]
-name = "ecsm"
-version = "0.1.0"
-dependencies = [
- "k256",
- "num-bigint",
- "num-traits",
-]
-
-[[package]]
-name = "either"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
-
-[[package]]
-name = "elliptic-curve"
-version = "0.13.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
-dependencies = [
- "base16ct",
- "crypto-bigint",
- "ff",
- "generic-array",
- "group",
- "rand_core 0.6.4",
- "sec1",
- "subtle",
- "zeroize",
-]
-
-[[package]]
-name = "embedded-alloc"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd"
-dependencies = [
- "const-default",
- "critical-section",
- "linked_list_allocator",
- "rlsf",
-]
-
-[[package]]
-name = "embedded-hal"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
-
-[[package]]
-name = "embedded-io"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
-
-[[package]]
-name = "embedded-io"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
-
-[[package]]
-name = "executor"
-version = "0.1.0"
-dependencies = [
- "ecsm",
- "rustc-demangle",
- "thiserror 1.0.69",
-]
-
-[[package]]
-name = "ff"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
-dependencies = [
- "rand_core 0.6.4",
- "subtle",
-]
-
-[[package]]
-name = "futures-core"
-version = "0.3.32"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
-
-[[package]]
-name = "futures-task"
-version = "0.3.32"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
-
-[[package]]
-name = "futures-util"
-version = "0.3.32"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
-dependencies = [
- "futures-core",
- "futures-task",
- "pin-project-lite",
- "slab",
-]
-
-[[package]]
-name = "generic-array"
-version = "0.14.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2"
-dependencies = [
- "typenum",
- "version_check",
- "zeroize",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
-dependencies = [
- "cfg-if",
- "js-sys",
- "libc",
- "wasi",
- "wasm-bindgen",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.3.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
-dependencies = [
- "cfg-if",
- "libc",
- "r-efi",
- "wasip2",
-]
-
-[[package]]
-name = "group"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
-dependencies = [
- "ff",
- "rand_core 0.6.4",
- "subtle",
-]
-
-[[package]]
-name = "half"
-version = "1.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403"
-
-[[package]]
-name = "hash32"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
-dependencies = [
- "byteorder",
-]
-
-[[package]]
-name = "heapless"
-version = "0.7.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
-dependencies = [
- "atomic-polyfill",
- "hash32",
- "rustc_version",
- "serde",
- "spin",
- "stable_deref_trait",
-]
-
-[[package]]
-name = "itertools"
-version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
-dependencies = [
- "either",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
-
-[[package]]
-name = "js-sys"
-version = "0.3.103"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53b44bfcdb3f8d5837a46dae1ca9660a837176eee74a28b229bc626816589102"
-dependencies = [
- "cfg-if",
- "futures-util",
- "wasm-bindgen",
-]
-
-[[package]]
-name = "k256"
-version = "0.13.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
-dependencies = [
- "cfg-if",
- "elliptic-curve",
-]
-
-[[package]]
-name = "keccak"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653"
-dependencies = [
- "cpufeatures",
-]
-
-[[package]]
-name = "lambda-vm-prover"
-version = "0.1.0"
-dependencies = [
- "crypto",
- "ecsm",
- "executor",
- "log",
- "math",
- "serde",
- "sha3",
- "stark",
- "sysinfo",
-]
-
-[[package]]
-name = "lambda-vm-syscalls"
-version = "0.1.0"
-dependencies = [
- "embedded-alloc",
- "getrandom 0.2.17",
- "getrandom 0.3.4",
- "lazy_static",
- "rand 0.9.4",
- "riscv",
- "thiserror 1.0.69",
-]
-
-[[package]]
-name = "lazy_static"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
-
-[[package]]
-name = "libc"
-version = "0.2.186"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
-
-[[package]]
-name = "linked_list_allocator"
-version = "0.10.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897"
-
-[[package]]
-name = "lock_api"
-version = "0.4.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
-dependencies = [
- "scopeguard",
-]
-
-[[package]]
-name = "log"
-version = "0.4.33"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad"
-
-[[package]]
-name = "math"
-version = "0.1.0"
-dependencies = [
- "getrandom 0.2.17",
- "num-bigint",
- "num-traits",
- "rand 0.8.6",
- "rayon",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "memchr"
-version = "2.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
-
-[[package]]
-name = "ntapi"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae"
-dependencies = [
- "winapi",
-]
-
-[[package]]
-name = "num-bigint"
-version = "0.4.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
-dependencies = [
- "num-integer",
- "num-traits",
-]
-
-[[package]]
-name = "num-integer"
-version = "0.1.46"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
-dependencies = [
- "num-traits",
-]
-
-[[package]]
-name = "num-traits"
-version = "0.2.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.21.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
-
-[[package]]
-name = "paste"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
-
-[[package]]
-name = "pin-project-lite"
-version = "0.2.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
-
-[[package]]
-name = "postcard"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24"
-dependencies = [
- "cobs",
- "embedded-io 0.4.0",
- "embedded-io 0.6.1",
- "heapless",
- "serde",
-]
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
-dependencies = [
- "zerocopy",
-]
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.106"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.46"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "r-efi"
-version = "5.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
-
-[[package]]
-name = "rand"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
-dependencies = [
- "rand_core 0.6.4",
-]
-
-[[package]]
-name = "rand"
-version = "0.9.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
-dependencies = [
- "rand_chacha 0.9.0",
- "rand_core 0.9.5",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.6.4",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.9.5",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
-
-[[package]]
-name = "rand_core"
-version = "0.9.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
-dependencies = [
- "getrandom 0.3.4",
-]
-
-[[package]]
-name = "rayon"
-version = "1.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
-dependencies = [
- "either",
- "rayon-core",
-]
-
-[[package]]
-name = "rayon-core"
-version = "1.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
-dependencies = [
- "crossbeam-deque",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "riscv"
-version = "0.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05cfa3f7b30c84536a9025150d44d26b8e1cc20ddf436448d74cd9591eefb25"
-dependencies = [
- "critical-section",
- "embedded-hal",
- "paste",
- "riscv-macros",
- "riscv-pac",
-]
-
-[[package]]
-name = "riscv-macros"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d323d13972c1b104aa036bc692cd08b822c8bbf23d79a27c526095856499799"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "riscv-pac"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
-
-[[package]]
-name = "rlsf"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a"
-dependencies = [
- "cfg-if",
- "const-default",
- "libc",
- "rustversion",
- "svgbobdoc",
-]
-
-[[package]]
-name = "rustc-demangle"
-version = "0.1.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
-
-[[package]]
-name = "rustc_version"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
-dependencies = [
- "semver",
-]
-
-[[package]]
-name = "rustversion"
-version = "1.0.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
-
-[[package]]
-name = "ryu"
-version = "1.0.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
-
-[[package]]
-name = "scopeguard"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
-
-[[package]]
-name = "sec1"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
-dependencies = [
- "base16ct",
- "der",
- "generic-array",
- "subtle",
- "zeroize",
-]
-
-[[package]]
-name = "semver"
-version = "1.0.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
-
-[[package]]
-name = "serde"
-version = "1.0.219"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_cbor"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
-dependencies = [
- "half",
- "serde",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.219"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.143"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
-dependencies = [
- "itoa",
- "memchr",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "sha3"
-version = "0.10.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77fd7028345d415a4034cf8777cd4f8ab1851274233b45f84e3d955502d93874"
-dependencies = [
- "digest",
- "keccak",
-]
-
-[[package]]
-name = "slab"
-version = "0.4.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
-
-[[package]]
-name = "spin"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]
-
-[[package]]
-name = "stable_deref_trait"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
-
-[[package]]
-name = "stark"
-version = "0.1.0"
-dependencies = [
- "crypto",
- "itertools",
- "log",
- "math",
- "serde",
- "serde_cbor",
- "sha3",
- "thiserror 1.0.69",
-]
-
-[[package]]
-name = "subtle"
-version = "2.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
-
-[[package]]
-name = "svgbobdoc"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50"
-dependencies = [
- "base64",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
- "unicode-width",
-]
-
-[[package]]
-name = "syn"
-version = "1.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.118"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "sysinfo"
-version = "0.31.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "355dbe4f8799b304b05e1b0f05fc59b2a18d36645cf169607da45bde2f69a1be"
-dependencies = [
- "core-foundation-sys",
- "libc",
- "memchr",
- "ntapi",
- "windows",
-]
-
-[[package]]
-name = "thiserror"
-version = "1.0.69"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
-dependencies = [
- "thiserror-impl 1.0.69",
-]
-
-[[package]]
-name = "thiserror"
-version = "2.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
-dependencies = [
- "thiserror-impl 2.0.18",
-]
-
-[[package]]
-name = "thiserror-impl"
-version = "1.0.69"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "thiserror-impl"
-version = "2.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "typenum"
-version = "1.20.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
-
-[[package]]
-name = "unicode-width"
-version = "0.1.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
-
-[[package]]
-name = "version_check"
-version = "0.9.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
-
-[[package]]
-name = "wasi"
-version = "0.11.1+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
-
-[[package]]
-name = "wasip2"
-version = "1.0.4+wasi-0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487"
-dependencies = [
- "wit-bindgen",
-]
-
-[[package]]
-name = "wasm-bindgen"
-version = "0.2.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b067c0c11094aef6b7a801c1e34a26affafdf3d051dba08456b868789aaf9a4"
-dependencies = [
- "cfg-if",
- "once_cell",
- "rustversion",
- "wasm-bindgen-macro",
- "wasm-bindgen-shared",
-]
-
-[[package]]
-name = "wasm-bindgen-macro"
-version = "0.2.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "167ce5e579f6bcf889c4f7175a8a5a585de84e8ff93976ce393efa5f2837aab1"
-dependencies = [
- "quote",
- "wasm-bindgen-macro-support",
-]
-
-[[package]]
-name = "wasm-bindgen-macro-support"
-version = "0.2.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3997c7839262f4ef12cf90b818d6340c18e80f263f1a94bf157d0ec4420380e"
-dependencies = [
- "bumpalo",
- "proc-macro2",
- "quote",
- "syn 2.0.118",
- "wasm-bindgen-shared",
-]
-
-[[package]]
-name = "wasm-bindgen-shared"
-version = "0.2.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc1b4cb0cc549fcf58d7dfc081778139b3d283a081644e833e84682ad71cea24"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "windows"
-version = "0.57.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143"
-dependencies = [
- "windows-core",
- "windows-targets",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.57.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d"
-dependencies = [
- "windows-implement",
- "windows-interface",
- "windows-result",
- "windows-targets",
-]
-
-[[package]]
-name = "windows-implement"
-version = "0.57.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "windows-interface"
-version = "0.57.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "windows-result"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
-dependencies = [
- "windows-targets",
-]
-
-[[package]]
-name = "windows-targets"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
-dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
-
-[[package]]
-name = "wit-bindgen"
-version = "0.57.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
-
-[[package]]
-name = "zerocopy"
-version = "0.8.52"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
-dependencies = [
- "zerocopy-derive",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.8.52"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
-]
-
-[[package]]
-name = "zeroize"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13c156562582aa81c60cb29407084cdb54c4164760106ab78e6c5b0858cf64e"
diff --git a/bench_vs/lambda/deserialize-only/Cargo.toml b/bench_vs/lambda/deserialize-only/Cargo.toml
deleted file mode 100644
index fac6a7628..000000000
--- a/bench_vs/lambda/deserialize-only/Cargo.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-[workspace]
-
-[package]
-name = "deserialize-only-bench"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-lambda-vm-prover = { path = "../../../prover", default-features = false }
-lambda-vm-syscalls = { path = "../../../syscalls" }
-postcard = { version = "1.0", features = ["alloc"] }
diff --git a/bench_vs/lambda/deserialize-only/src/main.rs b/bench_vs/lambda/deserialize-only/src/main.rs
deleted file mode 100644
index 7ba9a9d93..000000000
--- a/bench_vs/lambda/deserialize-only/src/main.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-//! Deserialize-only counterpart to the recursion guest.
-//!
-//! Reads the same private-input blob as `recursion-bench`, postcard-decodes
-//! `(VmProof, Vec<u8>, ProofOptions)`, then commits and halts — without ever
-//! calling `verify_with_options`. The cycle delta between this guest and
-//! `recursion-bench` is the actual cost of the STARK verifier inside the VM.
-//!
-//! Mirrors the recursion guest's std setup (build-std + `lambda_vm_syscalls`)
-//! so the two differ only in the verify call.
-
-#![no_main]
-
-use lambda_vm_prover::{ProofOptions, VmProof};
-
-#[unsafe(export_name = "main")]
-pub fn main() -> ! {
-    lambda_vm_syscalls::allocator::init_allocator();
-
-    const PANIC_MSG: &str = "PANICKED";
-    std::panic::set_hook(Box::new(|_| unsafe {
-        lambda_vm_syscalls::syscalls::sys_panic(PANIC_MSG.as_ptr(), PANIC_MSG.len())
-    }));
-
-    let blob = lambda_vm_syscalls::syscalls::get_private_input();
-    let decoded: (VmProof, Vec<u8>, ProofOptions) =
-        postcard::from_bytes(&blob).expect("failed to deserialize recursion input");
-
-    // Tie the committed byte to the decoded value so LLVM can't elide the decode.
-    let marker = decoded.2.blowup_factor ^ *decoded.1.first().unwrap_or(&0);
-    lambda_vm_syscalls::syscalls::commit(&[marker]);
-    lambda_vm_syscalls::syscalls::sys_halt();
-}
diff --git a/bench_vs/lambda/recursion/Cargo.toml b/bench_vs/lambda/recursion/Cargo.toml
index bdfeb38dc..1d2ddc808 100644
--- a/bench_vs/lambda/recursion/Cargo.toml
+++ b/bench_vs/lambda/recursion/Cargo.toml
@@ -6,6 +6,8 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
-lambda-vm-prover = { path = "../../../prover", default-features = false }
+lambda-vm-prover = { path = "../../../prover", default-features = false, features = [
+    "profile-markers",
+] }
 lambda-vm-syscalls = { path = "../../../syscalls" }
 postcard = { version = "1.0", features = ["alloc"] }
diff --git a/bench_vs/lambda/recursion/src/main.rs b/bench_vs/lambda/recursion/src/main.rs
index c256a0732..f19271aac 100644
--- a/bench_vs/lambda/recursion/src/main.rs
+++ b/bench_vs/lambda/recursion/src/main.rs
@@ -31,6 +31,9 @@ pub fn main() -> ! {
     let blob = lambda_vm_syscalls::syscalls::get_private_input();
     let (vm_proof, inner_elf, options): (VmProof, Vec<u8>, ProofOptions) =
         postcard::from_bytes(&blob).expect("failed to deserialize recursion input");
+    lambda_vm_prover::profile_markers::step_marker::<
+        { lambda_vm_prover::profile_markers::STEP_DECODE_DONE },
+    >();
 
     let ok = lambda_vm_prover::verify_with_options(&vm_proof, &inner_elf, &options, None, None)
         .expect("verify errored");
diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml
index d0f6a51ef..3a3b95068 100644
--- a/crypto/stark/Cargo.toml
+++ b/crypto/stark/Cargo.toml
@@ -48,6 +48,7 @@ rand_chacha = "0.3.1"
 test-utils = []
 test_fiat_shamir = []
 instruments = []                                                     # This enables timing prints in prover and verifier
+profile-markers = []                                                 # Emits inlining-immune asm markers for guest step profiling
 debug-checks = []                                                    # Enables validate_trace + bus balance report in prover
 parallel = ["dep:rayon", "crypto/parallel"]
 cuda = ["dep:math-cuda"]
diff --git a/crypto/stark/src/lib.rs b/crypto/stark/src/lib.rs
index 87236c5f9..2b93f41ba 100644
--- a/crypto/stark/src/lib.rs
+++ b/crypto/stark/src/lib.rs
@@ -21,6 +21,7 @@ pub mod grinding;
 pub mod instruments;
 pub mod lookup;
 pub(crate) mod par;
+pub mod profile_markers;
 pub mod proof;
 pub mod prover;
 pub mod r4_denoms;
diff --git a/crypto/stark/src/profile_markers.rs b/crypto/stark/src/profile_markers.rs
new file mode 100644
index 000000000..e5fd15100
--- /dev/null
+++ b/crypto/stark/src/profile_markers.rs
@@ -0,0 +1,26 @@
+//! Inlining-immune markers for guest-side step profiling.
+//!
+//! Each marker emits `addi x0, x0, N` on the RISC-V guest: a real instruction
+//! (so it survives inlining and optimization, unlike a removed symbol) that
+//! writes to the zero register and is otherwise a no-op. Real generated code
+//! never emits `addi x0, x0, N` for any nonzero `N` spontaneously (`x0` is
+//! hardwired to zero and writes to it are always discarded), so these values
+//! can't collide with organic instructions. Do not reuse this immediate
+//! encoding space for anything other than step markers.
+//!
+//! Kept separate from the `instruments` feature: `instruments` uses
+//! `std::time::Instant::now()`, which panics on the guest target.
+
+pub const STEP_DECODE_DONE: u32 = 1;
+pub const STEP_REPLAY_ROUNDS_AFTER_ROUND_1: u32 = 2;
+pub const STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL: u32 = 3;
+pub const STEP_VERIFY_FRI: u32 = 4;
+pub const STEP_VERIFY_TRACE_AND_COMPOSITION_OPENINGS: u32 = 5;
+
+#[inline(always)]
+pub fn step_marker<const N: u32>() {
+    #[cfg(all(feature = "profile-markers", target_arch = "riscv64"))]
+    unsafe {
+        core::arch::asm!("addi x0, x0, {n}", n = const N);
+    }
+}
diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs
index 7764ea679..5b512c37e 100644
--- a/crypto/stark/src/verifier.rs
+++ b/crypto/stark/src/verifier.rs
@@ -97,13 +97,15 @@ pub trait IsStarkVerifier<
     /// Checks whether the purported evaluations of the composition polynomial parts and the trace
     /// polynomials at the out-of-domain challenge are consistent.
     /// See https://lambdaclass.github.io/lambdaworks/starks/protocol.html#step-2-verify-claimed-composition-polynomial
-    #[inline(never)]
     fn step_2_verify_claimed_composition_polynomial(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
         proof: &StarkProof<Field, FieldExtension, PI>,
         domain: &VerifierDomain<Field>,
         challenges: &Challenges<FieldExtension>,
     ) -> bool {
+        crate::profile_markers::step_marker::<
+            { crate::profile_markers::STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL },
+        >();
         let trace_length = proof.trace_length;
         let boundary_constraints = air.boundary_constraints(
             &proof.public_inputs,
@@ -242,7 +244,6 @@ pub trait IsStarkVerifier<
     /// Reconstructs the Deep composition polynomial evaluations at the challenge indices values using the provided
     /// openings of the trace polynomials and the composition polynomial parts. It then uses these to verify that the
     /// FRI decommitments are valid and correspond to the Deep composition polynomial.
-    #[inline(never)]
     fn step_3_verify_fri(
         proof: &StarkProof<Field, FieldExtension, PI>,
         domain: &VerifierDomain<Field>,
@@ -252,6 +253,7 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
     {
+        crate::profile_markers::step_marker::<{ crate::profile_markers::STEP_VERIFY_FRI }>();
         let (deep_poly_evaluations, deep_poly_evaluations_sym) =
             match Self::reconstruct_deep_composition_poly_evaluations_for_all_queries(
                 challenges, domain, proof,
@@ -398,7 +400,6 @@ pub trait IsStarkVerifier<
     /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial
     /// parts at the domain elements and their symmetric counterparts corresponding to all the FRI query
     /// index challenges.
-    #[inline(never)]
     fn step_4_verify_trace_and_composition_openings(
         proof: &StarkProof<Field, FieldExtension, PI>,
         challenges: &Challenges<FieldExtension>,
@@ -407,6 +408,9 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
     {
+        crate::profile_markers::step_marker::<
+            { crate::profile_markers::STEP_VERIFY_TRACE_AND_COMPOSITION_OPENINGS },
+        >();
         challenges
             .iotas
             .iter()
@@ -917,6 +921,9 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes,
         FieldElement<FieldExtension>: AsBytes,
     {
+        crate::profile_markers::step_marker::<
+            { crate::profile_markers::STEP_REPLAY_ROUNDS_AFTER_ROUND_1 },
+        >();
         // ===================================
         // ==========|   Round 2   |==========
         // ===================================
diff --git a/executor/src/vm/execution.rs b/executor/src/vm/execution.rs
index 99eb0a00f..fae8e774a 100644
--- a/executor/src/vm/execution.rs
+++ b/executor/src/vm/execution.rs
@@ -302,6 +302,22 @@ impl InstructionCache {
     }
 }
 
+/// Decode a `stark::profile_markers::step_marker` hit at `pc`: the marker
+/// convention is `addi x0, x0, N` (an `ArithImm` with `dst == 0`, `op ==
+/// Add`), which real code never emits spontaneously since writes to `x0` are
+/// always discarded. Returns the marker's `N` if `pc` decodes to one.
+pub fn decode_step_marker(instructions: &InstructionCache, pc: u64) -> Option<u32> {
+    match instructions.get(pc)? {
+        Instruction::ArithImm {
+            dst: 0,
+            op: crate::vm::instruction::decoding::ArithOp::Add,
+            imm,
+            ..
+        } => Some(*imm as u32),
+        _ => None,
+    }
+}
+
 #[derive(thiserror::Error, Debug)]
 pub enum ExecutorError {
     #[error("Failed to decode instruction: {0}")]
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index ff6922f63..3695689d6 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -11,6 +11,7 @@ cuda = ["stark/cuda"]
 test-cuda-faults = ["cuda", "stark/test-cuda-faults"]
 debug-checks = ["stark/debug-checks"]
 instruments = ["stark/instruments"]
+profile-markers = ["stark/profile-markers"]
 disk-spill = ["stark/disk-spill"]
 
 [dependencies]
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 6bbde8b84..f8b0752e7 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -19,6 +19,7 @@ mod debug_report;
 #[cfg(feature = "instruments")]
 pub mod instruments;
 mod paged_mem;
+pub use stark::profile_markers;
 mod statement;
 pub mod tables;
 pub mod test_utils;
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index f0196b9c6..807d53a0b 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -209,13 +209,17 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
     )
 }
 
-/// Verifier sub-routines in execution order; `run_profile` buckets cycles by
-/// substring-matching the enclosing symbol (a missing step merges into the prior).
-const VERIFIER_STEP_KEYWORDS: [&str; 4] = [
-    "replay_rounds_after_round_1",
-    "step_2_verify_claimed_composition_polynomial",
-    "step_3_verify_fri",
-    "step_4_verify_trace_and_composition_openings",
+/// Verifier sub-steps in execution order, keyed by `stark::profile_markers::STEP_*`
+/// value. `run_profile` buckets cycles by the highest marker observed so far
+/// (`decode_step_marker` — a missing marker just means that bucket stays at 0
+/// cycles, no substring matching or symbol table needed).
+const STEP_LABELS: [&str; 6] = [
+    "0. setup (alloc, pre-decode)",
+    "1. decode (postcard decode -> Elf::load/VmAirs::new/transcript setup)",
+    "2. step 1: replay_rounds_after_round_1",
+    "3. step 2: verify_claimed_composition_polynomial",
+    "4. step 3: verify_fri",
+    "5. step 4: verify_trace_and_composition_openings (+ wrap-up)",
 ];
 
 /// `blowup=8` (128-bit, multi-query) options for the `multiquery` variants.
@@ -262,18 +266,11 @@ fn print_function_table(
 }
 
 /// Print the monotonic per-verifier-step cycle bucketing (`buckets[0]` = setup).
-fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
-    let labels = [
-        "0. setup (alloc + postcard decode + VmAirs::new + pre-step-1)",
-        "1. step 1: replay_rounds_after_round_1",
-        "2. step 2: verify_claimed_composition_polynomial",
-        "3. step 3: verify_fri",
-        "4. step 4: verify_trace_and_composition_openings (+ wrap-up)",
-    ];
+fn print_step_breakdown(buckets: &[u64; 6], total_cycles: u64) {
     eprintln!();
     eprintln!("  Per-step cycle breakdown (monotonic state machine):");
-    eprintln!("  {:<60}  {:>14}  {:>7}", "bucket", "cycles", "%");
-    for (label, cycles) in labels.iter().zip(buckets.iter()) {
+    eprintln!("  {:<70}  {:>14}  {:>7}", "bucket", "cycles", "%");
+    for (label, cycles) in STEP_LABELS.iter().zip(buckets.iter()) {
         let pct = if total_cycles > 0 {
             100.0 * (*cycles as f64) / (total_cycles as f64)
         } else {
@@ -283,9 +280,10 @@ fn print_step_breakdown(buckets: &[u64; 5], total_cycles: u64) {
     }
 }
 
-/// Single-pass execute-only profiler. Always prints total cycles + a rough
-/// trace/LDE estimate; with `detailed`, also the top-25 functions + per-step
-/// breakdown (one streamed pass). `!detailed` does no per-log work.
+/// Single-pass execute-only profiler. Always prints total cycles, the
+/// per-step cycle breakdown (marker decode is cheap — one `InstructionCache`
+/// lookup per cycle), and a rough trace/LDE estimate; with `detailed`, also
+/// the top-25 functions table (needs a `pc_hist` HashMap, so gated).
 fn run_profile(
     guest_name: &str,
     progress_stride: usize,
@@ -294,75 +292,41 @@ fn run_profile(
 ) {
     use std::collections::HashMap;
 
-    let (guest_elf_bytes, _program, mut executor) = setup_guest_run("profile", guest_name, &opts);
+    let (guest_elf_bytes, program, mut executor) = setup_guest_run("profile", guest_name, &opts);
     let symbols = executor::elf::SymbolTable::parse(&guest_elf_bytes);
+    let instructions = executor::vm::execution::InstructionCache::new(&program.data)
+        .expect("instruction cache build failed");
 
     let mut pc_hist: HashMap<u64, u64> = HashMap::new();
-    let mut buckets = [0u64; 5];
-    let mut last_range: Option<(u64, u64)> = None;
-    let mut last_advance: u8 = 0;
+    let mut buckets = [0u64; 6];
     let bucket = std::cell::Cell::new(0u8);
     let unique = std::cell::Cell::new(0usize);
 
-    if detailed {
-        assert!(
-            !symbols.is_empty(),
-            "{guest_name} ELF has no symbol table — was it stripped?"
-        );
-        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
-            let n = symbols
-                .functions()
-                .iter()
-                .filter(|f| f.name.contains(kw))
-                .count();
-            eprintln!(
-                "[profile] step {}: keyword={kw:?} -> {n} symbol(s) {}",
-                i + 1,
-                if n > 0 {
-                    ""
-                } else {
-                    "(no match; merges into previous bucket)"
-                },
-            );
-        }
-    }
-
     eprintln!(
         "[profile] executing {guest_name} guest ({}) ...",
         if detailed {
             "histogram + steps"
         } else {
-            "cycle counter"
+            "steps"
         }
     );
     let (total_cycles, exec_time) = drive_executor(
         &mut executor,
         |log| {
+            let pc = log.current_pc;
+
             if detailed {
-                let pc = log.current_pc;
                 *pc_hist.entry(pc).or_insert(0) += 1;
                 unique.set(pc_hist.len());
+            }
 
-                let in_cached = matches!(last_range, Some((s, e)) if pc >= s && pc < e);
-                if !in_cached {
-                    if let Some(sym) = symbols.lookup(pc) {
-                        last_range = Some((sym.address, sym.address + sym.size.max(1)));
-                        last_advance = 0;
-                        for (i, kw) in VERIFIER_STEP_KEYWORDS.iter().enumerate() {
-                            if sym.name.contains(kw) {
-                                last_advance = (i + 1) as u8;
-                            }
-                        }
-                    } else {
-                        last_range = None;
-                        last_advance = 0;
-                    }
-                }
-                if bucket.get() < last_advance {
-                    bucket.set(last_advance);
-                }
-                buckets[bucket.get() as usize] += 1;
+            if let Some(marker) = executor::vm::execution::decode_step_marker(&instructions, pc)
+                && bucket.get() < marker as u8
+            {
+                bucket.set(marker as u8);
             }
+            buckets[bucket.get() as usize] += 1;
+
             ControlFlow::Continue(())
         },
         |chunks, cycles, elapsed| {
@@ -374,7 +338,10 @@ fn run_profile(
                         bucket.get(),
                     );
                 } else {
-                    eprintln!("[profile]   ... {chunks} chunks, {cycles} cycles, {elapsed:?}");
+                    eprintln!(
+                        "[profile]   ... {chunks} chunks, {cycles} cycles, bucket={}, {elapsed:?}",
+                        bucket.get(),
+                    );
                 }
             }
         },
@@ -404,10 +371,11 @@ fn run_profile(
         (main_trace_bytes * 2) as f64 / 1e9,
     );
 
+    eprintln!();
+    print_step_breakdown(&buckets, total_cycles);
     if detailed {
         eprintln!();
         print_function_table(&symbols, pc_hist, total_cycles);
-        print_step_breakdown(&buckets, total_cycles);
     }
     eprintln!("============================================================");
 }
@@ -541,6 +509,65 @@ fn test_recursion_execute_1query() {
     );
 }
 
+/// Regression test for the marker mechanism itself: every `STEP_*` marker
+/// must be observed at least once during a full verifier run, and each
+/// transition between consecutive markers must be a valid step in the
+/// verifier's state machine.
+///
+/// `multi_verify` re-runs `replay_rounds_after_round_1 -> step_2 -> step_3 ->
+/// step_4` once per AIR table (see `crypto/stark/src/verifier.rs`), so the
+/// full marker sequence isn't monotonic overall — it's `STEP_DECODE_DONE`
+/// once, followed by N repetitions of the `2,3,4,5` cycle (one per table).
+/// A transition outside `{1->2, 2->3, 3->4, 4->5, 5->2}` means the marker
+/// convention broke — wrong immediate decoded, or a stale/mismatched build.
+#[test]
+#[ignore = "slow: runs the in-VM STARK verifier (minutes on CI)"]
+fn test_recursion_step_markers_observed_in_order() {
+    let (_bytes, program, mut executor) =
+        setup_guest_run("step-markers", "recursion", &MIN_PROOF_OPTIONS);
+    let instructions = executor::vm::execution::InstructionCache::new(&program.data)
+        .expect("instruction cache build failed");
+
+    let decode_done = stark::profile_markers::STEP_DECODE_DONE;
+    let replay = stark::profile_markers::STEP_REPLAY_ROUNDS_AFTER_ROUND_1;
+    let claimed = stark::profile_markers::STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL;
+    let fri = stark::profile_markers::STEP_VERIFY_FRI;
+    let openings = stark::profile_markers::STEP_VERIFY_TRACE_AND_COMPOSITION_OPENINGS;
+
+    let mut last_marker: Option<u32> = None;
+    let mut seen = std::collections::HashSet::new();
+    drive_executor(
+        &mut executor,
+        |log| {
+            if let Some(marker) =
+                executor::vm::execution::decode_step_marker(&instructions, log.current_pc)
+            {
+                let valid_transition = match last_marker {
+                    None => marker == decode_done,
+                    Some(last) if last == decode_done => marker == replay,
+                    Some(last) if last == replay => marker == claimed,
+                    Some(last) if last == claimed => marker == fri,
+                    Some(last) if last == fri => marker == openings,
+                    Some(last) if last == openings => marker == replay,
+                    Some(_) => false,
+                };
+                assert!(
+                    valid_transition,
+                    "invalid step marker transition: {last_marker:?} -> {marker}"
+                );
+                last_marker = Some(marker);
+                seen.insert(marker);
+            }
+            ControlFlow::Continue(())
+        },
+        |_, _, _| {},
+    );
+
+    for step in [decode_done, replay, claimed, fri, openings] {
+        assert!(seen.contains(&step), "marker {step} was never observed");
+    }
+}
+
 /// Execute-only: verify a `blowup=8` proof of fibonacci(10) in-VM.
 #[test]
 #[ignore = "slow: runs the in-VM STARK verifier (minutes on CI)"]
@@ -847,33 +874,6 @@ fn test_recursion_sampled_flamegraph() {
     eprintln!("============================================================");
 }
 
-// Control guest: decodes the blob and halts. Its cycle count subtracted from
-// the matching recursion run isolates the in-VM verifier cost.
-
-#[test]
-#[ignore = "diagnostic: fast; deserialize-only guest cycle count (1 query)"]
-fn test_deserialize_only_cycles_1query() {
-    run_profile("deserialize-only", 50, MIN_PROOF_OPTIONS, false);
-}
-
-#[test]
-#[ignore = "diagnostic: fast; deserialize-only guest cycle count (multi-query)"]
-fn test_deserialize_only_cycles_multiquery() {
-    run_profile("deserialize-only", 50, blowup8(), false);
-}
-
-#[test]
-#[ignore = "diagnostic: ~1 min; deserialize-only guest histogram (1 query)"]
-fn test_deserialize_only_profile_1query() {
-    run_profile("deserialize-only", 50, MIN_PROOF_OPTIONS, true);
-}
-
-#[test]
-#[ignore = "diagnostic: deserialize-only guest histogram (multi-query)"]
-fn test_deserialize_only_profile_multiquery() {
-    run_profile("deserialize-only", 50, blowup8(), true);
-}
-
 /// Inner program: fibonacci(10).
 #[test]
 #[ignore = "slow: memory-bounded continuation prove of the verifier-in-VM"]

From 2d5937e395397bdbbf1d538f5e18501785e13f63 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 12:24:02 -0300
Subject: [PATCH 21/36] test: drop recursion smoke-test flamegraph and
 page-count diagnostics

Too much reviewer overhead for their current value. The sampled
flamegraph will come back once the executor's flamegraph tooling makes
it simple to reimplement; the page-count histogram isn't interesting
right now.
---
 prover/src/tests/recursion_smoke_test.rs | 225 -----------------------
 1 file changed, 225 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 807d53a0b..750651e36 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -187,19 +187,6 @@ fn setup_guest_run(
     (guest_elf_bytes, program, executor)
 }
 
-/// A `drive_executor` progress callback printing one line every `stride` chunks.
-fn log_progress(
-    label: impl Into<String>,
-    stride: usize,
-) -> impl FnMut(usize, u64, std::time::Duration) {
-    let label = label.into();
-    move |chunks, cycles, elapsed| {
-        if chunks.is_multiple_of(stride) {
-            eprintln!("[{label}]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed");
-        }
-    }
-}
-
 /// Demangled enclosing-function name for a PC via the ELF symbol table;
 /// `<unknown>` if none covers it. No file:line (symtab has no DWARF).
 fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
@@ -662,218 +649,6 @@ fn test_recursion_profile_multiquery() {
     run_profile("recursion", 500, blowup8(), true);
 }
 
-/// Count the distinct 4 KB pages the guest touches (code/heap/input/stack) — a
-/// proxy for the prover's per-page PAGE-table overhead, without running it.
-#[test]
-#[ignore = "diagnostic: counts distinct 4 KB memory pages touched by the recursion guest"]
-fn test_recursion_page_count() {
-    use executor::vm::memory::PRIVATE_INPUT_START_INDEX;
-    use std::collections::HashSet;
-
-    let (_bytes, program, mut executor) =
-        setup_guest_run("page-count", "recursion", &MIN_PROOF_OPTIONS);
-
-    // Precompute the recursion ELF's PT_LOAD ranges so we can bucket code/
-    // static pages separately from heap. `Elf::load` already expands BSS
-    // (memsz > filesz) into zero-valued words, so these ranges cover
-    // .text + .rodata + .data + .bss.
-    let segment_ranges: Vec<(u64, u64)> = program
-        .data
-        .iter()
-        .map(|seg| (seg.base_addr, seg.base_addr + (seg.values.len() as u64 * 4)))
-        .collect();
-    eprintln!(
-        "[page-count] recursion ELF: {} PT_LOAD segment(s)",
-        segment_ranges.len(),
-    );
-    for (i, (lo, hi)) in segment_ranges.iter().enumerate() {
-        eprintln!(
-            "[page-count]   segment[{i}]: 0x{lo:016x} .. 0x{hi:016x} ({} bytes)",
-            hi - lo,
-        );
-    }
-
-    // Stream through execution — running to completion via `Executor::run`
-    // would accumulate ~67 M `Log` records (~2.7 GB) we don't need. We only
-    // care about the *final* memory state.
-    eprintln!("[page-count] executing recursion guest (streaming) ...");
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |_log| ControlFlow::Continue(()),
-        log_progress("page-count", 50),
-    );
-
-    // Collect the set of distinct 4 KB pages from every cell touched during
-    // (a) program loading, (b) private-input loading, (c) execution.
-    const PAGE_MASK: u64 = !0xFFFu64;
-    let cells = executor.memory().cells();
-    let total_cells = cells.len();
-    let pages: HashSet<u64> = cells.keys().map(|&a| a & PAGE_MASK).collect();
-
-    // Bucket by region. A "code/static" page is any page that overlaps a
-    // PT_LOAD segment. Stack lives near the top of the 64-bit address
-    // space; private input lives in the [0xFF000000, ...) window above the
-    // 3 GB heap ceiling.
-    const HEAP_CEILING: u64 = 0xC000_0000;
-    const STACK_FLOOR: u64 = 0xFFFF_FFFF_0000_0000;
-
-    let mut code_pages = 0usize;
-    let mut heap_pages = 0usize;
-    let mut private_input_pages = 0usize;
-    let mut stack_pages = 0usize;
-    let mut other_pages = 0usize;
-
-    for &page in &pages {
-        let page_end = page.saturating_add(0x1000);
-        let in_code = segment_ranges
-            .iter()
-            .any(|&(lo, hi)| page < hi && lo < page_end);
-        if in_code {
-            code_pages += 1;
-        } else if page >= STACK_FLOOR {
-            stack_pages += 1;
-        } else if page >= PRIVATE_INPUT_START_INDEX {
-            private_input_pages += 1;
-        } else if page < HEAP_CEILING {
-            heap_pages += 1;
-        } else {
-            other_pages += 1;
-        }
-    }
-
-    eprintln!();
-    eprintln!("============================================================");
-    eprintln!("  RECURSION GUEST PAGE-COUNT SUMMARY");
-    eprintln!("============================================================");
-    eprintln!("  Total cycles                  : {total_cycles}");
-    eprintln!("  Executor wall time            : {exec_time:?}");
-    eprintln!("  Memory cells touched (4 B ea) : {total_cells}");
-    eprintln!("  Distinct 4 KB pages touched   : {}", pages.len());
-    eprintln!();
-    eprintln!("  Pages per region:");
-    eprintln!("    code/static (ELF segments)     : {code_pages}");
-    eprintln!("    heap (0..0xC000_0000)          : {heap_pages}");
-    eprintln!("    private input (0xFF000000..)   : {private_input_pages}");
-    eprintln!("    stack (>= 0xFFFFFFFF_00000000) : {stack_pages}");
-    if other_pages > 0 {
-        eprintln!("    other (unclassified)           : {other_pages}");
-    }
-    eprintln!();
-    eprintln!("  Interpretation (PAGE-table overhead):");
-    eprintln!("    <1k pages     → PAGE overhead is not the bottleneck.");
-    eprintln!("    10k-100k      → TLSF heap fragmentation; try a bump alloc.");
-    eprintln!("    >100k         → postcard decode dominates; stream-decode?");
-    eprintln!("============================================================");
-}
-
-/// Sampled call-stack flamegraph of the recursion guest, written to
-/// `/tmp/recursion_folded_sampled.txt` (inferno "folded stacks" format).
-#[test]
-#[ignore = "diagnostic: sampled flamegraph for the verifier-in-VM"]
-fn test_recursion_sampled_flamegraph() {
-    use executor::flamegraph::FlamegraphGenerator;
-    use std::io::BufWriter;
-
-    /// 1-in-N logs sampled. >1 desyncs the call stack on skipped CALL/RETURNs,
-    /// so keep at 1 unless stack accuracy is expendable.
-    const SAMPLE_RATE: usize = 1;
-
-    /// Stop after this many cycles (0 = run to completion).
-    const CYCLE_BUDGET: u64 = 5_000_000;
-
-    let (recursion_elf_bytes, program, mut executor) =
-        setup_guest_run("sampled-fg", "recursion", &MIN_PROOF_OPTIONS);
-
-    eprintln!("[sampled-fg] executing recursion guest (sampling 1-in-{SAMPLE_RATE}) ...",);
-    let symbols = executor::elf::SymbolTable::parse(&recursion_elf_bytes);
-    let entry_point = program.entry_point;
-
-    // Build our own instruction cache from the same segments `Executor::new`
-    // decodes internally. Owning it (rather than reading `executor.instructions`
-    // mid-loop) is what lets the per-log closure call `process_logs` without
-    // borrowing `executor`, which `drive_executor` holds mutably for `resume()`.
-    let instructions = executor::vm::execution::InstructionCache::new(&program.data)
-        .expect("instruction cache build failed");
-
-    // RefCell so the per-log closure (`process_logs`, &mut self) and the
-    // progress closure (`write_folded`, &self) can both reach the generator —
-    // their calls never overlap, so the runtime borrow check never trips.
-    let generator = std::cell::RefCell::new(FlamegraphGenerator::new(symbols, entry_point));
-
-    // Path is defined here (not after the loop) so the periodic checkpoint
-    // writes below can target it. The final write at the end still happens.
-    let path = "/tmp/recursion_folded_sampled.txt";
-
-    let mut i = 0usize;
-    let (total_cycles, exec_time) = drive_executor(
-        &mut executor,
-        |log| {
-            // 1-in-SAMPLE_RATE logs are fed to `process_logs`. At SAMPLE_RATE==1
-            // this is the identity filter (`_ % 1 == 0`); the `#[allow]` keeps
-            // the general form so SAMPLE_RATE can be bumped without touching the
-            // body. Skipped logs lose stack accuracy — acceptable diagnostic
-            // quality at higher rates.
-            #[allow(clippy::modulo_one)]
-            let take = i.is_multiple_of(SAMPLE_RATE);
-            if take {
-                generator
-                    .borrow_mut()
-                    .process_logs(std::slice::from_ref(log), &instructions)
-                    .expect("flamegraph process_logs");
-            }
-            i += 1;
-
-            // Early exit once we've covered the cycle budget. The dominant hot
-            // kernels are ~uniform across the verifier's runtime, so a partial
-            // run still surfaces them. `#[allow]` lets CYCLE_BUDGET be const-0
-            // (full run) without tripping clippy.
-            #[allow(clippy::absurd_extreme_comparisons)]
-            if CYCLE_BUDGET > 0 && i as u64 >= CYCLE_BUDGET {
-                eprintln!("[sampled-fg] hit cycle budget ({CYCLE_BUDGET} cycles), stopping early");
-                ControlFlow::Break(())
-            } else {
-                ControlFlow::Continue(())
-            }
-        },
-        |chunks, cycles, elapsed| {
-            if chunks.is_multiple_of(500) {
-                eprintln!(
-                    "[sampled-fg]   ... {chunks} chunks, {cycles} cycles, {elapsed:?} elapsed"
-                );
-                // Checkpoint: re-write the folded file in place so a killed run
-                // still leaves a usable (if partial) flamegraph on disk.
-                let file = std::fs::File::create(path).expect("create output file");
-                let mut writer = BufWriter::new(file);
-                generator
-                    .borrow()
-                    .write_folded(&mut writer)
-                    .expect("write folded output");
-            }
-        },
-    );
-
-    let file = std::fs::File::create(path).expect("create output file");
-    let mut writer = BufWriter::new(file);
-    generator
-        .borrow()
-        .write_folded(&mut writer)
-        .expect("write folded output");
-
-    eprintln!();
-    eprintln!("============================================================");
-    eprintln!("  SAMPLED FLAMEGRAPH SUMMARY");
-    eprintln!("============================================================");
-    eprintln!("  Total cycles : {total_cycles}");
-    eprintln!("  Sample rate  : 1 in {SAMPLE_RATE}");
-    eprintln!("  Exec time    : {exec_time:?}");
-    eprintln!("  Output file  : {path}");
-    eprintln!("============================================================");
-    eprintln!();
-    eprintln!("  To render SVG (requires inferno):");
-    eprintln!("    cat {path} | inferno-flamegraph > /tmp/recursion_flamegraph_sampled.svg");
-    eprintln!("============================================================");
-}
-
 /// Inner program: fibonacci(10).
 #[test]
 #[ignore = "slow: memory-bounded continuation prove of the verifier-in-VM"]

From c97c9d38d090793b6f2f572898915d5a7765b3ee Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 12:47:49 -0300
Subject: [PATCH 22/36] refactor: drop accessors only used by the removed
 diagnostics

SymbolTable::functions() and Memory::cells() existed solely for the
flamegraph/page-count smoke tests just deleted.
---
 executor/src/elf.rs       | 5 -----
 executor/src/vm/memory.rs | 7 -------
 2 files changed, 12 deletions(-)

diff --git a/executor/src/elf.rs b/executor/src/elf.rs
index da38cbbf1..ed79fb983 100644
--- a/executor/src/elf.rs
+++ b/executor/src/elf.rs
@@ -557,9 +557,4 @@ impl SymbolTable {
     pub fn len(&self) -> usize {
         self.functions.len()
     }
-
-    /// Borrow the full function list (sorted by address).
-    pub fn functions(&self) -> &[FunctionSymbol] {
-        &self.functions
-    }
 }
diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs
index f3a3e622c..f349eeae6 100644
--- a/executor/src/vm/memory.rs
+++ b/executor/src/vm/memory.rs
@@ -218,13 +218,6 @@ impl Memory {
         Ok(self.public_output.clone())
     }
 
-    /// Read-only access to the underlying 4-byte cell map. Exposed for
-    /// diagnostic tooling (e.g. counting the distinct 4 KB memory pages a
-    /// program touches) — not part of the normal execution interface.
-    pub fn cells(&self) -> &U64HashMap<[u8; 4]> {
-        &self.cells
-    }
-
     /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX` as a
     /// 4-byte LE length prefix followed by the raw data. The guest reads these
     /// bytes directly via normal RISC-V loads (ZisK-style memory-mapped input).

From 0e68f301227ddf221cdac455ffa185ce8627021b Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 13:15:36 -0300
Subject: [PATCH 23/36] feat: split airs/bus-balance from decode in recursion
 step profiling

Add STEP_AIRS_AND_BUS_BALANCE_DONE marker so the verifier's preprocessed
FFT+Merkle commitment build (VmAirs::new) is bucketed separately from
postcard decode and from multi_verify's transcript replay. The top-25
cycle table now also tags each row with its verifier step, so e.g. how
much of step4:openings is keccak is visible at a glance. Update the CI
histogram aggregator to parse and render the new step column.
---
 .../scripts/aggregate_recursion_histogram.py  | 40 ++++----
 crypto/stark/src/profile_markers.rs           |  9 +-
 prover/src/lib.rs                             |  4 +
 prover/src/tests/recursion_smoke_test.rs      | 97 ++++++++++++-------
 4 files changed, 93 insertions(+), 57 deletions(-)

diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
index 1ae34ff70..c438dc528 100755
--- a/.github/scripts/aggregate_recursion_histogram.py
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -1,14 +1,15 @@
 #!/usr/bin/env python3
 """Format the recursion-guest per-function profile as a Markdown PR comment.
 
-`test_recursion_pc_histogram` prints a per-function summary table: the cycles
-folded over each function's PCs, computed across the *full* histogram — the view
-that shows where the cycles actually go. We parse that table and render it as
-Markdown.
+`test_recursion_profile_1query`/`_multiquery` print a per-(function, step)
+summary table: the cycles folded over each function's PCs *within a given
+verifier step*, computed across the full histogram — the view that shows both
+where the cycles go and which verifier step they belong to (e.g. how much of
+`step4:openings` is `keccak`). We parse that table and render it as Markdown.
 
-    Top 25 functions by cycle count (aggregated over their PCs):
-    rank          cycles        %    cum %    PCs  function
-       1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
+    Top 25 (function, step) pairs by cycle count (aggregated over their PCs):
+    rank          cycles        %    cum %    PCs  step              function
+       1         5335072   24.95%   24.95%     72  decode            <...>::visit_seq::<...>
 
 Reads the test's captured output from argv[1]; writes the Markdown body to
 argv[2] (or stdout).
@@ -17,11 +18,11 @@
 import re
 import sys
 
-# A per-function summary row: rank, cycles, pct%, cum%, pcs, function.
+# A per-(function, step) summary row: rank, cycles, pct%, cum%, pcs, step, function.
 FN_ROW = re.compile(
-    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$"
+    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(\S+)\s+(.*\S)\s*$"
 )
-FN_TABLE_START = re.compile(r"Top \d+ functions by cycle count")
+FN_TABLE_START = re.compile(r"Top \d+ \(function, step\) pairs by cycle count")
 # The "====" rule the test prints right after the (now sole) function table.
 TABLE_END = re.compile(r"^=+\s*$")
 TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
@@ -53,7 +54,8 @@ def parse(text):
                     "pct": m.group(2),
                     "cum": m.group(3),
                     "pcs": int(m.group(4)),
-                    "fn": m.group(5),
+                    "step": m.group(5),
+                    "fn": m.group(6),
                 }
             )
     return total_cycles, unique_pcs, exec_time, rows
@@ -80,20 +82,22 @@ def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest pro
             body += f" · **Exec time:** {exec_time}"
         body += "\n\n"
 
-    body += f"#### Top {len(rows)} functions by cycles (folded over their PCs)\n\n"
-    body += "| Rank | Cycles | % | Cum % | PCs | Function |\n"
-    body += "|-----:|-------:|--:|------:|----:|----------|\n"
+    body += f"#### Top {len(rows)} (function, step) pairs by cycles (folded over their PCs)\n\n"
+    body += "| Rank | Cycles | % | Cum % | PCs | Step | Function |\n"
+    body += "|-----:|-------:|--:|------:|----:|------|----------|\n"
     for i, r in enumerate(rows, 1):
         body += (
             f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | "
-            f"{r['pcs']} | `{short(r['fn'])}` |\n"
+            f"{r['pcs']} | `{r['step']}` | `{short(r['fn'])}` |\n"
         )
 
     last_cum = rows[-1]["cum"]
     body += (
-        f"\n<sub>Each function's cycles are summed over all its program counters "
-        f"across the full histogram; the top {len(rows)} cover {last_cum}% of total "
-        f"cycles. Percentages are of total cycles.</sub>\n"
+        f"\n<sub>Each (function, step) pair's cycles are summed over all its program "
+        f"counters within that verifier step, across the full histogram; the top "
+        f"{len(rows)} cover {last_cum}% of total cycles. Percentages are of total "
+        f"cycles. See the workflow log for the full per-step cycle breakdown "
+        f"table.</sub>\n"
     )
     return body
 
diff --git a/crypto/stark/src/profile_markers.rs b/crypto/stark/src/profile_markers.rs
index e5fd15100..570b68641 100644
--- a/crypto/stark/src/profile_markers.rs
+++ b/crypto/stark/src/profile_markers.rs
@@ -12,10 +12,11 @@
 //! `std::time::Instant::now()`, which panics on the guest target.
 
 pub const STEP_DECODE_DONE: u32 = 1;
-pub const STEP_REPLAY_ROUNDS_AFTER_ROUND_1: u32 = 2;
-pub const STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL: u32 = 3;
-pub const STEP_VERIFY_FRI: u32 = 4;
-pub const STEP_VERIFY_TRACE_AND_COMPOSITION_OPENINGS: u32 = 5;
+pub const STEP_AIRS_AND_BUS_BALANCE_DONE: u32 = 2;
+pub const STEP_REPLAY_ROUNDS_AFTER_ROUND_1: u32 = 3;
+pub const STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL: u32 = 4;
+pub const STEP_VERIFY_FRI: u32 = 5;
+pub const STEP_VERIFY_TRACE_AND_COMPOSITION_OPENINGS: u32 = 6;
 
 #[inline(always)]
 pub fn step_marker<const N: u32>() {
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index f8b0752e7..c3fc39cca 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -1072,6 +1072,10 @@ pub fn verify_with_options(
         None => return Ok(false),
     };
 
+    stark::profile_markers::step_marker::<
+        { stark::profile_markers::STEP_AIRS_AND_BUS_BALANCE_DONE },
+    >();
+
     Ok(Verifier::multi_verify(
         &air_refs,
         &vm_proof.proof,
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 750651e36..7e1f9953d 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -200,13 +200,14 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
 /// value. `run_profile` buckets cycles by the highest marker observed so far
 /// (`decode_step_marker` — a missing marker just means that bucket stays at 0
 /// cycles, no substring matching or symbol table needed).
-const STEP_LABELS: [&str; 6] = [
-    "0. setup (alloc, pre-decode)",
-    "1. decode (postcard decode -> Elf::load/VmAirs::new/transcript setup)",
-    "2. step 1: replay_rounds_after_round_1",
-    "3. step 2: verify_claimed_composition_polynomial",
-    "4. step 3: verify_fri",
-    "5. step 4: verify_trace_and_composition_openings (+ wrap-up)",
+const STEP_LABELS: [&str; 7] = [
+    "0. setup (alloc init + postcard decode)",
+    "1. airs_and_bus_balance (Elf::load/VmAirs::new preprocessed FFT+Merkle/bus balance)",
+    "2. multi_verify setup (transcript replay phase A/B, per-table fork)",
+    "3. step 1: replay_rounds_after_round_1",
+    "4. step 2: verify_claimed_composition_polynomial",
+    "5. step 3: verify_fri",
+    "6. step 4: verify_trace_and_composition_openings (+ wrap-up)",
 ];
 
 /// `blowup=8` (128-bit, multi-query) options for the `multiquery` variants.
@@ -214,46 +215,69 @@ fn blowup8() -> stark::proof::options::ProofOptions {
     crate::GoldilocksCubicProofOptions::with_blowup(8).expect("blowup=8 is always valid")
 }
 
-/// Print the top-25 functions by cycles, folding the PC histogram by symbol.
+/// Short per-step tag for the function table, keyed by the same bucket index
+/// used in `STEP_LABELS`/`buckets`.
+fn step_tag(bucket: u8) -> &'static str {
+    match bucket {
+        0 => "setup",
+        1 => "airs_bus_balance",
+        2 => "multi_verify_setup",
+        3 => "step1:replay",
+        4 => "step2:claimed",
+        5 => "step3:fri",
+        6 => "step4:openings",
+        _ => "?",
+    }
+}
+
+/// Print the top-25 `(function, step)` pairs by cycles, folding the PC
+/// histogram by symbol and by the verifier step active when each cycle ran.
+/// This is what lets you see e.g. how much of `step4:openings` is spent in
+/// `keccak` at a glance, instead of only the function's total across all steps.
 fn print_function_table(
     symbols: &executor::elf::SymbolTable,
-    pc_hist: std::collections::HashMap<u64, u64>,
+    pc_hist: std::collections::HashMap<(u64, u8), u64>,
     total_cycles: u64,
 ) {
-    let mut by_function: std::collections::HashMap<String, (u64, u64)> =
+    let mut by_function_step: std::collections::HashMap<(String, u8), (u64, u64)> =
         std::collections::HashMap::new();
-    for (pc, count) in &pc_hist {
-        let entry = by_function
-            .entry(resolve_pc(symbols, *pc))
+    let mut unique_pcs: std::collections::HashSet<u64> = std::collections::HashSet::new();
+    for ((pc, bucket), count) in &pc_hist {
+        unique_pcs.insert(*pc);
+        let entry = by_function_step
+            .entry((resolve_pc(symbols, *pc), *bucket))
             .or_insert((0, 0));
         entry.0 += *count; // cycles
-        entry.1 += 1; // distinct PCs folded into this function
+        entry.1 += 1; // distinct PCs folded into this (function, step)
     }
-    let mut fn_entries: Vec<(String, (u64, u64))> = by_function.into_iter().collect();
-    fn_entries.sort_unstable_by_key(|(_name, (cycles, _pcs))| std::cmp::Reverse(*cycles));
+    let mut fn_entries: Vec<((String, u8), (u64, u64))> = by_function_step.into_iter().collect();
+    fn_entries.sort_unstable_by_key(|(_key, (cycles, _pcs))| std::cmp::Reverse(*cycles));
 
     let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
-    eprintln!("  Unique PCs   : {}", pc_hist.len());
+    eprintln!("  Unique PCs   : {}", unique_pcs.len());
     eprintln!();
-    eprintln!("  Top 25 functions by cycle count (aggregated over their PCs):");
-    eprintln!("  rank          cycles        %    cum %    PCs  function");
+    eprintln!("  Top 25 (function, step) pairs by cycle count (aggregated over their PCs):");
+    eprintln!(
+        "  rank          cycles        %    cum %    PCs  step              function"
+    );
     let mut fn_cumulative: u64 = 0;
-    for (rank, (name, (cycles, pcs))) in fn_entries.iter().take(25).enumerate() {
+    for (rank, ((name, bucket), (cycles, pcs))) in fn_entries.iter().take(25).enumerate() {
         fn_cumulative += cycles;
         eprintln!(
-            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  {:>5}  {}",
+            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  {:>5}  {:<16}  {}",
             rank + 1,
             cycles,
             pct(*cycles),
             pct(fn_cumulative),
             pcs,
+            step_tag(*bucket),
             name,
         );
     }
 }
 
 /// Print the monotonic per-verifier-step cycle bucketing (`buckets[0]` = setup).
-fn print_step_breakdown(buckets: &[u64; 6], total_cycles: u64) {
+fn print_step_breakdown(buckets: &[u64; 7], total_cycles: u64) {
     eprintln!();
     eprintln!("  Per-step cycle breakdown (monotonic state machine):");
     eprintln!("  {:<70}  {:>14}  {:>7}", "bucket", "cycles", "%");
@@ -284,8 +308,8 @@ fn run_profile(
     let instructions = executor::vm::execution::InstructionCache::new(&program.data)
         .expect("instruction cache build failed");
 
-    let mut pc_hist: HashMap<u64, u64> = HashMap::new();
-    let mut buckets = [0u64; 6];
+    let mut pc_hist: HashMap<(u64, u8), u64> = HashMap::new();
+    let mut buckets = [0u64; 7];
     let bucket = std::cell::Cell::new(0u8);
     let unique = std::cell::Cell::new(0usize);
 
@@ -302,11 +326,6 @@ fn run_profile(
         |log| {
             let pc = log.current_pc;
 
-            if detailed {
-                *pc_hist.entry(pc).or_insert(0) += 1;
-                unique.set(pc_hist.len());
-            }
-
             if let Some(marker) = executor::vm::execution::decode_step_marker(&instructions, pc)
                 && bucket.get() < marker as u8
             {
@@ -314,6 +333,11 @@ fn run_profile(
             }
             buckets[bucket.get() as usize] += 1;
 
+            if detailed {
+                *pc_hist.entry((pc, bucket.get())).or_insert(0) += 1;
+                unique.set(pc_hist.len());
+            }
+
             ControlFlow::Continue(())
         },
         |chunks, cycles, elapsed| {
@@ -503,10 +527,11 @@ fn test_recursion_execute_1query() {
 ///
 /// `multi_verify` re-runs `replay_rounds_after_round_1 -> step_2 -> step_3 ->
 /// step_4` once per AIR table (see `crypto/stark/src/verifier.rs`), so the
-/// full marker sequence isn't monotonic overall — it's `STEP_DECODE_DONE`
-/// once, followed by N repetitions of the `2,3,4,5` cycle (one per table).
-/// A transition outside `{1->2, 2->3, 3->4, 4->5, 5->2}` means the marker
-/// convention broke — wrong immediate decoded, or a stale/mismatched build.
+/// full marker sequence isn't monotonic overall — it's `STEP_DECODE_DONE ->
+/// STEP_AIRS_AND_BUS_BALANCE_DONE` once each, followed by N repetitions of
+/// the `3,4,5,6` cycle (one per table). A transition outside
+/// `{1->2, 2->3, 3->4, 4->5, 5->6, 6->3}` means the marker convention broke —
+/// wrong immediate decoded, or a stale/mismatched build.
 #[test]
 #[ignore = "slow: runs the in-VM STARK verifier (minutes on CI)"]
 fn test_recursion_step_markers_observed_in_order() {
@@ -516,6 +541,7 @@ fn test_recursion_step_markers_observed_in_order() {
         .expect("instruction cache build failed");
 
     let decode_done = stark::profile_markers::STEP_DECODE_DONE;
+    let airs_ready = stark::profile_markers::STEP_AIRS_AND_BUS_BALANCE_DONE;
     let replay = stark::profile_markers::STEP_REPLAY_ROUNDS_AFTER_ROUND_1;
     let claimed = stark::profile_markers::STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL;
     let fri = stark::profile_markers::STEP_VERIFY_FRI;
@@ -531,7 +557,8 @@ fn test_recursion_step_markers_observed_in_order() {
             {
                 let valid_transition = match last_marker {
                     None => marker == decode_done,
-                    Some(last) if last == decode_done => marker == replay,
+                    Some(last) if last == decode_done => marker == airs_ready,
+                    Some(last) if last == airs_ready => marker == replay,
                     Some(last) if last == replay => marker == claimed,
                     Some(last) if last == claimed => marker == fri,
                     Some(last) if last == fri => marker == openings,
@@ -550,7 +577,7 @@ fn test_recursion_step_markers_observed_in_order() {
         |_, _, _| {},
     );
 
-    for step in [decode_done, replay, claimed, fri, openings] {
+    for step in [decode_done, airs_ready, replay, claimed, fri, openings] {
         assert!(seen.contains(&step), "marker {step} was never observed");
     }
 }

From 21f9e329a28b613515f8b7f4be4ea94405417d95 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 13:20:21 -0300
Subject: [PATCH 24/36] cargo fmt

---
 prover/src/lib.rs                        | 5 ++---
 prover/src/tests/recursion_smoke_test.rs | 4 +---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index c3fc39cca..41b7d4738 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -1072,9 +1072,8 @@ pub fn verify_with_options(
         None => return Ok(false),
     };
 
-    stark::profile_markers::step_marker::<
-        { stark::profile_markers::STEP_AIRS_AND_BUS_BALANCE_DONE },
-    >();
+    stark::profile_markers::step_marker::<{ stark::profile_markers::STEP_AIRS_AND_BUS_BALANCE_DONE }>(
+    );
 
     Ok(Verifier::multi_verify(
         &air_refs,
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 7e1f9953d..1e34d2d0e 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -257,9 +257,7 @@ fn print_function_table(
     eprintln!("  Unique PCs   : {}", unique_pcs.len());
     eprintln!();
     eprintln!("  Top 25 (function, step) pairs by cycle count (aggregated over their PCs):");
-    eprintln!(
-        "  rank          cycles        %    cum %    PCs  step              function"
-    );
+    eprintln!("  rank          cycles        %    cum %    PCs  step              function");
     let mut fn_cumulative: u64 = 0;
     for (rank, ((name, bucket), (cycles, pcs))) in fn_entries.iter().take(25).enumerate() {
         fn_cumulative += cycles;

From 9c4da0b60c59fc87340868f65732b91fc5740b97 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 13:37:30 -0300
Subject: [PATCH 25/36] fix: per-step top-25 tables instead of a single tagged
 table

The previous split added a step column to one combined top-25 table,
losing per-step rank/cum% fidelity. Print the global top-25 (all steps
folded together) plus a separate top-25 table per verifier step, so
each step's own hottest functions and their cumulative share are
visible directly. Update the CI aggregator to parse and render the new
multi-table output.
---
 .../scripts/aggregate_recursion_histogram.py  | 123 ++++++++++++------
 prover/src/tests/recursion_smoke_test.rs      |  86 ++++++++----
 2 files changed, 143 insertions(+), 66 deletions(-)

diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
index c438dc528..ec53985d1 100755
--- a/.github/scripts/aggregate_recursion_histogram.py
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -1,15 +1,19 @@
 #!/usr/bin/env python3
 """Format the recursion-guest per-function profile as a Markdown PR comment.
 
-`test_recursion_profile_1query`/`_multiquery` print a per-(function, step)
-summary table: the cycles folded over each function's PCs *within a given
-verifier step*, computed across the full histogram — the view that shows both
-where the cycles go and which verifier step they belong to (e.g. how much of
-`step4:openings` is `keccak`). We parse that table and render it as Markdown.
+`test_recursion_profile_1query`/`_multiquery` print a global top-25 functions
+table (folded over all verifier steps), followed by one top-25 table per
+verifier step — so e.g. how much of `step4:openings` is `keccak` is visible
+at a glance instead of only the function's total across all steps. We parse
+all of those tables and render them as Markdown.
 
-    Top 25 (function, step) pairs by cycle count (aggregated over their PCs):
-    rank          cycles        %    cum %    PCs  step              function
-       1         5335072   24.95%   24.95%     72  decode            <...>::visit_seq::<...>
+    Top 25 functions by cycle count (aggregated over their PCs, all steps):
+      rank          cycles        %    cum %    PCs  function
+         1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
+
+    Top 25 functions by cycle count — step airs_bus_balance:
+      rank          cycles        %    cum %    PCs  function
+         1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
 
 Reads the test's captured output from argv[1]; writes the Markdown body to
 argv[2] (or stdout).
@@ -17,23 +21,30 @@
 
 import re
 import sys
+from collections import OrderedDict
 
-# A per-(function, step) summary row: rank, cycles, pct%, cum%, pcs, step, function.
+# A per-function summary row: rank, cycles, pct%, cum%, pcs, function.
 FN_ROW = re.compile(
-    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(\S+)\s+(.*\S)\s*$"
+    r"^\s*\d+\s+(\d+)\s+([\d.]+)%\s+([\d.]+)%\s+(\d+)\s+(.*\S)\s*$"
+)
+HEADER_ROW = re.compile(r"^\s*rank\s+cycles")
+GLOBAL_TABLE_START = re.compile(
+    r"Top \d+ functions by cycle count \(aggregated over their PCs, all steps\)"
 )
-FN_TABLE_START = re.compile(r"Top \d+ \(function, step\) pairs by cycle count")
-# The "====" rule the test prints right after the (now sole) function table.
-TABLE_END = re.compile(r"^=+\s*$")
+STEP_TABLE_START = re.compile(r"Top \d+ functions by cycle count — step (\S+):")
 TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
 UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)")
 EXEC_TIME = re.compile(r"Exec time\s*:\s*(\S+)")
 
+GLOBAL_KEY = "__global__"
+
 
 def parse(text):
     total_cycles = unique_pcs = exec_time = None
-    rows = []
-    in_fn_table = False
+    # GLOBAL_KEY -> rows, then one entry per step tag, in first-seen order.
+    tables = OrderedDict()
+    current = None
+    skip_header = False
     for line in text.splitlines():
         if total_cycles is None and (m := TOTAL_CYCLES.search(line)):
             total_cycles = int(m.group(1))
@@ -41,32 +52,68 @@ def parse(text):
             unique_pcs = int(m.group(1))
         if exec_time is None and (m := EXEC_TIME.search(line)):
             exec_time = m.group(1)
-        if FN_TABLE_START.search(line):
-            in_fn_table = True
+
+        if GLOBAL_TABLE_START.search(line):
+            current = GLOBAL_KEY
+            tables.setdefault(current, [])
+            skip_header = True
             continue
-        if in_fn_table and TABLE_END.match(line):
-            in_fn_table = False
+        if m := STEP_TABLE_START.search(line):
+            current = m.group(1)
+            tables.setdefault(current, [])
+            skip_header = True
             continue
-        if in_fn_table and (m := FN_ROW.match(line)):
-            rows.append(
+
+        if current is None:
+            continue
+        if skip_header:
+            # The header row right after a table-start line; anything else
+            # (e.g. a stray blank line) just ends the table early, which is
+            # fine — an empty table renders as "no rows".
+            skip_header = False
+            if HEADER_ROW.match(line):
+                continue
+        if m := FN_ROW.match(line):
+            tables[current].append(
                 {
                     "cycles": int(m.group(1)),
                     "pct": m.group(2),
                     "cum": m.group(3),
                     "pcs": int(m.group(4)),
-                    "step": m.group(5),
-                    "fn": m.group(6),
+                    "fn": m.group(5),
                 }
             )
-    return total_cycles, unique_pcs, exec_time, rows
+        else:
+            current = None
+
+    return total_cycles, unique_pcs, exec_time, tables
 
 
 def short(name, width=90):
     return name if len(name) <= width else name[: width - 1] + "…"
 
 
-def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest profile"):
+def render_table(rows):
     if not rows:
+        return "> _no rows_\n"
+    body = "| Rank | Cycles | % | Cum % | PCs | Function |\n"
+    body += "|-----:|-------:|--:|------:|----:|----------|\n"
+    for i, r in enumerate(rows, 1):
+        body += (
+            f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | "
+            f"{r['pcs']} | `{short(r['fn'])}` |\n"
+        )
+    last_cum = rows[-1]["cum"]
+    body += (
+        f"\n<sub>Each function's cycles are summed over all its program counters "
+        f"in this table's scope; the top {len(rows)} cover {last_cum}% of total "
+        f"cycles. Percentages are of total cycles.</sub>\n"
+    )
+    return body
+
+
+def render(total_cycles, unique_pcs, exec_time, tables, title="Recursion guest profile"):
+    if not tables.get(GLOBAL_KEY):
         return (
             f"### {title}\n\n"
             "> ⚠️ No per-function rows found in the test output — the run may "
@@ -82,23 +129,17 @@ def render(total_cycles, unique_pcs, exec_time, rows, title="Recursion guest pro
             body += f" · **Exec time:** {exec_time}"
         body += "\n\n"
 
-    body += f"#### Top {len(rows)} (function, step) pairs by cycles (folded over their PCs)\n\n"
-    body += "| Rank | Cycles | % | Cum % | PCs | Step | Function |\n"
-    body += "|-----:|-------:|--:|------:|----:|------|----------|\n"
-    for i, r in enumerate(rows, 1):
-        body += (
-            f"| {i} | {r['cycles']:,} | {r['pct']}% | {r['cum']}% | "
-            f"{r['pcs']} | `{r['step']}` | `{short(r['fn'])}` |\n"
-        )
+    global_rows = tables[GLOBAL_KEY]
+    body += f"#### Top {len(global_rows)} functions by cycles (all steps)\n\n"
+    body += render_table(global_rows)
+
+    for step, rows in tables.items():
+        if step == GLOBAL_KEY:
+            continue
+        body += f"\n<details><summary>Step <code>{step}</code> — top {len(rows)} functions</summary>\n\n"
+        body += render_table(rows)
+        body += "\n</details>\n"
 
-    last_cum = rows[-1]["cum"]
-    body += (
-        f"\n<sub>Each (function, step) pair's cycles are summed over all its program "
-        f"counters within that verifier step, across the full histogram; the top "
-        f"{len(rows)} cover {last_cum}% of total cycles. Percentages are of total "
-        f"cycles. See the workflow log for the full per-step cycle breakdown "
-        f"table.</sub>\n"
-    )
     return body
 
 
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 1e34d2d0e..cb88ae653 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -230,47 +230,83 @@ fn step_tag(bucket: u8) -> &'static str {
     }
 }
 
-/// Print the top-25 `(function, step)` pairs by cycles, folding the PC
-/// histogram by symbol and by the verifier step active when each cycle ran.
-/// This is what lets you see e.g. how much of `step4:openings` is spent in
-/// `keccak` at a glance, instead of only the function's total across all steps.
+/// Print one top-25 table: `rows` is `(name, cycles, distinct_pcs)`, already
+/// unsorted; `total_cycles` is the denominator for percentages (always the
+/// *global* total, so per-step tables' percentages are comparable to the
+/// global table's and to `print_step_breakdown`'s bucket percentages).
+fn print_top25_table(rows: &mut [(String, u64, u64)], total_cycles: u64) {
+    rows.sort_unstable_by_key(|(_name, cycles, _pcs)| std::cmp::Reverse(*cycles));
+    let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
+    eprintln!("  rank          cycles        %    cum %    PCs  function");
+    let mut cumulative: u64 = 0;
+    for (rank, (name, cycles, pcs)) in rows.iter().take(25).enumerate() {
+        cumulative += cycles;
+        eprintln!(
+            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  {:>5}  {}",
+            rank + 1,
+            cycles,
+            pct(*cycles),
+            pct(cumulative),
+            pcs,
+            name,
+        );
+    }
+}
+
+/// Print the global top-25 functions by cycle count, then one top-25 table
+/// per verifier step — so e.g. how much of `step4:openings` is spent in
+/// `keccak` is visible at a glance, instead of only the function's total
+/// across all steps.
 fn print_function_table(
     symbols: &executor::elf::SymbolTable,
     pc_hist: std::collections::HashMap<(u64, u8), u64>,
     total_cycles: u64,
 ) {
-    let mut by_function_step: std::collections::HashMap<(String, u8), (u64, u64)> =
+    let mut by_function: std::collections::HashMap<String, (u64, u64)> =
+        std::collections::HashMap::new();
+    let mut by_function_per_step: std::collections::HashMap<u8, std::collections::HashMap<String, (u64, u64)>> =
         std::collections::HashMap::new();
     let mut unique_pcs: std::collections::HashSet<u64> = std::collections::HashSet::new();
     for ((pc, bucket), count) in &pc_hist {
         unique_pcs.insert(*pc);
-        let entry = by_function_step
-            .entry((resolve_pc(symbols, *pc), *bucket))
-            .or_insert((0, 0));
+        let name = resolve_pc(symbols, *pc);
+
+        let entry = by_function.entry(name.clone()).or_insert((0, 0));
         entry.0 += *count; // cycles
-        entry.1 += 1; // distinct PCs folded into this (function, step)
+        entry.1 += 1; // distinct PCs folded into this function
+
+        let step_entry = by_function_per_step
+            .entry(*bucket)
+            .or_default()
+            .entry(name)
+            .or_insert((0, 0));
+        step_entry.0 += *count;
+        step_entry.1 += 1;
     }
-    let mut fn_entries: Vec<((String, u8), (u64, u64))> = by_function_step.into_iter().collect();
-    fn_entries.sort_unstable_by_key(|(_key, (cycles, _pcs))| std::cmp::Reverse(*cycles));
 
-    let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
     eprintln!("  Unique PCs   : {}", unique_pcs.len());
     eprintln!();
-    eprintln!("  Top 25 (function, step) pairs by cycle count (aggregated over their PCs):");
-    eprintln!("  rank          cycles        %    cum %    PCs  step              function");
-    let mut fn_cumulative: u64 = 0;
-    for (rank, ((name, bucket), (cycles, pcs))) in fn_entries.iter().take(25).enumerate() {
-        fn_cumulative += cycles;
+    eprintln!("  Top 25 functions by cycle count (aggregated over their PCs, all steps):");
+    let mut rows: Vec<(String, u64, u64)> = by_function
+        .into_iter()
+        .map(|(name, (cycles, pcs))| (name, cycles, pcs))
+        .collect();
+    print_top25_table(&mut rows, total_cycles);
+
+    for bucket in 0u8..STEP_LABELS.len() as u8 {
+        let Some(by_step_function) = by_function_per_step.remove(&bucket) else {
+            continue;
+        };
+        eprintln!();
         eprintln!(
-            "  {:>4}  {:>14}  {:>6.2}%  {:>6.2}%  {:>5}  {:<16}  {}",
-            rank + 1,
-            cycles,
-            pct(*cycles),
-            pct(fn_cumulative),
-            pcs,
-            step_tag(*bucket),
-            name,
+            "  Top 25 functions by cycle count — step {}:",
+            step_tag(bucket)
         );
+        let mut rows: Vec<(String, u64, u64)> = by_step_function
+            .into_iter()
+            .map(|(name, (cycles, pcs))| (name, cycles, pcs))
+            .collect();
+        print_top25_table(&mut rows, total_cycles);
     }
 }
 

From c709329b05809d4a2ca66102bdc28200a4e88fb0 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 13:43:32 -0300
Subject: [PATCH 26/36] fix: per-step top-25 percentages relative to step
 cycles, not total
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per-step tables previously used the global cycle count as the pct
denominator, so a function dominating a cheap step (e.g. 90% of
step2:claimed) rendered as a near-zero percentage of the whole run —
useless for spotting what dominates within that step. Use each step's
own cycle total as the denominator for its table instead; the global
table still uses the run's total. Update the CI aggregator to parse
and surface the per-step denominator.
---
 .../scripts/aggregate_recursion_histogram.py  | 51 +++++++++++--------
 prover/src/tests/recursion_smoke_test.rs      | 24 +++++----
 2 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/.github/scripts/aggregate_recursion_histogram.py b/.github/scripts/aggregate_recursion_histogram.py
index ec53985d1..0be0a3010 100755
--- a/.github/scripts/aggregate_recursion_histogram.py
+++ b/.github/scripts/aggregate_recursion_histogram.py
@@ -2,16 +2,17 @@
 """Format the recursion-guest per-function profile as a Markdown PR comment.
 
 `test_recursion_profile_1query`/`_multiquery` print a global top-25 functions
-table (folded over all verifier steps), followed by one top-25 table per
-verifier step — so e.g. how much of `step4:openings` is `keccak` is visible
-at a glance instead of only the function's total across all steps. We parse
-all of those tables and render them as Markdown.
+table (folded over all verifier steps, % of total run cycles), followed by
+one top-25 table per verifier step (% of that step's own cycles, so the
+table shows what dominates *within* the step) — e.g. how much of
+`step4:openings` is `keccak`. We parse all of those tables and render them
+as Markdown.
 
-    Top 25 functions by cycle count (aggregated over their PCs, all steps):
+    Top 25 functions by cycle count (aggregated over their PCs, all steps; % of total cycles):
       rank          cycles        %    cum %    PCs  function
          1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
 
-    Top 25 functions by cycle count — step airs_bus_balance:
+    Top 25 functions by cycle count — step airs_bus_balance (% of this step's 5129138364 cycles):
       rank          cycles        %    cum %    PCs  function
          1         5335072   24.95%   24.95%     72  <...>::visit_seq::<...>
 
@@ -29,9 +30,11 @@
 )
 HEADER_ROW = re.compile(r"^\s*rank\s+cycles")
 GLOBAL_TABLE_START = re.compile(
-    r"Top \d+ functions by cycle count \(aggregated over their PCs, all steps\)"
+    r"Top \d+ functions by cycle count \(aggregated over their PCs, all steps"
+)
+STEP_TABLE_START = re.compile(
+    r"Top \d+ functions by cycle count — step (\S+) \(% of this step's (\d+) cycles\):"
 )
-STEP_TABLE_START = re.compile(r"Top \d+ functions by cycle count — step (\S+):")
 TOTAL_CYCLES = re.compile(r"Total cycles\s*:\s*(\d+)")
 UNIQUE_PCS = re.compile(r"Unique PCs\s*:\s*(\d+)")
 EXEC_TIME = re.compile(r"Exec time\s*:\s*(\S+)")
@@ -41,7 +44,8 @@
 
 def parse(text):
     total_cycles = unique_pcs = exec_time = None
-    # GLOBAL_KEY -> rows, then one entry per step tag, in first-seen order.
+    # GLOBAL_KEY -> {"denom": int|None, "rows": [...]}, then one entry per
+    # step tag in first-seen order.
     tables = OrderedDict()
     current = None
     skip_header = False
@@ -55,12 +59,12 @@ def parse(text):
 
         if GLOBAL_TABLE_START.search(line):
             current = GLOBAL_KEY
-            tables.setdefault(current, [])
+            tables[current] = {"denom": total_cycles, "rows": []}
             skip_header = True
             continue
         if m := STEP_TABLE_START.search(line):
             current = m.group(1)
-            tables.setdefault(current, [])
+            tables[current] = {"denom": int(m.group(2)), "rows": []}
             skip_header = True
             continue
 
@@ -74,7 +78,7 @@ def parse(text):
             if HEADER_ROW.match(line):
                 continue
         if m := FN_ROW.match(line):
-            tables[current].append(
+            tables[current]["rows"].append(
                 {
                     "cycles": int(m.group(1)),
                     "pct": m.group(2),
@@ -93,7 +97,7 @@ def short(name, width=90):
     return name if len(name) <= width else name[: width - 1] + "…"
 
 
-def render_table(rows):
+def render_table(rows, denom_label):
     if not rows:
         return "> _no rows_\n"
     body = "| Rank | Cycles | % | Cum % | PCs | Function |\n"
@@ -106,14 +110,14 @@ def render_table(rows):
     last_cum = rows[-1]["cum"]
     body += (
         f"\n<sub>Each function's cycles are summed over all its program counters "
-        f"in this table's scope; the top {len(rows)} cover {last_cum}% of total "
-        f"cycles. Percentages are of total cycles.</sub>\n"
+        f"in this table's scope; the top {len(rows)} cover {last_cum}% of "
+        f"{denom_label}.</sub>\n"
     )
     return body
 
 
 def render(total_cycles, unique_pcs, exec_time, tables, title="Recursion guest profile"):
-    if not tables.get(GLOBAL_KEY):
+    if not tables.get(GLOBAL_KEY, {}).get("rows"):
         return (
             f"### {title}\n\n"
             "> ⚠️ No per-function rows found in the test output — the run may "
@@ -129,15 +133,20 @@ def render(total_cycles, unique_pcs, exec_time, tables, title="Recursion guest p
             body += f" · **Exec time:** {exec_time}"
         body += "\n\n"
 
-    global_rows = tables[GLOBAL_KEY]
+    global_rows = tables[GLOBAL_KEY]["rows"]
     body += f"#### Top {len(global_rows)} functions by cycles (all steps)\n\n"
-    body += render_table(global_rows)
+    body += render_table(global_rows, "total cycles")
 
-    for step, rows in tables.items():
+    for step, table in tables.items():
         if step == GLOBAL_KEY:
             continue
-        body += f"\n<details><summary>Step <code>{step}</code> — top {len(rows)} functions</summary>\n\n"
-        body += render_table(rows)
+        rows, denom = table["rows"], table["denom"]
+        denom_note = f" of {denom:,} step cycles" if denom is not None else ""
+        body += (
+            f"\n<details><summary>Step <code>{step}</code>{denom_note} — "
+            f"top {len(rows)} functions</summary>\n\n"
+        )
+        body += render_table(rows, "this step's cycles")
         body += "\n</details>\n"
 
     return body
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index cb88ae653..9e8b13a60 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -231,12 +231,14 @@ fn step_tag(bucket: u8) -> &'static str {
 }
 
 /// Print one top-25 table: `rows` is `(name, cycles, distinct_pcs)`, already
-/// unsorted; `total_cycles` is the denominator for percentages (always the
-/// *global* total, so per-step tables' percentages are comparable to the
-/// global table's and to `print_step_breakdown`'s bucket percentages).
-fn print_top25_table(rows: &mut [(String, u64, u64)], total_cycles: u64) {
+/// unsorted; `denom_cycles` is the denominator for percentages — the global
+/// total for the all-steps table, but *that step's own total* for a per-step
+/// table, so `%`/`cum %` show what dominates within that step (a `keccak`
+/// that's 90% of a cheap step should read as 90%, not as a fraction of a
+/// percent of the whole run).
+fn print_top25_table(rows: &mut [(String, u64, u64)], denom_cycles: u64) {
     rows.sort_unstable_by_key(|(_name, cycles, _pcs)| std::cmp::Reverse(*cycles));
-    let pct = |n: u64| 100.0 * (n as f64) / (total_cycles as f64);
+    let pct = |n: u64| 100.0 * (n as f64) / (denom_cycles as f64);
     eprintln!("  rank          cycles        %    cum %    PCs  function");
     let mut cumulative: u64 = 0;
     for (rank, (name, cycles, pcs)) in rows.iter().take(25).enumerate() {
@@ -286,7 +288,9 @@ fn print_function_table(
 
     eprintln!("  Unique PCs   : {}", unique_pcs.len());
     eprintln!();
-    eprintln!("  Top 25 functions by cycle count (aggregated over their PCs, all steps):");
+    eprintln!(
+        "  Top 25 functions by cycle count (aggregated over their PCs, all steps; % of total cycles):"
+    );
     let mut rows: Vec<(String, u64, u64)> = by_function
         .into_iter()
         .map(|(name, (cycles, pcs))| (name, cycles, pcs))
@@ -297,16 +301,18 @@ fn print_function_table(
         let Some(by_step_function) = by_function_per_step.remove(&bucket) else {
             continue;
         };
+        let step_total: u64 = by_step_function.values().map(|(cycles, _pcs)| cycles).sum();
         eprintln!();
         eprintln!(
-            "  Top 25 functions by cycle count — step {}:",
-            step_tag(bucket)
+            "  Top 25 functions by cycle count — step {} (% of this step's {} cycles):",
+            step_tag(bucket),
+            step_total,
         );
         let mut rows: Vec<(String, u64, u64)> = by_step_function
             .into_iter()
             .map(|(name, (cycles, pcs))| (name, cycles, pcs))
             .collect();
-        print_top25_table(&mut rows, total_cycles);
+        print_top25_table(&mut rows, step_total);
     }
 }
 

From 5d27652f7fd1caf5f8d34444eb2ec1c5f4198f9d Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 14:32:01 -0300
Subject: [PATCH 27/36] lower requirements for comment

---
 .github/workflows/profile-recursion.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/profile-recursion.yml b/.github/workflows/profile-recursion.yml
index 0e614fcd8..680741f15 100644
--- a/.github/workflows/profile-recursion.yml
+++ b/.github/workflows/profile-recursion.yml
@@ -110,7 +110,7 @@ jobs:
     # skipped (non-/profile_recursion or non-member comment) so this job — and
     # the self-hosted bench runner it spins up — doesn't fire on every comment.
     if: always() && github.event_name == 'issue_comment' && needs.profile.result != 'skipped'
-    runs-on: [self-hosted, bench]
+    runs-on: ubuntu-latest
     steps:
       - name: Get PR head ref
         id: pr-ref

From 775c99737fe52f8feb0449a98877273e1af4b3b1 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Wed, 1 Jul 2026 14:40:22 -0300
Subject: [PATCH 28/36] fix: NOP/marker-0 collision and step-bucketing latch in
 recursion profiling

decode_step_marker required only dst==0, matching the canonical NOP
(addi x0, x0, 0) as marker 0; pin src==0 and imm!=0 per the documented
addi x0, x0, N convention.

run_profile latched the step bucket at the highest marker ever seen,
so multi_verify's per-AIR-table 3,4,5,6 repetition folded every table
after the first into bucket 6. Track the latest marker instead.
---
 executor/src/vm/execution.rs             | 11 ++++++-----
 prover/src/tests/recursion_smoke_test.rs | 21 +++++++++++----------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/executor/src/vm/execution.rs b/executor/src/vm/execution.rs
index fae8e774a..a1a766127 100644
--- a/executor/src/vm/execution.rs
+++ b/executor/src/vm/execution.rs
@@ -303,17 +303,18 @@ impl InstructionCache {
 }
 
 /// Decode a `stark::profile_markers::step_marker` hit at `pc`: the marker
-/// convention is `addi x0, x0, N` (an `ArithImm` with `dst == 0`, `op ==
-/// Add`), which real code never emits spontaneously since writes to `x0` are
-/// always discarded. Returns the marker's `N` if `pc` decodes to one.
+/// convention is `addi x0, x0, N` (an `ArithImm` with `dst == 0`, `src == 0`,
+/// `op == Add`, `N != 0`), which real code never emits spontaneously since
+/// writes to `x0` are always discarded and the canonical NOP is `addi x0, x0,
+/// 0`. Returns the marker's `N` if `pc` decodes to one.
 pub fn decode_step_marker(instructions: &InstructionCache, pc: u64) -> Option<u32> {
     match instructions.get(pc)? {
         Instruction::ArithImm {
             dst: 0,
+            src: 0,
             op: crate::vm::instruction::decoding::ArithOp::Add,
             imm,
-            ..
-        } => Some(*imm as u32),
+        } if *imm != 0 => Some(*imm as u32),
         _ => None,
     }
 }
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 9e8b13a60..a32d44c7c 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -197,9 +197,10 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
 }
 
 /// Verifier sub-steps in execution order, keyed by `stark::profile_markers::STEP_*`
-/// value. `run_profile` buckets cycles by the highest marker observed so far
-/// (`decode_step_marker` — a missing marker just means that bucket stays at 0
-/// cycles, no substring matching or symbol table needed).
+/// value. `run_profile` buckets cycles by the latest marker observed so far
+/// (`decode_step_marker`, defaulting to bucket 0 until the first marker fires),
+/// so `multi_verify`'s per-table `3,4,5,6` repetition re-attributes cycles to
+/// the correct step on each table's `6->3` transition instead of latching at 6.
 const STEP_LABELS: [&str; 7] = [
     "0. setup (alloc init + postcard decode)",
     "1. airs_and_bus_balance (Elf::load/VmAirs::new preprocessed FFT+Merkle/bus balance)",
@@ -266,8 +267,10 @@ fn print_function_table(
 ) {
     let mut by_function: std::collections::HashMap<String, (u64, u64)> =
         std::collections::HashMap::new();
-    let mut by_function_per_step: std::collections::HashMap<u8, std::collections::HashMap<String, (u64, u64)>> =
-        std::collections::HashMap::new();
+    let mut by_function_per_step: std::collections::HashMap<
+        u8,
+        std::collections::HashMap<String, (u64, u64)>,
+    > = std::collections::HashMap::new();
     let mut unique_pcs: std::collections::HashSet<u64> = std::collections::HashSet::new();
     for ((pc, bucket), count) in &pc_hist {
         unique_pcs.insert(*pc);
@@ -316,10 +319,10 @@ fn print_function_table(
     }
 }
 
-/// Print the monotonic per-verifier-step cycle bucketing (`buckets[0]` = setup).
+/// Print the per-verifier-step cycle bucketing (`buckets[0]` = setup).
 fn print_step_breakdown(buckets: &[u64; 7], total_cycles: u64) {
     eprintln!();
-    eprintln!("  Per-step cycle breakdown (monotonic state machine):");
+    eprintln!("  Per-step cycle breakdown (latest-marker state machine):");
     eprintln!("  {:<70}  {:>14}  {:>7}", "bucket", "cycles", "%");
     for (label, cycles) in STEP_LABELS.iter().zip(buckets.iter()) {
         let pct = if total_cycles > 0 {
@@ -366,9 +369,7 @@ fn run_profile(
         |log| {
             let pc = log.current_pc;
 
-            if let Some(marker) = executor::vm::execution::decode_step_marker(&instructions, pc)
-                && bucket.get() < marker as u8
-            {
+            if let Some(marker) = executor::vm::execution::decode_step_marker(&instructions, pc) {
                 bucket.set(marker as u8);
             }
             buckets[bucket.get() as usize] += 1;

From 32319b1b568dd1012d8e051369e0cf6b14b0ce25 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Fri, 26 Jun 2026 18:03:52 -0300
Subject: [PATCH 29/36] feat: cache verifying key + commitments (recursion opt)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `VmVerifyingKey` (prover/src/vkey.rs): host-derived cache of the five
preprocessed-table Merkle commitments (BITWISE, DECODE, REGISTER,
KECCAK_RC, per-PAGE). `VmAirs::new_with_vkey` /
`verify_with_options_with_vkey` take the cached commitments instead of
recomputing them — recomputation is ~87% of verifier cycles inside the
recursion guest. Soundness is preserved by Fiat-Shamir.

The recursion and deserialize-only guests and the smoke test now encode
the vkey into the postcard blob `(VmProof, elf, opts, vkey)`.
---
 bench_vs/lambda/recursion/Cargo.lock     |   1 +
 bench_vs/lambda/recursion/src/main.rs    |  15 +-
 prover/Cargo.toml                        |   1 +
 prover/src/lib.rs                        | 106 +++++++++--
 prover/src/tables/page.rs                |  20 ++
 prover/src/tests/mod.rs                  |   2 +
 prover/src/tests/recursion_smoke_test.rs |  16 +-
 prover/src/tests/vkey_tests.rs           | 221 +++++++++++++++++++++++
 prover/src/vkey.rs                       | 128 +++++++++++++
 9 files changed, 486 insertions(+), 24 deletions(-)
 create mode 100644 prover/src/tests/vkey_tests.rs
 create mode 100644 prover/src/vkey.rs

diff --git a/bench_vs/lambda/recursion/Cargo.lock b/bench_vs/lambda/recursion/Cargo.lock
index 66048ba81..88a9fb605 100644
--- a/bench_vs/lambda/recursion/Cargo.lock
+++ b/bench_vs/lambda/recursion/Cargo.lock
@@ -412,6 +412,7 @@ dependencies = [
  "executor",
  "log",
  "math",
+ "postcard",
  "serde",
  "sha3",
  "stark",
diff --git a/bench_vs/lambda/recursion/src/main.rs b/bench_vs/lambda/recursion/src/main.rs
index f19271aac..4ea592b78 100644
--- a/bench_vs/lambda/recursion/src/main.rs
+++ b/bench_vs/lambda/recursion/src/main.rs
@@ -14,7 +14,7 @@
 
 #![no_main]
 
-use lambda_vm_prover::{ProofOptions, VmProof};
+use lambda_vm_prover::{ProofOptions, VmProof, VmVerifyingKey};
 
 #[unsafe(export_name = "main")]
 pub fn main() -> ! {
@@ -29,14 +29,21 @@ pub fn main() -> ! {
     }));
 
     let blob = lambda_vm_syscalls::syscalls::get_private_input();
-    let (vm_proof, inner_elf, options): (VmProof, Vec<u8>, ProofOptions) =
+    let (vm_proof, inner_elf, options, vkey): (VmProof, Vec<u8>, ProofOptions, VmVerifyingKey) =
         postcard::from_bytes(&blob).expect("failed to deserialize recursion input");
     lambda_vm_prover::profile_markers::step_marker::<
         { lambda_vm_prover::profile_markers::STEP_DECODE_DONE },
     >();
 
-    let ok = lambda_vm_prover::verify_with_options(&vm_proof, &inner_elf, &options, None, None)
-        .expect("verify errored");
+    let ok = lambda_vm_prover::verify_with_options_with_vkey(
+        &vm_proof,
+        &inner_elf,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("verify errored");
     assert!(ok, "inner proof failed verification");
 
     lambda_vm_syscalls::syscalls::commit(&[1u8]);
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index 3695689d6..887d9c1c1 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -25,6 +25,7 @@ rayon = { version = "1.8.0", optional = true }
 sysinfo = { version = "0.31", default-features = false, features = ["system"] }
 log = "0.4"
 sha3 = { version = "0.10.8", default-features = false }
+postcard = { version = "1.0", default-features = false, features = ["alloc"] }
 
 [dev-dependencies]
 env_logger = "*"
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 41b7d4738..09d3e22fa 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -25,6 +25,9 @@ pub mod tables;
 pub mod test_utils;
 #[cfg(test)]
 pub mod tests;
+pub mod vkey;
+
+pub use vkey::VmVerifyingKey;
 
 use std::fmt;
 
@@ -453,6 +456,38 @@ impl VmAirs {
         register_init: Option<&[u32]>,
         page_commitments: Option<&[(u64, Commitment)]>,
         register_preprocessed: Option<(Commitment, usize)>,
+    ) -> Self {
+        Self::new_with_vkey(
+            elf,
+            proof_options,
+            minimal_bitwise,
+            page_configs,
+            table_counts,
+            decode_commitment,
+            include_halt,
+            register_init,
+            page_commitments,
+            register_preprocessed,
+            None,
+        )
+    }
+
+    /// Same as [`Self::new`] but accepts a precomputed [`VmVerifyingKey`].
+    /// When `vkey` is `Some`, the bitwise preprocessed commitment is taken
+    /// from it instead of being recomputed from `proof_options` — that
+    /// recomputation is ~87% of verifier cycles inside the recursion guest.
+    pub fn new_with_vkey(
+        elf: &Elf,
+        proof_options: &ProofOptions,
+        minimal_bitwise: bool,
+        page_configs: &[crate::tables::page::PageConfig],
+        table_counts: &TableCounts,
+        decode_commitment: Option<Commitment>,
+        include_halt: bool,
+        register_init: Option<&[u32]>,
+        page_commitments: Option<&[(u64, Commitment)]>,
+        register_preprocessed: Option<(Commitment, usize)>,
+        vkey: Option<&VmVerifyingKey>,
     ) -> Self {
         let cpus: Vec<_> = (0..table_counts.cpu)
             .map(|i| create_cpu_air(proof_options).with_name(&format!("CPU[{}]", i)))
@@ -460,10 +495,12 @@ impl VmAirs {
         let bitwise = if minimal_bitwise {
             create_bitwise_air(proof_options)
         } else {
-            create_bitwise_air(proof_options).with_preprocessed(
-                bitwise::preprocessed_commitment(proof_options),
-                bitwise::NUM_PRECOMPUTED_COLS,
-            )
+            let commitment = match vkey {
+                Some(vk) => vk.bitwise,
+                None => bitwise::preprocessed_commitment(proof_options),
+            };
+            create_bitwise_air(proof_options)
+                .with_preprocessed(commitment, bitwise::NUM_PRECOMPUTED_COLS)
         };
         let lts: Vec<_> = (0..table_counts.lt)
             .map(|i| create_lt_air(proof_options).with_name(&format!("LT[{}]", i)))
@@ -480,10 +517,12 @@ impl VmAirs {
         let loads: Vec<_> = (0..table_counts.load)
             .map(|i| create_load_air(proof_options).with_name(&format!("LOAD[{}]", i)))
             .collect();
-        let decode_root = decode_commitment.unwrap_or_else(|| {
-            decode::commitment_from_elf(elf, proof_options)
-                .expect("Failed to compute decode commitment")
-        });
+        let decode_root = decode_commitment
+            .or_else(|| vkey.map(|vk| vk.decode))
+            .unwrap_or_else(|| {
+                decode::commitment_from_elf(elf, proof_options)
+                    .expect("Failed to compute decode commitment")
+            });
         let decode = create_decode_air(proof_options)
             .with_preprocessed(decode_root, decode::NUM_PRECOMPUTED_COLS);
         let muls: Vec<_> = (0..table_counts.mul)
@@ -499,8 +538,11 @@ impl VmAirs {
         let commit = create_commit_air(proof_options);
         let keccak = create_keccak_air(proof_options);
         let keccak_rnd = create_keccak_rnd_air(proof_options);
+        let keccak_rc_commitment = vkey
+            .map(|vk| vk.keccak_rc)
+            .unwrap_or_else(|| tables::keccak_rc::preprocessed_commitment(proof_options));
         let keccak_rc = create_keccak_rc_air(proof_options).with_preprocessed(
-            tables::keccak_rc::preprocessed_commitment(proof_options),
+            keccak_rc_commitment,
             tables::keccak_rc::NUM_PRECOMPUTED_COLS,
         );
         let ecsm = create_ecsm_air(proof_options);
@@ -512,10 +554,11 @@ impl VmAirs {
             let register_init = register_init
                 .map(<[u32]>::to_vec)
                 .unwrap_or_else(|| register::register_init_from_entry_point(elf.entry_point));
-            create_register_air(proof_options).with_preprocessed(
-                register::preprocessed_commitment(proof_options, &register_init),
-                register::NUM_PREPROCESSED_COLS,
-            )
+            let register_commitment = vkey.map(|vk| vk.register).unwrap_or_else(|| {
+                register::preprocessed_commitment(proof_options, &register_init)
+            });
+            create_register_air(proof_options)
+                .with_preprocessed(register_commitment, register::NUM_PREPROCESSED_COLS)
         };
         // Every zero-init page shares one preprocessed commitment: OFFSET is
         // page-relative and INIT is all-zero, so it depends only on
@@ -527,7 +570,8 @@ impl VmAirs {
 
         let pages: Vec<_> = page_configs
             .iter()
-            .map(|config| {
+            .enumerate()
+            .map(|(index, config)| {
                 let air = create_page_air(proof_options, config.page_base);
                 if config.is_private_input {
                     // Private-input pages: all columns are main trace (not preprocessed).
@@ -536,16 +580,21 @@ impl VmAirs {
                     air
                 } else if config.init_values.is_none() {
                     // Zero-init pages: the shared commitment computed once above.
+                    // `vkey.pages` caches the same static value for these slots,
+                    // so the local lookup is equivalent and equally cheap.
                     air.with_preprocessed(zero_init_commitment, page::NUM_PREPROCESSED_COLS)
                 } else {
                     // ELF data pages: INIT is program-specific, so the commitment is
                     // per-page. Prefer a caller-supplied `(page_base, commitment)`
-                    // (recursion guest); otherwise recompute from the ELF.
+                    // (recursion guest), then the vkey's cached per-page root
+                    // (indexed parallel to `page_configs`); otherwise recompute
+                    // from the ELF.
                     let commitment = page_commitments
                         .unwrap_or(&[])
                         .iter()
                         .find(|(pb, _)| *pb == config.page_base)
                         .map(|(_, c)| *c)
+                        .or_else(|| vkey.map(|vk| vk.pages[index]))
                         .unwrap_or_else(|| {
                             page::compute_precomputed_commitment(config, proof_options)
                         });
@@ -984,6 +1033,30 @@ pub fn verify_with_options(
     proof_options: &ProofOptions,
     decode_commitment: Option<Commitment>,
     page_commitments: Option<&[(u64, Commitment)]>,
+) -> Result<bool, Error> {
+    verify_with_options_with_vkey(
+        vm_proof,
+        elf_bytes,
+        proof_options,
+        decode_commitment,
+        page_commitments,
+        None,
+    )
+}
+
+/// Same as [`verify_with_options`] but accepts a precomputed
+/// [`VmVerifyingKey`]. When `vkey` is `Some`, the bitwise preprocessed
+/// commitment is taken from it instead of being recomputed inside
+/// `VmAirs::new`. A tampered vkey is caught by Fiat-Shamir: the verifier
+/// feeds the supplied commitment into the transcript, derives different
+/// challenges from what the prover used, and the openings stop matching.
+pub fn verify_with_options_with_vkey(
+    vm_proof: &VmProof,
+    elf_bytes: &[u8],
+    proof_options: &ProofOptions,
+    decode_commitment: Option<Commitment>,
+    page_commitments: Option<&[(u64, Commitment)]>,
+    vkey: Option<&VmVerifyingKey>,
 ) -> Result<bool, Error> {
     // Validate table_counts before constructing AIRs.
     // A malicious prover could set counts to 0, removing entire constraint sets.
@@ -1024,7 +1097,7 @@ pub fn verify_with_options(
         )));
     }
 
-    let airs = VmAirs::new(
+    let airs = VmAirs::new_with_vkey(
         &program,
         proof_options,
         false,
@@ -1035,6 +1108,7 @@ pub fn verify_with_options(
         None,
         page_commitments,
         None,
+        vkey,
     );
 
     // Recompute the COMMIT output bus offset from VmProof.public_output.
diff --git a/prover/src/tables/page.rs b/prover/src/tables/page.rs
index 2d1059bcc..7e1f7dea5 100644
--- a/prover/src/tables/page.rs
+++ b/prover/src/tables/page.rs
@@ -328,6 +328,26 @@ pub fn compute_precomputed_commitment(config: &PageConfig, options: &ProofOption
     root
 }
 
+/// Returns a page's preprocessed commitment, preferring the cheap path.
+///
+/// Zero-init pages (INIT is all-zero) share a single commitment that depends
+/// only on `(blowup, coset)`, so they resolve to the static lookup in
+/// [`zero_init_preprocessed_commitment`] instead of rebuilding the FFT +
+/// Merkle tree. ELF data pages have program-specific INIT and fall through
+/// to [`compute_precomputed_commitment`]. This mirrors the per-page choice
+/// made in `VmAirs::new_with_vkey`, so a vkey built from this function caches
+/// exactly the commitments the verifier expects.
+///
+/// Private-input pages have no preprocessed commitment; callers must skip
+/// them before calling this.
+pub fn precomputed_commitment_cached(config: &PageConfig, options: &ProofOptions) -> Commitment {
+    if config.init_values.is_none() {
+        zero_init_preprocessed_commitment(options)
+    } else {
+        compute_precomputed_commitment(config, options)
+    }
+}
+
 /// Returns the zero-init PAGE preprocessed commitment.
 ///
 /// Looks up `blowup_factor` in [`static_zero_page_commitment`] when
diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs
index 9e650422f..d10336e8d 100644
--- a/prover/src/tests/mod.rs
+++ b/prover/src/tests/mod.rs
@@ -78,3 +78,5 @@ pub mod templates_tests;
 pub mod trace_builder_tests;
 #[cfg(test)]
 pub mod trace_test_helpers;
+#[cfg(test)]
+pub mod vkey_tests;
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index a32d44c7c..e2aae52ff 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -38,8 +38,8 @@ const MIN_PROOF_OPTIONS: stark::proof::options::ProofOptions =
         grinding_factor: 1,
     };
 
-/// Prove `inner_elf` under `opts` and postcard-encode `(proof, elf, opts)` into
-/// the guest's private-input blob. Returns the proof and the blob.
+/// Prove `inner_elf` under `opts` and postcard-encode `(proof, elf, opts, vkey)`
+/// into the guest's private-input blob. Returns the proof and the blob.
 fn prove_inner_and_encode_blob(
     tag: &str,
     inner_elf: &[u8],
@@ -58,8 +58,16 @@ fn prove_inner_and_encode_blob(
     )
     .expect("inner prove should succeed");
 
-    let blob =
-        postcard::to_allocvec(&(&inner_proof, &inner_elf, opts)).expect("postcard encode failed");
+    let elf_for_vkey = executor::elf::Elf::load(inner_elf).expect("ELF load failed");
+    let page_configs = crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime(
+        &elf_for_vkey,
+        &inner_proof.runtime_page_ranges,
+        inner_proof.num_private_input_pages,
+    );
+    let vkey =
+        crate::VmVerifyingKey::from_elf_and_options(&elf_for_vkey, opts, None, &page_configs);
+    let blob = postcard::to_allocvec(&(&inner_proof, &inner_elf, opts, &vkey))
+        .expect("postcard encode failed");
     eprintln!("[{tag}] postcard blob: {} bytes", blob.len());
     (inner_proof, blob)
 }
diff --git a/prover/src/tests/vkey_tests.rs b/prover/src/tests/vkey_tests.rs
new file mode 100644
index 000000000..fc8a0c0e0
--- /dev/null
+++ b/prover/src/tests/vkey_tests.rs
@@ -0,0 +1,221 @@
+//! Tests for [`crate::VmVerifyingKey`] and the vkey-aware verify path.
+
+use executor::elf::Elf;
+use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions};
+
+use crate::VmVerifyingKey;
+use crate::tables::page::PageConfig;
+use crate::tables::trace_builder::Traces;
+use crate::test_utils::asm_elf_bytes;
+use crate::vkey::VKEY_VERSION;
+use crate::{VmProof, prove};
+
+fn default_options() -> ProofOptions {
+    GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid")
+}
+
+/// Derive the same `page_configs` slice the verifier would reconstruct from
+/// `vm_proof`. This is exactly what `verify_with_options_with_vkey` does
+/// internally, lifted into the test so the test-side and verifier-side
+/// `vkey.pages` indexing line up.
+fn page_configs_from_proof(elf: &Elf, vm_proof: &VmProof) -> Vec<PageConfig> {
+    Traces::page_configs_from_elf_and_runtime(
+        elf,
+        &vm_proof.runtime_page_ranges,
+        vm_proof.num_private_input_pages,
+    )
+}
+
+#[test]
+fn test_vkey_roundtrip() {
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+
+    let vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+    assert_eq!(vkey.version, VKEY_VERSION, "version field must be set");
+    assert_eq!(
+        vkey.pages.len(),
+        page_configs.len(),
+        "vkey.pages must have one entry per page config",
+    );
+    let digest_before = vkey.compute_digest();
+
+    // Two host derivations on the same inputs must produce the same vkey;
+    // the per-table commitment caches should not change between calls.
+    let vkey_again = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+    assert_eq!(vkey, vkey_again, "vkey derivation must be deterministic");
+
+    // postcard round-trip preserves every field.
+    let encoded = postcard::to_allocvec(&vkey).expect("postcard encode");
+    let decoded: VmVerifyingKey = postcard::from_bytes(&encoded).expect("postcard decode");
+    assert_eq!(vkey, decoded, "postcard round-trip must preserve the vkey");
+    assert_eq!(
+        decoded.compute_digest(),
+        digest_before,
+        "digest must be stable across serialization"
+    );
+}
+
+#[test]
+fn test_vkey_verify_equivalence() {
+    // Prove a tiny program once with the full (non-minimal) bitwise table,
+    // then verify it both ways: with and without a precomputed vkey.
+    // Both paths must accept the proof. This is the core correctness
+    // guarantee — the vkey shortcut produces identical results to the
+    // recompute-from-scratch path.
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+    let vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+
+    let baseline = crate::verify_with_options(&vm_proof, &elf_bytes, &options, None, None)
+        .expect("baseline verify errored");
+    assert!(baseline, "baseline verify must accept the proof");
+
+    let with_vkey = crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &elf_bytes,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("vkey verify errored");
+    assert!(with_vkey, "vkey verify must accept the same proof");
+}
+
+#[test]
+fn test_vkey_mismatch_rejects() {
+    // Tamper with vkey.bitwise. Without an explicit `vk_digest` field on
+    // VmProof (deferred to a later PR), rejection comes from Fiat-Shamir:
+    // the verifier feeds the tampered commitment into the transcript,
+    // derives different challenges from what the prover used, and the
+    // proof's openings stop matching.
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+
+    vkey.bitwise[0] ^= 0xFF;
+
+    let result = crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &elf_bytes,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
+    assert!(!result, "tampered bitwise commitment must cause rejection");
+}
+
+#[test]
+fn test_vkey_page_mismatch_rejects() {
+    // Same shape as `test_vkey_mismatch_rejects`, but tampers with the page
+    // table that gets it first non-private-input slot. Fiat-Shamir rejects
+    // the same way: the page commitment is in the verifier's transcript
+    // exactly like the bitwise one.
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+
+    let target = page_configs
+        .iter()
+        .position(|c| !c.is_private_input)
+        .expect("test ELF must produce at least one non-private-input page");
+    vkey.pages[target][0] ^= 0xFF;
+
+    let result = crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &elf_bytes,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
+    assert!(!result, "tampered page commitment must cause rejection");
+}
+
+#[test]
+fn test_vkey_decode_mismatch_rejects() {
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+
+    vkey.decode[0] ^= 0xFF;
+
+    let result = crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &elf_bytes,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
+    assert!(!result, "tampered decode commitment must cause rejection");
+}
+
+#[test]
+fn test_vkey_register_mismatch_rejects() {
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+
+    vkey.register[0] ^= 0xFF;
+
+    let result = crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &elf_bytes,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
+    assert!(!result, "tampered register commitment must cause rejection");
+}
+
+#[test]
+fn test_vkey_keccak_rc_mismatch_rejects() {
+    let elf_bytes = asm_elf_bytes("sub");
+    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let options = default_options();
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+
+    vkey.keccak_rc[0] ^= 0xFF;
+
+    let result = crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &elf_bytes,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )
+    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
+    assert!(
+        !result,
+        "tampered keccak_rc commitment must cause rejection"
+    );
+}
diff --git a/prover/src/vkey.rs b/prover/src/vkey.rs
new file mode 100644
index 000000000..1d85d1d28
--- /dev/null
+++ b/prover/src/vkey.rs
@@ -0,0 +1,128 @@
+//! Verifying key for the lambda-vm STARK verifier.
+//!
+//! Caches preprocessed-table Merkle commitments that the verifier would
+//! otherwise recompute on every call. Mirrors the SP1 `MachineVerifyingKey`
+//! pattern (preprocessed commitments derived once at setup, never recomputed
+//! per-proof) and the prover-side companion in
+//! <https://github.com/yetanotherco/lambda_vm/pull/282> (which caches the
+//! same data on the prover side).
+//!
+//! ## Current scope
+//!
+//! All five preprocessed tables — BITWISE, DECODE, REGISTER, KECCAK_RC, and
+//! every non-private-input PAGE — are cached here. `VmAirs::new_with_vkey`
+//! prefers the vkey-supplied commitment over recomputing when a vkey is
+//! provided. The `version` field exists so a vkey serialized against an
+//! older layout produces a different `compute_digest()` and stops
+//! validating.
+//!
+//! ## Security
+//!
+//! For this PR the verifying key is only a performance shortcut. The
+//! verifier still relies on Fiat-Shamir: every preprocessed commitment the
+//! prover used is bound into the proof's challenges, so a verifier that
+//! consumes a tampered `vkey` field derives different challenges, the
+//! openings stop matching, and verification fails. A future PR will
+//! additionally embed `vkey.compute_digest()` in `VmProof` so vkey
+//! substitution surfaces as an explicit error before any STARK work runs.
+
+use executor::elf::Elf;
+use sha3::{Digest, Keccak256};
+use stark::config::Commitment;
+use stark::proof::options::ProofOptions;
+
+use crate::tables::bitwise;
+use crate::tables::decode;
+use crate::tables::keccak_rc;
+use crate::tables::page::{self, PageConfig};
+use crate::tables::register;
+
+/// Current `VmVerifyingKey` layout version. Bump whenever fields are added,
+/// removed, or reordered so that vkeys serialized against an older layout
+/// produce a different `compute_digest()` and stop validating.
+pub const VKEY_VERSION: u32 = 3;
+
+/// Placeholder commitment stored in [`VmVerifyingKey::pages`] for
+/// private-input page slots, where there is no preprocessed commitment to
+/// cache. The verifier never reads these slots (private-input pages have no
+/// `with_preprocessed(...)` call in `VmAirs::new`).
+const PRIVATE_INPUT_PAGE_PLACEHOLDER: Commitment = [0u8; 32];
+
+/// Cached preprocessed-table commitments the verifier would otherwise
+/// recompute on every call.
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct VmVerifyingKey {
+    /// Layout version. See [`VKEY_VERSION`].
+    pub version: u32,
+    /// Merkle root over the LDE of the bitwise preprocessed columns.
+    /// Program-independent; depends only on `ProofOptions`.
+    pub bitwise: Commitment,
+    /// Merkle root over the LDE of the decode preprocessed columns.
+    /// Program-dependent: derived from the inner ELF's instruction stream.
+    pub decode: Commitment,
+    /// Merkle root over the LDE of the register preprocessed columns.
+    /// Program-dependent via the ELF's entry point.
+    pub register: Commitment,
+    /// Merkle root over the LDE of the keccak round-constants preprocessed
+    /// columns. Program-independent; depends only on `ProofOptions`.
+    pub keccak_rc: Commitment,
+    /// Per-page preprocessed Merkle roots, indexed parallel to the
+    /// `page_configs` slice the verifier reconstructs from the proof via
+    /// [`crate::tables::trace_builder::Traces::page_configs_from_elf_and_runtime`].
+    /// Private-input slots hold a zero placeholder and are never read by the
+    /// verifier — they exist only to keep the index aligned with
+    /// `page_configs`, which interleaves preprocessed and private-input pages.
+    pub pages: Vec<Commitment>,
+}
+
+impl VmVerifyingKey {
+    /// Derive the verifying key on the host.
+    ///
+    /// `elf` is read to derive the program-dependent commitments (DECODE
+    /// from the instruction stream, REGISTER from `elf.entry_point`).
+    ///
+    /// `page_configs` must match exactly what the verifier will reconstruct
+    /// from the proof — i.e. the output of
+    /// `Traces::page_configs_from_elf_and_runtime(elf, runtime_page_ranges,
+    /// num_private_input_pages)`. The host can call that helper with the
+    /// values it already has after producing the inner proof.
+    pub fn from_elf_and_options(
+        elf: &Elf,
+        options: &ProofOptions,
+        register_init: Option<&[u32]>,
+        page_configs: &[PageConfig],
+    ) -> Self {
+        let pages = page_configs
+            .iter()
+            .map(|config| {
+                if config.is_private_input {
+                    PRIVATE_INPUT_PAGE_PLACEHOLDER
+                } else {
+                    page::precomputed_commitment_cached(config, options)
+                }
+            })
+            .collect();
+        let register_init = register_init
+            .map(<[u32]>::to_vec)
+            .unwrap_or_else(|| register::register_init_from_entry_point(elf.entry_point));
+        Self {
+            version: VKEY_VERSION,
+            bitwise: bitwise::preprocessed_commitment(options),
+            decode: decode::commitment_from_elf(elf, options)
+                .expect("decode commitment must compute"),
+            register: register::preprocessed_commitment(options, &register_init),
+            keccak_rc: keccak_rc::preprocessed_commitment(options),
+            pages,
+        }
+    }
+
+    /// Keccak256 fingerprint of the postcard-serialized vkey. Stable as long
+    /// as the field layout (and [`VKEY_VERSION`]) does not change.
+    pub fn compute_digest(&self) -> [u8; 32] {
+        let bytes = postcard::to_allocvec(self)
+            .expect("postcard serialization of VmVerifyingKey must succeed");
+        let mut hasher = Keccak256::new();
+        hasher.update(&bytes);
+        hasher.finalize().into()
+    }
+}

From dbaa04c8284a377fd879d5e7beee3e94dd23334d Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 15:57:38 -0300
Subject: [PATCH 30/36] clippy allow

---
 prover/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 09d3e22fa..7ca5f6e93 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -476,6 +476,7 @@ impl VmAirs {
     /// When `vkey` is `Some`, the bitwise preprocessed commitment is taken
     /// from it instead of being recomputed from `proof_options` — that
     /// recomputation is ~87% of verifier cycles inside the recursion guest.
+    #[allow(clippy::too_many_arguments)]
     pub fn new_with_vkey(
         elf: &Elf,
         proof_options: &ProofOptions,

From 0aee5b58da38a736f3a0de32ebb95a7e50582529 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 17:26:37 -0300
Subject: [PATCH 31/36] feat: bind vk_digest into proof statement and guest
 output

A prover-supplied vkey defeats the preprocessed-commitment check:
Fiat-Shamir only catches post-hoc tampering, not a coordinated
prover committing to a forged table with a matching vkey. Bind
keccak(vkey) as vk_digest: embed ProofOptions in the vkey (query
count and grinding factor pin no commitment), stamp the digest into
VmProof and the statement transcript (V3), check it in verify
before STARK work, and commit vk_digest || inner output from the
recursion guest so the outer verifier can compare against a digest
derived from the trusted inner ELF.

Also validate vkey version/page-count instead of panicking on
short pages, and reject on options mismatch.
---
 bench_vs/lambda/recursion/src/main.rs    |  12 +-
 crypto/stark/src/proof/options.rs        |   2 +-
 prover/src/lib.rs                        |  81 ++++++++--
 prover/src/statement.rs                  |  24 ++-
 prover/src/tests/prove_elfs_tests.rs     |   3 +
 prover/src/tests/recursion_smoke_test.rs |  13 +-
 prover/src/tests/statement_tests.rs      |  28 +++-
 prover/src/tests/vkey_tests.rs           | 182 +++++++++--------------
 prover/src/vkey.rs                       |  34 +++--
 9 files changed, 230 insertions(+), 149 deletions(-)

diff --git a/bench_vs/lambda/recursion/src/main.rs b/bench_vs/lambda/recursion/src/main.rs
index 4ea592b78..00aac3f03 100644
--- a/bench_vs/lambda/recursion/src/main.rs
+++ b/bench_vs/lambda/recursion/src/main.rs
@@ -1,9 +1,12 @@
 //! Naive recursion guest: verifies an inner lambda-vm proof inside the VM.
 //!
 //! Private input layout (postcard-encoded):
-//!   `(VmProof, Vec<u8>, ProofOptions)`
+//!   `(VmProof, Vec<u8>, ProofOptions, VmVerifyingKey)`
 //! where the `Vec<u8>` holds the inner program's ELF bytes and `ProofOptions`
-//! specifies the parameters the inner prover used. Commits `[1]` on success.
+//! specifies the parameters the inner prover used. Commits
+//! `vk_digest ‖ inner public output` on success: every input here is
+//! prover-supplied, so soundness comes from the outer verifier checking
+//! the committed digest against one derived from the trusted inner ELF.
 //!
 //! Not `no_std` (std/alloc are available — `build-std` provides them, and the
 //! prover links as a normal std crate; its prove-side code is dead-code
@@ -46,6 +49,9 @@ pub fn main() -> ! {
     .expect("verify errored");
     assert!(ok, "inner proof failed verification");
 
-    lambda_vm_syscalls::syscalls::commit(&[1u8]);
+    let mut output = Vec::with_capacity(32 + vm_proof.public_output.len());
+    output.extend_from_slice(&vkey.compute_digest());
+    output.extend_from_slice(&vm_proof.public_output);
+    lambda_vm_syscalls::syscalls::commit(&output);
     lambda_vm_syscalls::syscalls::sys_halt();
 }
diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs
index 70976b993..2c91ef00c 100644
--- a/crypto/stark/src/proof/options.rs
+++ b/crypto/stark/src/proof/options.rs
@@ -39,7 +39,7 @@ impl fmt::Display for ProofOptionsError {
 /// - `coset_offset`: the offset for the coset
 /// - `grinding_factor`: the number of leading zeros that we want for the Hash(hash || nonce)
 #[cfg_attr(feature = "wasm", wasm_bindgen)]
-#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct ProofOptions {
     pub blowup_factor: u8,
     pub fri_number_of_queries: usize,
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 7ca5f6e93..b42a7d35e 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -175,6 +175,10 @@ pub struct VmProof {
     /// These pages are NOT preprocessed — the verifier reconstructs them
     /// as non-preprocessed tables starting at `PRIVATE_INPUT_START_INDEX`.
     pub num_private_input_pages: usize,
+    /// Digest of the [`VmVerifyingKey`] the proof was made against. Bound
+    /// into the Fiat-Shamir statement and checked by the verifier against
+    /// its own vkey's digest before any STARK work.
+    pub vk_digest: [u8; 32],
 }
 
 /// Error type for the prover crate.
@@ -192,6 +196,8 @@ pub enum Error {
     Prover(String),
     /// Proof contains invalid table_counts (e.g. zero for a required table)
     InvalidTableCounts(String),
+    /// Supplied verifying key is malformed (wrong version or page count)
+    InvalidVerifyingKey(String),
     /// Continuation epoch size exponent is invalid.
     InvalidContinuationEpochSize(String),
     /// Continuation proof construction hit an internal invariant failure.
@@ -212,6 +218,7 @@ impl fmt::Display for Error {
             Error::Execution(msg) => write!(f, "execution error: {msg}"),
             Error::Prover(msg) => write!(f, "proving error: {msg}"),
             Error::InvalidTableCounts(msg) => write!(f, "invalid table_counts: {msg}"),
+            Error::InvalidVerifyingKey(msg) => write!(f, "invalid verifying key: {msg}"),
             Error::InvalidContinuationEpochSize(msg) => {
                 write!(f, "invalid continuation epoch size: {msg}")
             }
@@ -595,7 +602,7 @@ impl VmAirs {
                         .iter()
                         .find(|(pb, _)| *pb == config.page_base)
                         .map(|(_, c)| *c)
-                        .or_else(|| vkey.map(|vk| vk.pages[index]))
+                        .or_else(|| vkey.and_then(|vk| vk.pages.get(index)).copied())
                         .unwrap_or_else(|| {
                             page::compute_precomputed_commitment(config, proof_options)
                         });
@@ -898,7 +905,12 @@ pub fn prove_with_options_and_inputs(
     let __sp = stark::instruments::span("air_construction");
 
     let table_counts = traces.table_counts();
-    let airs = VmAirs::new(
+    // Derive the vkey before AIR construction so each preprocessed
+    // commitment is computed once and reused.
+    let vkey =
+        VmVerifyingKey::from_elf_and_options(&program, proof_options, None, &traces.page_configs);
+    let vk_digest = vkey.compute_digest();
+    let airs = VmAirs::new_with_vkey(
         &program,
         proof_options,
         false,
@@ -909,6 +921,7 @@ pub fn prove_with_options_and_inputs(
         None,
         None,
         None,
+        Some(&vkey),
     );
 
     #[cfg(feature = "instruments")]
@@ -931,7 +944,7 @@ pub fn prove_with_options_and_inputs(
     let mut transcript = DefaultTranscript::<E>::new(&[]);
     absorb_statement(
         &mut transcript,
-        StatementKind::Monolithic,
+        StatementKind::Monolithic { vk_digest },
         elf_bytes,
         &traces.public_output_bytes,
         &table_counts,
@@ -982,6 +995,7 @@ pub fn prove_with_options_and_inputs(
         table_counts,
         public_output: traces.public_output_bytes.clone(),
         num_private_input_pages,
+        vk_digest,
     })
 }
 
@@ -1046,11 +1060,17 @@ pub fn verify_with_options(
 }
 
 /// Same as [`verify_with_options`] but accepts a precomputed
-/// [`VmVerifyingKey`]. When `vkey` is `Some`, the bitwise preprocessed
-/// commitment is taken from it instead of being recomputed inside
-/// `VmAirs::new`. A tampered vkey is caught by Fiat-Shamir: the verifier
-/// feeds the supplied commitment into the transcript, derives different
-/// challenges from what the prover used, and the openings stop matching.
+/// [`VmVerifyingKey`], skipping the preprocessed-commitment recomputation.
+///
+/// # Security
+///
+/// The vkey is TRUSTED input. Passing one supplied by the prover (as the
+/// recursion guest does) proves nothing by itself: a coordinated prover can
+/// commit to a forged preprocessed table and ship a matching vkey, and the
+/// transcript stays self-consistent. Soundness requires the caller's output
+/// to bind `vkey.compute_digest()` (the recursion guest commits it) so an
+/// outer verifier can compare it against a digest derived from trusted
+/// inputs. See `vkey.rs`.
 pub fn verify_with_options_with_vkey(
     vm_proof: &VmProof,
     elf_bytes: &[u8],
@@ -1084,6 +1104,47 @@ pub fn verify_with_options_with_vkey(
         vm_proof.num_private_input_pages,
     );
 
+    // Validate the vkey before constructing AIRs: `vk.pages` is indexed
+    // parallel to `page_configs` (a short vec would panic), and options are
+    // checked because query count / grinding factor affect soundness but no
+    // commitment. With no caller vkey, derive one from the trusted ELF.
+    let owned_vkey;
+    let vkey = match vkey {
+        Some(vk) => {
+            if vk.version != crate::vkey::VKEY_VERSION {
+                return Err(Error::InvalidVerifyingKey(format!(
+                    "vkey version {} != expected {}",
+                    vk.version,
+                    crate::vkey::VKEY_VERSION,
+                )));
+            }
+            if vk.pages.len() != page_configs.len() {
+                return Err(Error::InvalidVerifyingKey(format!(
+                    "vkey has {} page commitments but the proof requires {}",
+                    vk.pages.len(),
+                    page_configs.len(),
+                )));
+            }
+            if vk.options != *proof_options {
+                return Err(Error::InvalidVerifyingKey(
+                    "vkey options do not match the verification options".into(),
+                ));
+            }
+            vk
+        }
+        None => {
+            owned_vkey =
+                VmVerifyingKey::from_elf_and_options(&program, proof_options, None, &page_configs);
+            &owned_vkey
+        }
+    };
+    let vk_digest = vkey.compute_digest();
+    if vm_proof.vk_digest != vk_digest {
+        return Err(Error::InvalidVerifyingKey(
+            "proof vk_digest does not match the verifying key's digest".into(),
+        ));
+    }
+
     // Cross-check: table_counts must match the number of sub-proofs.
     // FIXED_TABLE_COUNT always-present tables, plus page tables.
     let expected_proof_count =
@@ -1109,7 +1170,7 @@ pub fn verify_with_options_with_vkey(
         None,
         page_commitments,
         None,
-        vkey,
+        Some(vkey),
     );
 
     // Recompute the COMMIT output bus offset from VmProof.public_output.
@@ -1123,7 +1184,7 @@ pub fn verify_with_options_with_vkey(
     let mut transcript = DefaultTranscript::<E>::new(&[]);
     absorb_statement(
         &mut transcript,
-        StatementKind::Monolithic,
+        StatementKind::Monolithic { vk_digest },
         elf_bytes,
         &vm_proof.public_output,
         &vm_proof.table_counts,
diff --git a/prover/src/statement.rs b/prover/src/statement.rs
index cca961be5..0a692647d 100644
--- a/prover/src/statement.rs
+++ b/prover/src/statement.rs
@@ -16,7 +16,8 @@ use crate::test_utils::E;
 use crate::{RuntimePageRange, TableCounts};
 
 /// Domain-separation tag. Bump the suffix (`_V2`, ...) on any encoding change.
-const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V2";
+/// V3: monolithic statements bind the verifying-key digest after the tag.
+const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V3";
 
 fn elf_digest(elf: &[u8]) -> [u8; 32] {
     let mut h = Keccak256::new();
@@ -24,14 +25,15 @@ fn elf_digest(elf: &[u8]) -> [u8; 32] {
     h.finalize().into()
 }
 
-/// Which statement is being bound. Selects the leading domain tag and whether an
-/// epoch label is appended, so monolithic and continuation-epoch proofs share one
-/// function while each starts with its own tag. `Monolithic` reproduces the
-/// original encoding byte-for-byte (no label), so existing proofs are unaffected.
+/// Which statement is being bound. Selects the leading domain tag and the
+/// kind-specific fields, so monolithic and continuation-epoch proofs share one
+/// function while each starts with its own tag.
 #[derive(Clone, Copy)]
 pub(crate) enum StatementKind {
-    /// Whole-program (monolithic) proof.
-    Monolithic,
+    /// Whole-program (monolithic) proof. Carries the digest of the
+    /// [`crate::VmVerifyingKey`] (preprocessed commitments + proof options)
+    /// so every challenge depends on which vkey the proof was made against.
+    Monolithic { vk_digest: [u8; 32] },
     /// One continuation epoch proof, pinned to its position by `epoch_label`.
     ContinuationEpoch { epoch_label: u64 },
 }
@@ -48,11 +50,17 @@ pub(crate) fn absorb_statement(
     // Leading domain tag — distinct per statement kind, so a monolithic proof and
     // a continuation epoch proof can never share a transcript prefix.
     let domain_tag = match kind {
-        StatementKind::Monolithic => DOMAIN_TAG,
+        StatementKind::Monolithic { .. } => DOMAIN_TAG,
         StatementKind::ContinuationEpoch { .. } => CONTINUATION_EPOCH_TAG,
     };
     t.append_bytes(domain_tag);
 
+    // Fixed 32 bytes, no length prefix needed; the per-kind tags keep
+    // kind-specific fields unambiguous.
+    if let StatementKind::Monolithic { vk_digest } = kind {
+        t.append_bytes(&vk_digest);
+    }
+
     // ELF: fixed 32-byte digest — no length prefix needed.
     t.append_bytes(&elf_digest(elf_bytes));
 
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index 10013b5ed..f4cefb97f 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -136,6 +136,9 @@ fn prove_vm_minimal(elf_bytes: &[u8], private_inputs: &[u8], max_rows: &MaxRowsC
         table_counts,
         public_output: traces.public_output_bytes.clone(),
         num_private_input_pages,
+        // Minimal proofs skip statement absorption; only verify_vm_minimal
+        // can check them, so the digest is never read.
+        vk_digest: [0u8; 32],
     }
 }
 
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index e2aae52ff..e87726535 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -441,7 +441,9 @@ fn run_profile(
 }
 
 /// Core pipeline: prove the inner program, run the guest to `mode`, assert it
-/// committed `[1]` (the in-VM verifier accepted the proof).
+/// committed `vk_digest ‖ inner public output` — the outer-verifier check:
+/// the digest of the vkey used in-guest must match one derived on the host
+/// from the trusted inner ELF.
 fn run_recursion_pipeline_with_options(
     label: &str,
     inner_elf_bytes: &[u8],
@@ -480,12 +482,13 @@ fn run_recursion_pipeline_with_options(
         OuterMode::Prove => prove_outer_and_commit(label, &recursion_elf_bytes, &blob),
     };
 
+    let mut expected = inner_proof.vk_digest.to_vec();
+    expected.extend_from_slice(&inner_proof.public_output);
     assert_eq!(
-        committed,
-        vec![1u8],
-        "recursion guest must commit the [1] success marker (in-VM verify accepted)"
+        committed, expected,
+        "recursion guest must commit vk_digest ‖ inner public output"
     );
-    eprintln!("[{label}] guest committed [1]: in-VM verify accepted ✓");
+    eprintln!("[{label}] guest committed vk_digest ‖ output: in-VM verify accepted ✓");
 }
 
 /// `run_recursion_pipeline_with_options` with `blowup=8` (the `empty`/`fibonacci` default).
diff --git a/prover/src/tests/statement_tests.rs b/prover/src/tests/statement_tests.rs
index 73944e262..df8236666 100644
--- a/prover/src/tests/statement_tests.rs
+++ b/prover/src/tests/statement_tests.rs
@@ -39,7 +39,8 @@ fn sample_ranges() -> Vec<RuntimePageRange> {
     ]
 }
 
-fn state_after_absorb(
+fn state_after_absorb_with_digest(
+    vk_digest: [u8; 32],
     elf: &[u8],
     out: &[u8],
     counts: &TableCounts,
@@ -49,7 +50,7 @@ fn state_after_absorb(
     let mut t = DefaultTranscript::<E>::new(&[]);
     absorb_statement(
         &mut t,
-        StatementKind::Monolithic,
+        StatementKind::Monolithic { vk_digest },
         elf,
         out,
         counts,
@@ -59,6 +60,16 @@ fn state_after_absorb(
     t.state()
 }
 
+fn state_after_absorb(
+    elf: &[u8],
+    out: &[u8],
+    counts: &TableCounts,
+    priv_pages: usize,
+    ranges: &[RuntimePageRange],
+) -> [u8; 32] {
+    state_after_absorb_with_digest([7u8; 32], elf, out, counts, priv_pages, ranges)
+}
+
 #[test]
 fn state_is_deterministic() {
     let a = state_after_absorb(b"elf", b"out", &sample_counts(), 3, &sample_ranges());
@@ -112,6 +123,19 @@ fn state_depends_on_every_field() {
         state_after_absorb(b"elf", b"out", &sample_counts(), 1, &[]),
         "state must depend on runtime_page_ranges",
     );
+
+    assert_ne!(
+        baseline,
+        state_after_absorb_with_digest(
+            [8u8; 32],
+            b"elf",
+            b"out",
+            &sample_counts(),
+            1,
+            &sample_ranges()
+        ),
+        "state must depend on vk_digest",
+    );
 }
 
 #[test]
diff --git a/prover/src/tests/vkey_tests.rs b/prover/src/tests/vkey_tests.rs
index fc8a0c0e0..941415f31 100644
--- a/prover/src/tests/vkey_tests.rs
+++ b/prover/src/tests/vkey_tests.rs
@@ -8,7 +8,7 @@ use crate::tables::page::PageConfig;
 use crate::tables::trace_builder::Traces;
 use crate::test_utils::asm_elf_bytes;
 use crate::vkey::VKEY_VERSION;
-use crate::{VmProof, prove};
+use crate::{Error, VmProof, prove};
 
 fn default_options() -> ProofOptions {
     GoldilocksCubicProofOptions::with_blowup(2).expect("blowup=2 is always valid")
@@ -26,22 +26,54 @@ fn page_configs_from_proof(elf: &Elf, vm_proof: &VmProof) -> Vec<PageConfig> {
     )
 }
 
-#[test]
-fn test_vkey_roundtrip() {
+/// Prove `sub`, and derive the honest vkey the same way the verifier would.
+fn proof_and_vkey() -> (Vec<u8>, VmProof, ProofOptions, VmVerifyingKey) {
     let elf_bytes = asm_elf_bytes("sub");
     let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
     let elf = Elf::load(&elf_bytes).expect("ELF load failed");
     let options = default_options();
     let page_configs = page_configs_from_proof(&elf, &vm_proof);
-
     let vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+    (elf_bytes, vm_proof, options, vkey)
+}
+
+/// A tampered or malformed vkey must be rejected with an explicit
+/// `InvalidVerifyingKey` before any STARK work runs — either by the shape
+/// checks or by the `vk_digest` comparison against the proof.
+fn assert_rejects_vkey(
+    elf_bytes: &[u8],
+    vm_proof: &VmProof,
+    options: &ProofOptions,
+    vkey: &VmVerifyingKey,
+    what: &str,
+) {
+    let result =
+        crate::verify_with_options_with_vkey(vm_proof, elf_bytes, options, None, None, Some(vkey));
+    assert!(
+        matches!(result, Err(Error::InvalidVerifyingKey(_))),
+        "{what} must be rejected with InvalidVerifyingKey, got {result:?}"
+    );
+}
+
+#[test]
+fn test_vkey_roundtrip() {
+    let (_, vm_proof, options, vkey) = proof_and_vkey();
+    let elf_bytes = asm_elf_bytes("sub");
+    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
+    let page_configs = page_configs_from_proof(&elf, &vm_proof);
+
     assert_eq!(vkey.version, VKEY_VERSION, "version field must be set");
+    assert_eq!(vkey.options, options, "options must be embedded");
     assert_eq!(
         vkey.pages.len(),
         page_configs.len(),
         "vkey.pages must have one entry per page config",
     );
     let digest_before = vkey.compute_digest();
+    assert_eq!(
+        vm_proof.vk_digest, digest_before,
+        "prover must stamp the same digest the verifier derives"
+    );
 
     // Two host derivations on the same inputs must produce the same vkey;
     // the per-table commitment caches should not change between calls.
@@ -66,12 +98,7 @@ fn test_vkey_verify_equivalence() {
     // Both paths must accept the proof. This is the core correctness
     // guarantee — the vkey shortcut produces identical results to the
     // recompute-from-scratch path.
-    let elf_bytes = asm_elf_bytes("sub");
-    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
-    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
-    let options = default_options();
-    let page_configs = page_configs_from_proof(&elf, &vm_proof);
-    let vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
+    let (elf_bytes, vm_proof, options, vkey) = proof_and_vkey();
 
     let baseline = crate::verify_with_options(&vm_proof, &elf_bytes, &options, None, None)
         .expect("baseline verify errored");
@@ -91,131 +118,64 @@ fn test_vkey_verify_equivalence() {
 
 #[test]
 fn test_vkey_mismatch_rejects() {
-    // Tamper with vkey.bitwise. Without an explicit `vk_digest` field on
-    // VmProof (deferred to a later PR), rejection comes from Fiat-Shamir:
-    // the verifier feeds the tampered commitment into the transcript,
-    // derives different challenges from what the prover used, and the
-    // proof's openings stop matching.
-    let elf_bytes = asm_elf_bytes("sub");
-    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
-    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
-    let options = default_options();
-    let page_configs = page_configs_from_proof(&elf, &vm_proof);
-    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
-
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
     vkey.bitwise[0] ^= 0xFF;
-
-    let result = crate::verify_with_options_with_vkey(
-        &vm_proof,
-        &elf_bytes,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    )
-    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
-    assert!(!result, "tampered bitwise commitment must cause rejection");
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "tampered bitwise");
 }
 
 #[test]
 fn test_vkey_page_mismatch_rejects() {
-    // Same shape as `test_vkey_mismatch_rejects`, but tampers with the page
-    // table that gets it first non-private-input slot. Fiat-Shamir rejects
-    // the same way: the page commitment is in the verifier's transcript
-    // exactly like the bitwise one.
-    let elf_bytes = asm_elf_bytes("sub");
-    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
     let elf = Elf::load(&elf_bytes).expect("ELF load failed");
-    let options = default_options();
-    let page_configs = page_configs_from_proof(&elf, &vm_proof);
-    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
-
-    let target = page_configs
+    let target = page_configs_from_proof(&elf, &vm_proof)
         .iter()
         .position(|c| !c.is_private_input)
         .expect("test ELF must produce at least one non-private-input page");
     vkey.pages[target][0] ^= 0xFF;
-
-    let result = crate::verify_with_options_with_vkey(
-        &vm_proof,
-        &elf_bytes,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    )
-    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
-    assert!(!result, "tampered page commitment must cause rejection");
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "tampered page");
 }
 
 #[test]
 fn test_vkey_decode_mismatch_rejects() {
-    let elf_bytes = asm_elf_bytes("sub");
-    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
-    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
-    let options = default_options();
-    let page_configs = page_configs_from_proof(&elf, &vm_proof);
-    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
-
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
     vkey.decode[0] ^= 0xFF;
-
-    let result = crate::verify_with_options_with_vkey(
-        &vm_proof,
-        &elf_bytes,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    )
-    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
-    assert!(!result, "tampered decode commitment must cause rejection");
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "tampered decode");
 }
 
 #[test]
 fn test_vkey_register_mismatch_rejects() {
-    let elf_bytes = asm_elf_bytes("sub");
-    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
-    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
-    let options = default_options();
-    let page_configs = page_configs_from_proof(&elf, &vm_proof);
-    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
-
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
     vkey.register[0] ^= 0xFF;
-
-    let result = crate::verify_with_options_with_vkey(
-        &vm_proof,
-        &elf_bytes,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    )
-    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
-    assert!(!result, "tampered register commitment must cause rejection");
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "tampered register");
 }
 
 #[test]
 fn test_vkey_keccak_rc_mismatch_rejects() {
-    let elf_bytes = asm_elf_bytes("sub");
-    let vm_proof = prove(&elf_bytes).expect("inner prove should succeed");
-    let elf = Elf::load(&elf_bytes).expect("ELF load failed");
-    let options = default_options();
-    let page_configs = page_configs_from_proof(&elf, &vm_proof);
-    let mut vkey = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
-
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
     vkey.keccak_rc[0] ^= 0xFF;
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "tampered keccak_rc");
+}
 
-    let result = crate::verify_with_options_with_vkey(
-        &vm_proof,
-        &elf_bytes,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    )
-    .expect("verify must not return Err — Fiat-Shamir mismatch is Ok(false)");
-    assert!(
-        !result,
-        "tampered keccak_rc commitment must cause rejection"
-    );
+#[test]
+fn test_vkey_short_pages_rejects() {
+    // A short pages vec must be a clean error, not an out-of-bounds panic.
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
+    vkey.pages.clear();
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "short pages vec");
+}
+
+#[test]
+fn test_vkey_options_mismatch_rejects() {
+    // Query count and grinding factor affect soundness but no commitment,
+    // so a weakened-options vkey must be caught by the explicit check.
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
+    vkey.options.fri_number_of_queries = 1;
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "weakened options");
+}
+
+#[test]
+fn test_vkey_wrong_version_rejects() {
+    let (elf_bytes, vm_proof, options, mut vkey) = proof_and_vkey();
+    vkey.version = VKEY_VERSION - 1;
+    assert_rejects_vkey(&elf_bytes, &vm_proof, &options, &vkey, "wrong version");
 }
diff --git a/prover/src/vkey.rs b/prover/src/vkey.rs
index 1d85d1d28..3de96eb29 100644
--- a/prover/src/vkey.rs
+++ b/prover/src/vkey.rs
@@ -10,7 +10,8 @@
 //! ## Current scope
 //!
 //! All five preprocessed tables — BITWISE, DECODE, REGISTER, KECCAK_RC, and
-//! every non-private-input PAGE — are cached here. `VmAirs::new_with_vkey`
+//! every non-private-input PAGE — are cached here, together with the
+//! [`ProofOptions`] the commitments were derived under. `VmAirs::new_with_vkey`
 //! prefers the vkey-supplied commitment over recomputing when a vkey is
 //! provided. The `version` field exists so a vkey serialized against an
 //! older layout produces a different `compute_digest()` and stops
@@ -18,13 +19,23 @@
 //!
 //! ## Security
 //!
-//! For this PR the verifying key is only a performance shortcut. The
-//! verifier still relies on Fiat-Shamir: every preprocessed commitment the
-//! prover used is bound into the proof's challenges, so a verifier that
-//! consumes a tampered `vkey` field derives different challenges, the
-//! openings stop matching, and verification fails. A future PR will
-//! additionally embed `vkey.compute_digest()` in `VmProof` so vkey
-//! substitution surfaces as an explicit error before any STARK work runs.
+//! The vkey is **trusted input**. Fiat-Shamir only detects a vkey that is
+//! inconsistent with the proof (post-hoc tampering); a coordinated prover
+//! can commit to a forged preprocessed table from the start and supply a
+//! matching vkey, and the transcript stays self-consistent. The binding
+//! that makes recursion sound is `compute_digest()`:
+//!
+//! - The prover stamps it into `VmProof::vk_digest` and binds it into the
+//!   Fiat-Shamir statement; the verifier recomputes it from its own vkey
+//!   and rejects on mismatch before any STARK work.
+//! - The recursion guest commits `vk_digest ‖ inner public output`, so the
+//!   *outer* verifier can check which vkey was used in-guest against a
+//!   digest derived from the trusted inner ELF. Without that outer check
+//!   the guest's result says nothing — every guest input is prover-supplied.
+//!
+//! The digest covers the embedded [`ProofOptions`]: query count and
+//! grinding factor affect soundness but no commitment, so nothing else
+//! would pin them.
 
 use executor::elf::Elf;
 use sha3::{Digest, Keccak256};
@@ -40,7 +51,7 @@ use crate::tables::register;
 /// Current `VmVerifyingKey` layout version. Bump whenever fields are added,
 /// removed, or reordered so that vkeys serialized against an older layout
 /// produce a different `compute_digest()` and stop validating.
-pub const VKEY_VERSION: u32 = 3;
+pub const VKEY_VERSION: u32 = 4;
 
 /// Placeholder commitment stored in [`VmVerifyingKey::pages`] for
 /// private-input page slots, where there is no preprocessed commitment to
@@ -54,6 +65,10 @@ const PRIVATE_INPUT_PAGE_PLACEHOLDER: Commitment = [0u8; 32];
 pub struct VmVerifyingKey {
     /// Layout version. See [`VKEY_VERSION`].
     pub version: u32,
+    /// The options every commitment below was derived under. In the digest
+    /// because query count and grinding factor affect soundness but no
+    /// commitment.
+    pub options: ProofOptions,
     /// Merkle root over the LDE of the bitwise preprocessed columns.
     /// Program-independent; depends only on `ProofOptions`.
     pub bitwise: Commitment,
@@ -107,6 +122,7 @@ impl VmVerifyingKey {
             .unwrap_or_else(|| register::register_init_from_entry_point(elf.entry_point));
         Self {
             version: VKEY_VERSION,
+            options: options.clone(),
             bitwise: bitwise::preprocessed_commitment(options),
             decode: decode::commitment_from_elf(elf, options)
                 .expect("decode commitment must compute"),

From efc2dbbe65ef0b1c3b1f828ec2ef540bc967929f Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 17:34:57 -0300
Subject: [PATCH 32/36] Revert comment on `DOMAIN_TAG`

The comment should still say when it needs bumping, not why it was bumped the last time.
---
 prover/src/statement.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prover/src/statement.rs b/prover/src/statement.rs
index 0a692647d..ca2ff2459 100644
--- a/prover/src/statement.rs
+++ b/prover/src/statement.rs
@@ -16,7 +16,7 @@ use crate::test_utils::E;
 use crate::{RuntimePageRange, TableCounts};
 
 /// Domain-separation tag. Bump the suffix (`_V2`, ...) on any encoding change.
-/// V3: monolithic statements bind the verifying-key digest after the tag.
+/// Domain-separation tag. Bump the suffix (`_V3`, ...) on any encoding change.
 const DOMAIN_TAG: &[u8] = b"LAMBDAVM_STARK_STATEMENT_V3";
 
 fn elf_digest(elf: &[u8]) -> [u8; 32] {

From 5c0a0c8e321f33293f0cb8bfe827469d852013ad Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 17:44:40 -0300
Subject: [PATCH 33/36] test: decode recursion blob as the 4-tuple the guest
 reads

The host roundtrip test still decoded (VmProof, elf, opts); postcard
discards trailing bytes, so it silently skipped the vkey and the
vkey verify path the guest actually exercises.
---
 prover/src/tests/recursion_smoke_test.rs | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index e87726535..1345050e3 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -1,5 +1,5 @@
 //! End-to-end naive recursion pipeline smoke tests: prove an inner program,
-//! hand `(VmProof, elf, opts)` to the in-VM verifier guest, then either prove
+//! hand `(VmProof, elf, opts, vkey)` to the in-VM verifier guest, then either prove
 //! the guest's execution (`OuterMode::Prove`) or just execute it
 //! (`OuterMode::ExecuteOnly`). Guest ELFs come from `make compile-recursion-elfs`.
 //!
@@ -520,9 +520,16 @@ fn test_recursion_blob_decodes_and_verifies_on_host() {
         prove_inner_and_encode_blob("roundtrip", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
 
     // Decode exactly as the guest does.
-    let decoded: Result<(crate::VmProof, Vec<u8>, crate::ProofOptions), _> =
-        postcard::from_bytes(&blob);
-    let (vm_proof, inner_elf, options) = match decoded {
+    let decoded: Result<
+        (
+            crate::VmProof,
+            Vec<u8>,
+            crate::ProofOptions,
+            crate::VmVerifyingKey,
+        ),
+        _,
+    > = postcard::from_bytes(&blob);
+    let (vm_proof, inner_elf, options, vkey) = match decoded {
         Ok(t) => t,
         Err(e) => panic!("[roundtrip] postcard DECODE failed (this is the guest panic): {e}"),
     };
@@ -532,7 +539,14 @@ fn test_recursion_blob_decodes_and_verifies_on_host() {
         options.blowup_factor
     );
 
-    match crate::verify_with_options(&vm_proof, &inner_elf, &options, None, None) {
+    match crate::verify_with_options_with_vkey(
+        &vm_proof,
+        &inner_elf,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    ) {
         Ok(true) => eprintln!("[roundtrip] verify ok=true — guest path is sound"),
         Ok(false) => panic!(
             "[roundtrip] verify returned FALSE (guest hits assert!(ok)) — proof did not survive the postcard round-trip"

From e6b54fe27958274848db2223bdc3c5ab81bd7348 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 17:48:41 -0300
Subject: [PATCH 34/36] docs: new_with_vkey covers five tables; note page-order
 lockstep

---
 prover/src/lib.rs  | 8 +++++---
 prover/src/vkey.rs | 3 +++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index b42a7d35e..07b275f6a 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -480,9 +480,11 @@ impl VmAirs {
     }
 
     /// Same as [`Self::new`] but accepts a precomputed [`VmVerifyingKey`].
-    /// When `vkey` is `Some`, the bitwise preprocessed commitment is taken
-    /// from it instead of being recomputed from `proof_options` — that
-    /// recomputation is ~87% of verifier cycles inside the recursion guest.
+    /// When `vkey` is `Some`, the bitwise, decode, register, keccak_rc and
+    /// per-page preprocessed commitments are taken from it instead of being
+    /// recomputed — recomputation (dominated by bitwise) is ~87% of verifier
+    /// cycles inside the recursion guest. Explicit `decode_commitment` /
+    /// `page_commitments` arguments still take precedence over the vkey.
     #[allow(clippy::too_many_arguments)]
     pub fn new_with_vkey(
         elf: &Elf,
diff --git a/prover/src/vkey.rs b/prover/src/vkey.rs
index 3de96eb29..e3afd85ef 100644
--- a/prover/src/vkey.rs
+++ b/prover/src/vkey.rs
@@ -87,6 +87,9 @@ pub struct VmVerifyingKey {
     /// Private-input slots hold a zero placeholder and are never read by the
     /// verifier — they exist only to keep the index aligned with
     /// `page_configs`, which interleaves preprocessed and private-input pages.
+    /// Prover (`traces.page_configs`) and verifier
+    /// (`page_configs_from_elf_and_runtime`) must derive the same page order
+    /// or the digests diverge.
     pub pages: Vec<Commitment>,
 }
 

From 7411b067bed7c27be743d4acc51f3e49edcbc186 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 21:11:27 -0300
Subject: [PATCH 35/36] perf(verifier): single-format rkyv, verify proofs in
 place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace postcard and serde with rkyv as the sole proof serialization.
The STARK verifier gets one implementation over ArchivedStarkProof,
reading the archive in place: archived field elements are bit-identical
to native ones on little-endian (ArchivedFieldElement transparent
newtype + slice_as_native), so the recursion guest verifies straight
from its input buffer with no deserialization pass and no per-field
allocation. Owned multi_verify becomes a serialize-then-delegate shim,
so every host verification also exercises the wire format.

- recursion blob: 12-byte LVMR magic/version prefix 16-aligns the
  archive at PRIVATE_INPUT_START+4+12 (const-asserted); guest borrows
  the input region (get_private_input_slice), commits
  vk_digest ‖ public_output from verify_recursion_blob
- vkey digest: framework-free fixed-width canonical encoding
  (exhaustive destructure; injective), replacing postcard bytes
- CLI persistence: bincode -> validated rkyv from_bytes/to_bytes;
  proof files change format
- Table: manual rkyv impl under disk-spill reads via row_major_data,
  archive layout identical to the derive
- ethrex host-reference tests move to tooling/ethrex-tests (detached
  workspace): ethrex pins rkyv 'unaligned', a global archived-layout
  switch that must not feature-unify with the aligned proof format;
  our crates pin 'aligned' so any reintroduction is a compile error
- harden verifier for in-place reads: OOD dimensions_consistent +
  height>0 gate, deep-openings count guard, aux-width checked_sub,
  FRI decommitment length equality (fixes pre-existing skip of the
  fri_last_value check via over-long layers_evaluations_sym)
- verify_recursion_blob falls back to one aligned copy when the host
  buffer is misaligned (guest path stays zero-copy)

Recursion verifier guest, inner empty proof:
- 1 query (blowup=2): 115.26M -> 88.98M cycles (-22.8%)
- multi-query (blowup=8): 2.976B -> 2.211B cycles (-25.7%)
setup step (was postcard decode): 21.89M -> ~170 cycles
---
 Cargo.lock                                    | 1727 +-----------
 Makefile                                      |    8 +-
 bench_vs/lambda/recursion/Cargo.lock          |  283 +-
 bench_vs/lambda/recursion/Cargo.toml          |    1 -
 bench_vs/lambda/recursion/src/main.rs         |   45 +-
 bin/cli/Cargo.toml                            |    2 +-
 bin/cli/src/main.rs                           |   12 +-
 crypto/crypto/Cargo.toml                      |    8 +-
 crypto/crypto/src/merkle_tree/proof.rs        |   48 +-
 crypto/math/Cargo.toml                        |    9 +
 crypto/math/src/field/element.rs              |  157 ++
 crypto/stark/Cargo.toml                       |   13 +-
 .../src/examples/fibonacci_2_cols_shifted.rs  |    2 +-
 .../src/examples/fibonacci_multi_column.rs    |    2 +-
 crypto/stark/src/examples/fibonacci_rap.rs    |    2 +-
 crypto/stark/src/examples/quadratic_air.rs    |    2 +-
 crypto/stark/src/examples/read_only_memory.rs |    2 +-
 .../src/examples/read_only_memory_logup.rs    |    2 +-
 crypto/stark/src/examples/simple_addition.rs  |    2 +-
 crypto/stark/src/examples/simple_fibonacci.rs |    2 +-
 .../src/examples/simple_periodic_cols.rs      |    2 +-
 crypto/stark/src/fri/fri_decommit.rs          |    3 +-
 crypto/stark/src/lookup.rs                    |   30 +-
 crypto/stark/src/proof/options.rs             |    2 +-
 crypto/stark/src/proof/stark.rs               |   12 +-
 crypto/stark/src/table.rs                     |  223 +-
 .../src/tests/bus_tests/completeness_tests.rs |    5 +-
 .../src/tests/prove_verify_roundtrip_tests.rs |    5 +-
 crypto/stark/src/verifier.rs                  |  370 ++-
 docs/continuations_design.md                  |    4 +-
 executor/Cargo.toml                           |    7 -
 executor/programs/rust/ef_io_demo/Cargo.lock  |  331 +++
 executor/tests/README.md                      |    4 +
 executor/tests/rust.rs                        |   66 -
 prover/Cargo.toml                             |    5 +-
 prover/src/continuation.rs                    |   29 +-
 prover/src/lib.rs                             |  323 ++-
 prover/src/tables/local_to_global.rs          |    6 +-
 prover/src/tests/prove_elfs_tests.rs          |   32 +-
 prover/src/tests/recursion_smoke_test.rs      |  100 +-
 prover/src/tests/vkey_tests.rs                |    9 +-
 prover/src/vkey.rs                            |   42 +-
 syscalls/src/syscalls.rs                      |   21 +
 tooling/ethrex-tests/Cargo.lock               | 2415 +++++++++++++++++
 tooling/ethrex-tests/Cargo.toml               |   24 +
 tooling/ethrex-tests/tests/ethrex.rs          |   88 +
 46 files changed, 4262 insertions(+), 2225 deletions(-)
 create mode 100644 executor/programs/rust/ef_io_demo/Cargo.lock
 create mode 100644 tooling/ethrex-tests/Cargo.lock
 create mode 100644 tooling/ethrex-tests/Cargo.toml
 create mode 100644 tooling/ethrex-tests/tests/ethrex.rs

diff --git a/Cargo.lock b/Cargo.lock
index 6a9cae1ef..7c351b9ee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,18 +2,6 @@
 # It is not intended for manual editing.
 version = 4
 
-[[package]]
-name = "ahash"
-version = "0.8.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
-dependencies = [
- "cfg-if",
- "once_cell",
- "version_check",
- "zerocopy",
-]
-
 [[package]]
 name = "aho-corasick"
 version = "1.1.4"
@@ -23,21 +11,6 @@ dependencies = [
  "memchr",
 ]
 
-[[package]]
-name = "allocator-api2"
-version = "0.2.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
-
-[[package]]
-name = "android_system_properties"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "anes"
 version = "0.1.6"
@@ -94,151 +67,6 @@ dependencies = [
  "windows-sys",
 ]
 
-[[package]]
-name = "anyhow"
-version = "1.0.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
-
-[[package]]
-name = "ark-bn254"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d69eab57e8d2663efa5c63135b2af4f396d66424f88954c21104125ab6b3e6bc"
-dependencies = [
- "ark-ec",
- "ark-ff",
- "ark-std",
-]
-
-[[package]]
-name = "ark-ec"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d68f2d516162846c1238e755a7c4d131b892b70cc70c471a8e3ca3ed818fce"
-dependencies = [
- "ahash",
- "ark-ff",
- "ark-poly",
- "ark-serialize",
- "ark-std",
- "educe",
- "fnv",
- "hashbrown 0.15.5",
- "itertools 0.13.0",
- "num-bigint",
- "num-integer",
- "num-traits",
- "zeroize",
-]
-
-[[package]]
-name = "ark-ff"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a177aba0ed1e0fbb62aa9f6d0502e9b46dad8c2eab04c14258a1212d2557ea70"
-dependencies = [
- "ark-ff-asm",
- "ark-ff-macros",
- "ark-serialize",
- "ark-std",
- "arrayvec",
- "digest",
- "educe",
- "itertools 0.13.0",
- "num-bigint",
- "num-traits",
- "paste",
- "zeroize",
-]
-
-[[package]]
-name = "ark-ff-asm"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62945a2f7e6de02a31fe400aa489f0e0f5b2502e69f95f853adb82a96c7a6b60"
-dependencies = [
- "quote",
- "syn",
-]
-
-[[package]]
-name = "ark-ff-macros"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09be120733ee33f7693ceaa202ca41accd5653b779563608f1234f78ae07c4b3"
-dependencies = [
- "num-bigint",
- "num-traits",
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "ark-poly"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "579305839da207f02b89cd1679e50e67b4331e2f9294a57693e5051b7703fe27"
-dependencies = [
- "ahash",
- "ark-ff",
- "ark-serialize",
- "ark-std",
- "educe",
- "fnv",
- "hashbrown 0.15.5",
-]
-
-[[package]]
-name = "ark-serialize"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f4d068aaf107ebcd7dfb52bc748f8030e0fc930ac8e360146ca54c1203088f7"
-dependencies = [
- "ark-serialize-derive",
- "ark-std",
- "arrayvec",
- "digest",
- "num-bigint",
-]
-
-[[package]]
-name = "ark-serialize-derive"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "213888f660fddcca0d257e88e54ac05bca01885f258ccdf695bafd77031bb69d"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "ark-std"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a"
-dependencies = [
- "num-traits",
- "rand 0.8.5",
-]
-
-[[package]]
-name = "arrayvec"
-version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
-
-[[package]]
-name = "atomic-polyfill"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
-dependencies = [
- "critical-section",
-]
-
 [[package]]
 name = "atty"
 version = "0.2.14"
@@ -262,18 +90,6 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
 
-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
-[[package]]
-name = "base64ct"
-version = "1.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
-
 [[package]]
 name = "bincode"
 version = "1.3.3"
@@ -298,22 +114,6 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
 
-[[package]]
-name = "bitcoin-io"
-version = "0.1.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11301df0b06f22dea7bb1916403fdd88a371031e495c49b8f96931b28189e175"
-
-[[package]]
-name = "bitcoin_hashes"
-version = "0.14.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c9901a56e133a1fc86eeb1113e2591f45f4682451ca893bff494d2f88918e3f"
-dependencies = [
- "bitcoin-io",
- "hex-conservative",
-]
-
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -326,18 +126,6 @@ version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
-[[package]]
-name = "bitvec"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
-dependencies = [
- "funty",
- "radium",
- "tap",
- "wyz",
-]
-
 [[package]]
 name = "block-buffer"
 version = "0.10.4"
@@ -347,31 +135,12 @@ dependencies = [
  "generic-array",
 ]
 
-[[package]]
-name = "bls12_381"
-version = "0.8.0"
-source = "git+https://github.com/lambdaclass/bls12_381?branch=expose-affine-constructors#78cad0378b17fc3157b83f514be192bf46edf9a1"
-dependencies = [
- "digest",
- "ff",
- "group",
- "pairing",
- "rand_core 0.6.4",
- "subtle",
-]
-
 [[package]]
 name = "bumpalo"
 version = "3.19.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
 
-[[package]]
-name = "byte-slice-cast"
-version = "1.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7575182f7272186991736b70173b0ea045398f984bf5ebbb3804736ce1330c9d"
-
 [[package]]
 name = "bytecheck"
 version = "0.8.2"
@@ -395,27 +164,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "bytemuck"
-version = "1.24.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
-
-[[package]]
-name = "byteorder"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
-
-[[package]]
-name = "bytes"
-version = "1.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
-dependencies = [
- "serde",
-]
-
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -438,18 +186,6 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
-[[package]]
-name = "chrono"
-version = "0.4.42"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
-dependencies = [
- "iana-time-zone",
- "num-traits",
- "serde",
- "windows-link",
-]
-
 [[package]]
 name = "ciborium"
 version = "0.2.2"
@@ -474,7 +210,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
 dependencies = [
  "ciborium-io",
- "half 2.7.1",
+ "half",
 ]
 
 [[package]]
@@ -485,7 +221,7 @@ checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
 dependencies = [
  "bitflags 1.3.2",
  "clap_lex 0.2.4",
- "indexmap 1.9.3",
+ "indexmap",
  "textwrap",
 ]
 
@@ -542,25 +278,16 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
 name = "cli"
 version = "0.1.0"
 dependencies = [
- "bincode",
  "clap 4.5.53",
  "env_logger",
  "executor",
  "lambda-vm-prover",
+ "rkyv",
  "stark",
  "tikv-jemalloc-ctl",
  "tikv-jemallocator",
 ]
 
-[[package]]
-name = "cobs"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1"
-dependencies = [
- "thiserror 2.0.17",
-]
-
 [[package]]
 name = "colorchoice"
 version = "1.0.4"
@@ -573,35 +300,6 @@ version = "0.9.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
 
-[[package]]
-name = "const_format"
-version = "0.2.35"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
-dependencies = [
- "const_format_proc_macros",
-]
-
-[[package]]
-name = "const_format_proc_macros"
-version = "0.2.34"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-xid",
-]
-
-[[package]]
-name = "convert_case"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca"
-dependencies = [
- "unicode-segmentation",
-]
-
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -617,15 +315,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "crc32fast"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
-dependencies = [
- "cfg-if",
-]
-
 [[package]]
 name = "criterion"
 version = "0.4.0"
@@ -686,34 +375,6 @@ dependencies = [
  "itertools 0.10.5",
 ]
 
-[[package]]
-name = "critical-section"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
-
-[[package]]
-name = "crossbeam"
-version = "0.8.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
-dependencies = [
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-epoch",
- "crossbeam-queue",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-channel"
-version = "0.5.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
-dependencies = [
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.6"
@@ -733,15 +394,6 @@ dependencies = [
  "crossbeam-utils",
 ]
 
-[[package]]
-name = "crossbeam-queue"
-version = "0.3.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
-dependencies = [
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.21"
@@ -766,6 +418,7 @@ dependencies = [
  "rand 0.8.5",
  "rand_chacha 0.3.1",
  "rayon",
+ "rkyv",
  "serde",
  "sha2",
  "sha3",
@@ -804,140 +457,39 @@ dependencies = [
 ]
 
 [[package]]
-name = "darling"
-version = "0.21.3"
+name = "der"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
+checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
 dependencies = [
- "darling_core",
- "darling_macro",
+ "const-oid",
+ "zeroize",
 ]
 
 [[package]]
-name = "darling_core"
-version = "0.21.3"
+name = "digest"
+version = "0.10.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
- "fnv",
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim",
- "syn",
+ "block-buffer",
+ "crypto-common",
 ]
 
 [[package]]
-name = "darling_macro"
-version = "0.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
+name = "ecsm"
+version = "0.1.0"
 dependencies = [
- "darling_core",
- "quote",
- "syn",
+ "k256",
+ "num-bigint",
+ "num-traits",
 ]
 
 [[package]]
-name = "der"
-version = "0.7.10"
+name = "either"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
-dependencies = [
- "const-oid",
- "zeroize",
-]
-
-[[package]]
-name = "deranged"
-version = "0.5.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
-dependencies = [
- "powerfmt",
- "serde_core",
-]
-
-[[package]]
-name = "derive_more"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05"
-dependencies = [
- "derive_more-impl",
-]
-
-[[package]]
-name = "derive_more-impl"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22"
-dependencies = [
- "convert_case",
- "proc-macro2",
- "quote",
- "syn",
- "unicode-xid",
-]
-
-[[package]]
-name = "digest"
-version = "0.10.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
-dependencies = [
- "block-buffer",
- "const-oid",
- "crypto-common",
- "subtle",
-]
-
-[[package]]
-name = "dyn-clone"
-version = "1.0.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
-
-[[package]]
-name = "ecdsa"
-version = "0.16.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
-dependencies = [
- "der",
- "digest",
- "elliptic-curve",
- "rfc6979",
- "signature",
- "spki",
-]
-
-[[package]]
-name = "ecsm"
-version = "0.1.0"
-dependencies = [
- "k256",
- "num-bigint",
- "num-traits",
-]
-
-[[package]]
-name = "educe"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417"
-dependencies = [
- "enum-ordinalize",
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "either"
-version = "1.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
 name = "elliptic-curve"
@@ -947,49 +499,15 @@ checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
 dependencies = [
  "base16ct",
  "crypto-bigint",
- "digest",
  "ff",
  "generic-array",
  "group",
- "pkcs8",
  "rand_core 0.6.4",
  "sec1",
  "subtle",
  "zeroize",
 ]
 
-[[package]]
-name = "embedded-io"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
-
-[[package]]
-name = "embedded-io"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
-
-[[package]]
-name = "enum-ordinalize"
-version = "4.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0"
-dependencies = [
- "enum-ordinalize-derive",
-]
-
-[[package]]
-name = "enum-ordinalize-derive"
-version = "4.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "env_filter"
 version = "0.1.4"
@@ -1013,12 +531,6 @@ dependencies = [
  "log",
 ]
 
-[[package]]
-name = "equivalent"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
-
 [[package]]
 name = "errno"
 version = "0.3.14"
@@ -1029,201 +541,15 @@ dependencies = [
  "windows-sys",
 ]
 
-[[package]]
-name = "ethbloom"
-version = "0.14.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c321610643004cf908ec0f5f2aa0d8f1f8e14b540562a2887a1111ff1ecbf7b"
-dependencies = [
- "crunchy",
- "fixed-hash",
- "impl-rlp",
- "impl-serde",
- "tiny-keccak",
-]
-
-[[package]]
-name = "ethereum-types"
-version = "0.15.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ab15ed80916029f878e0267c3a9f92b67df55e79af370bf66199059ae2b4ee3"
-dependencies = [
- "ethbloom",
- "fixed-hash",
- "impl-rlp",
- "impl-serde",
- "primitive-types",
- "uint",
-]
-
-[[package]]
-name = "ethrex-common"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "bytes",
- "crc32fast",
- "ethereum-types",
- "ethrex-crypto",
- "ethrex-rlp",
- "ethrex-trie",
- "hex",
- "hex-literal",
- "hex-simd",
- "indexmap 2.12.1",
- "lazy_static",
- "libc",
- "lru",
- "once_cell",
- "rayon",
- "rkyv",
- "rustc-hash",
- "secp256k1",
- "serde",
- "serde_json",
- "sha2",
- "thiserror 2.0.17",
- "tracing",
-]
-
-[[package]]
-name = "ethrex-crypto"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "ark-bn254",
- "ark-ec",
- "ark-ff",
- "bls12_381",
- "ethereum-types",
- "ff",
- "hex-literal",
- "k256",
- "malachite",
- "num-bigint",
- "p256",
- "ripemd",
- "secp256k1",
- "sha2",
- "thiserror 2.0.17",
- "tiny-keccak",
-]
-
-[[package]]
-name = "ethrex-guest-program"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "bytes",
- "ethereum-types",
- "ethrex-common",
- "ethrex-crypto",
- "ethrex-l2-common",
- "ethrex-rlp",
- "ethrex-vm",
- "hex",
- "rkyv",
- "serde",
- "serde_with",
- "thiserror 2.0.17",
-]
-
-[[package]]
-name = "ethrex-l2-common"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "bytes",
- "ethereum-types",
- "ethrex-common",
- "ethrex-crypto",
- "k256",
- "lambdaworks-crypto",
- "rkyv",
- "secp256k1",
- "serde",
- "serde_with",
- "thiserror 2.0.17",
- "tracing",
-]
-
-[[package]]
-name = "ethrex-levm"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "bytes",
- "derive_more",
- "ethrex-common",
- "ethrex-crypto",
- "ethrex-rlp",
- "malachite",
- "rayon",
- "rustc-hash",
- "serde",
- "strum",
- "thiserror 2.0.17",
-]
-
-[[package]]
-name = "ethrex-rlp"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "bytes",
- "ethereum-types",
- "thiserror 2.0.17",
-]
-
-[[package]]
-name = "ethrex-trie"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "anyhow",
- "bytes",
- "crossbeam",
- "ethereum-types",
- "ethrex-crypto",
- "ethrex-rlp",
- "lazy_static",
- "rayon",
- "rkyv",
- "rustc-hash",
- "serde",
- "thiserror 2.0.17",
-]
-
-[[package]]
-name = "ethrex-vm"
-version = "13.0.0"
-source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
-dependencies = [
- "bytes",
- "derive_more",
- "dyn-clone",
- "ethrex-common",
- "ethrex-crypto",
- "ethrex-levm",
- "ethrex-rlp",
- "rayon",
- "rustc-hash",
- "serde",
- "thiserror 2.0.17",
- "tracing",
-]
-
 [[package]]
 name = "executor"
 version = "0.1.0"
 dependencies = [
  "ecsm",
- "ethrex-guest-program",
- "rkyv",
  "rustc-demangle",
  "serde",
  "serde_json",
- "thiserror 1.0.69",
+ "thiserror",
  "tiny-keccak",
 ]
 
@@ -1239,7 +565,6 @@ version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
 dependencies = [
- "bitvec",
  "rand_core 0.6.4",
  "subtle",
 ]
@@ -1248,292 +573,104 @@ dependencies = [
 name = "find-msvc-tools"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
-
-[[package]]
-name = "fixed-hash"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "835c052cb0c08c1acf6ffd71c022172e18723949c8282f2b9f27efbc51e64534"
-dependencies = [
- "byteorder",
- "rand 0.8.5",
- "rustc-hex",
- "static_assertions",
-]
-
-[[package]]
-name = "fnv"
-version = "1.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
-
-[[package]]
-name = "foldhash"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
-
-[[package]]
-name = "foldhash"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
-
-[[package]]
-name = "funty"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
-
-[[package]]
-name = "generic-array"
-version = "0.14.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
-dependencies = [
- "typenum",
- "version_check",
- "zeroize",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
-dependencies = [
- "cfg-if",
- "js-sys",
- "libc",
- "wasi",
- "wasm-bindgen",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.3.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
-dependencies = [
- "cfg-if",
- "libc",
- "r-efi",
- "wasip2",
-]
-
-[[package]]
-name = "group"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
-dependencies = [
- "ff",
- "rand_core 0.6.4",
- "subtle",
-]
-
-[[package]]
-name = "half"
-version = "1.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403"
-
-[[package]]
-name = "half"
-version = "2.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
-dependencies = [
- "cfg-if",
- "crunchy",
- "zerocopy",
-]
-
-[[package]]
-name = "hash32"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
-dependencies = [
- "byteorder",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-
-[[package]]
-name = "hashbrown"
-version = "0.15.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
-dependencies = [
- "allocator-api2",
- "foldhash 0.1.5",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.16.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
-dependencies = [
- "allocator-api2",
- "equivalent",
- "foldhash 0.2.0",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.17.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
-
-[[package]]
-name = "heapless"
-version = "0.7.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
-dependencies = [
- "atomic-polyfill",
- "hash32",
- "rustc_version",
- "serde",
- "spin",
- "stable_deref_trait",
-]
-
-[[package]]
-name = "heck"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
-
-[[package]]
-name = "hermit-abi"
-version = "0.1.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "hermit-abi"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
 
 [[package]]
-name = "hex"
-version = "0.4.3"
+name = "fnv"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
-name = "hex-conservative"
-version = "0.2.2"
+name = "generic-array"
+version = "0.14.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fda06d18ac606267c40c04e41b9947729bf8b9efe74bd4e82b61a5f26a510b9f"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
 dependencies = [
- "arrayvec",
+ "typenum",
+ "version_check",
+ "zeroize",
 ]
 
 [[package]]
-name = "hex-literal"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
-
-[[package]]
-name = "hex-simd"
-version = "0.8.0"
+name = "getrandom"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f7685beb53fc20efc2605f32f5d51e9ba18b8ef237961d1760169d2290d3bee"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
- "outref",
- "vsimd",
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
 ]
 
 [[package]]
-name = "hmac"
-version = "0.12.1"
+name = "getrandom"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
- "digest",
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
 ]
 
 [[package]]
-name = "iana-time-zone"
-version = "0.1.64"
+name = "group"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
 dependencies = [
- "android_system_properties",
- "core-foundation-sys",
- "iana-time-zone-haiku",
- "js-sys",
- "log",
- "wasm-bindgen",
- "windows-core",
+ "ff",
+ "rand_core 0.6.4",
+ "subtle",
 ]
 
 [[package]]
-name = "iana-time-zone-haiku"
-version = "0.1.2"
+name = "half"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
- "cc",
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
 ]
 
 [[package]]
-name = "ident_case"
-version = "1.0.1"
+name = "hashbrown"
+version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 
 [[package]]
-name = "impl-codec"
-version = "0.7.1"
+name = "hashbrown"
+version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d40b9d5e17727407e55028eafc22b2dc68781786e6d7eb8a21103f5058e3a14"
-dependencies = [
- "parity-scale-codec",
-]
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
 
 [[package]]
-name = "impl-rlp"
-version = "0.4.0"
+name = "heck"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54ed8ad1f3877f7e775b8cbf30ed1bd3209a95401817f19a0eb4402d13f8cf90"
-dependencies = [
- "rlp",
-]
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
 [[package]]
-name = "impl-serde"
-version = "0.5.0"
+name = "hermit-abi"
+version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a143eada6a1ec4aefa5049037a26a6d597bfd64f8c026d07b77133e02b7dd0b"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
 dependencies = [
- "serde",
+ "libc",
 ]
 
 [[package]]
-name = "impl-trait-for-tuples"
-version = "0.2.3"
+name = "hermit-abi"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
 
 [[package]]
 name = "indexmap"
@@ -1543,19 +680,6 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
  "autocfg",
  "hashbrown 0.12.3",
- "serde",
-]
-
-[[package]]
-name = "indexmap"
-version = "2.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
-dependencies = [
- "equivalent",
- "hashbrown 0.16.1",
- "serde",
- "serde_core",
 ]
 
 [[package]]
@@ -1593,24 +717,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "itertools"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
-dependencies = [
- "either",
-]
-
-[[package]]
-name = "itertools"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itoa"
 version = "1.0.16"
@@ -1658,11 +764,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
 dependencies = [
  "cfg-if",
- "ecdsa",
  "elliptic-curve",
- "once_cell",
- "sha2",
- "signature",
 ]
 
 [[package]]
@@ -1678,7 +780,6 @@ dependencies = [
 name = "lambda-vm-prover"
 version = "0.1.0"
 dependencies = [
- "bincode",
  "criterion 0.5.1",
  "crypto",
  "ecsm",
@@ -1686,9 +787,8 @@ dependencies = [
  "executor",
  "log",
  "math",
- "postcard",
  "rayon",
- "serde",
+ "rkyv",
  "sha3",
  "stark",
  "sysinfo",
@@ -1697,34 +797,6 @@ dependencies = [
  "tiny-keccak",
 ]
 
-[[package]]
-name = "lambdaworks-crypto"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58b1a1c1102a5a7fbbda117b79fb3a01e033459c738a3c1642269603484fd1c1"
-dependencies = [
- "lambdaworks-math",
- "rand 0.8.5",
- "rand_chacha 0.3.1",
- "serde",
- "sha2",
- "sha3",
-]
-
-[[package]]
-name = "lambdaworks-math"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "018a95aa873eb49896a858dee0d925c33f3978d073c64b08dd4f2c9b35a017c6"
-dependencies = [
- "getrandom 0.2.16",
- "num-bigint",
- "num-traits",
- "rand 0.8.5",
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "lazy_static"
 version = "1.5.0"
@@ -1747,88 +819,18 @@ dependencies = [
  "windows-link",
 ]
 
-[[package]]
-name = "libm"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
 
-[[package]]
-name = "lock_api"
-version = "0.4.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
-dependencies = [
- "scopeguard",
-]
-
 [[package]]
 name = "log"
 version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
-[[package]]
-name = "lru"
-version = "0.16.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593"
-dependencies = [
- "hashbrown 0.16.1",
-]
-
-[[package]]
-name = "malachite"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec410515e231332b14cd986a475d1c3323bcfa4c7efc038bfa1d5b410b1c57e4"
-dependencies = [
- "malachite-base",
- "malachite-nz",
- "malachite-q",
-]
-
-[[package]]
-name = "malachite-base"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c738d3789301e957a8f7519318fcbb1b92bb95863b28f6938ae5a05be6259f34"
-dependencies = [
- "hashbrown 0.15.5",
- "itertools 0.14.0",
- "libm",
- "ryu",
-]
-
-[[package]]
-name = "malachite-nz"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1707c9a1fa36ce21749b35972bfad17bbf34cf5a7c96897c0491da321e387d3b"
-dependencies = [
- "itertools 0.14.0",
- "libm",
- "malachite-base",
- "wide",
-]
-
-[[package]]
-name = "malachite-q"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d764801aa4e96bbb69b389dcd03b50075345131cd63ca2e380bca71cc37a3675"
-dependencies = [
- "itertools 0.14.0",
- "malachite-base",
- "malachite-nz",
-]
-
 [[package]]
 name = "matchers"
 version = "0.2.0"
@@ -1850,6 +852,7 @@ dependencies = [
  "rand 0.8.5",
  "rand_chacha 0.3.1",
  "rayon",
+ "rkyv",
  "serde",
  "serde_json",
 ]
@@ -1931,12 +934,6 @@ dependencies = [
  "num-traits",
 ]
 
-[[package]]
-name = "num-conv"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
-
 [[package]]
 name = "num-integer"
 version = "0.1.46"
@@ -1979,61 +976,6 @@ version = "6.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
 
-[[package]]
-name = "outref"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
-
-[[package]]
-name = "p256"
-version = "0.13.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b"
-dependencies = [
- "ecdsa",
- "elliptic-curve",
- "primeorder",
- "sha2",
-]
-
-[[package]]
-name = "pairing"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81fec4625e73cf41ef4bb6846cafa6d44736525f442ba45e407c4a000a13996f"
-dependencies = [
- "group",
-]
-
-[[package]]
-name = "parity-scale-codec"
-version = "3.7.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "799781ae679d79a948e13d4824a40970bfa500058d245760dd857301059810fa"
-dependencies = [
- "arrayvec",
- "bitvec",
- "byte-slice-cast",
- "const_format",
- "impl-trait-for-tuples",
- "parity-scale-codec-derive",
- "rustversion",
- "serde",
-]
-
-[[package]]
-name = "parity-scale-codec-derive"
-version = "3.7.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34b4653168b563151153c9e4c08ebed57fb8262bebfa79711552fa983c623e7a"
-dependencies = [
- "proc-macro-crate",
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "paste"
 version = "1.0.15"
@@ -2046,16 +988,6 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
 
-[[package]]
-name = "pkcs8"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
-dependencies = [
- "der",
- "spki",
-]
-
 [[package]]
 name = "plotters"
 version = "0.3.7"
@@ -2099,25 +1031,6 @@ dependencies = [
  "portable-atomic",
 ]
 
-[[package]]
-name = "postcard"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24"
-dependencies = [
- "cobs",
- "embedded-io 0.4.0",
- "embedded-io 0.6.1",
- "heapless",
- "serde",
-]
-
-[[package]]
-name = "powerfmt"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
-
 [[package]]
 name = "ppv-lite86"
 version = "0.2.21"
@@ -2127,37 +1040,6 @@ dependencies = [
  "zerocopy",
 ]
 
-[[package]]
-name = "primeorder"
-version = "0.13.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
-dependencies = [
- "elliptic-curve",
-]
-
-[[package]]
-name = "primitive-types"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d15600a7d856470b7d278b3fe0e311fe28c2526348549f8ef2ff7db3299c87f5"
-dependencies = [
- "fixed-hash",
- "impl-codec",
- "impl-rlp",
- "impl-serde",
- "uint",
-]
-
-[[package]]
-name = "proc-macro-crate"
-version = "3.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
-dependencies = [
- "toml_edit",
-]
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.103"
@@ -2227,12 +1109,6 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
-[[package]]
-name = "radium"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
-
 [[package]]
 name = "rancor"
 version = "0.1.1"
@@ -2326,28 +1202,8 @@ version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
 dependencies = [
- "crossbeam-deque",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "ref-cast"
-version = "1.0.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
-dependencies = [
- "ref-cast-impl",
-]
-
-[[package]]
-name = "ref-cast-impl"
-version = "1.0.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
+ "crossbeam-deque",
+ "crossbeam-utils",
 ]
 
 [[package]]
@@ -2388,25 +1244,6 @@ dependencies = [
  "bytecheck",
 ]
 
-[[package]]
-name = "rfc6979"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
-dependencies = [
- "hmac",
- "subtle",
-]
-
-[[package]]
-name = "ripemd"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd124222d17ad93a644ed9d011a40f4fb64aa54275c08cc216524a9ea82fb09f"
-dependencies = [
- "digest",
-]
-
 [[package]]
 name = "rkyv"
 version = "0.8.16"
@@ -2414,16 +1251,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73389e0c99e664f919275ab5b5b0471391fe9a8de61e1dff9b1eaf56a90f16e3"
 dependencies = [
  "bytecheck",
- "bytes",
  "hashbrown 0.17.1",
- "indexmap 2.12.1",
  "munge",
  "ptr_meta",
  "rancor",
  "rend",
  "rkyv_derive",
  "tinyvec",
- "uuid",
 ]
 
 [[package]]
@@ -2437,43 +1271,12 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "rlp"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa24e92bb2a83198bb76d661a71df9f7076b8c420b8696e4d3d97d50d94479e3"
-dependencies = [
- "bytes",
- "rustc-hex",
-]
-
 [[package]]
 name = "rustc-demangle"
 version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
 
-[[package]]
-name = "rustc-hash"
-version = "2.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
-
-[[package]]
-name = "rustc-hex"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6"
-
-[[package]]
-name = "rustc_version"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
-dependencies = [
- "semver",
-]
-
 [[package]]
 name = "rustix"
 version = "1.1.3"
@@ -2511,15 +1314,6 @@ version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea"
 
-[[package]]
-name = "safe_arch"
-version = "0.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323"
-dependencies = [
- "bytemuck",
-]
-
 [[package]]
 name = "same-file"
 version = "1.0.6"
@@ -2529,36 +1323,6 @@ dependencies = [
  "winapi-util",
 ]
 
-[[package]]
-name = "schemars"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "schemars"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "scopeguard"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
-
 [[package]]
 name = "sec1"
 version = "0.7.3"
@@ -2568,37 +1332,10 @@ dependencies = [
  "base16ct",
  "der",
  "generic-array",
- "pkcs8",
  "subtle",
  "zeroize",
 ]
 
-[[package]]
-name = "secp256k1"
-version = "0.30.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b50c5943d326858130af85e049f2661ba3c78b26589b8ab98e65e80ae44a1252"
-dependencies = [
- "bitcoin_hashes",
- "rand 0.8.5",
- "secp256k1-sys",
-]
-
-[[package]]
-name = "secp256k1-sys"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4387882333d3aa8cb20530a17c69a3752e97837832f34f6dccc760e715001d9"
-dependencies = [
- "cc",
-]
-
-[[package]]
-name = "semver"
-version = "1.0.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
-
 [[package]]
 name = "serde"
 version = "1.0.228"
@@ -2609,27 +1346,6 @@ dependencies = [
  "serde_derive",
 ]
 
-[[package]]
-name = "serde-wasm-bindgen"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e"
-dependencies = [
- "js-sys",
- "serde",
- "wasm-bindgen",
-]
-
-[[package]]
-name = "serde_cbor"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
-dependencies = [
- "half 1.8.3",
- "serde",
-]
-
 [[package]]
 name = "serde_core"
 version = "1.0.228"
@@ -2663,37 +1379,6 @@ dependencies = [
  "serde_core",
 ]
 
-[[package]]
-name = "serde_with"
-version = "3.16.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7"
-dependencies = [
- "base64",
- "chrono",
- "hex",
- "indexmap 1.9.3",
- "indexmap 2.12.1",
- "schemars 0.9.0",
- "schemars 1.2.0",
- "serde_core",
- "serde_json",
- "serde_with_macros",
- "time",
-]
-
-[[package]]
-name = "serde_with_macros"
-version = "3.16.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c"
-dependencies = [
- "darling",
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -2730,47 +1415,12 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
-[[package]]
-name = "signature"
-version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
-dependencies = [
- "digest",
- "rand_core 0.6.4",
-]
-
 [[package]]
 name = "simdutf8"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
 
-[[package]]
-name = "spin"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]
-
-[[package]]
-name = "spki"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
-dependencies = [
- "base64ct",
- "der",
-]
-
-[[package]]
-name = "stable_deref_trait"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
-
 [[package]]
 name = "stark"
 version = "0.1.0"
@@ -2788,50 +1438,21 @@ dependencies = [
  "rand 0.8.5",
  "rand_chacha 0.3.1",
  "rayon",
- "serde",
- "serde-wasm-bindgen",
- "serde_cbor",
+ "rkyv",
  "sha3",
  "tempfile",
  "test-log",
- "thiserror 1.0.69",
+ "thiserror",
  "wasm-bindgen",
  "web-sys",
 ]
 
-[[package]]
-name = "static_assertions"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
-
 [[package]]
 name = "strsim"
 version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
-[[package]]
-name = "strum"
-version = "0.27.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
-dependencies = [
- "strum_macros",
-]
-
-[[package]]
-name = "strum_macros"
-version = "0.27.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "subtle"
 version = "2.6.1"
@@ -2862,12 +1483,6 @@ dependencies = [
  "windows",
 ]
 
-[[package]]
-name = "tap"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
-
 [[package]]
 name = "tempfile"
 version = "3.23.0"
@@ -2915,16 +1530,7 @@ version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
 dependencies = [
- "thiserror-impl 1.0.69",
-]
-
-[[package]]
-name = "thiserror"
-version = "2.0.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
-dependencies = [
- "thiserror-impl 2.0.17",
+ "thiserror-impl",
 ]
 
 [[package]]
@@ -2938,17 +1544,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "thiserror-impl"
-version = "2.0.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "thread_local"
 version = "1.1.9"
@@ -2989,37 +1584,6 @@ dependencies = [
  "tikv-jemalloc-sys",
 ]
 
-[[package]]
-name = "time"
-version = "0.3.45"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd"
-dependencies = [
- "deranged",
- "itoa",
- "num-conv",
- "powerfmt",
- "serde_core",
- "time-core",
- "time-macros",
-]
-
-[[package]]
-name = "time-core"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca"
-
-[[package]]
-name = "time-macros"
-version = "0.2.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd"
-dependencies = [
- "num-conv",
- "time-core",
-]
-
 [[package]]
 name = "tiny-keccak"
 version = "2.0.2"
@@ -3054,59 +1618,16 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
-[[package]]
-name = "toml_datetime"
-version = "0.7.5+spec-1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
-dependencies = [
- "serde_core",
-]
-
-[[package]]
-name = "toml_edit"
-version = "0.23.10+spec-1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
-dependencies = [
- "indexmap 2.12.1",
- "toml_datetime",
- "toml_parser",
- "winnow",
-]
-
-[[package]]
-name = "toml_parser"
-version = "1.0.6+spec-1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
-dependencies = [
- "winnow",
-]
-
 [[package]]
 name = "tracing"
 version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
- "log",
  "pin-project-lite",
- "tracing-attributes",
  "tracing-core",
 ]
 
-[[package]]
-name = "tracing-attributes"
-version = "0.1.31"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "tracing-core"
 version = "0.1.36"
@@ -3151,18 +1672,6 @@ version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
 
-[[package]]
-name = "uint"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "909988d098b2f738727b161a106cfc7cab00c539c2687a8836f8e565976fb53e"
-dependencies = [
- "byteorder",
- "crunchy",
- "hex",
- "static_assertions",
-]
-
 [[package]]
 name = "unarray"
 version = "0.1.4"
@@ -3175,34 +1684,12 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 
-[[package]]
-name = "unicode-segmentation"
-version = "1.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
-
-[[package]]
-name = "unicode-xid"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
-
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
-[[package]]
-name = "uuid"
-version = "1.19.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
-dependencies = [
- "js-sys",
- "wasm-bindgen",
-]
-
 [[package]]
 name = "valuable"
 version = "0.1.1"
@@ -3215,12 +1702,6 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
-[[package]]
-name = "vsimd"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
-
 [[package]]
 name = "wait-timeout"
 version = "0.2.1"
@@ -3310,16 +1791,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "wide"
-version = "0.7.33"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ce5da8ecb62bcd8ec8b7ea19f69a51275e91299be594ea5cc6ef7819e16cd03"
-dependencies = [
- "bytemuck",
- "safe_arch",
-]
-
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -3483,30 +1954,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
-[[package]]
-name = "winnow"
-version = "0.7.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "wit-bindgen"
 version = "0.46.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
 
-[[package]]
-name = "wyz"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
-dependencies = [
- "tap",
-]
-
 [[package]]
 name = "zerocopy"
 version = "0.8.31"
@@ -3532,17 +1985,3 @@ name = "zeroize"
 version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
-dependencies = [
- "zeroize_derive",
-]
-
-[[package]]
-name = "zeroize_derive"
-version = "1.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
diff --git a/Makefile b/Makefile
index d725ca2d7..5fbe41f2c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 compile-programs compile-recursion-elfs clean-asm clean-rust clean-bench clean-shared \
 clean-recursion-elfs clean test test-asm \
 test-rust test-executor test-flamegraph flamegraph-prover test-profile-recursion test-profile-recursion-single test-profile-recursion-multi \
-test-fast test-prover test-prover-all test-disk-spill test-math-cuda test-cuda-integration \
+test-fast test-prover test-prover-all test-disk-spill test-math-cuda test-cuda-integration test-ethrex \
 bench-math-cuda bench-prover bench-prover-cuda build check clippy fmt lint regen-ethrex-fixtures \
 update-ethrex-fixture-checksums check-ethrex-fixture-checksums
 
@@ -255,8 +255,14 @@ update-ethrex-fixture-checksums:
 check-ethrex-fixture-checksums:
 	python3 tooling/ethrex-fixtures/update_readme_checksums.py --check
 
+# Detached workspace: ethrex pins rkyv `unaligned`, which must not feature-unify
+# with the main workspace's aligned proof format (see tooling/ethrex-tests).
+test-ethrex: compile-programs-rust
+	cd tooling/ethrex-tests && cargo test
+
 test: compile-programs
 	cargo test
+	$(MAKE) test-ethrex
 
 # === Quick test shortcuts ===
 
diff --git a/bench_vs/lambda/recursion/Cargo.lock b/bench_vs/lambda/recursion/Cargo.lock
index 88a9fb605..0e0cd0796 100644
--- a/bench_vs/lambda/recursion/Cargo.lock
+++ b/bench_vs/lambda/recursion/Cargo.lock
@@ -2,15 +2,6 @@
 # It is not intended for manual editing.
 version = 4
 
-[[package]]
-name = "atomic-polyfill"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
-dependencies = [
- "critical-section",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.5.1"
@@ -45,25 +36,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
 
 [[package]]
-name = "byteorder"
-version = "1.5.0"
+name = "bytecheck"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b"
+dependencies = [
+ "bytecheck_derive",
+ "ptr_meta",
+ "rancor",
+ "simdutf8",
+]
 
 [[package]]
-name = "cfg-if"
-version = "1.0.4"
+name = "bytecheck_derive"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
 
 [[package]]
-name = "cobs"
-version = "0.3.0"
+name = "cfg-if"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1"
-dependencies = [
- "thiserror 2.0.18",
-]
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
 [[package]]
 name = "const-default"
@@ -131,6 +130,7 @@ dependencies = [
  "math",
  "rand 0.8.6",
  "rand_chacha 0.3.1",
+ "rkyv",
  "serde",
  "sha3",
 ]
@@ -227,25 +227,13 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
 
-[[package]]
-name = "embedded-io"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
-
-[[package]]
-name = "embedded-io"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
-
 [[package]]
 name = "executor"
 version = "0.1.0"
 dependencies = [
  "ecsm",
  "rustc-demangle",
- "thiserror 1.0.69",
+ "thiserror",
 ]
 
 [[package]]
@@ -330,33 +318,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "half"
-version = "1.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403"
-
-[[package]]
-name = "hash32"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
-dependencies = [
- "byteorder",
-]
-
-[[package]]
-name = "heapless"
-version = "0.7.17"
+name = "hashbrown"
+version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
-dependencies = [
- "atomic-polyfill",
- "hash32",
- "rustc_version",
- "serde",
- "spin",
- "stable_deref_trait",
-]
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
 
 [[package]]
 name = "itertools"
@@ -412,8 +377,7 @@ dependencies = [
  "executor",
  "log",
  "math",
- "postcard",
- "serde",
+ "rkyv",
  "sha3",
  "stark",
  "sysinfo",
@@ -429,7 +393,7 @@ dependencies = [
  "lazy_static",
  "rand 0.9.4",
  "riscv",
- "thiserror 1.0.69",
+ "thiserror",
 ]
 
 [[package]]
@@ -450,15 +414,6 @@ version = "0.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897"
 
-[[package]]
-name = "lock_api"
-version = "0.4.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
-dependencies = [
- "scopeguard",
-]
-
 [[package]]
 name = "log"
 version = "0.4.33"
@@ -474,6 +429,7 @@ dependencies = [
  "num-traits",
  "rand 0.8.6",
  "rayon",
+ "rkyv",
  "serde",
  "serde_json",
 ]
@@ -484,6 +440,26 @@ version = "2.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
 
+[[package]]
+name = "munge"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c"
+dependencies = [
+ "munge_macro",
+]
+
+[[package]]
+name = "munge_macro"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
 [[package]]
 name = "ntapi"
 version = "0.4.3"
@@ -539,19 +515,6 @@ version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
 
-[[package]]
-name = "postcard"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24"
-dependencies = [
- "cobs",
- "embedded-io 0.4.0",
- "embedded-io 0.6.1",
- "heapless",
- "serde",
-]
-
 [[package]]
 name = "ppv-lite86"
 version = "0.2.21"
@@ -570,6 +533,26 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "ptr_meta"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79"
+dependencies = [
+ "ptr_meta_derive",
+]
+
+[[package]]
+name = "ptr_meta_derive"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.46"
@@ -585,6 +568,15 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "rancor"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daff8b7b3ccf5f7ba270b3e7a0a4d4c701c5797e38dec27c7e2c3dbb830fed1c"
+dependencies = [
+ "ptr_meta",
+]
+
 [[package]]
 name = "rand"
 version = "0.8.6"
@@ -665,7 +657,15 @@ version = "0.1.0"
 dependencies = [
  "lambda-vm-prover",
  "lambda-vm-syscalls",
- "postcard",
+]
+
+[[package]]
+name = "rend"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "663ba70707f96e871406fe10d68128412e619b06d1d47cb91c3a4c6501176240"
+dependencies = [
+ "bytecheck",
 ]
 
 [[package]]
@@ -698,6 +698,33 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
 
+[[package]]
+name = "rkyv"
+version = "0.8.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "815cc8a37159a463064825246cadb07961e25cd9885908606f6d08a98d8f8874"
+dependencies = [
+ "bytecheck",
+ "hashbrown",
+ "munge",
+ "ptr_meta",
+ "rancor",
+ "rend",
+ "rkyv_derive",
+ "tinyvec",
+]
+
+[[package]]
+name = "rkyv_derive"
+version = "0.8.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0ed1a78a1b19d184b0daa629dd9a024573173ec7d485b287cb369fb3607cc1c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
 [[package]]
 name = "rlsf"
 version = "0.2.2"
@@ -717,27 +744,12 @@ version = "0.1.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
 
-[[package]]
-name = "rustc_version"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
-dependencies = [
- "semver",
-]
-
 [[package]]
 name = "rustversion"
 version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
 
-[[package]]
-name = "scopeguard"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
-
 [[package]]
 name = "sec1"
 version = "0.7.3"
@@ -751,12 +763,6 @@ dependencies = [
  "zeroize",
 ]
 
-[[package]]
-name = "semver"
-version = "1.0.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
-
 [[package]]
 name = "serde"
 version = "1.0.228"
@@ -767,16 +773,6 @@ dependencies = [
  "serde_derive",
 ]
 
-[[package]]
-name = "serde_cbor"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
-dependencies = [
- "half",
- "serde",
-]
-
 [[package]]
 name = "serde_core"
 version = "1.0.228"
@@ -821,25 +817,16 @@ dependencies = [
 ]
 
 [[package]]
-name = "slab"
-version = "0.4.12"
+name = "simdutf8"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
 
 [[package]]
-name = "spin"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]
-
-[[package]]
-name = "stable_deref_trait"
-version = "1.2.1"
+name = "slab"
+version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
 
 [[package]]
 name = "stark"
@@ -849,10 +836,9 @@ dependencies = [
  "itertools",
  "log",
  "math",
- "serde",
- "serde_cbor",
+ "rkyv",
  "sha3",
- "thiserror 1.0.69",
+ "thiserror",
 ]
 
 [[package]]
@@ -915,16 +901,7 @@ version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
 dependencies = [
- "thiserror-impl 1.0.69",
-]
-
-[[package]]
-name = "thiserror"
-version = "2.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
-dependencies = [
- "thiserror-impl 2.0.18",
+ "thiserror-impl",
 ]
 
 [[package]]
@@ -939,16 +916,20 @@ dependencies = [
 ]
 
 [[package]]
-name = "thiserror-impl"
-version = "2.0.18"
+name = "tinyvec"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
 dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.118",
+ "tinyvec_macros",
 ]
 
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
 [[package]]
 name = "typenum"
 version = "1.20.1"
diff --git a/bench_vs/lambda/recursion/Cargo.toml b/bench_vs/lambda/recursion/Cargo.toml
index 1d2ddc808..6a926546f 100644
--- a/bench_vs/lambda/recursion/Cargo.toml
+++ b/bench_vs/lambda/recursion/Cargo.toml
@@ -10,4 +10,3 @@ lambda-vm-prover = { path = "../../../prover", default-features = false, feature
     "profile-markers",
 ] }
 lambda-vm-syscalls = { path = "../../../syscalls" }
-postcard = { version = "1.0", features = ["alloc"] }
diff --git a/bench_vs/lambda/recursion/src/main.rs b/bench_vs/lambda/recursion/src/main.rs
index 00aac3f03..11e64f90c 100644
--- a/bench_vs/lambda/recursion/src/main.rs
+++ b/bench_vs/lambda/recursion/src/main.rs
@@ -1,12 +1,16 @@
 //! Naive recursion guest: verifies an inner lambda-vm proof inside the VM.
 //!
-//! Private input layout (postcard-encoded):
-//!   `(VmProof, Vec<u8>, ProofOptions, VmVerifyingKey)`
-//! where the `Vec<u8>` holds the inner program's ELF bytes and `ProofOptions`
-//! specifies the parameters the inner prover used. Commits
-//! `vk_digest ‖ inner public output` on success: every input here is
-//! prover-supplied, so soundness comes from the outer verifier checking
-//! the committed digest against one derived from the trusted inner ELF.
+//! Private input layout: a 12-byte `"LVMR" + version + reserved` prefix
+//! followed by an rkyv archive of `lambda_vm_prover::RecursionInput`
+//! `{ vm_proof, inner_elf, options, vkey }`. The prefix 16-aligns the archive
+//! in guest memory (the executor maps the payload at `PRIVATE_INPUT_START + 4`,
+//! which is only 4-aligned) and tags the format so the guest rejects a
+//! wrong-format blob before the unsafe access. The proof is verified **in
+//! place** via `verify_recursion_blob` — no deserialization pass, no owned
+//! `VmProof`. Commits `vk_digest ‖ inner public output` on success: every
+//! input here is prover-supplied, so soundness comes from the outer verifier
+//! checking the committed digest against one derived from the trusted inner
+//! ELF.
 //!
 //! Not `no_std` (std/alloc are available — `build-std` provides them, and the
 //! prover links as a normal std crate; its prove-side code is dead-code
@@ -17,8 +21,6 @@
 
 #![no_main]
 
-use lambda_vm_prover::{ProofOptions, VmProof, VmVerifyingKey};
-
 #[unsafe(export_name = "main")]
 pub fn main() -> ! {
     lambda_vm_syscalls::allocator::init_allocator();
@@ -31,27 +33,20 @@ pub fn main() -> ! {
         lambda_vm_syscalls::syscalls::sys_panic(PANIC_MSG.as_ptr(), PANIC_MSG.len())
     }));
 
-    let blob = lambda_vm_syscalls::syscalls::get_private_input();
-    let (vm_proof, inner_elf, options, vkey): (VmProof, Vec<u8>, ProofOptions, VmVerifyingKey) =
-        postcard::from_bytes(&blob).expect("failed to deserialize recursion input");
+    // Zero-copy: borrow the blob straight from the mapped private-input region.
+    // The 12-byte prefix puts the archive at a 16-aligned guest address, so the
+    // verifier's in-place doubleword loads don't trap.
+    let blob = lambda_vm_syscalls::syscalls::get_private_input_slice();
     lambda_vm_prover::profile_markers::step_marker::<
         { lambda_vm_prover::profile_markers::STEP_DECODE_DONE },
     >();
 
-    let ok = lambda_vm_prover::verify_with_options_with_vkey(
-        &vm_proof,
-        &inner_elf,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    )
-    .expect("verify errored");
-    assert!(ok, "inner proof failed verification");
+    let verification = lambda_vm_prover::verify_recursion_blob(blob).expect("verify errored");
+    assert!(verification.ok, "inner proof failed verification");
 
-    let mut output = Vec::with_capacity(32 + vm_proof.public_output.len());
-    output.extend_from_slice(&vkey.compute_digest());
-    output.extend_from_slice(&vm_proof.public_output);
+    let mut output = Vec::with_capacity(32 + verification.public_output.len());
+    output.extend_from_slice(&verification.vk_digest);
+    output.extend_from_slice(verification.public_output);
     lambda_vm_syscalls::syscalls::commit(&output);
     lambda_vm_syscalls::syscalls::sys_halt();
 }
diff --git a/bin/cli/Cargo.toml b/bin/cli/Cargo.toml
index 87bb1c8fc..c0179c74d 100644
--- a/bin/cli/Cargo.toml
+++ b/bin/cli/Cargo.toml
@@ -9,7 +9,7 @@ executor = { path = "../../executor" }
 prover = { path = "../../prover", package = "lambda-vm-prover" }
 stark = { path = "../../crypto/stark" }
 clap = { version = "4.3.10", features = ["derive"] }
-bincode = "1"
+rkyv = { version = "0.8.10", default-features = false, features = ["alloc", "bytecheck", "aligned"] }
 tikv-jemallocator = "0.6"
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true }
 env_logger = "0.11"
diff --git a/bin/cli/src/main.rs b/bin/cli/src/main.rs
index 2b053755c..cf43863b8 100644
--- a/bin/cli/src/main.rs
+++ b/bin/cli/src/main.rs
@@ -475,7 +475,7 @@ fn cmd_prove(
     };
     let mut writer = BufWriter::new(file);
 
-    let bytes = match bincode::serialize(&proof) {
+    let bytes = match rkyv::to_bytes::<rkyv::rancor::Error>(&proof) {
         Ok(b) => b,
         Err(e) => {
             eprintln!("Failed to serialize proof: {}", e);
@@ -526,7 +526,7 @@ fn cmd_verify(proof_path: PathBuf, elf_path: PathBuf, blowup: u8, time: bool) ->
         }
     };
 
-    let proof: VmProof = match bincode::deserialize(&proof_bytes) {
+    let proof: VmProof = match rkyv::from_bytes::<VmProof, rkyv::rancor::Error>(&proof_bytes) {
         Ok(p) => p,
         Err(e) => {
             eprintln!("Failed to deserialize proof: {}", e);
@@ -647,7 +647,7 @@ fn cmd_prove_continuation(
         }
     };
     let mut writer = BufWriter::new(file);
-    let bytes = match bincode::serialize(&bundle) {
+    let bytes = match rkyv::to_bytes::<rkyv::rancor::Error>(&bundle) {
         Ok(b) => b,
         Err(e) => {
             eprintln!("Failed to serialize proof: {}", e);
@@ -693,7 +693,11 @@ fn cmd_verify_continuation(
             return ExitCode::FAILURE;
         }
     };
-    let bundle: prover::continuation::ContinuationProof = match bincode::deserialize(&proof_bytes) {
+    let bundle: prover::continuation::ContinuationProof = match rkyv::from_bytes::<
+        prover::continuation::ContinuationProof,
+        rkyv::rancor::Error,
+    >(&proof_bytes)
+    {
         Ok(p) => p,
         Err(e) => {
             eprintln!("Failed to deserialize proof: {}", e);
diff --git a/crypto/crypto/Cargo.toml b/crypto/crypto/Cargo.toml
index 6e3731beb..c8f7bc6a5 100644
--- a/crypto/crypto/Cargo.toml
+++ b/crypto/crypto/Cargo.toml
@@ -22,6 +22,11 @@ rand_chacha = { version = "0.3.1", default-features = false }
 memmap2 = { version = "0.9", optional = true }
 tempfile = { version = "3", optional = true }
 libc = { version = "0.2", optional = true }
+rkyv = { version = "0.8.10", default-features = false, features = [
+    "alloc",
+    "bytecheck",
+    "aligned",
+], optional = true }
 
 [dev-dependencies]
 math = { path = "../math", features = ["test-utils"] }
@@ -37,4 +42,5 @@ std = ["math/std", "sha3/std", "serde?/std"]
 serde = ["dep:serde"]
 parallel = ["dep:rayon"]
 disk-spill = ["std", "dep:memmap2", "dep:tempfile", "dep:libc"]
-alloc = []
\ No newline at end of file
+alloc = []
+rkyv = ["dep:rkyv", "math/rkyv"]
\ No newline at end of file
diff --git a/crypto/crypto/src/merkle_tree/proof.rs b/crypto/crypto/src/merkle_tree/proof.rs
index 20d5452a2..2bbcfb3c5 100644
--- a/crypto/crypto/src/merkle_tree/proof.rs
+++ b/crypto/crypto/src/merkle_tree/proof.rs
@@ -15,29 +15,49 @@ use super::{
 /// when verifying.
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(
+    feature = "rkyv",
+    derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
+)]
 pub struct Proof<T: PartialEq + Eq> {
     pub merkle_path: Vec<T>,
 }
 
+/// Verifies a Merkle inclusion proof given the authentication path as a borrowed
+/// slice. Shared by [`Proof::verify`] (owned) and the zero-copy verifier (which
+/// reads the path straight from an rkyv-archived proof buffer) so both compute
+/// the identical root.
+pub fn verify_merkle_path<B>(
+    merkle_path: &[B::Node],
+    root_hash: &B::Node,
+    mut index: usize,
+    value: &B::Data,
+) -> bool
+where
+    B: IsMerkleTreeBackend,
+{
+    let mut hashed_value = B::hash_data(value);
+
+    for sibling_node in merkle_path.iter() {
+        if index.is_multiple_of(2) {
+            hashed_value = B::hash_new_parent(&hashed_value, sibling_node);
+        } else {
+            hashed_value = B::hash_new_parent(sibling_node, &hashed_value);
+        }
+
+        index >>= 1;
+    }
+
+    root_hash == &hashed_value
+}
+
 impl<T: PartialEq + Eq> Proof<T> {
     /// Verifies a Merkle inclusion proof for the value contained at leaf index.
-    pub fn verify<B>(&self, root_hash: &B::Node, mut index: usize, value: &B::Data) -> bool
+    pub fn verify<B>(&self, root_hash: &B::Node, index: usize, value: &B::Data) -> bool
     where
         B: IsMerkleTreeBackend<Node = T>,
     {
-        let mut hashed_value = B::hash_data(value);
-
-        for sibling_node in self.merkle_path.iter() {
-            if index.is_multiple_of(2) {
-                hashed_value = B::hash_new_parent(&hashed_value, sibling_node);
-            } else {
-                hashed_value = B::hash_new_parent(sibling_node, &hashed_value);
-            }
-
-            index >>= 1;
-        }
-
-        root_hash == &hashed_value
+        verify_merkle_path::<B>(&self.merkle_path, root_hash, index, value)
     }
 }
 
diff --git a/crypto/math/Cargo.toml b/crypto/math/Cargo.toml
index 85979a7c4..df43ea975 100644
--- a/crypto/math/Cargo.toml
+++ b/crypto/math/Cargo.toml
@@ -23,6 +23,14 @@ rayon = { version = "1.7", optional = true }
 num-bigint = { version = "0.4.6", default-features = false }
 num-traits = { version = "0.2.19", default-features = false }
 
+# rkyv zero-copy (de)serialization. Optional; used by the recursion verifier to
+# read a proof straight from its byte buffer with no deserialization pass.
+rkyv = { version = "0.8.10", default-features = false, features = [
+    "alloc",
+    "bytecheck",
+    "aligned",
+], optional = true }
+
 [dev-dependencies]
 rand_chacha = "0.3.1"
 criterion = "0.5.1"
@@ -39,6 +47,7 @@ lambdaworks-serde-string = ["dep:serde", "dep:serde_json", "alloc"]
 proptest = ["dep:proptest"]
 instruments = []
 test-utils = []
+rkyv = ["dep:rkyv"]
 
 [target.wasm32-unknown-unknown.dependencies]
 getrandom = { version = "0.2.15", features = ["js"] }
diff --git a/crypto/math/src/field/element.rs b/crypto/math/src/field/element.rs
index 0eb0aef96..d5aa07e57 100644
--- a/crypto/math/src/field/element.rs
+++ b/crypto/math/src/field/element.rs
@@ -850,3 +850,160 @@ impl<'de, F: IsPrimeField> Deserialize<'de> for FieldElement<F> {
         deserializer.deserialize_struct("FieldElement", FIELDS, FieldElementVisitor(PhantomData))
     }
 }
+
+// ============================================================================
+// rkyv zero-copy (de)serialization
+// ============================================================================
+//
+// `FieldElement<F>` is `#[repr(transparent)]` over `F::BaseType`. Its archived
+// form is a local `#[repr(transparent)]` newtype wrapping the archived form of
+// `F::BaseType` (e.g. archived `u64` for Goldilocks, `[ArchivedFieldElement; 3]`
+// for the cubic extension). Keeping it a LOCAL type (rather than reusing
+// `<F::BaseType as Archive>::Archived` directly) is what lets us implement
+// `Deserialize` without colliding with rkyv's blanket impls — while the
+// transparent repr keeps the archived bytes identical to the base type, so the
+// recursion verifier still reads field elements straight from the proof buffer.
+
+/// Archived form of [`FieldElement<F>`]; see the module note above.
+#[cfg(feature = "rkyv")]
+#[repr(transparent)]
+pub struct ArchivedFieldElement<F: IsField>
+where
+    F::BaseType: rkyv::Archive,
+{
+    value: <F::BaseType as rkyv::Archive>::Archived,
+}
+
+#[cfg(feature = "rkyv")]
+const _: () = {
+    use rkyv::{Archive, Deserialize, Place, Portable, Serialize};
+
+    // SAFETY: `ArchivedFieldElement<F>` is `#[repr(transparent)]` over the base
+    // type's archived form, which is itself `Portable` (required by `Archive`).
+    // A transparent wrapper over a `Portable` type is position-independent and
+    // valid for the same byte patterns, so it is `Portable` too.
+    unsafe impl<F> Portable for ArchivedFieldElement<F>
+    where
+        F: IsField,
+        F::BaseType: Archive,
+        <F::BaseType as Archive>::Archived: Portable,
+    {
+    }
+
+    impl<F> Archive for FieldElement<F>
+    where
+        F: IsField,
+        F::BaseType: Archive,
+    {
+        type Archived = ArchivedFieldElement<F>;
+        type Resolver = <F::BaseType as Archive>::Resolver;
+
+        #[inline]
+        fn resolve(&self, resolver: Self::Resolver, out: Place<Self::Archived>) {
+            // `ArchivedFieldElement` is `#[repr(transparent)]` over the base
+            // type's archived form, so resolving into the inner field resolves
+            // the whole newtype.
+            let inner = unsafe { out.cast_unchecked::<<F::BaseType as Archive>::Archived>() };
+            self.value.resolve(resolver, inner);
+        }
+    }
+
+    impl<F, S> Serialize<S> for FieldElement<F>
+    where
+        F: IsField,
+        F::BaseType: Serialize<S>,
+        S: rkyv::rancor::Fallible + ?Sized,
+    {
+        #[inline]
+        fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
+            self.value.serialize(serializer)
+        }
+    }
+
+    impl<F, D> Deserialize<FieldElement<F>, D> for ArchivedFieldElement<F>
+    where
+        F: IsField,
+        F::BaseType: Archive,
+        <F::BaseType as Archive>::Archived: Deserialize<F::BaseType, D>,
+        D: rkyv::rancor::Fallible + ?Sized,
+    {
+        #[inline]
+        fn deserialize(&self, deserializer: &mut D) -> Result<FieldElement<F>, D::Error> {
+            Ok(FieldElement {
+                value: self.value.deserialize(deserializer)?,
+            })
+        }
+    }
+
+    impl<F> ArchivedFieldElement<F>
+    where
+        F: IsField,
+        F::BaseType: Archive,
+    {
+        /// Borrow the archived base-type value (for zero-copy reads).
+        #[inline]
+        pub fn archived_value(&self) -> &<F::BaseType as Archive>::Archived {
+            &self.value
+        }
+    }
+
+    // SAFETY: `#[repr(transparent)]` over the inner archived value, so checking
+    // the inner type's bytes checks the whole newtype.
+    unsafe impl<F, C> rkyv::bytecheck::CheckBytes<C> for ArchivedFieldElement<F>
+    where
+        F: IsField,
+        F::BaseType: Archive,
+        <F::BaseType as Archive>::Archived: rkyv::bytecheck::CheckBytes<C>,
+        C: rkyv::rancor::Fallible + ?Sized,
+    {
+        unsafe fn check_bytes(value: *const Self, context: &mut C) -> Result<(), C::Error> {
+            unsafe {
+                <<F::BaseType as Archive>::Archived as rkyv::bytecheck::CheckBytes<C>>::check_bytes(
+                    value as *const <F::BaseType as Archive>::Archived,
+                    context,
+                )
+            }
+        }
+    }
+};
+
+// ----------------------------------------------------------------------------
+// Zero-copy native views (little-endian only)
+// ----------------------------------------------------------------------------
+//
+// rkyv archives integers as `rend::*_le` types, which are `#[repr(C, align(N))]`
+// and bit-identical to the native little-endian primitive. `FieldElement<F>` is
+// `#[repr(transparent)]` over `F::BaseType` and `ArchivedFieldElement<F>` is
+// `#[repr(transparent)]` over `<F::BaseType as Archive>::Archived`. So on a
+// little-endian target the two types share size, alignment, and bit layout —
+// an archived field element *is* a native field element. These views let the
+// verifier read field elements straight out of the proof buffer with no copy
+// and no allocation.
+//
+// Restricted to `target_endian = "little"` (the lambda-vm guest target). On a
+// big-endian host these would be wrong, so they simply don't exist there.
+#[cfg(all(feature = "rkyv", target_endian = "little"))]
+impl<F: IsField> ArchivedFieldElement<F>
+where
+    F::BaseType: rkyv::Archive,
+{
+    /// Reinterpret this archived element as a native [`FieldElement`] (no copy).
+    ///
+    /// Sound on little-endian: see the module note above.
+    #[inline]
+    pub fn as_native(&self) -> &FieldElement<F> {
+        // SAFETY: identical size/align/bit-layout on little-endian.
+        unsafe { &*(self as *const Self as *const FieldElement<F>) }
+    }
+
+    /// Reinterpret a slice of archived elements as a slice of native
+    /// [`FieldElement`]s (no copy, no allocation).
+    #[inline]
+    pub fn slice_as_native(slice: &[Self]) -> &[FieldElement<F>] {
+        // SAFETY: element-wise identical layout on little-endian, so the slice
+        // (same length, same element stride) reinterprets directly.
+        unsafe {
+            core::slice::from_raw_parts(slice.as_ptr() as *const FieldElement<F>, slice.len())
+        }
+    }
+}
diff --git a/crypto/stark/Cargo.toml b/crypto/stark/Cargo.toml
index 3a3b95068..e205810ae 100644
--- a/crypto/stark/Cargo.toml
+++ b/crypto/stark/Cargo.toml
@@ -9,16 +9,13 @@ crate-type = ["cdylib", "rlib"]
 
 
 [dependencies]
-math = { path = "../math", features = [
-    "std",
-    "lambdaworks-serde-binary",
-] }
-crypto = { path = "../crypto", features = ["std", "serde"] }
+math = { path = "../math", features = ["std", "rkyv"] }
+crypto = { path = "../crypto", features = ["std", "rkyv"] }
 thiserror = "1.0.38"
 log = "0.4.17"
 sha3 = "0.10.8"
-serde = { version = "1.0", features = ["derive"] }
 itertools = "0.11.0"
+rkyv = { version = "0.8.10", default-features = false, features = ["alloc", "bytecheck", "aligned"] }
 
 # Parallelization crates
 rayon = { version = "1.8.0", optional = true }
@@ -32,9 +29,7 @@ math-cuda = { path = "../math-cuda", optional = true }
 
 # wasm
 wasm-bindgen = { version = "0.2", optional = true }
-serde-wasm-bindgen = { version = "0.5", optional = true }
 web-sys = { version = "0.3.64", features = ['console'], optional = true }
-serde_cbor = { version = "0.11.1" }
 
 [dev-dependencies]
 criterion = { version = "0.4", default-features = false }
@@ -53,7 +48,7 @@ debug-checks = []                                                    # Enables v
 parallel = ["dep:rayon", "crypto/parallel"]
 cuda = ["dep:math-cuda"]
 test-cuda-faults = ["cuda", "math-cuda/test-faults"]
-wasm = ["dep:wasm-bindgen", "dep:serde-wasm-bindgen", "dep:web-sys"]
+wasm = ["dep:wasm-bindgen", "dep:web-sys"]
 disk-spill = ["dep:memmap2", "dep:tempfile", "dep:libc", "crypto/disk-spill"]
 
 
diff --git a/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs b/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs
index 76c8ea11f..fae17e5ba 100644
--- a/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs
+++ b/crypto/stark/src/examples/fibonacci_2_cols_shifted.rs
@@ -137,7 +137,7 @@ where
     }
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct PublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/examples/fibonacci_multi_column.rs b/crypto/stark/src/examples/fibonacci_multi_column.rs
index ac6069ece..43efa8566 100644
--- a/crypto/stark/src/examples/fibonacci_multi_column.rs
+++ b/crypto/stark/src/examples/fibonacci_multi_column.rs
@@ -113,7 +113,7 @@ where
 
 /// Public inputs for the multi-column Fibonacci AIR.
 /// Contains the initial values (first two elements) for each column.
-#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct FibonacciMultiColumnPublicInputs<F: IsFFTField> {
     /// Initial values for each column: (a0, a1) pairs
     pub initial_values: Vec<(FieldElement<F>, FieldElement<F>)>,
diff --git a/crypto/stark/src/examples/fibonacci_rap.rs b/crypto/stark/src/examples/fibonacci_rap.rs
index 10f1827d2..9d359bf26 100644
--- a/crypto/stark/src/examples/fibonacci_rap.rs
+++ b/crypto/stark/src/examples/fibonacci_rap.rs
@@ -164,7 +164,7 @@ where
     transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, F>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct FibonacciRAPPublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/examples/quadratic_air.rs b/crypto/stark/src/examples/quadratic_air.rs
index d49b0050d..6a7fcc3da 100644
--- a/crypto/stark/src/examples/quadratic_air.rs
+++ b/crypto/stark/src/examples/quadratic_air.rs
@@ -81,7 +81,7 @@ where
     constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, F>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct QuadraticPublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/examples/read_only_memory.rs b/crypto/stark/src/examples/read_only_memory.rs
index 8c3e9efac..eef4b47bc 100644
--- a/crypto/stark/src/examples/read_only_memory.rs
+++ b/crypto/stark/src/examples/read_only_memory.rs
@@ -232,7 +232,7 @@ where
     transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, F>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct ReadOnlyPublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/examples/read_only_memory_logup.rs b/crypto/stark/src/examples/read_only_memory_logup.rs
index e4f25c16c..3e90dc89d 100644
--- a/crypto/stark/src/examples/read_only_memory_logup.rs
+++ b/crypto/stark/src/examples/read_only_memory_logup.rs
@@ -360,7 +360,7 @@ where
     transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, E>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct LogReadOnlyPublicInputs<F>
 where
     F: IsFFTField + Send + Sync,
diff --git a/crypto/stark/src/examples/simple_addition.rs b/crypto/stark/src/examples/simple_addition.rs
index 78f938838..a78e71eb2 100644
--- a/crypto/stark/src/examples/simple_addition.rs
+++ b/crypto/stark/src/examples/simple_addition.rs
@@ -83,7 +83,7 @@ where
     constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, F>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct SimpleAdditionPublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/examples/simple_fibonacci.rs b/crypto/stark/src/examples/simple_fibonacci.rs
index a39064258..7e7dab641 100644
--- a/crypto/stark/src/examples/simple_fibonacci.rs
+++ b/crypto/stark/src/examples/simple_fibonacci.rs
@@ -82,7 +82,7 @@ where
     constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, F>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct FibonacciPublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/examples/simple_periodic_cols.rs b/crypto/stark/src/examples/simple_periodic_cols.rs
index 70f5da3b4..0f5b74af8 100644
--- a/crypto/stark/src/examples/simple_periodic_cols.rs
+++ b/crypto/stark/src/examples/simple_periodic_cols.rs
@@ -100,7 +100,7 @@ where
     transition_constraints: Vec<Box<dyn TransitionConstraintEvaluator<F, F>>>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct SimplePeriodicPublicInputs<F>
 where
     F: IsFFTField,
diff --git a/crypto/stark/src/fri/fri_decommit.rs b/crypto/stark/src/fri/fri_decommit.rs
index f398096d5..baab2cd4d 100644
--- a/crypto/stark/src/fri/fri_decommit.rs
+++ b/crypto/stark/src/fri/fri_decommit.rs
@@ -4,8 +4,7 @@ use math::field::traits::IsField;
 
 use crate::config::Commitment;
 
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-#[serde(bound = "")]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct FriDecommitment<F: IsField> {
     pub layers_auth_paths: Vec<Proof<Commitment>>,
     pub layers_evaluations_sym: Vec<FieldElement<F>>,
diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs
index 5174bf66c..8404e9ed3 100644
--- a/crypto/stark/src/lookup.rs
+++ b/crypto/stark/src/lookup.rs
@@ -1394,8 +1394,7 @@ impl BusInteraction {
 ///
 /// For the circular constraint, `table_contribution / N` is the per-row offset
 /// that makes the accumulated column wrap to zero at row N-1.
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-#[serde(bound = "")]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct BusPublicInputs<E>
 where
     E: IsField,
@@ -1403,20 +1402,45 @@ where
     /// Total sum of all LogUp terms across all rows (L).
     /// Used for bus balance check and to derive the per-row offset L/N.
     pub table_contribution: FieldElement<E>,
-    /// Per-bus sums for this table (bus_id → sum) - for debug aggregation
+    /// Per-bus sums for this table (bus_id → sum) - for debug aggregation.
+    /// Debug-only aggregation state; not part of the archived proof (`Skip`).
     #[cfg(feature = "debug-checks")]
+    #[rkyv(with = rkyv::with::Skip)]
     pub per_bus_sums: HashMap<u64, FieldElement<E>>,
     /// Per-bus sender sums (bus_id → sum) - positive contributions
     #[cfg(feature = "debug-checks")]
+    #[rkyv(with = rkyv::with::Skip)]
     pub per_bus_sender_sums: HashMap<u64, FieldElement<E>>,
     /// Per-bus receiver sums (bus_id → sum) - absolute value (before negation)
     #[cfg(feature = "debug-checks")]
+    #[rkyv(with = rkyv::with::Skip)]
     pub per_bus_receiver_sums: HashMap<u64, FieldElement<E>>,
     /// Table name for debug output
     #[cfg(feature = "debug-checks")]
+    #[rkyv(with = rkyv::with::Skip)]
     pub table_name: String,
 }
 
+impl<E: IsField> BusPublicInputs<E> {
+    /// Build a `BusPublicInputs` carrying just the table contribution `L`.
+    /// The debug-only per-bus aggregation fields are defaulted (empty). Used by
+    /// the zero-copy verifier, which reads only `table_contribution` from the
+    /// archived proof.
+    pub fn from_contribution(table_contribution: FieldElement<E>) -> Self {
+        Self {
+            table_contribution,
+            #[cfg(feature = "debug-checks")]
+            per_bus_sums: HashMap::new(),
+            #[cfg(feature = "debug-checks")]
+            per_bus_sender_sums: HashMap::new(),
+            #[cfg(feature = "debug-checks")]
+            per_bus_receiver_sums: HashMap::new(),
+            #[cfg(feature = "debug-checks")]
+            table_name: String::new(),
+        }
+    }
+}
+
 /// Trait representing boundary constraint building behaviour.
 ///  Should be defined when creating an `AirWithBuses` if the AIR requires its own boundary constraints aside from the lookup ones
 pub trait BoundaryConstraintBuilder<
diff --git a/crypto/stark/src/proof/options.rs b/crypto/stark/src/proof/options.rs
index 2c91ef00c..2342d5a14 100644
--- a/crypto/stark/src/proof/options.rs
+++ b/crypto/stark/src/proof/options.rs
@@ -39,7 +39,7 @@ impl fmt::Display for ProofOptionsError {
 /// - `coset_offset`: the offset for the coset
 /// - `grinding_factor`: the number of leading zeros that we want for the Hash(hash || nonce)
 #[cfg_attr(feature = "wasm", wasm_bindgen)]
-#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Debug, PartialEq, Eq, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct ProofOptions {
     pub blowup_factor: u8,
     pub fri_number_of_queries: usize,
diff --git a/crypto/stark/src/proof/stark.rs b/crypto/stark/src/proof/stark.rs
index 851c0b37a..4229b2bdb 100644
--- a/crypto/stark/src/proof/stark.rs
+++ b/crypto/stark/src/proof/stark.rs
@@ -8,8 +8,7 @@ use crate::{
     config::Commitment, fri::fri_decommit::FriDecommitment, lookup::BusPublicInputs, table::Table,
 };
 
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-#[serde(bound = "")]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 /// Opening of a bit-reversed, row-paired commitment at one FRI query.
 ///
 /// The queried row and its symmetric counterpart (LDE positions `2·iota`,
@@ -22,8 +21,7 @@ pub struct PolynomialOpenings<F: IsField> {
     pub evaluations_sym: Vec<FieldElement<F>>,
 }
 
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-#[serde(bound = "")]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct DeepPolynomialOpening<F: IsSubFieldOf<E>, E: IsField> {
     pub composition_poly: PolynomialOpenings<E>,
     pub main_trace_polys: PolynomialOpenings<F>,
@@ -35,8 +33,7 @@ pub struct DeepPolynomialOpening<F: IsSubFieldOf<E>, E: IsField> {
 
 pub type DeepPolynomialOpenings<F, E> = Vec<DeepPolynomialOpening<F, E>>;
 
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-#[serde(bound = "PI: serde::Serialize + serde::de::DeserializeOwned")]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct StarkProof<F: IsSubFieldOf<E>, E: IsField, PI> {
     // Length of the execution trace
     pub trace_length: usize,
@@ -78,8 +75,7 @@ pub struct StarkProof<F: IsSubFieldOf<E>, E: IsField, PI> {
 /// A collection of STARK proofs for multiple AIRs.
 /// Used for multi-table proving where tables are linked via bus (LogUp).
 /// Returned by `Prover::multi_prove` and verified by `Verifier::multi_verify`.
-#[derive(Debug, serde::Serialize, serde::Deserialize)]
-#[serde(bound = "PI: serde::Serialize + serde::de::DeserializeOwned")]
+#[derive(Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct MultiProof<F: IsSubFieldOf<E>, E: IsField, PI> {
     pub proofs: Vec<StarkProof<F, E, PI>>,
 }
diff --git a/crypto/stark/src/table.rs b/crypto/stark/src/table.rs
index dc188d690..93541c65b 100644
--- a/crypto/stark/src/table.rs
+++ b/crypto/stark/src/table.rs
@@ -41,12 +41,18 @@ impl std::fmt::Debug for TableMmapBacking {
 /// the STARK protocol implementation, such as the `TraceTable` and the `EvaluationFrame`.
 /// Since this struct is a representation of a two-dimensional table, all rows should have the same
 /// length.
-#[derive(Default, Debug, serde::Deserialize)]
+#[derive(Default, Debug)]
 #[cfg_attr(
     not(feature = "disk-spill"),
-    derive(serde::Serialize, Clone, PartialEq, Eq)
+    derive(
+        Clone,
+        PartialEq,
+        Eq,
+        rkyv::Archive,
+        rkyv::Serialize,
+        rkyv::Deserialize
+    )
 )]
-#[serde(bound = "")]
 pub struct Table<F: IsField> {
     /// Row-major backing store. Crate-private: external callers must go through
     /// the spill-safe accessors (`get`/`get_row`/`set`) rather than indexing the
@@ -55,47 +61,140 @@ pub struct Table<F: IsField> {
     pub width: usize,
     pub height: usize,
     #[cfg(feature = "disk-spill")]
-    #[serde(skip)]
     pub(crate) mmap_backing: Option<TableMmapBacking>,
 }
 
+// Manual rkyv impl under disk-spill: the derive can't handle `mmap_backing`,
+// and serialization must read through `row_major_data()` so a spilled table
+// archives its mmap contents (deserializing always yields an unspilled table).
+// The archived layout matches what the derive generates without disk-spill, so
+// both configurations produce byte-identical archives.
 #[cfg(feature = "disk-spill")]
-impl<F: IsField> serde::Serialize for Table<F>
-where
-    FieldElement<F>: serde::Serialize,
-{
-    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
-        use serde::ser::SerializeStruct;
-        let mut s = serializer.serialize_struct("Table", 3)?;
-        if self.mmap_backing.is_some() {
-            s.serialize_field("data", &MmapDataSeq(self))?;
-        } else {
-            s.serialize_field("data", &self.data)?;
+mod archived_table {
+    use super::{FieldElement, IsField, Table};
+    use math::field::element::ArchivedFieldElement;
+    use rkyv::rancor::Fallible;
+    use rkyv::ser::{Allocator, Writer};
+    use rkyv::vec::{ArchivedVec, VecResolver};
+    use rkyv::{Archive, Deserialize, Place, Portable, Serialize};
+
+    #[derive(Portable, rkyv::bytecheck::CheckBytes)]
+    #[bytecheck(crate = rkyv::bytecheck)]
+    #[repr(C)]
+    pub struct ArchivedTable<F: IsField>
+    where
+        F::BaseType: Archive,
+    {
+        pub data: ArchivedVec<ArchivedFieldElement<F>>,
+        pub width: rkyv::primitive::ArchivedUsize,
+        pub height: rkyv::primitive::ArchivedUsize,
+    }
+
+    pub struct TableResolver {
+        data: VecResolver,
+    }
+
+    impl<F: IsField> Archive for Table<F>
+    where
+        F::BaseType: Archive,
+    {
+        type Archived = ArchivedTable<F>;
+        type Resolver = TableResolver;
+
+        fn resolve(&self, resolver: Self::Resolver, out: Place<Self::Archived>) {
+            rkyv::munge::munge!(let ArchivedTable { data, width, height } = out);
+            ArchivedVec::resolve_from_len(self.width * self.height, resolver.data, data);
+            self.width.resolve((), width);
+            self.height.resolve((), height);
+        }
+    }
+
+    impl<F: IsField, S> Serialize<S> for Table<F>
+    where
+        F::BaseType: Archive,
+        FieldElement<F>: Serialize<S>,
+        S: Fallible + Allocator + Writer + ?Sized,
+    {
+        fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
+            Ok(TableResolver {
+                data: ArchivedVec::serialize_from_slice(self.row_major_data(), serializer)?,
+            })
+        }
+    }
+
+    impl<F: IsField, D> Deserialize<Table<F>, D> for ArchivedTable<F>
+    where
+        F::BaseType: Archive,
+        ArchivedFieldElement<F>: Deserialize<FieldElement<F>, D>,
+        D: Fallible + ?Sized,
+    {
+        fn deserialize(&self, deserializer: &mut D) -> Result<Table<F>, D::Error> {
+            Ok(Table {
+                data: self.data.deserialize(deserializer)?,
+                width: self.width.to_native() as usize,
+                height: self.height.to_native() as usize,
+                mmap_backing: None,
+            })
         }
-        s.serialize_field("width", &self.width)?;
-        s.serialize_field("height", &self.height)?;
-        s.end()
     }
 }
 
 #[cfg(feature = "disk-spill")]
-struct MmapDataSeq<'a, F: IsField>(&'a Table<F>);
+pub use archived_table::ArchivedTable;
 
-#[cfg(feature = "disk-spill")]
-impl<F: IsField> serde::Serialize for MmapDataSeq<'_, F>
+/// Read API over an rkyv-archived [`Table`], used by the verifier to consume
+/// the out-of-domain evaluations straight from the proof buffer. On
+/// little-endian targets the element data is viewed in place with no copy.
+#[cfg(target_endian = "little")]
+impl<F: IsField> ArchivedTable<F>
 where
-    FieldElement<F>: serde::Serialize,
+    F::BaseType: rkyv::Archive,
 {
-    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
-        use serde::ser::SerializeSeq;
-        let table = self.0;
-        let mut seq = serializer.serialize_seq(Some(table.width * table.height))?;
-        for r in 0..table.height {
-            for elem in table.get_row(r) {
-                seq.serialize_element(elem)?;
-            }
-        }
-        seq.end()
+    #[inline]
+    pub fn width(&self) -> usize {
+        self.width.to_native() as usize
+    }
+
+    #[inline]
+    pub fn height(&self) -> usize {
+        self.height.to_native() as usize
+    }
+
+    /// Full row-major element data, viewed in place.
+    #[inline]
+    pub fn row_major_data(&self) -> &[FieldElement<F>] {
+        math::field::element::ArchivedFieldElement::slice_as_native(self.data.as_slice())
+    }
+
+    /// `true` iff the backing data holds exactly `width × height` elements —
+    /// the invariant `get_row` indexing relies on. A malformed archive can
+    /// advertise dimensions that disagree with the data length; callers must
+    /// reject such tables before row access.
+    #[inline]
+    pub fn dimensions_consistent(&self) -> bool {
+        self.width()
+            .checked_mul(self.height())
+            .is_some_and(|n| n == self.data.len())
+    }
+
+    /// Row `row_idx` as a native field-element slice (no copy).
+    #[inline]
+    pub fn get_row(&self, row_idx: usize) -> &[FieldElement<F>] {
+        let width = self.width();
+        let start = row_idx * width;
+        &self.row_major_data()[start..start + width]
+    }
+
+    /// Build a [`Frame`] over this table, identical to [`Table::into_frame`].
+    /// Only the small OOD frame is materialized (bounded by `step_size × width`),
+    /// never the whole proof.
+    pub fn into_frame(&self, main_trace_columns: usize, step_size: usize) -> Frame<F, F>
+    where
+        F: IsSubFieldOf<F>,
+    {
+        frame_from_rows(self.height(), step_size, main_trace_columns, |row_idx| {
+            self.get_row(row_idx)
+        })
     }
 }
 
@@ -361,29 +460,49 @@ impl<F: IsField> Table<F> {
     /// Given a step size, converts the given table into a `Frame`.
     /// Clones row data into owned Vecs (only used by verifier on small OOD tables).
     pub fn into_frame(&self, main_trace_columns: usize, step_size: usize) -> Frame<F, F> {
-        debug_assert!(self.height.is_multiple_of(step_size));
-        let steps = (0..self.height)
-            .step_by(step_size)
-            .map(|initial_row_idx| {
-                let end_row_idx = initial_row_idx + step_size;
-
-                let mut step_main_data: Vec<Vec<FieldElement<F>>> = Vec::new();
-                let mut step_aux_data: Vec<Vec<FieldElement<F>>> = Vec::new();
-
-                (initial_row_idx..end_row_idx).for_each(|row_idx| {
-                    let row = self.get_row(row_idx);
-                    step_main_data.push(row[..main_trace_columns].to_vec());
-                    step_aux_data.push(row[main_trace_columns..].to_vec());
-                });
-
-                TableView::new(step_main_data, step_aux_data)
-            })
-            .collect();
-
-        Frame::new(steps)
+        frame_from_rows(self.height, step_size, main_trace_columns, |row_idx| {
+            self.get_row(row_idx)
+        })
     }
 }
 
+/// Build a [`Frame`] from `height` rows accessed via `get_row`, splitting each
+/// row at `main_trace_columns` into main/aux. Shared by [`Table::into_frame`]
+/// and the zero-copy `OodTableRef::into_frame` so both produce identical frames.
+///
+/// Only the small out-of-domain frame is materialized here (bounded by
+/// `step_size × width`), never the full trace.
+pub fn frame_from_rows<'a, F>(
+    height: usize,
+    step_size: usize,
+    main_trace_columns: usize,
+    get_row: impl Fn(usize) -> &'a [FieldElement<F>],
+) -> Frame<F, F>
+where
+    F: IsSubFieldOf<F> + IsField + 'a,
+{
+    debug_assert!(height.is_multiple_of(step_size));
+    let steps = (0..height)
+        .step_by(step_size)
+        .map(|initial_row_idx| {
+            let end_row_idx = initial_row_idx + step_size;
+
+            let mut step_main_data: Vec<Vec<FieldElement<F>>> = Vec::new();
+            let mut step_aux_data: Vec<Vec<FieldElement<F>>> = Vec::new();
+
+            (initial_row_idx..end_row_idx).for_each(|row_idx| {
+                let row = get_row(row_idx);
+                step_main_data.push(row[..main_trace_columns].to_vec());
+                step_aux_data.push(row[main_trace_columns..].to_vec());
+            });
+
+            TableView::new(step_main_data, step_aux_data)
+        })
+        .collect();
+
+    Frame::new(steps)
+}
+
 /// A view of a contiguous subset of rows of a table.
 ///
 /// Owns its row data (Vec per row) so it can be built from either row-major Tables
diff --git a/crypto/stark/src/tests/bus_tests/completeness_tests.rs b/crypto/stark/src/tests/bus_tests/completeness_tests.rs
index 83f8ac391..51f6ef50c 100644
--- a/crypto/stark/src/tests/bus_tests/completeness_tests.rs
+++ b/crypto/stark/src/tests/bus_tests/completeness_tests.rs
@@ -377,9 +377,10 @@ fn test_serialization_roundtrip() {
         multi_prove_ram(air_trace_pairs, &mut DefaultTranscript::<E>::new(&[])).unwrap();
 
     // Serialize and deserialize
-    let serialized = serde_cbor::to_vec(&multi_proof).expect("serialization failed");
+    let serialized =
+        rkyv::to_bytes::<rkyv::rancor::Error>(&multi_proof).expect("serialization failed");
     let deserialized: crate::proof::stark::MultiProof<F, E, ()> =
-        serde_cbor::from_slice(&serialized).expect("deserialization failed");
+        rkyv::from_bytes::<_, rkyv::rancor::Error>(&serialized).expect("deserialization failed");
 
     let airs: Vec<&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>> =
         vec![&cpu_air, &add_air, &mul_air];
diff --git a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs
index 4059ed481..032e68ea1 100644
--- a/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs
+++ b/crypto/stark/src/tests/prove_verify_roundtrip_tests.rs
@@ -142,13 +142,14 @@ fn test_verify_serialized_multi_table_proofs() {
     // NETWORK TRANSMISSION - Serialize and deserialize (using CBOR binary format)
     // =========================================================================
 
-    let serialized = serde_cbor::to_vec(&proofs).expect("Failed to serialize proofs");
+    let serialized =
+        rkyv::to_bytes::<rkyv::rancor::Error>(&proofs).expect("Failed to serialize proofs");
 
     // At this point, the prover's data is dropped (out of scope above)
     // The verifier only has the serialized data
 
     let received_proofs: MultiProof<F, E, ()> =
-        serde_cbor::from_slice(&serialized).expect("Failed to deserialize proofs");
+        rkyv::from_bytes::<_, rkyv::rancor::Error>(&serialized).expect("Failed to deserialize proofs");
 
     // =========================================================================
     // VERIFIER SIDE - Reconstruct AIRs and verify
diff --git a/crypto/stark/src/verifier.rs b/crypto/stark/src/verifier.rs
index 5b512c37e..7ab1843a2 100644
--- a/crypto/stark/src/verifier.rs
+++ b/crypto/stark/src/verifier.rs
@@ -1,7 +1,7 @@
 use super::{
     config::BatchedMerkleTreeBackend,
     domain::VerifierDomain,
-    fri::fri_decommit::FriDecommitment,
+    fri::fri_decommit::ArchivedFriDecommitment,
     grinding,
     proof::stark::StarkProof,
     traits::{AIR, TransitionEvaluationContext},
@@ -9,10 +9,15 @@ use super::{
 use crate::{
     config::Commitment,
     domain::new_verifier_domain,
-    lookup::{LOGUP_CHALLENGE_ALPHA, LOGUP_NUM_CHALLENGES, PackingShifts, compute_alpha_powers},
-    proof::stark::{DeepPolynomialOpening, MultiProof, PolynomialOpenings},
+    lookup::{BusPublicInputs, LOGUP_CHALLENGE_ALPHA, LOGUP_NUM_CHALLENGES, PackingShifts, compute_alpha_powers},
+    proof::stark::{
+        ArchivedDeepPolynomialOpening, ArchivedMultiProof, ArchivedPolynomialOpenings,
+        ArchivedStarkProof, MultiProof,
+    },
 };
-use crypto::{fiat_shamir::is_transcript::IsStarkTranscript, merkle_tree::proof::Proof};
+use crypto::fiat_shamir::is_transcript::IsStarkTranscript;
+use crypto::merkle_tree::proof::verify_merkle_path;
+use math::field::element::ArchivedFieldElement;
 #[cfg(not(feature = "test_fiat_shamir"))]
 use log::error;
 #[cfg(feature = "debug-checks")]
@@ -44,6 +49,11 @@ impl<
     FieldExtension: IsField + Send + Sync,
     PI,
 > IsStarkVerifier<Field, FieldExtension, PI> for Verifier<Field, FieldExtension, PI>
+where
+    Field::BaseType: rkyv::Archive,
+    FieldExtension::BaseType: rkyv::Archive,
+    PI: rkyv::Archive,
+    <PI as rkyv::Archive>::Archived: rkyv::Deserialize<PI, PiDeserializer>,
 {
 }
 
@@ -75,6 +85,23 @@ where
 
 pub type DeepPolynomialEvaluations<F> = (Vec<FieldElement<F>>, Vec<FieldElement<F>>);
 
+// The verifier reads proofs in place from their rkyv archive; archived field
+// elements are viewed as native ones, which is only valid on little-endian.
+#[cfg(not(target_endian = "little"))]
+compile_error!("the zero-copy STARK verifier requires a little-endian target");
+
+/// Deserializer used to materialize the (tiny) per-proof `PI` public inputs.
+pub type PiDeserializer = rkyv::api::high::HighDeserializer<rkyv::rancor::Error>;
+
+/// `&[FieldElement<G>]` view over an archived field-element vector (no copy).
+#[inline]
+fn evals<G: IsField>(v: &rkyv::vec::ArchivedVec<ArchivedFieldElement<G>>) -> &[FieldElement<G>]
+where
+    G::BaseType: rkyv::Archive,
+{
+    ArchivedFieldElement::slice_as_native(v.as_slice())
+}
+
 /// The functionality of a STARK verifier providing methods to run the STARK Verify protocol
 /// https://lambdaclass.github.io/lambdaworks/starks/protocol.html
 pub trait IsStarkVerifier<
@@ -82,6 +109,11 @@ pub trait IsStarkVerifier<
     FieldExtension: Send + Sync + IsField,
     PI,
 >
+where
+    Field::BaseType: rkyv::Archive,
+    FieldExtension::BaseType: rkyv::Archive,
+    PI: rkyv::Archive,
+    <PI as rkyv::Archive>::Archived: rkyv::Deserialize<PI, PiDeserializer>,
 {
     fn sample_query_indexes(
         number_of_queries: usize,
@@ -99,18 +131,25 @@ pub trait IsStarkVerifier<
     /// See https://lambdaclass.github.io/lambdaworks/starks/protocol.html#step-2-verify-claimed-composition-polynomial
     fn step_2_verify_claimed_composition_polynomial(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
+        public_inputs: &PI,
         domain: &VerifierDomain<Field>,
         challenges: &Challenges<FieldExtension>,
     ) -> bool {
         crate::profile_markers::step_marker::<
             { crate::profile_markers::STEP_VERIFY_CLAIMED_COMPOSITION_POLYNOMIAL },
         >();
-        let trace_length = proof.trace_length;
+        let trace_length = proof.trace_length.to_native() as usize;
+        // Owned `BusPublicInputs` (just the table contribution L — one field
+        // element) reconstructed from the archive for the AIR boundary call.
+        let bus_public_inputs = proof
+            .bus_public_inputs
+            .as_ref()
+            .map(|bpi| BusPublicInputs::from_contribution(bpi.table_contribution.as_native().clone()));
         let boundary_constraints = air.boundary_constraints(
-            &proof.public_inputs,
+            public_inputs,
             &challenges.rap_challenges,
-            proof.bus_public_inputs.as_ref(),
+            bus_public_inputs.as_ref(),
             trace_length,
         );
         // Precompute g^step once per distinct step to avoid the prior O(B^2)
@@ -173,8 +212,16 @@ pub trait IsStarkVerifier<
             .map(|poly| poly.evaluate(&challenges.z))
             .collect::<Vec<FieldElement<FieldExtension>>>();
 
-        let num_main_trace_columns =
-            proof.trace_ood_evaluations.width - air.num_auxiliary_rap_columns();
+        // A malformed archive can advertise fewer OOD columns than the AIR's
+        // aux count; reject instead of underflowing.
+        let num_main_trace_columns = match proof
+            .trace_ood_evaluations
+            .width()
+            .checked_sub(air.num_auxiliary_rap_columns())
+        {
+            Some(n) => n,
+            None => return false,
+        };
 
         let logup_alpha_powers: Vec<FieldElement<FieldExtension>> =
             if challenges.rap_challenges.len() > LOGUP_CHALLENGE_ALPHA {
@@ -186,11 +233,11 @@ pub trait IsStarkVerifier<
                 Vec::new()
             };
 
-        let logup_table_offset = match &proof.bus_public_inputs {
+        let logup_table_offset = match proof.bus_public_inputs.as_ref() {
             Some(bpi) => {
                 let n = FieldElement::<Field>::from(trace_length as u64);
                 match n.inv() {
-                    Ok(n_inv) => n_inv * &bpi.table_contribution,
+                    Ok(n_inv) => n_inv * bpi.table_contribution.as_native(),
                     Err(_) => return false, // trace_length == 0 is invalid
                 }
             }
@@ -230,8 +277,7 @@ pub trait IsStarkVerifier<
         let composition_poly_ood_evaluation =
             &boundary_quotient_ood_evaluation + transition_c_i_evaluations_sum;
 
-        let composition_poly_claimed_ood_evaluation = proof
-            .composition_poly_parts_ood_evaluation
+        let composition_poly_claimed_ood_evaluation = evals(&proof.composition_poly_parts_ood_evaluation)
             .iter()
             .rev()
             .fold(FieldElement::zero(), |acc, coeff| {
@@ -245,7 +291,7 @@ pub trait IsStarkVerifier<
     /// openings of the trace polynomials and the composition polynomial parts. It then uses these to verify that the
     /// FRI decommitments are valid and correspond to the Deep composition polynomial.
     fn step_3_verify_fri(
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
         domain: &VerifierDomain<Field>,
         challenges: &Challenges<FieldExtension>,
     ) -> bool
@@ -275,6 +321,7 @@ pub trait IsStarkVerifier<
 
         proof
             .query_list
+            .as_slice()
             .iter()
             .zip(&challenges.iotas)
             .zip(evaluation_point_inverse)
@@ -311,7 +358,7 @@ pub trait IsStarkVerifier<
     /// `evaluations ‖ evaluations_sym` and verify once. (Same as the composition
     /// opening check.)
     fn verify_opening_pair<E>(
-        opening: &PolynomialOpenings<E>,
+        opening: &ArchivedPolynomialOpenings<E>,
         root: &Commitment,
         iota: usize,
     ) -> bool
@@ -319,20 +366,24 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<E>: AsBytes + Sync + Send,
         E: IsField,
+        E::BaseType: rkyv::Archive,
         Field: IsSubFieldOf<E>,
     {
-        let mut value = opening.evaluations.clone();
-        value.extend_from_slice(&opening.evaluations_sym);
-        opening
-            .proof
-            .verify::<BatchedMerkleTreeBackend<E>>(root, iota, &value)
+        let mut value = evals(&opening.evaluations).to_vec();
+        value.extend_from_slice(evals(&opening.evaluations_sym));
+        verify_merkle_path::<BatchedMerkleTreeBackend<E>>(
+            opening.proof.merkle_path.as_slice(),
+            root,
+            iota,
+            &value,
+        )
     }
 
     /// Verify opening Open(tⱼ(D_LDE), 𝜐) and Open(tⱼ(D_LDE), -𝜐) for all trace polynomials tⱼ,
     /// where 𝜐 and -𝜐 are the elements corresponding to the index challenge `iota`.
     fn verify_trace_openings(
-        proof: &StarkProof<Field, FieldExtension, PI>,
-        deep_poly_openings: &DeepPolynomialOpening<Field, FieldExtension>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
+        deep_poly_openings: &ArchivedDeepPolynomialOpening<Field, FieldExtension>,
         iota: usize,
     ) -> bool
     where
@@ -350,8 +401,8 @@ pub trait IsStarkVerifier<
         // unreachable in practice (multi_verify rejects such proofs upstream),
         // but a defensive check keeps this function self-contained.
         ok &= match (
-            &proof.lde_trace_precomputed_merkle_root,
-            &deep_poly_openings.precomputed_trace_polys,
+            proof.lde_trace_precomputed_merkle_root.as_ref(),
+            deep_poly_openings.precomputed_trace_polys.as_ref(),
         ) {
             (Some(root), Some(opening)) => Self::verify_opening_pair::<Field>(opening, root, iota),
             (None, None) => true,
@@ -360,11 +411,11 @@ pub trait IsStarkVerifier<
 
         // Auxiliary trace.
         ok &= match (
-            proof.lde_trace_aux_merkle_root,
-            &deep_poly_openings.aux_trace_polys,
+            proof.lde_trace_aux_merkle_root.as_ref(),
+            deep_poly_openings.aux_trace_polys.as_ref(),
         ) {
             (Some(root), Some(opening)) => {
-                Self::verify_opening_pair::<FieldExtension>(opening, &root, iota)
+                Self::verify_opening_pair::<FieldExtension>(opening, root, iota)
             }
             (None, None) => true,
             _ => false,
@@ -376,7 +427,7 @@ pub trait IsStarkVerifier<
     /// Verify opening Open(Hᵢ(D_LDE), 𝜐) and Open(Hᵢ(D_LDE), -𝜐) for all parts Hᵢof the composition
     /// polynomial, where 𝜐 and -𝜐 are the elements corresponding to the index challenge `iota`.
     fn verify_composition_poly_opening(
-        deep_poly_openings: &DeepPolynomialOpening<Field, FieldExtension>,
+        deep_poly_openings: &ArchivedDeepPolynomialOpening<Field, FieldExtension>,
         composition_poly_merkle_root: &Commitment,
         iota: &usize,
     ) -> bool
@@ -384,24 +435,22 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
     {
-        let mut value = deep_poly_openings.composition_poly.evaluations.clone();
-        value.extend_from_slice(&deep_poly_openings.composition_poly.evaluations_sym);
-
-        deep_poly_openings
-            .composition_poly
-            .proof
-            .verify::<BatchedMerkleTreeBackend<FieldExtension>>(
-                composition_poly_merkle_root,
-                *iota,
-                &value,
-            )
+        let mut value = evals(&deep_poly_openings.composition_poly.evaluations).to_vec();
+        value.extend_from_slice(evals(&deep_poly_openings.composition_poly.evaluations_sym));
+
+        verify_merkle_path::<BatchedMerkleTreeBackend<FieldExtension>>(
+            deep_poly_openings.composition_poly.proof.merkle_path.as_slice(),
+            composition_poly_merkle_root,
+            *iota,
+            &value,
+        )
     }
 
     /// Verifies the validity of the purported values of the trace polynomials and the composition polynomial
     /// parts at the domain elements and their symmetric counterparts corresponding to all the FRI query
     /// index challenges.
     fn step_4_verify_trace_and_composition_openings(
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
         challenges: &Challenges<FieldExtension>,
     ) -> bool
     where
@@ -414,7 +463,7 @@ pub trait IsStarkVerifier<
         challenges
             .iotas
             .iter()
-            .zip(&proof.deep_poly_openings)
+            .zip(proof.deep_poly_openings.as_slice())
             .all(|(iota_n, deep_poly_opening)| {
                 Self::verify_composition_poly_opening(
                     deep_poly_opening,
@@ -427,7 +476,7 @@ pub trait IsStarkVerifier<
     /// Verifies the openings of a fold polynomial of an inner layer of FRI.
     fn verify_fri_layer_openings(
         merkle_root: &Commitment,
-        auth_path_sym: &Proof<Commitment>,
+        auth_path_sym: &[Commitment],
         evaluation: &FieldElement<FieldExtension>,
         evaluation_sym: &FieldElement<FieldExtension>,
         iota: usize,
@@ -442,7 +491,8 @@ pub trait IsStarkVerifier<
             vec![evaluation.clone(), evaluation_sym.clone()]
         };
 
-        auth_path_sym.verify::<BatchedMerkleTreeBackend<FieldExtension>>(
+        verify_merkle_path::<BatchedMerkleTreeBackend<FieldExtension>>(
+            auth_path_sym,
             merkle_root,
             iota >> 1,
             &evaluations,
@@ -458,10 +508,10 @@ pub trait IsStarkVerifier<
     /// `deep_composition_evaluation`: precomputed value of p₀(𝜐), where p₀ is the deep composition polynomial.
     /// `deep_composition_evaluation_sym`: precomputed value of p₀(-𝜐), where p₀ is the deep composition polynomial.
     fn verify_query_and_sym_openings(
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
         zetas: &[FieldElement<FieldExtension>],
         iota: usize,
-        fri_decommitment: &FriDecommitment<FieldExtension>,
+        fri_decommitment: &ArchivedFriDecommitment<FieldExtension>,
         evaluation_point_inv: FieldElement<Field>,
         deep_composition_evaluation: &FieldElement<FieldExtension>,
         deep_composition_evaluation_sym: &FieldElement<FieldExtension>,
@@ -470,7 +520,19 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
     {
-        let fri_layers_merkle_roots = &proof.fri_layers_merkle_roots;
+        let fri_layers_merkle_roots = proof.fri_layers_merkle_roots.as_slice();
+        let fri_last_value = proof.fri_last_value.as_native();
+
+        // The three per-layer vectors must line up with the committed roots:
+        // the fold below iterates their zip (min length) and only compares the
+        // final value on the last evaluation index, so an over-long
+        // `layers_evaluations_sym` would otherwise skip the `fri_last_value`
+        // check entirely.
+        if fri_decommitment.layers_auth_paths.len() != fri_layers_merkle_roots.len()
+            || fri_decommitment.layers_evaluations_sym.len() != fri_layers_merkle_roots.len()
+        {
+            return false;
+        }
         let evaluation_point_vec: Vec<FieldElement<Field>> =
             core::iter::successors(Some(evaluation_point_inv.square()), |evaluation_point| {
                 Some(evaluation_point.square())
@@ -490,7 +552,7 @@ pub trait IsStarkVerifier<
         // In this case, the fold loop below doesn't iterate, so we need to verify
         // the final value directly here.
         if fri_layers_merkle_roots.is_empty() {
-            return v == proof.fri_last_value;
+            return v == *fri_last_value;
         }
 
         // For each FRI layer, starting from the layer 1: use the proof to verify the validity of values pᵢ(−𝜐^(2ⁱ)) (given by the prover) and
@@ -499,8 +561,8 @@ pub trait IsStarkVerifier<
         fri_layers_merkle_roots
             .iter()
             .enumerate()
-            .zip(&fri_decommitment.layers_auth_paths)
-            .zip(&fri_decommitment.layers_evaluations_sym)
+            .zip(fri_decommitment.layers_auth_paths.as_slice())
+            .zip(evals(&fri_decommitment.layers_evaluations_sym))
             .zip(evaluation_point_vec)
             .fold(
                 true,
@@ -514,7 +576,7 @@ pub trait IsStarkVerifier<
                     // `evaluation_sym` is pᵢ(−𝜐^(2ⁱ)).
                     let openings_ok = Self::verify_fri_layer_openings(
                         merkle_root,
-                        auth_path_sym,
+                        auth_path_sym.merkle_path.as_slice(),
                         &v,
                         evaluation_sym,
                         index,
@@ -532,7 +594,7 @@ pub trait IsStarkVerifier<
                         result & openings_ok
                     } else {
                         // Check that final value is the given by the prover
-                        result & (v == proof.fri_last_value) & openings_ok
+                        result & (v == *fri_last_value) & openings_ok
                     }
                 },
             )
@@ -541,8 +603,13 @@ pub trait IsStarkVerifier<
     fn reconstruct_deep_composition_poly_evaluations_for_all_queries(
         challenges: &Challenges<FieldExtension>,
         domain: &VerifierDomain<Field>,
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
     ) -> Option<DeepPolynomialEvaluations<FieldExtension>> {
+        // A malformed proof can carry fewer openings than query challenges;
+        // reject instead of panicking on the indexed access below.
+        if proof.deep_poly_openings.len() < challenges.iotas.len() {
+            return None;
+        }
         let num_queries = challenges.iotas.len();
         let mut deep_poly_evaluations = Vec::with_capacity(num_queries);
         let mut deep_poly_evaluations_sym = Vec::with_capacity(num_queries);
@@ -554,19 +621,19 @@ pub trait IsStarkVerifier<
             .expect("verifier domain root_order is a valid power of two");
 
         for (i, iota) in challenges.iotas.iter().enumerate() {
-            let opening = &proof.deep_poly_openings[i];
+            let opening = &proof.deep_poly_openings.as_slice()[i];
 
             // Base-field portion: precomputed columns FIRST, then main trace columns.
             let mut lde_base: Vec<FieldElement<Field>> = Vec::new();
-            if let Some(p) = &opening.precomputed_trace_polys {
-                lde_base.extend_from_slice(&p.evaluations);
+            if let Some(p) = opening.precomputed_trace_polys.as_ref() {
+                lde_base.extend_from_slice(evals(&p.evaluations));
             }
-            lde_base.extend_from_slice(&opening.main_trace_polys.evaluations);
+            lde_base.extend_from_slice(evals(&opening.main_trace_polys.evaluations));
 
             let lde_aux: &[FieldElement<FieldExtension>] = opening
                 .aux_trace_polys
                 .as_ref()
-                .map(|a| a.evaluations.as_slice())
+                .map(|a| evals(&a.evaluations))
                 .unwrap_or(&[]);
 
             let evaluation_point = Self::query_challenge_to_evaluation_point(*iota, false, domain);
@@ -577,20 +644,20 @@ pub trait IsStarkVerifier<
                 challenges,
                 &lde_base,
                 lde_aux,
-                &opening.composition_poly.evaluations,
+                evals(&opening.composition_poly.evaluations),
             )?);
 
             // Mirror for the symmetric query point.
             let mut lde_base_sym: Vec<FieldElement<Field>> = Vec::new();
-            if let Some(p) = &opening.precomputed_trace_polys {
-                lde_base_sym.extend_from_slice(&p.evaluations_sym);
+            if let Some(p) = opening.precomputed_trace_polys.as_ref() {
+                lde_base_sym.extend_from_slice(evals(&p.evaluations_sym));
             }
-            lde_base_sym.extend_from_slice(&opening.main_trace_polys.evaluations_sym);
+            lde_base_sym.extend_from_slice(evals(&opening.main_trace_polys.evaluations_sym));
 
             let lde_aux_sym: &[FieldElement<FieldExtension>] = opening
                 .aux_trace_polys
                 .as_ref()
-                .map(|a| a.evaluations_sym.as_slice())
+                .map(|a| evals(&a.evaluations_sym))
                 .unwrap_or(&[]);
 
             let evaluation_point = Self::query_challenge_to_evaluation_point(*iota, true, domain);
@@ -601,14 +668,14 @@ pub trait IsStarkVerifier<
                 challenges,
                 &lde_base_sym,
                 lde_aux_sym,
-                &opening.composition_poly.evaluations_sym,
+                evals(&opening.composition_poly.evaluations_sym),
             )?);
         }
         Some((deep_poly_evaluations, deep_poly_evaluations_sym))
     }
 
     fn reconstruct_deep_composition_poly_evaluation(
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
         evaluation_point: &FieldElement<Field>,
         primitive_root: &FieldElement<Field>,
         challenges: &Challenges<FieldExtension>,
@@ -616,8 +683,11 @@ pub trait IsStarkVerifier<
         lde_trace_aux_evaluations: &[FieldElement<FieldExtension>],
         lde_composition_poly_parts_evaluation: &[FieldElement<FieldExtension>],
     ) -> Option<FieldElement<FieldExtension>> {
-        let ood_evaluations_table_height = proof.trace_ood_evaluations.height;
-        let ood_evaluations_table_width = proof.trace_ood_evaluations.width;
+        let ood_evaluations_table_height = proof.trace_ood_evaluations.height();
+        let ood_evaluations_table_width = proof.trace_ood_evaluations.width();
+        // Hot loop below: resolve the archived OOD data to one flat slice once
+        // instead of re-deriving a row slice per element.
+        let ood_data = proof.trace_ood_evaluations.row_major_data();
         let trace_term_coeffs = &challenges.trace_term_coeffs;
 
         // Runtime guard: a malformed proof may supply opening evaluations whose
@@ -652,7 +722,7 @@ pub trait IsStarkVerifier<
                 let trace_i = (0..ood_evaluations_table_height).zip(coeff_row).fold(
                     FieldElement::zero(),
                     |trace_t, (row_idx, coeff)| {
-                        let ood_val = &proof.trace_ood_evaluations.get_row(row_idx)[col_idx];
+                        let ood_val = &ood_data[row_idx * ood_evaluations_table_width + col_idx];
                         // Stay in base when we can: F: IsSubFieldOf<E> gives F - E -> E.
                         let diff: FieldElement<FieldExtension> = if col_idx < num_base {
                             &lde_trace_base_evaluations[col_idx] - ood_val
@@ -666,6 +736,7 @@ pub trait IsStarkVerifier<
                 trace_terms + trace_i
             });
 
+        let composition_parts_ood = evals(&proof.composition_poly_parts_ood_evaluation);
         let number_of_parts = lde_composition_poly_parts_evaluation.len();
         let z_pow = &challenges.z.pow(number_of_parts);
 
@@ -675,7 +746,7 @@ pub trait IsStarkVerifier<
         for (j, h_i_upsilon) in lde_composition_poly_parts_evaluation.iter().enumerate() {
             // Bounds-check via `.get(j)?`: a malformed opening may have more
             // parts than the proof header advertises.
-            let h_i_zpower = proof.composition_poly_parts_ood_evaluation.get(j)?;
+            let h_i_zpower = composition_parts_ood.get(j)?;
             let gamma = challenges.gammas.get(j)?;
             let h_i_term = (h_i_upsilon - h_i_zpower) * gamma;
             h_terms += h_i_term;
@@ -713,12 +784,53 @@ pub trait IsStarkVerifier<
     where
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
+        MultiProof<Field, FieldExtension, PI>: for<'a> rkyv::Serialize<
+                rkyv::api::high::HighSerializer<
+                    rkyv::util::AlignedVec,
+                    rkyv::ser::allocator::ArenaHandle<'a>,
+                    rkyv::rancor::Error,
+                >,
+            >,
     {
-        if airs.len() != multi_proof.proofs.len() {
+        let bytes = match rkyv::to_bytes::<rkyv::rancor::Error>(multi_proof) {
+            Ok(bytes) => bytes,
+            Err(e) => {
+                error!("failed to archive proof for verification: {e}");
+                return false;
+            }
+        };
+        // SAFETY: `bytes` was produced by `rkyv::to_bytes` on the line above,
+        // so it is a valid, aligned archive of this exact type.
+        let archived = unsafe {
+            rkyv::access_unchecked::<ArchivedMultiProof<Field, FieldExtension, PI>>(&bytes)
+        };
+        Self::multi_verify_archived(
+            airs,
+            archived.proofs.as_slice(),
+            transcript,
+            expected_bus_balance,
+        )
+    }
+
+    /// Verifies one or more rkyv-archived STARK proofs read **in place** from
+    /// their archive buffer — no proof deserialization, no per-field allocation.
+    /// This is the single verification implementation; [`Self::multi_verify`]
+    /// is a thin shim that archives an owned [`MultiProof`] and delegates here.
+    fn multi_verify_archived(
+        airs: &[&dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>],
+        proofs: &[ArchivedStarkProof<Field, FieldExtension, PI>],
+        transcript: &mut (impl IsStarkTranscript<FieldExtension, Field> + Clone),
+        expected_bus_balance: &FieldElement<FieldExtension>,
+    ) -> bool
+    where
+        FieldElement<Field>: AsBytes + Sync + Send,
+        FieldElement<FieldExtension>: AsBytes + Sync + Send,
+    {
+        if airs.len() != proofs.len() {
             error!(
                 "AIR count ({}) does not match proof count ({})",
                 airs.len(),
-                multi_proof.proofs.len()
+                proofs.len()
             );
             return false;
         }
@@ -732,15 +844,24 @@ pub trait IsStarkVerifier<
         // For preprocessed tables, use the hardcoded commitment (verifier cannot
         // trust the prover). For normal tables, use the commitment from the proof.
 
-        for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() {
+        for (idx, (air, proof)) in airs.iter().zip(proofs).enumerate() {
             // Soundness: the number of composition-poly parts is fixed by the AIR's
             // degree bound, NOT chosen by the prover. Deriving it from the proof would
             // let a malicious prover inflate the part count, widening the composition
             // polynomial's degree space and weakening the low-degree test. Reject any
             // proof whose advertised part count disagrees with the AIR.
-            if proof.trace_length == 0
+            let trace_length = proof.trace_length.to_native() as usize;
+            if trace_length == 0
                 || proof.composition_poly_parts_ood_evaluation.len()
-                    != air.composition_poly_degree_bound(proof.trace_length) / proof.trace_length
+                    != air.composition_poly_degree_bound(trace_length) / trace_length
+            {
+                return false;
+            }
+            // The archive is read in place without validation; reject an OOD
+            // table whose advertised dimensions disagree with its data length
+            // (or that has no rows) before any row access indexes into it.
+            if !proof.trace_ood_evaluations.dimensions_consistent()
+                || proof.trace_ood_evaluations.height() == 0
             {
                 return false;
             }
@@ -748,7 +869,7 @@ pub trait IsStarkVerifier<
                 // Preprocessed table: VERIFY precomputed commitment matches hardcoded.
                 // This is the critical soundness check - ensures prover used correct precomputed values.
                 let expected_precomputed = air.precomputed_commitment();
-                match &proof.lde_trace_precomputed_merkle_root {
+                match proof.lde_trace_precomputed_merkle_root.as_ref() {
                     Some(actual) if *actual == expected_precomputed => {
                         // OK - commitment matches hardcoded
                     }
@@ -797,7 +918,7 @@ pub trait IsStarkVerifier<
         // boundary constraints on LogUp columns, so the bus balance check is
         // the only cross-table validation.
 
-        for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() {
+        for (idx, (air, proof)) in airs.iter().zip(proofs).enumerate() {
             if air.has_trace_interaction() && proof.bus_public_inputs.is_none() {
                 error!(
                     "Table {idx}: AIR has LogUp interactions but proof is missing bus_public_inputs"
@@ -819,7 +940,7 @@ pub trait IsStarkVerifier<
         // state after Phase B, domain-separated by table index). This matches
         // the prover's forking and makes per-table verification independent.
 
-        for (idx, (air, proof)) in airs.iter().zip(&multi_proof.proofs).enumerate() {
+        for (idx, (air, proof)) in airs.iter().zip(proofs).enumerate() {
             // Must match prover: fork with domain separator for multi-table,
             // use original transcript directly for single-table.
             let num_tables = airs.len();
@@ -829,19 +950,28 @@ pub trait IsStarkVerifier<
             }
 
             // Phase C: replay aux commitment
-            if let Some(root) = proof.lde_trace_aux_merkle_root {
-                table_transcript.append_bytes(&root);
+            if let Some(root) = proof.lde_trace_aux_merkle_root.as_ref() {
+                table_transcript.append_bytes(root);
             }
 
             // Bind table_contribution (L) to transcript, matching prover.
-            if let Some(ref bpi) = proof.bus_public_inputs {
-                table_transcript.append_field_element(&bpi.table_contribution);
+            if let Some(bpi) = proof.bus_public_inputs.as_ref() {
+                table_transcript.append_field_element(bpi.table_contribution.as_native());
             }
 
+            // The AIR API takes owned public inputs; materialize the (tiny) PI.
+            // For the VM verifier `PI = ()` and this is a no-op.
+            let public_inputs: PI =
+                match rkyv::deserialize::<PI, rkyv::rancor::Error>(&proof.public_inputs) {
+                    Ok(pi) => pi,
+                    Err(_) => return false,
+                };
+
             // Rounds 2-4: verify
             if !Self::verify_rounds_2_to_4(
                 *air,
                 proof,
+                &public_inputs,
                 &mut table_transcript,
                 lookup_challenges.clone(),
             ) {
@@ -867,11 +997,11 @@ pub trait IsStarkVerifier<
 
         if needs_lookup_challenges {
             let mut total = FieldElement::<FieldExtension>::zero();
-            for (air, proof) in airs.iter().zip(&multi_proof.proofs) {
+            for (air, proof) in airs.iter().zip(proofs) {
                 if air.has_trace_interaction()
-                    && let Some(interaction) = &proof.bus_public_inputs
+                    && let Some(interaction) = proof.bus_public_inputs.as_ref()
                 {
-                    total = total + &interaction.table_contribution;
+                    total = total + interaction.table_contribution.as_native();
                 }
             }
 
@@ -901,6 +1031,13 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
         PI: Clone,
+        MultiProof<Field, FieldExtension, PI>: for<'a> rkyv::Serialize<
+                rkyv::api::high::HighSerializer<
+                    rkyv::util::AlignedVec,
+                    rkyv::ser::allocator::ArenaHandle<'a>,
+                    rkyv::rancor::Error,
+                >,
+            >,
     {
         let multi_proof = MultiProof {
             proofs: vec![proof.clone()],
@@ -912,7 +1049,8 @@ pub trait IsStarkVerifier<
     /// already been replayed and the RAP challenges are known.
     fn replay_rounds_after_round_1(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
+        public_inputs: &PI,
         domain: &VerifierDomain<Field>,
         transcript: &mut impl IsStarkTranscript<FieldExtension, Field>,
         rap_challenges: Vec<FieldElement<FieldExtension>>,
@@ -930,12 +1068,16 @@ pub trait IsStarkVerifier<
 
         // <<<< Receive challenge: 𝛽
         let beta = transcript.sample_field_element();
-        let trace_length = proof.trace_length;
+        let trace_length = proof.trace_length.to_native() as usize;
+        let bus_public_inputs = proof
+            .bus_public_inputs
+            .as_ref()
+            .map(|bpi| BusPublicInputs::from_contribution(bpi.table_contribution.as_native().clone()));
         let num_boundary_constraints = air
             .boundary_constraints(
-                &proof.public_inputs,
+                public_inputs,
                 &rap_challenges,
-                proof.bus_public_inputs.as_ref(),
+                bus_public_inputs.as_ref(),
                 trace_length,
             )
             .constraints
@@ -964,14 +1106,16 @@ pub trait IsStarkVerifier<
         );
 
         // <<<< Receive values: tⱼ(zgᵏ)
-        let trace_ood_evaluations_columns = proof.trace_ood_evaluations.columns();
-        for col in trace_ood_evaluations_columns.iter() {
-            for elem in col.iter() {
-                transcript.append_field_element(elem);
+        // Column-major append (matches `Table::columns()` order) reading the
+        // archived rows in place, without materializing transposed columns.
+        let ood = &proof.trace_ood_evaluations;
+        for col_idx in 0..ood.width() {
+            for row_idx in 0..ood.height() {
+                transcript.append_field_element(&ood.get_row(row_idx)[col_idx]);
             }
         }
         // <<<< Receive value: Hᵢ(z^N)
-        for element in proof.composition_poly_parts_ood_evaluation.iter() {
+        for element in evals(&proof.composition_poly_parts_ood_evaluation).iter() {
             transcript.append_field_element(element);
         }
 
@@ -980,6 +1124,7 @@ pub trait IsStarkVerifier<
         // ===================================
 
         let num_terms_composition_poly = proof.composition_poly_parts_ood_evaluation.len();
+
         let num_terms_trace =
             air.context().transition_offsets.len() * air.step_size() * air.context().trace_columns;
         let gamma = transcript.sample_field_element();
@@ -1001,7 +1146,7 @@ pub trait IsStarkVerifier<
         let gammas = deep_composition_coefficients;
 
         // FRI commit phase
-        let merkle_roots = &proof.fri_layers_merkle_roots;
+        let merkle_roots = proof.fri_layers_merkle_roots.as_slice();
         let mut zetas = merkle_roots
             .iter()
             .map(|root| {
@@ -1017,16 +1162,16 @@ pub trait IsStarkVerifier<
         zetas.push(transcript.sample_field_element());
 
         // <<<< Receive value: pₙ
-        transcript.append_field_element(&proof.fri_last_value);
+        transcript.append_field_element(proof.fri_last_value.as_native());
 
         // Receive grinding value
         let security_bits = air.context().proof_options.grinding_factor;
         let mut grinding_seed = [0u8; 32];
         if security_bits > 0
-            && let Some(nonce_value) = proof.nonce
+            && let Some(nonce_value) = proof.nonce.as_ref()
         {
             grinding_seed = transcript.state();
-            transcript.append_bytes(&nonce_value.to_be_bytes());
+            transcript.append_bytes(&nonce_value.to_native().to_be_bytes());
         }
 
         // FRI query phase
@@ -1050,7 +1195,8 @@ pub trait IsStarkVerifier<
     /// Verifies a single table after round 1 has been replayed.
     fn verify_rounds_2_to_4(
         air: &dyn AIR<Field = Field, FieldExtension = FieldExtension, PublicInputs = PI>,
-        proof: &StarkProof<Field, FieldExtension, PI>,
+        proof: &ArchivedStarkProof<Field, FieldExtension, PI>,
+        public_inputs: &PI,
         transcript: &mut impl IsStarkTranscript<FieldExtension, Field>,
         rap_challenges: Vec<FieldElement<FieldExtension>>,
     ) -> bool
@@ -1058,7 +1204,7 @@ pub trait IsStarkVerifier<
         FieldElement<Field>: AsBytes + Sync + Send,
         FieldElement<FieldExtension>: AsBytes + Sync + Send,
     {
-        let domain = new_verifier_domain(air, proof.trace_length);
+        let domain = new_verifier_domain(air, proof.trace_length.to_native() as usize);
 
         // Verify there are enough queries
         if proof.query_list.len() < air.options().fri_number_of_queries {
@@ -1070,14 +1216,24 @@ pub trait IsStarkVerifier<
         #[cfg(feature = "instruments")]
         let timer1 = Instant::now();
 
-        let challenges =
-            Self::replay_rounds_after_round_1(air, proof, &domain, transcript, rap_challenges);
+        let challenges = Self::replay_rounds_after_round_1(
+            air,
+            proof,
+            public_inputs,
+            &domain,
+            transcript,
+            rap_challenges,
+        );
 
         // verify grinding
         let security_bits = air.context().proof_options.grinding_factor;
         if security_bits > 0 {
-            let nonce_is_valid = proof.nonce.is_some_and(|nonce_value| {
-                grinding::is_valid_nonce(&challenges.grinding_seed, nonce_value, security_bits)
+            let nonce_is_valid = proof.nonce.as_ref().is_some_and(|nonce_value| {
+                grinding::is_valid_nonce(
+                    &challenges.grinding_seed,
+                    nonce_value.to_native(),
+                    security_bits,
+                )
             });
 
             if !nonce_is_valid {
@@ -1097,7 +1253,13 @@ pub trait IsStarkVerifier<
         #[cfg(feature = "instruments")]
         let timer2 = Instant::now();
 
-        if !Self::step_2_verify_claimed_composition_polynomial(air, proof, &domain, &challenges) {
+        if !Self::step_2_verify_claimed_composition_polynomial(
+            air,
+            proof,
+            public_inputs,
+            &domain,
+            &challenges,
+        ) {
             #[cfg(not(feature = "test_fiat_shamir"))]
             error!("Composition Polynomial verification failed");
             return false;
diff --git a/docs/continuations_design.md b/docs/continuations_design.md
index 9c3f54747..5ff60e1d6 100644
--- a/docs/continuations_design.md
+++ b/docs/continuations_design.md
@@ -499,7 +499,7 @@ fresh agents) of the register/x254 chain, the L2G root binding, and
 completeness-by-enumeration found no false-accept: each forgery is caught by a
 Merkle/hash collision, a bus imbalance, or a Fiat-Shamir divergence.
 
-The bundle derives serde and round-trips through `bincode` (exactly like a
+The bundle derives rkyv and round-trips through the same archive format (exactly like a
 monolithic `VmProof`); the CLI drives it via `prove --continuations` (writes the
 bundle) and `verify --continuations` (checks bundle + ELF only). `prove` picks the
 epoch size from `--epoch-size-log2 N` (`N=20` means 1,048,576 cycles), defaulting
@@ -521,7 +521,7 @@ recursion/aggregation layer (deferred).
   ordering check (§3.3), the `MU` selector (§3.4), the **power-of-two epoch size**
   (§3.5), **cross-epoch registers** (§6), the **commit index x254** across epochs
   (§6), the **Fiat-Shamir statement binding** (§7), and the **standalone split
-  prover/verifier** (§8) — bundle serialized with `bincode` and driven from the CLI
+  prover/verifier** (§8) — bundle serialized with rkyv and driven from the CLI
   (`prove`/`verify --continuations`).
 - **The committed code implements Design X** (`MU` gates every L2G interaction),
   which is the sound design. Design Y was implemented briefly, then found unsound
diff --git a/executor/Cargo.toml b/executor/Cargo.toml
index 5d1e4ae49..3f278e1c6 100644
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@@ -13,10 +13,3 @@ ecsm = { path = "../crypto/ecsm" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 tiny-keccak = { version = "2.0", features = ["keccak"] }
-# Exact pin: must match the fixture writer + guest so the rkyv ProgramInput
-# layout the executor tests read stays consistent (see tooling/ethrex-fixtures).
-rkyv = { version = "=0.8.16", features = ["std", "unaligned"] }
-# Native reference for the ethrex tests (host-side `execution_program` with
-# `NativeCrypto`). Pinned to the same ethrex rev as the guest ELF
-# (executor/programs/rust/ethrex) — the open LambdaVM-backend PR branch.
-ethrex-guest-program = { git = "https://github.com/lambdaclass/ethrex.git", rev = "156cb8d6a3974f411d71622eecd1b249ee37ff1c", package = "ethrex-guest-program" }
diff --git a/executor/programs/rust/ef_io_demo/Cargo.lock b/executor/programs/rust/ef_io_demo/Cargo.lock
new file mode 100644
index 000000000..c2993d8c6
--- /dev/null
+++ b/executor/programs/rust/ef_io_demo/Cargo.lock
@@ -0,0 +1,331 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "const-default"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa"
+
+[[package]]
+name = "critical-section"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
+
+[[package]]
+name = "ef_io_demo"
+version = "0.1.0"
+dependencies = [
+ "lambda-vm-syscalls",
+]
+
+[[package]]
+name = "embedded-alloc"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd"
+dependencies = [
+ "const-default",
+ "critical-section",
+ "linked_list_allocator",
+ "rlsf",
+]
+
+[[package]]
+name = "embedded-hal"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "lambda-vm-syscalls"
+version = "0.1.0"
+dependencies = [
+ "embedded-alloc",
+ "getrandom 0.2.17",
+ "getrandom 0.3.4",
+ "lazy_static",
+ "rand",
+ "riscv",
+ "thiserror",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "linked_list_allocator"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897"
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "riscv"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05cfa3f7b30c84536a9025150d44d26b8e1cc20ddf436448d74cd9591eefb25"
+dependencies = [
+ "critical-section",
+ "embedded-hal",
+ "paste",
+ "riscv-macros",
+ "riscv-pac",
+]
+
+[[package]]
+name = "riscv-macros"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d323d13972c1b104aa036bc692cd08b822c8bbf23d79a27c526095856499799"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "riscv-pac"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
+
+[[package]]
+name = "rlsf"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a"
+dependencies = [
+ "cfg-if",
+ "const-default",
+ "libc",
+ "rustversion",
+ "svgbobdoc",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "svgbobdoc"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50"
+dependencies = [
+ "base64",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "unicode-width",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.4+wasi-0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
diff --git a/executor/tests/README.md b/executor/tests/README.md
index eddd3b525..39fd13813 100644
--- a/executor/tests/README.md
+++ b/executor/tests/README.md
@@ -5,6 +5,10 @@
 The `ethrex_*.bin` files are rkyv-serialized `ethrex_guest_program::l1::ProgramInput`
 values consumed by the ethrex guest (`executor/programs/rust/ethrex`).
 
+The native-reference tests live in `tooling/ethrex-tests` (a detached
+workspace: ethrex pins rkyv `unaligned`, which must not feature-unify with the
+main workspace's aligned proof format).
+
 The ethrex guest, the native test reference, and the fixture generator are all
 pinned to the same ethrex revision (the open LambdaVM-backend PR branch, until it
 merges to `main`):
diff --git a/executor/tests/rust.rs b/executor/tests/rust.rs
index 458a0bd6c..7dcb88e29 100644
--- a/executor/tests/rust.rs
+++ b/executor/tests/rust.rs
@@ -267,72 +267,6 @@ fn test_args_panics() {
     }
 }
 
-/// Larger-block smoke test: a synthetic ethrex block with 10 ETH transfers.
-/// (Replaces the old `ethrex_hoodi.bin` real-block fixture, which was in the
-/// pre-Crypto-trait ethrex format and no longer deserializes.) Fixture is
-/// generated by `tooling/ethrex-fixtures`; see `tests/README.md`.
-#[ignore = "heavier synthetic block (10 txs); run in the dedicated --ignored CI step"]
-#[test]
-fn test_ethrex() {
-    use ethrex_guest_program::crypto::NativeCrypto;
-    use ethrex_guest_program::l1::{ProgramInput, execution_program};
-    use rkyv::rancor::Error;
-    use std::fs;
-    use std::sync::Arc;
-    let inputs = fs::read("tests/ethrex_10_transfers.bin").unwrap();
-    let input = rkyv::from_bytes::<ProgramInput, Error>(&inputs).unwrap();
-    let output = execution_program(input, Arc::new(NativeCrypto)).unwrap();
-    run_program_and_check_public_output(
-        "./program_artifacts/rust/ethrex.elf",
-        output.encode(),
-        inputs,
-    );
-}
-
-/// Executes a stateless ethrex block containing a single (plain ETH transfer)
-/// transaction. Execution only — no proving — against the ethrex guest ELF
-/// built from the same pinned ethrex revision as the native reference. The
-/// fixture is a serialized `ProgramInput`; see `tests/README.md` for provenance.
-///
-/// The fixture is generated by `tooling/ethrex-fixtures` at the same ethrex rev
-/// as the guest (see `tests/README.md`).
-#[test]
-fn test_ethrex_simple_tx() {
-    use ethrex_guest_program::crypto::NativeCrypto;
-    use ethrex_guest_program::l1::{ProgramInput, execution_program};
-    use rkyv::rancor::Error;
-    use std::sync::Arc;
-    let inputs = std::fs::read("tests/ethrex_simple_tx.bin").unwrap();
-    let input = rkyv::from_bytes::<ProgramInput, Error>(&inputs).unwrap();
-    let output = execution_program(input, Arc::new(NativeCrypto)).unwrap();
-    run_program_and_check_public_output(
-        "./program_artifacts/rust/ethrex.elf",
-        output.encode(),
-        inputs,
-    );
-}
-
-/// Executes a stateless ethrex block with NO transactions (empty block).
-/// Execution only — no proving. Pins the committed `ethrex_empty_block.bin`
-/// fixture into the default suite so its rkyv `ProgramInput` layout (the 0-tx
-/// edge case) is exercised and stays consistent with the guest across ethrex
-/// rev bumps. Mirrors `test_ethrex_simple_tx`; see `tests/README.md`.
-#[test]
-fn test_ethrex_empty_block() {
-    use ethrex_guest_program::crypto::NativeCrypto;
-    use ethrex_guest_program::l1::{ProgramInput, execution_program};
-    use rkyv::rancor::Error;
-    use std::sync::Arc;
-    let inputs = std::fs::read("tests/ethrex_empty_block.bin").unwrap();
-    let input = rkyv::from_bytes::<ProgramInput, Error>(&inputs).unwrap();
-    let output = execution_program(input, Arc::new(NativeCrypto)).unwrap();
-    run_program_and_check_public_output(
-        "./program_artifacts/rust/ethrex.elf",
-        output.encode(),
-        inputs,
-    );
-}
-
 #[ignore = "Ignored until the vm is fast enough to run this test"]
 #[test]
 fn test_ckzg() {
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index 887d9c1c1..18f2b12d4 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -20,18 +20,15 @@ crypto = { path = "../crypto/crypto" }
 math = { path = "../crypto/math" }
 executor = { path = "../executor" }
 ecsm = { path = "../crypto/ecsm" }
-serde = { version = "1.0", features = ["derive"] }
 rayon = { version = "1.8.0", optional = true }
 sysinfo = { version = "0.31", default-features = false, features = ["system"] }
 log = "0.4"
 sha3 = { version = "0.10.8", default-features = false }
-postcard = { version = "1.0", default-features = false, features = ["alloc"] }
+rkyv = { version = "0.8.10", default-features = false, features = ["alloc", "bytecheck", "aligned"] }
 
 [dev-dependencies]
 env_logger = "*"
 criterion = { version = "0.5", default-features = false }
-bincode = "1"
-postcard = { version = "1.0", features = ["alloc"] }
 tikv-jemallocator = "0.6"
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tiny-keccak = { version = "2.0", features = ["keccak"] }
diff --git a/prover/src/continuation.rs b/prover/src/continuation.rs
index ccdd5a6f9..ea9f0c948 100644
--- a/prover/src/continuation.rs
+++ b/prover/src/continuation.rs
@@ -247,7 +247,7 @@ struct EpochStart<'a> {
 /// Note: continuation epochs use the L2G memory bookend, so PAGE is skipped and the
 /// per-epoch page config set is empty — the verifier builds the AIRs with no PAGE
 /// tables rather than trusting any prover-supplied page config.
-#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 struct EpochProof {
     /// The epoch's STARK proof (its tables + the epoch-local L2G sub-table last).
     proof: MultiProof<F, E, ()>,
@@ -278,8 +278,8 @@ struct EpochProof {
 /// rebuild the genesis image — bound by the global proof's genesis-from-ELF check).
 ///
 /// `verify_continuation` checks this using only the bundle and the ELF. It derives
-/// serde, so it round-trips through `bincode` exactly like a monolithic `VmProof`.
-#[derive(serde::Serialize, serde::Deserialize)]
+/// rkyv, so it round-trips through the same archive format as a monolithic `VmProof`.
+#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct ContinuationProof {
     epochs: Vec<EpochProof>,
     global: MultiProof<F, E, ()>,
@@ -496,9 +496,19 @@ fn verify_epoch(
         .copied()
         .unwrap_or(0) as u64;
 
+    let proof_bytes = match rkyv::to_bytes::<rkyv::rancor::Error>(&epoch.proof) {
+        Ok(bytes) => bytes,
+        Err(_) => return false,
+    };
+    // SAFETY: `proof_bytes` was produced by `rkyv::to_bytes` on the line above.
+    let archived = unsafe {
+        rkyv::access_unchecked::<stark::proof::stark::ArchivedMultiProof<F, E, ()>>(&proof_bytes)
+    };
+    let archived_proofs = archived.proofs.as_slice();
+
     let expected = match compute_expected_commit_bus_balance(
         &refs,
-        &epoch.proof,
+        archived_proofs,
         &epoch.public_output,
         commit_start_index,
         &mut seed(),
@@ -507,7 +517,7 @@ fn verify_epoch(
         None => return false,
     };
 
-    if !Verifier::multi_verify(&refs, &epoch.proof, &mut seed(), &expected) {
+    if !Verifier::multi_verify_archived(&refs, archived_proofs, &mut seed(), &expected) {
         return false;
     }
 
@@ -1030,17 +1040,18 @@ mod tests {
         assert_eq!(out.as_deref(), Some(&[0xAA, 0xBB, 0xCC, 0xDD][..]));
     }
 
-    // A bundle survives a bincode round-trip and still verifies to the same output —
+    // A bundle survives an rkyv round-trip and still verifies to the same output —
     // the serialization path the CLI's `prove`/`verify --continuations` relies on.
     #[test]
-    fn test_continuation_bincode_roundtrip() {
+    fn test_continuation_rkyv_roundtrip() {
         let _ = env_logger::builder().is_test(true).try_init();
         let elf_bytes = asm_elf_bytes("test_commit_split");
         let bundle =
             prove_continuation(&elf_bytes, &[], 4, &ProofOptions::default_test_options()).unwrap();
 
-        let bytes = bincode::serialize(&bundle).unwrap();
-        let restored: ContinuationProof = bincode::deserialize(&bytes).unwrap();
+        let bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&bundle).unwrap();
+        let restored: ContinuationProof =
+            rkyv::from_bytes::<_, rkyv::rancor::Error>(&bytes).unwrap();
 
         let out = verify_continuation(&elf_bytes, &restored, &ProofOptions::default_test_options())
             .unwrap();
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 07b275f6a..18734a7d7 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -65,13 +65,13 @@ use crate::test_utils::{
 // Re-exported so downstream verifier guests (e.g. the in-VM recursion guest) can
 // name the proof-options type carried in their private input alongside `VmProof`.
 pub use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions};
-use stark::proof::stark::MultiProof;
+use stark::proof::stark::{ArchivedMultiProof, ArchivedStarkProof, MultiProof};
 
 /// A run-length encoded range of contiguous zero-initialized 4KB pages.
 ///
 /// Represents `count` contiguous pages starting at `base`, used for
 /// runtime-allocated memory (stack, heap) not covered by ELF segments.
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct RuntimePageRange {
     /// Base address of the first page (4KB-aligned).
     pub base: u64,
@@ -86,7 +86,7 @@ pub const FIXED_TABLE_COUNT: usize = 11;
 
 /// Number of chunks for each split table.
 /// The verifier needs this to reconstruct matching AIRs.
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct TableCounts {
     pub cpu: usize,
     pub lt: usize,
@@ -158,7 +158,7 @@ impl TableCounts {
 
 /// A complete VM proof bundle containing the STARK proof and metadata
 /// needed by the verifier to reconstruct the AIR configuration.
-#[derive(Debug, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct VmProof {
     /// The multi-table STARK proof.
     pub proof: MultiProof<F, E, ()>,
@@ -181,6 +181,93 @@ pub struct VmProof {
     pub vk_digest: [u8; 32],
 }
 
+/// The private-input bundle the recursion verifier guest consumes: an inner
+/// proof plus everything needed to verify it (inner ELF, the inner prover's
+/// options, and the host-derived verifying key).
+///
+/// Archived as one rkyv blob so the guest reads every field straight from the
+/// input buffer with no deserialization pass.
+#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
+pub struct RecursionInput {
+    pub vm_proof: VmProof,
+    pub inner_elf: Vec<u8>,
+    pub options: ProofOptions,
+    pub vkey: VmVerifyingKey,
+}
+
+// ============================================================================
+// Recursion-input wire format: aligning magic prefix + rkyv archive
+// ============================================================================
+//
+// The guest reads the archive in place with naturally-aligned loads (archived
+// field elements are 8-aligned; we require 16 for headroom), and the executor
+// traps unaligned doubleword loads. The executor maps the private input as
+// `[u32 len][payload...]` with the payload at `PRIVATE_INPUT_START + 4`, which
+// is only 4-aligned. A fixed prefix pads the payload so the archive that
+// follows lands 16-aligned, and doubles as a magic + version tag the guest
+// validates before the unsafe access.
+
+/// 4-byte magic identifying a lambda-vm recursion input blob ("LVMR").
+pub const RECURSION_INPUT_MAGIC: [u8; 4] = *b"LVMR";
+
+/// Wire-format version of the recursion input blob.
+pub const RECURSION_INPUT_VERSION: u32 = 1;
+
+/// Required alignment (bytes) of the archive's first byte in guest memory.
+pub const RECURSION_INPUT_ALIGN: usize = 16;
+
+/// Aligning prefix length: `magic(4) + version(4) + reserved(4) = 12` bytes,
+/// chosen so the archive starts 16-aligned given the executor's
+/// `PRIVATE_INPUT_START + 4` payload base. Asserted below.
+pub const RECURSION_INPUT_PREFIX_LEN: usize = 12;
+
+const _: () = {
+    let payload_base = (executor::vm::memory::PRIVATE_INPUT_START_INDEX as usize) + 4;
+    let pad =
+        (RECURSION_INPUT_ALIGN - (payload_base % RECURSION_INPUT_ALIGN)) % RECURSION_INPUT_ALIGN;
+    assert!(
+        RECURSION_INPUT_PREFIX_LEN == pad,
+        "prefix length must align the archive to RECURSION_INPUT_ALIGN given the private-input payload base",
+    );
+    assert!(
+        (payload_base + RECURSION_INPUT_PREFIX_LEN).is_multiple_of(RECURSION_INPUT_ALIGN),
+        "archive must start at a RECURSION_INPUT_ALIGN-aligned guest address",
+    );
+};
+
+/// Encode a [`RecursionInput`] into the on-wire blob: a 12-byte
+/// `magic + version + reserved` prefix followed by the rkyv archive. The prefix
+/// both aligns the archive in guest memory (so in-place reads don't trap) and
+/// tags the format/version so the guest can validate before the unsafe access.
+pub fn encode_recursion_input(input: &RecursionInput) -> Result<Vec<u8>, Error> {
+    let archive = rkyv::to_bytes::<rkyv::rancor::Error>(input)
+        .map_err(|e| Error::Execution(format!("rkyv encode failed: {e}")))?;
+    let mut blob = Vec::with_capacity(RECURSION_INPUT_PREFIX_LEN + archive.len());
+    blob.extend_from_slice(&RECURSION_INPUT_MAGIC);
+    blob.extend_from_slice(&RECURSION_INPUT_VERSION.to_le_bytes());
+    blob.extend_from_slice(&[0u8; 4]); // reserved
+    debug_assert_eq!(blob.len(), RECURSION_INPUT_PREFIX_LEN);
+    blob.extend_from_slice(&archive);
+    Ok(blob)
+}
+
+/// Validate the wire prefix and return the archive bytes (zero-copy slice).
+/// Returns `None` if the magic or version doesn't match — the caller should
+/// halt cleanly rather than proceed into an `access_unchecked`.
+pub fn recursion_archive_bytes(blob: &[u8]) -> Option<&[u8]> {
+    if blob.len() < RECURSION_INPUT_PREFIX_LEN {
+        return None;
+    }
+    if blob[0..4] != RECURSION_INPUT_MAGIC {
+        return None;
+    }
+    let version = u32::from_le_bytes([blob[4], blob[5], blob[6], blob[7]]);
+    if version != RECURSION_INPUT_VERSION {
+        return None;
+    }
+    Some(&blob[RECURSION_INPUT_PREFIX_LEN..])
+}
+
 /// Error type for the prover crate.
 #[derive(Debug)]
 pub enum Error {
@@ -673,10 +760,10 @@ impl VmAirs {
 /// challenge elements.
 pub(crate) fn replay_transcript_phase_a(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
-    multi_proof: &MultiProof<F, E, ()>,
+    proofs: &[ArchivedStarkProof<F, E, ()>],
     transcript: &mut DefaultTranscript<E>,
 ) -> (FieldElement<E>, FieldElement<E>) {
-    for (air, proof) in airs.iter().zip(&multi_proof.proofs) {
+    for (air, proof) in airs.iter().zip(proofs) {
         if air.is_preprocessed() {
             transcript.append_bytes(&air.precomputed_commitment());
         }
@@ -743,15 +830,38 @@ pub(crate) fn compute_commit_bus_offset(
 /// and before `multi_verify`.
 pub(crate) fn compute_expected_commit_bus_balance(
     airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
-    proof: &MultiProof<F, E, ()>,
+    proofs: &[ArchivedStarkProof<F, E, ()>],
     public_output_bytes: &[u8],
     start_index: u64,
     transcript: &mut DefaultTranscript<E>,
 ) -> Option<FieldElement<E>> {
-    let (z, alpha) = replay_transcript_phase_a(airs, proof, transcript);
+    let (z, alpha) = replay_transcript_phase_a(airs, proofs, transcript);
     compute_commit_bus_offset(public_output_bytes, start_index, &z, &alpha)
 }
 
+/// Owned-proof convenience over [`compute_expected_commit_bus_balance`]:
+/// archives the proof and replays from the archived view. Test helper.
+#[cfg(test)]
+pub(crate) fn compute_expected_commit_bus_balance_owned(
+    airs: &[&dyn AIR<Field = F, FieldExtension = E, PublicInputs = ()>],
+    proof: &MultiProof<F, E, ()>,
+    public_output_bytes: &[u8],
+    start_index: u64,
+    transcript: &mut DefaultTranscript<E>,
+) -> Option<FieldElement<E>> {
+    let proof_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(proof).ok()?;
+    // SAFETY: `proof_bytes` was produced by `rkyv::to_bytes` on the line above.
+    let archived =
+        unsafe { rkyv::access_unchecked::<ArchivedMultiProof<F, E, ()>>(&proof_bytes) };
+    compute_expected_commit_bus_balance(
+        airs,
+        archived.proofs.as_slice(),
+        public_output_bytes,
+        start_index,
+        transcript,
+    )
+}
+
 /// Bind the final cross-epoch GlobalMemory proof to the per-epoch proofs.
 ///
 /// The final proof commits one local-to-global sub-table per epoch as its first
@@ -1081,9 +1191,154 @@ pub fn verify_with_options_with_vkey(
     page_commitments: Option<&[(u64, Commitment)]>,
     vkey: Option<&VmVerifyingKey>,
 ) -> Result<bool, Error> {
+    // The verifier has a single implementation over the rkyv wire format
+    // (`verify_archived_parts`); an owned in-memory proof is archived once here
+    // and read back in place. Proofs arriving as bytes skip this entirely.
+    let proof_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&vm_proof.proof)
+        .map_err(|e| Error::Execution(format!("rkyv encode of proof failed: {e}")))?;
+    // SAFETY: `proof_bytes` was produced by `rkyv::to_bytes` on the line above,
+    // so it is a valid, aligned archive of this exact type.
+    let archived =
+        unsafe { rkyv::access_unchecked::<ArchivedMultiProof<F, E, ()>>(&proof_bytes) };
+
+    verify_archived_parts(
+        archived.proofs.as_slice(),
+        &vm_proof.table_counts,
+        &vm_proof.runtime_page_ranges,
+        vm_proof.num_private_input_pages,
+        &vm_proof.public_output,
+        &vm_proof.vk_digest,
+        elf_bytes,
+        proof_options,
+        decode_commitment,
+        page_commitments,
+        vkey,
+    )
+    .map(|v| v.ok)
+}
+
+/// Result of a recursion-blob verification: the verdict plus the values the
+/// recursion guest commits (`vk_digest ‖ public_output`), read from the blob.
+pub struct RecursionVerification<'a> {
+    /// Whether the inner proof verified.
+    pub ok: bool,
+    /// Digest of the verifying key that was actually used, recomputed from the
+    /// supplied vkey (and checked against the proof's embedded `vk_digest`).
+    pub vk_digest: [u8; 32],
+    /// The inner proof's committed public output (zero-copy from the blob).
+    pub public_output: &'a [u8],
+}
+
+/// Verify a recursion-input blob produced by [`encode_recursion_input`].
+///
+/// The archive is read **in place**: the STARK proof is verified straight from
+/// the blob with no deserialization and no per-field allocation. Only tiny
+/// metadata (options, vkey, table counts, page ranges) is materialized.
+pub fn verify_recursion_blob(blob: &[u8]) -> Result<RecursionVerification<'_>, Error> {
+    use rkyv::rancor::Error as RkyvError;
+
+    // Validate + strip the aligning magic/version prefix. In the guest the
+    // returned slice starts at the 16-aligned archive base (the prefix exists
+    // precisely so the archive lands aligned at
+    // `PRIVATE_INPUT_START + 4 + PREFIX_LEN`), so the in-place doubleword
+    // loads do not trap.
+    let archive_bytes = recursion_archive_bytes(blob).ok_or_else(|| {
+        Error::Execution(String::from("recursion blob: bad magic or version"))
+    })?;
+
+    // A host caller's buffer carries no alignment guarantee (`Vec<u8>` is
+    // align-1, so `&blob[12..]` is typically only 4-aligned) — in-place access
+    // there would be UB. Fall back to one aligned copy when the base is
+    // misaligned; the guest path is aligned by construction and stays
+    // zero-copy.
+    let mut aligned_fallback = rkyv::util::AlignedVec::<{ RECURSION_INPUT_ALIGN }>::new();
+    let archive: &[u8] =
+        if (archive_bytes.as_ptr() as usize).is_multiple_of(RECURSION_INPUT_ALIGN) {
+            archive_bytes
+        } else {
+            aligned_fallback.extend_from_slice(archive_bytes);
+            &aligned_fallback
+        };
+
+    // SAFETY: `archive` is RECURSION_INPUT_ALIGN-aligned (checked above). No
+    // structural validation is performed. On the host, callers must pass blobs
+    // produced by our own `encode_recursion_input`. In the recursion guest the
+    // blob is PROVER-SUPPLIED: a malformed archive makes the in-place reads
+    // hit arbitrary guest addresses, which the VM memory model defines (reads
+    // of unmapped cells return 0), so the outcome is garbage field elements
+    // that fail the cryptographic checks or an in-guest panic — verification
+    // failure either way, never a false accept.
+    let archived = unsafe { rkyv::access_unchecked::<ArchivedRecursionInput>(archive) };
+
+    // Materialize only the small metadata; the proof stays in the buffer.
+    let options: ProofOptions = rkyv::deserialize::<ProofOptions, RkyvError>(&archived.options)
+        .map_err(|e| Error::Execution(format!("rkyv deserialize options failed: {e}")))?;
+    let vkey: VmVerifyingKey = rkyv::deserialize::<VmVerifyingKey, RkyvError>(&archived.vkey)
+        .map_err(|e| Error::Execution(format!("rkyv deserialize vkey failed: {e}")))?;
+    let table_counts: TableCounts =
+        rkyv::deserialize::<TableCounts, RkyvError>(&archived.vm_proof.table_counts)
+            .map_err(|e| Error::Execution(format!("rkyv deserialize table_counts failed: {e}")))?;
+    let runtime_page_ranges: Vec<RuntimePageRange> =
+        rkyv::deserialize::<Vec<RuntimePageRange>, RkyvError>(&archived.vm_proof.runtime_page_ranges)
+            .map_err(|e| Error::Execution(format!("rkyv deserialize page ranges failed: {e}")))?;
+    let num_private_input_pages = archived.vm_proof.num_private_input_pages.to_native() as usize;
+    // Bytes read straight from the archived buffer (zero-copy).
+    let inner_elf: &[u8] = archived.inner_elf.as_slice();
+    let public_output: &[u8] = archived.vm_proof.public_output.as_slice();
+    // Rebase the returned output slice onto the caller's buffer: `archive` may
+    // be the aligned fallback copy, whose lifetime ends with this call. Same
+    // bytes at the same offsets in both buffers.
+    let public_output_offset = public_output.as_ptr() as usize - archive.as_ptr() as usize;
+
+    let verification = verify_archived_parts(
+        archived.vm_proof.proof.proofs.as_slice(),
+        &table_counts,
+        &runtime_page_ranges,
+        num_private_input_pages,
+        public_output,
+        &archived.vm_proof.vk_digest,
+        inner_elf,
+        &options,
+        None,
+        None,
+        Some(&vkey),
+    )?;
+
+    Ok(RecursionVerification {
+        ok: verification.ok,
+        vk_digest: verification.vk_digest,
+        public_output: &archive_bytes[public_output_offset..public_output_offset + public_output.len()],
+    })
+}
+
+/// Internal verdict of [`verify_archived_parts`].
+struct ArchivedVerification {
+    ok: bool,
+    vk_digest: [u8; 32],
+}
+
+/// The single VM-proof verification implementation: verifies a STARK proof
+/// whose sub-proofs are read in place from an rkyv archive, given the proof's
+/// metadata fields. Both [`verify_with_options_with_vkey`] (owned proof,
+/// archived once) and [`verify_recursion_blob`] (guest blob, zero-copy) funnel
+/// here.
+#[allow(clippy::too_many_arguments)]
+fn verify_archived_parts(
+    archived_proofs: &[ArchivedStarkProof<F, E, ()>],
+    table_counts: &TableCounts,
+    runtime_page_ranges: &[RuntimePageRange],
+    num_private_input_pages: usize,
+    public_output: &[u8],
+    vk_digest_in_proof: &[u8; 32],
+    elf_bytes: &[u8],
+    proof_options: &ProofOptions,
+    decode_commitment: Option<Commitment>,
+    page_commitments: Option<&[(u64, Commitment)]>,
+    vkey: Option<&VmVerifyingKey>,
+) -> Result<ArchivedVerification, Error> {
     // Validate table_counts before constructing AIRs.
     // A malicious prover could set counts to 0, removing entire constraint sets.
-    vm_proof.table_counts.validate()?;
+    table_counts.validate()?;
 
     // Bound num_private_input_pages before allocating PageConfigs.
     // MAX_PRIVATE_INPUT_SIZE fits in ~257 pages of DEFAULT_PAGE_SIZE.
@@ -1091,10 +1346,9 @@ pub fn verify_with_options_with_vkey(
         use crate::tables::page::DEFAULT_PAGE_SIZE;
         use executor::vm::memory::MAX_PRIVATE_INPUT_SIZE;
         let max_pages = (MAX_PRIVATE_INPUT_SIZE as usize + 4).div_ceil(DEFAULT_PAGE_SIZE) + 1;
-        if vm_proof.num_private_input_pages > max_pages {
+        if num_private_input_pages > max_pages {
             return Err(Error::InvalidTableCounts(format!(
-                "num_private_input_pages ({}) exceeds max ({max_pages})",
-                vm_proof.num_private_input_pages,
+                "num_private_input_pages ({num_private_input_pages}) exceeds max ({max_pages})",
             )));
         }
     }
@@ -1102,8 +1356,8 @@ pub fn verify_with_options_with_vkey(
     let program = Elf::load(elf_bytes).map_err(|e| Error::ElfLoad(format!("{e}")))?;
     let page_configs = Traces::page_configs_from_elf_and_runtime(
         &program,
-        &vm_proof.runtime_page_ranges,
-        vm_proof.num_private_input_pages,
+        runtime_page_ranges,
+        num_private_input_pages,
     );
 
     // Validate the vkey before constructing AIRs: `vk.pages` is indexed
@@ -1141,7 +1395,7 @@ pub fn verify_with_options_with_vkey(
         }
     };
     let vk_digest = vkey.compute_digest();
-    if vm_proof.vk_digest != vk_digest {
+    if *vk_digest_in_proof != vk_digest {
         return Err(Error::InvalidVerifyingKey(
             "proof vk_digest does not match the verifying key's digest".into(),
         ));
@@ -1149,15 +1403,14 @@ pub fn verify_with_options_with_vkey(
 
     // Cross-check: table_counts must match the number of sub-proofs.
     // FIXED_TABLE_COUNT always-present tables, plus page tables.
-    let expected_proof_count =
-        vm_proof.table_counts.total() + FIXED_TABLE_COUNT + page_configs.len();
-    if expected_proof_count != vm_proof.proof.proofs.len() {
+    let expected_proof_count = table_counts.total() + FIXED_TABLE_COUNT + page_configs.len();
+    if expected_proof_count != archived_proofs.len() {
         return Err(Error::InvalidTableCounts(format!(
             "table_counts total ({}) + {FIXED_TABLE_COUNT} fixed + {} pages = {}, but proof contains {} sub-proofs",
-            vm_proof.table_counts.total(),
+            table_counts.total(),
             page_configs.len(),
             expected_proof_count,
-            vm_proof.proof.proofs.len(),
+            archived_proofs.len(),
         )));
     }
 
@@ -1166,7 +1419,7 @@ pub fn verify_with_options_with_vkey(
         proof_options,
         false,
         &page_configs,
-        &vm_proof.table_counts,
+        table_counts,
         decode_commitment,
         true,
         None,
@@ -1175,7 +1428,7 @@ pub fn verify_with_options_with_vkey(
         Some(vkey),
     );
 
-    // Recompute the COMMIT output bus offset from VmProof.public_output.
+    // Recompute the COMMIT output bus offset from the public output.
     // If public_output was tampered, the recomputed offset won't match the
     // actual bus total in the proof, and multi_verify will reject.
     let air_refs = airs.air_refs();
@@ -1188,10 +1441,10 @@ pub fn verify_with_options_with_vkey(
         &mut transcript,
         StatementKind::Monolithic { vk_digest },
         elf_bytes,
-        &vm_proof.public_output,
-        &vm_proof.table_counts,
-        vm_proof.num_private_input_pages,
-        &vm_proof.runtime_page_ranges,
+        public_output,
+        table_counts,
+        num_private_input_pages,
+        runtime_page_ranges,
     );
 
     // Fork the post-absorb state: the replay helper advances through Phase A
@@ -1200,25 +1453,31 @@ pub fn verify_with_options_with_vkey(
     let mut transcript_for_replay = transcript.clone();
     let expected_bus_balance = match compute_expected_commit_bus_balance(
         &air_refs,
-        &vm_proof.proof,
-        &vm_proof.public_output,
+        archived_proofs,
+        public_output,
         // Monolithic proof: commits are indexed from 0.
         0,
         &mut transcript_for_replay,
     ) {
         Some(balance) => balance,
-        None => return Ok(false),
+        None => {
+            return Ok(ArchivedVerification {
+                ok: false,
+                vk_digest,
+            });
+        }
     };
 
     stark::profile_markers::step_marker::<{ stark::profile_markers::STEP_AIRS_AND_BUS_BALANCE_DONE }>(
     );
 
-    Ok(Verifier::multi_verify(
+    let ok = Verifier::multi_verify_archived(
         &air_refs,
-        &vm_proof.proof,
+        archived_proofs,
         &mut transcript,
         &expected_bus_balance,
-    ))
+    );
+    Ok(ArchivedVerification { ok, vk_digest })
 }
 
 /// Prove and verify in one call (convenience).
diff --git a/prover/src/tables/local_to_global.rs b/prover/src/tables/local_to_global.rs
index ada19baf5..193374056 100644
--- a/prover/src/tables/local_to_global.rs
+++ b/prover/src/tables/local_to_global.rs
@@ -83,7 +83,7 @@ pub const GENESIS_EPOCH: u64 = 0;
 pub const MAX_EPOCHS: u64 = 1 << 20;
 
 /// A cell's state when an epoch first touches it.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct InitClaim {
     /// Value the cell held when this epoch first touched it.
     pub value: u64,
@@ -94,7 +94,7 @@ pub struct InitClaim {
 }
 
 /// A cell's state at the end of the epoch that touched it.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct FiniClaim {
     /// Value the cell holds at this epoch's end.
     pub value: u64,
@@ -105,7 +105,7 @@ pub struct FiniClaim {
 }
 
 /// The init/fini boundary claims for a single touched cell.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct CellBoundary {
     pub address: u64,
     pub init: InitClaim,
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index f4cefb97f..98d10e5dc 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -76,7 +76,7 @@ fn prove_and_verify_vm_minimal(elf: &Elf, traces: &mut Traces) -> bool {
 
     // Compute the verifier-side expected COMMIT bus balance from public output bytes
     let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = crate::compute_expected_commit_bus_balance(
+    let expected_bus_balance = crate::compute_expected_commit_bus_balance_owned(
         &airs.air_refs(),
         &multi_proof,
         &traces.public_output_bytes,
@@ -167,7 +167,7 @@ fn verify_vm_minimal(vm_proof: &VmProof, elf_bytes: &[u8]) -> bool {
     );
     let air_refs = airs.air_refs();
     let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = crate::compute_expected_commit_bus_balance(
+    let expected_bus_balance = crate::compute_expected_commit_bus_balance_owned(
         &air_refs,
         &vm_proof.proof,
         &vm_proof.public_output,
@@ -1378,7 +1378,7 @@ fn test_prove_elfs_test_commit_4_wrong_pages_rejected() {
     );
     let verifier_air_refs = verifier_airs.air_refs();
     let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = crate::compute_expected_commit_bus_balance(
+    let expected_bus_balance = crate::compute_expected_commit_bus_balance_owned(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
@@ -2133,7 +2133,7 @@ fn test_deep_stack_runtime_pages_roundtrip() {
     );
     let verifier_air_refs = verifier_airs.air_refs();
     let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = crate::compute_expected_commit_bus_balance(
+    let expected_bus_balance = crate::compute_expected_commit_bus_balance_owned(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
@@ -2206,7 +2206,7 @@ fn test_deep_stack_missing_pages_rejected() {
     );
     let verifier_air_refs = verifier_airs.air_refs();
     let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = crate::compute_expected_commit_bus_balance(
+    let expected_bus_balance = crate::compute_expected_commit_bus_balance_owned(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
@@ -2314,7 +2314,7 @@ fn test_heap_alloc_runtime_pages_roundtrip() {
     );
     let verifier_air_refs = verifier_airs.air_refs();
     let mut replay_transcript = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = crate::compute_expected_commit_bus_balance(
+    let expected_bus_balance = crate::compute_expected_commit_bus_balance_owned(
         &verifier_air_refs,
         &proof,
         &traces.public_output_bytes,
@@ -2914,7 +2914,7 @@ fn test_count_elements_nonzero() {
 /// not terminate, so it is proven with the HALT table excluded (`include_halt = false`).
 #[test]
 fn test_prove_first_epoch_without_halt() {
-    use crate::compute_expected_commit_bus_balance;
+    use crate::compute_expected_commit_bus_balance_owned;
     use crate::tables::trace_builder::build_initial_image;
     use crate::test_utils::asm_elf_bytes;
 
@@ -2972,7 +2972,7 @@ fn test_prove_first_epoch_without_halt() {
     .expect("first epoch failed to prove");
 
     let mut replay = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = compute_expected_commit_bus_balance(
+    let expected_bus_balance = compute_expected_commit_bus_balance_owned(
         &airs.air_refs(),
         &multi_proof,
         &traces.public_output_bytes,
@@ -2997,7 +2997,7 @@ fn test_prove_first_epoch_without_halt() {
 /// does not terminate (HALT excluded).
 #[test]
 fn test_prove_second_epoch_from_snapshot() {
-    use crate::compute_expected_commit_bus_balance;
+    use crate::compute_expected_commit_bus_balance_owned;
     use crate::tables::register;
     use crate::test_utils::asm_elf_bytes;
 
@@ -3056,7 +3056,7 @@ fn test_prove_second_epoch_from_snapshot() {
     .expect("second epoch failed to prove");
 
     let mut replay = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = compute_expected_commit_bus_balance(
+    let expected_bus_balance = compute_expected_commit_bus_balance_owned(
         &airs.air_refs(),
         &multi_proof,
         &traces.public_output_bytes,
@@ -3082,7 +3082,7 @@ fn test_prove_second_epoch_from_snapshot() {
 /// will bind to. The cross-epoch GlobalMemory matching is proven separately.
 #[test]
 fn test_epoch_proof_commits_l2g() {
-    use crate::compute_expected_commit_bus_balance;
+    use crate::compute_expected_commit_bus_balance_owned;
     use crate::tables::local_to_global;
     use crate::tables::register;
     use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells};
@@ -3163,7 +3163,7 @@ fn test_epoch_proof_commits_l2g() {
     refs.push(&inert_l2g_air);
 
     let mut replay = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = compute_expected_commit_bus_balance(
+    let expected_bus_balance = compute_expected_commit_bus_balance_owned(
         &refs,
         &multi_proof,
         &traces.public_output_bytes,
@@ -3205,7 +3205,7 @@ fn test_epoch_proof_commits_l2g() {
 /// argument.
 #[test]
 fn test_continuation_pipeline_end_to_end() {
-    use crate::compute_expected_commit_bus_balance;
+    use crate::compute_expected_commit_bus_balance_owned;
     use crate::tables::local_to_global;
     use crate::tables::register;
     use crate::tables::trace_builder::{build_initial_image, epoch_touched_cells};
@@ -3315,7 +3315,7 @@ fn test_continuation_pipeline_end_to_end() {
         let mut refs = airs.air_refs();
         refs.push(&inert_l2g_air);
         let mut replay = DefaultTranscript::<E>::new(&[]);
-        let expected_bus_balance = compute_expected_commit_bus_balance(
+        let expected_bus_balance = compute_expected_commit_bus_balance_owned(
             &refs,
             &multi_proof,
             &traces.public_output_bytes,
@@ -3363,7 +3363,7 @@ fn test_continuation_pipeline_end_to_end() {
 /// `Memory` bus still nets to zero — L2G has replaced PAGE as the bookend.
 #[test]
 fn test_epoch_memory_bus_with_l2g_bookend() {
-    use crate::compute_expected_commit_bus_balance;
+    use crate::compute_expected_commit_bus_balance_owned;
     use crate::tables::local_to_global;
     use crate::tables::register;
     use crate::tables::trace_builder::build_initial_image;
@@ -3446,7 +3446,7 @@ fn test_epoch_memory_bus_with_l2g_bookend() {
     let mut refs = airs.air_refs();
     refs.push(&l2g_air);
     let mut replay = DefaultTranscript::<E>::new(&[]);
-    let expected_bus_balance = compute_expected_commit_bus_balance(
+    let expected_bus_balance = compute_expected_commit_bus_balance_owned(
         &refs,
         &multi_proof,
         &traces.public_output_bytes,
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 1345050e3..68b9e26c3 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -38,8 +38,9 @@ const MIN_PROOF_OPTIONS: stark::proof::options::ProofOptions =
         grinding_factor: 1,
     };
 
-/// Prove `inner_elf` under `opts` and postcard-encode `(proof, elf, opts, vkey)`
-/// into the guest's private-input blob. Returns the proof and the blob.
+/// Prove `inner_elf` under `opts` and rkyv-encode a `RecursionInput` into the
+/// guest's private-input blob (magic/version prefix + archive). Returns the
+/// proof and the blob.
 fn prove_inner_and_encode_blob(
     tag: &str,
     inner_elf: &[u8],
@@ -66,10 +67,15 @@ fn prove_inner_and_encode_blob(
     );
     let vkey =
         crate::VmVerifyingKey::from_elf_and_options(&elf_for_vkey, opts, None, &page_configs);
-    let blob = postcard::to_allocvec(&(&inner_proof, &inner_elf, opts, &vkey))
-        .expect("postcard encode failed");
-    eprintln!("[{tag}] postcard blob: {} bytes", blob.len());
-    (inner_proof, blob)
+    let input = crate::RecursionInput {
+        vm_proof: inner_proof,
+        inner_elf: inner_elf.to_vec(),
+        options: opts.clone(),
+        vkey,
+    };
+    let blob = crate::encode_recursion_input(&input).expect("encode recursion input");
+    eprintln!("[{tag}] rkyv blob: {} bytes", blob.len());
+    (input.vm_proof, blob)
 }
 
 /// Whether to also prove the guest's own execution after handing it the proof.
@@ -210,7 +216,7 @@ fn resolve_pc(symbols: &executor::elf::SymbolTable, pc: u64) -> String {
 /// so `multi_verify`'s per-table `3,4,5,6` repetition re-attributes cycles to
 /// the correct step on each table's `6->3` transition instead of latching at 6.
 const STEP_LABELS: [&str; 7] = [
-    "0. setup (alloc init + postcard decode)",
+    "0. setup (alloc init + blob prefix check)",
     "1. airs_and_bus_balance (Elf::load/VmAirs::new preprocessed FFT+Merkle/bus balance)",
     "2. multi_verify setup (transcript replay phase A/B, per-table fork)",
     "3. step 1: replay_rounds_after_round_1",
@@ -509,50 +515,60 @@ fn run_recursion_pipeline(
     );
 }
 
-/// Decode the blob on the host and verify — a cheap guard on the encode/decode
-/// contract without running the VM.
+/// Verify the blob on the host exactly as the guest does (zero-copy through
+/// `verify_recursion_blob`) — a cheap guard on the encode/verify contract
+/// without running the VM. Also checks the guest's misaligned read conditions
+/// and that a tampered proof is rejected.
 #[test]
 #[ignore = "needs prebuilt guest ELF (make compile-recursion-elfs)"]
 fn test_recursion_blob_decodes_and_verifies_on_host() {
     let root = workspace_root();
     let empty_elf_bytes = read_guest_elf(&root, "empty");
-    let (_inner, blob) =
+    let (inner_proof, blob) =
         prove_inner_and_encode_blob("roundtrip", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
 
-    // Decode exactly as the guest does.
-    let decoded: Result<
-        (
-            crate::VmProof,
-            Vec<u8>,
-            crate::ProofOptions,
-            crate::VmVerifyingKey,
-        ),
-        _,
-    > = postcard::from_bytes(&blob);
-    let (vm_proof, inner_elf, options, vkey) = match decoded {
-        Ok(t) => t,
-        Err(e) => panic!("[roundtrip] postcard DECODE failed (this is the guest panic): {e}"),
-    };
-    eprintln!(
-        "[roundtrip] decode ok: elf {} bytes, blowup {}",
-        inner_elf.len(),
-        options.blowup_factor
+    let verification = crate::verify_recursion_blob(&blob).expect("verify_recursion_blob errored");
+    assert!(verification.ok, "zero-copy path must accept a valid proof");
+    assert_eq!(
+        verification.public_output, inner_proof.public_output,
+        "public output must round-trip through the blob"
+    );
+    assert_eq!(
+        verification.vk_digest, inner_proof.vk_digest,
+        "vk digest must match the proof's"
     );
 
-    match crate::verify_with_options_with_vkey(
-        &vm_proof,
-        &inner_elf,
-        &options,
-        None,
-        None,
-        Some(&vkey),
-    ) {
-        Ok(true) => eprintln!("[roundtrip] verify ok=true — guest path is sound"),
-        Ok(false) => panic!(
-            "[roundtrip] verify returned FALSE (guest hits assert!(ok)) — proof did not survive the postcard round-trip"
-        ),
-        Err(e) => panic!("[roundtrip] verify ERRORED (guest hits .expect): {e:?}"),
-    }
+    // Host buffers carry no alignment guarantee, so `verify_recursion_blob`
+    // must accept the blob at any base alignment (falling back to an aligned
+    // copy when needed). The plain call above already exercises the common
+    // misaligned case (`Vec` base + 12-byte prefix → 4-aligned archive);
+    // shifting the base by 4 covers another residue class.
+    let mut padded: Vec<u8> = Vec::with_capacity(blob.len() + 4);
+    padded.extend_from_slice(&[0u8; 4]);
+    padded.extend_from_slice(&blob);
+    let ok_shifted = crate::verify_recursion_blob(&padded[4..])
+        .expect("verify_recursion_blob errored on shifted buffer")
+        .ok;
+    assert!(ok_shifted, "path must accept the proof from a shifted buffer");
+
+    // A bad magic must be rejected before the unsafe access.
+    let mut bad_magic = blob.clone();
+    bad_magic[0] ^= 0xFF;
+    assert!(
+        crate::verify_recursion_blob(&bad_magic).is_err(),
+        "bad magic must be rejected"
+    );
+
+    // Soundness: a single-byte tamper in the proof payload must make the
+    // zero-copy verifier reject (Fiat-Shamir / Merkle openings stop matching).
+    let mut tampered = blob.clone();
+    let tamper_idx = tampered.len() - 64;
+    tampered[tamper_idx] ^= 0x01;
+    let tampered_result = crate::verify_recursion_blob(&tampered);
+    assert!(
+        !matches!(tampered_result.map(|v| v.ok), Ok(true)),
+        "zero-copy verifier must NOT accept a tampered proof"
+    );
 }
 
 // === Execute-only tier ========================================================
diff --git a/prover/src/tests/vkey_tests.rs b/prover/src/tests/vkey_tests.rs
index 941415f31..932ef9058 100644
--- a/prover/src/tests/vkey_tests.rs
+++ b/prover/src/tests/vkey_tests.rs
@@ -80,10 +80,11 @@ fn test_vkey_roundtrip() {
     let vkey_again = VmVerifyingKey::from_elf_and_options(&elf, &options, None, &page_configs);
     assert_eq!(vkey, vkey_again, "vkey derivation must be deterministic");
 
-    // postcard round-trip preserves every field.
-    let encoded = postcard::to_allocvec(&vkey).expect("postcard encode");
-    let decoded: VmVerifyingKey = postcard::from_bytes(&encoded).expect("postcard decode");
-    assert_eq!(vkey, decoded, "postcard round-trip must preserve the vkey");
+    // rkyv round-trip preserves every field.
+    let encoded = rkyv::to_bytes::<rkyv::rancor::Error>(&vkey).expect("rkyv encode");
+    let decoded: VmVerifyingKey =
+        rkyv::from_bytes::<_, rkyv::rancor::Error>(&encoded).expect("rkyv decode");
+    assert_eq!(vkey, decoded, "rkyv round-trip must preserve the vkey");
     assert_eq!(
         decoded.compute_digest(),
         digest_before,
diff --git a/prover/src/vkey.rs b/prover/src/vkey.rs
index e3afd85ef..e07d9b74a 100644
--- a/prover/src/vkey.rs
+++ b/prover/src/vkey.rs
@@ -61,7 +61,7 @@ const PRIVATE_INPUT_PAGE_PLACEHOLDER: Commitment = [0u8; 32];
 
 /// Cached preprocessed-table commitments the verifier would otherwise
 /// recompute on every call.
-#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, PartialEq, Eq, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
 pub struct VmVerifyingKey {
     /// Layout version. See [`VKEY_VERSION`].
     pub version: u32,
@@ -135,13 +135,43 @@ impl VmVerifyingKey {
         }
     }
 
-    /// Keccak256 fingerprint of the postcard-serialized vkey. Stable as long
-    /// as the field layout (and [`VKEY_VERSION`]) does not change.
+    /// Keccak256 fingerprint of a canonical, framework-free encoding of the
+    /// vkey: every field is absorbed fixed-width (integers as little-endian
+    /// u64/u8, commitments raw, `pages` length-prefixed), so the encoding is
+    /// injective and stable as long as the field layout (and [`VKEY_VERSION`])
+    /// does not change. The exhaustive destructure makes any field added to
+    /// `VmVerifyingKey` a compile error here — the signal to extend the
+    /// absorption below and bump [`VKEY_VERSION`].
     pub fn compute_digest(&self) -> [u8; 32] {
-        let bytes = postcard::to_allocvec(self)
-            .expect("postcard serialization of VmVerifyingKey must succeed");
+        let Self {
+            version,
+            options:
+                ProofOptions {
+                    blowup_factor,
+                    fri_number_of_queries,
+                    coset_offset,
+                    grinding_factor,
+                },
+            bitwise,
+            decode,
+            register,
+            keccak_rc,
+            pages,
+        } = self;
         let mut hasher = Keccak256::new();
-        hasher.update(&bytes);
+        hasher.update(version.to_le_bytes());
+        hasher.update([*blowup_factor]);
+        hasher.update((*fri_number_of_queries as u64).to_le_bytes());
+        hasher.update(coset_offset.to_le_bytes());
+        hasher.update([*grinding_factor]);
+        hasher.update(bitwise);
+        hasher.update(decode);
+        hasher.update(register);
+        hasher.update(keccak_rc);
+        hasher.update((pages.len() as u64).to_le_bytes());
+        for page in pages {
+            hasher.update(page);
+        }
         hasher.finalize().into()
     }
 }
diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs
index 491315ecb..fa4e43edd 100644
--- a/syscalls/src/syscalls.rs
+++ b/syscalls/src/syscalls.rs
@@ -97,6 +97,27 @@ pub fn get_private_input() -> Vec<u8> {
     unimplemented!("syscalls are only implemented for riscv64 targets");
 }
 
+/// Borrow the private input bytes in place from the memory-mapped region —
+/// no copy, no allocation. Same layout as [`get_private_input`]; the returned
+/// slice starts at `PRIVATE_INPUT_START + 4` (a 4-aligned address) and lives
+/// for the whole execution (the host never remaps the region).
+#[cfg(target_arch = "riscv64")]
+pub fn get_private_input_slice() -> &'static [u8] {
+    // SAFETY: The host pre-loads private input at PRIVATE_INPUT_START before
+    // execution. The 4-byte LE length prefix is always valid (written by the
+    // executor). The data pointer and length are within the memory-mapped
+    // region, which stays mapped and unmodified for the whole execution.
+    let len_ptr = PRIVATE_INPUT_START as *const u32;
+    let len = unsafe { core::ptr::read_volatile(len_ptr) } as usize;
+    let data_ptr = (PRIVATE_INPUT_START + 4) as *const u8;
+    unsafe { core::slice::from_raw_parts(data_ptr, len) }
+}
+
+#[cfg(not(target_arch = "riscv64"))]
+pub fn get_private_input_slice() -> &'static [u8] {
+    unimplemented!("syscalls are only implemented for riscv64 targets");
+}
+
 #[cfg(target_arch = "riscv64")]
 pub fn sys_halt() -> ! {
     // NOTE: no print_string here — the Print ecall is unmatched on the Ecall bus
diff --git a/tooling/ethrex-tests/Cargo.lock b/tooling/ethrex-tests/Cargo.lock
new file mode 100644
index 000000000..7d24708ed
--- /dev/null
+++ b/tooling/ethrex-tests/Cargo.lock
@@ -0,0 +1,2415 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a4385e2e34eb35d6b3efe798b9eb88096925d87726c0798709bf56d9ed84af3"
+
+[[package]]
+name = "ark-bn254"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d69eab57e8d2663efa5c63135b2af4f396d66424f88954c21104125ab6b3e6bc"
+dependencies = [
+ "ark-ec",
+ "ark-ff",
+ "ark-std",
+]
+
+[[package]]
+name = "ark-ec"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d68f2d516162846c1238e755a7c4d131b892b70cc70c471a8e3ca3ed818fce"
+dependencies = [
+ "ahash",
+ "ark-ff",
+ "ark-poly",
+ "ark-serialize",
+ "ark-std",
+ "educe",
+ "fnv",
+ "hashbrown 0.15.5",
+ "itertools 0.13.0",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+ "zeroize",
+]
+
+[[package]]
+name = "ark-ff"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a177aba0ed1e0fbb62aa9f6d0502e9b46dad8c2eab04c14258a1212d2557ea70"
+dependencies = [
+ "ark-ff-asm",
+ "ark-ff-macros",
+ "ark-serialize",
+ "ark-std",
+ "arrayvec",
+ "digest",
+ "educe",
+ "itertools 0.13.0",
+ "num-bigint",
+ "num-traits",
+ "paste",
+ "zeroize",
+]
+
+[[package]]
+name = "ark-ff-asm"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62945a2f7e6de02a31fe400aa489f0e0f5b2502e69f95f853adb82a96c7a6b60"
+dependencies = [
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "ark-ff-macros"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09be120733ee33f7693ceaa202ca41accd5653b779563608f1234f78ae07c4b3"
+dependencies = [
+ "num-bigint",
+ "num-traits",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "ark-poly"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "579305839da207f02b89cd1679e50e67b4331e2f9294a57693e5051b7703fe27"
+dependencies = [
+ "ahash",
+ "ark-ff",
+ "ark-serialize",
+ "ark-std",
+ "educe",
+ "fnv",
+ "hashbrown 0.15.5",
+]
+
+[[package]]
+name = "ark-serialize"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f4d068aaf107ebcd7dfb52bc748f8030e0fc930ac8e360146ca54c1203088f7"
+dependencies = [
+ "ark-serialize-derive",
+ "ark-std",
+ "arrayvec",
+ "digest",
+ "num-bigint",
+]
+
+[[package]]
+name = "ark-serialize-derive"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "213888f660fddcca0d257e88e54ac05bca01885f258ccdf695bafd77031bb69d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "ark-std"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a"
+dependencies = [
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3fb67a6e08acf24fdeccbac2cb6ac4305825bd1f117462e0e6f2f193345ad56"
+
+[[package]]
+name = "autocfg"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
+
+[[package]]
+name = "base16ct"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "base64ct"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
+
+[[package]]
+name = "bitcoin-consensus-encoding"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2d6094e2a1ba3c93b5a596fe5a10d1a10c3c6e06785cde89f693a044c01aa40"
+dependencies = [
+ "bitcoin-internals",
+]
+
+[[package]]
+name = "bitcoin-internals"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a30a22d1f112dde8e16be7b45c63645dc165cef254f835b3e1e9553e485cfa64"
+dependencies = [
+ "hex-conservative 0.3.2",
+]
+
+[[package]]
+name = "bitcoin-io"
+version = "0.1.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb5de036369d1ac59d3c1819ebc4d850f89466f5401c571a285b6ed564a4cb78"
+dependencies = [
+ "bitcoin-consensus-encoding",
+]
+
+[[package]]
+name = "bitcoin_hashes"
+version = "0.14.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bca4c7abb40c8817d77403c880988cfd484f23ab2365726afb2f798363e2c4a2"
+dependencies = [
+ "bitcoin-io",
+ "hex-conservative 0.2.2",
+]
+
+[[package]]
+name = "bitvec"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddcec3d12c579d40898fe0a9a358a803c23e9c52ca3c425707f81c9436211837"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "bls12_381"
+version = "0.8.0"
+source = "git+https://github.com/lambdaclass/bls12_381?branch=expose-affine-constructors#78cad0378b17fc3157b83f514be192bf46edf9a1"
+dependencies = [
+ "digest",
+ "ff",
+ "group",
+ "pairing",
+ "rand_core",
+ "subtle",
+]
+
+[[package]]
+name = "bs58"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.20.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
+
+[[package]]
+name = "byte-slice-cast"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7575182f7272186991736b70173b0ea045398f984bf5ebbb3804736ce1330c9d"
+
+[[package]]
+name = "bytecheck"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b"
+dependencies = [
+ "bytecheck_derive",
+ "ptr_meta",
+ "rancor",
+ "simdutf8",
+]
+
+[[package]]
+name = "bytecheck_derive"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "bytemuck"
+version = "1.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae3f5d315924270530207e2a68396c3cc547f6dca3fbdca317cfb1a51edb593"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "cc"
+version = "1.2.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e228eec9be7c17ccb640b59b36a5cd805ea2a564a4c5e162c2f659fea30d3b96"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chrono"
+version = "0.4.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327"
+dependencies = [
+ "iana-time-zone",
+ "num-traits",
+ "serde",
+ "windows-link",
+]
+
+[[package]]
+name = "const-oid"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+
+[[package]]
+name = "const_format"
+version = "0.2.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e"
+dependencies = [
+ "const_format_proc_macros",
+ "konst",
+]
+
+[[package]]
+name = "const_format_proc_macros"
+version = "0.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "convert_case"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-epoch",
+ "crossbeam-queue",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-bigint"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
+dependencies = [
+ "generic-array",
+ "rand_core",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "darling"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
+dependencies = [
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "der"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
+dependencies = [
+ "const-oid",
+ "zeroize",
+]
+
+[[package]]
+name = "deranged"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "derive_more"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05"
+dependencies = [
+ "derive_more-impl",
+]
+
+[[package]]
+name = "derive_more-impl"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22"
+dependencies = [
+ "convert_case",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "const-oid",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "dyn-clone"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
+
+[[package]]
+name = "ecdsa"
+version = "0.16.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
+dependencies = [
+ "der",
+ "digest",
+ "elliptic-curve",
+ "rfc6979",
+ "signature",
+ "spki",
+]
+
+[[package]]
+name = "ecsm"
+version = "0.1.0"
+dependencies = [
+ "k256",
+ "num-bigint",
+ "num-traits",
+]
+
+[[package]]
+name = "educe"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417"
+dependencies = [
+ "enum-ordinalize",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
+
+[[package]]
+name = "elliptic-curve"
+version = "0.13.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
+dependencies = [
+ "base16ct",
+ "crypto-bigint",
+ "digest",
+ "ff",
+ "generic-array",
+ "group",
+ "pkcs8",
+ "rand_core",
+ "sec1",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "enum-ordinalize"
+version = "4.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07f808d588c10e464ea6f7d3eaed500049eff30aaac103460f61828c2d65b3eb"
+dependencies = [
+ "enum-ordinalize-derive",
+]
+
+[[package]]
+name = "enum-ordinalize-derive"
+version = "4.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e528e2d34ba8a67a1a650b86beae8ef69fc5fdb638016f386b973226590432"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "ethbloom"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c321610643004cf908ec0f5f2aa0d8f1f8e14b540562a2887a1111ff1ecbf7b"
+dependencies = [
+ "crunchy",
+ "fixed-hash",
+ "impl-rlp",
+ "impl-serde",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "ethereum-types"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ab15ed80916029f878e0267c3a9f92b67df55e79af370bf66199059ae2b4ee3"
+dependencies = [
+ "ethbloom",
+ "fixed-hash",
+ "impl-rlp",
+ "impl-serde",
+ "primitive-types",
+ "uint",
+]
+
+[[package]]
+name = "ethrex-common"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "bytes",
+ "crc32fast",
+ "ethereum-types",
+ "ethrex-crypto",
+ "ethrex-rlp",
+ "ethrex-trie",
+ "hex",
+ "hex-literal",
+ "hex-simd",
+ "indexmap 2.14.0",
+ "lazy_static",
+ "libc",
+ "lru",
+ "once_cell",
+ "rayon",
+ "rkyv",
+ "rustc-hash",
+ "secp256k1",
+ "serde",
+ "serde_json",
+ "sha2",
+ "thiserror 2.0.18",
+ "tracing",
+]
+
+[[package]]
+name = "ethrex-crypto"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "ark-bn254",
+ "ark-ec",
+ "ark-ff",
+ "bls12_381",
+ "ethereum-types",
+ "ff",
+ "hex-literal",
+ "k256",
+ "malachite",
+ "num-bigint",
+ "p256",
+ "ripemd",
+ "secp256k1",
+ "sha2",
+ "thiserror 2.0.18",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "ethrex-guest-program"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "bytes",
+ "ethereum-types",
+ "ethrex-common",
+ "ethrex-crypto",
+ "ethrex-l2-common",
+ "ethrex-rlp",
+ "ethrex-vm",
+ "hex",
+ "rkyv",
+ "serde",
+ "serde_with",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "ethrex-l2-common"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "bytes",
+ "ethereum-types",
+ "ethrex-common",
+ "ethrex-crypto",
+ "k256",
+ "lambdaworks-crypto",
+ "rkyv",
+ "secp256k1",
+ "serde",
+ "serde_with",
+ "thiserror 2.0.18",
+ "tracing",
+]
+
+[[package]]
+name = "ethrex-levm"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "bytes",
+ "derive_more",
+ "ethrex-common",
+ "ethrex-crypto",
+ "ethrex-rlp",
+ "malachite",
+ "rayon",
+ "rustc-hash",
+ "serde",
+ "strum",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "ethrex-rlp"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "bytes",
+ "ethereum-types",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "ethrex-tests"
+version = "0.1.0"
+dependencies = [
+ "ethrex-guest-program",
+ "executor",
+ "rkyv",
+]
+
+[[package]]
+name = "ethrex-trie"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "anyhow",
+ "bytes",
+ "crossbeam",
+ "ethereum-types",
+ "ethrex-crypto",
+ "ethrex-rlp",
+ "lazy_static",
+ "rayon",
+ "rkyv",
+ "rustc-hash",
+ "serde",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "ethrex-vm"
+version = "13.0.0"
+source = "git+https://github.com/lambdaclass/ethrex.git?rev=156cb8d6a3974f411d71622eecd1b249ee37ff1c#156cb8d6a3974f411d71622eecd1b249ee37ff1c"
+dependencies = [
+ "bytes",
+ "derive_more",
+ "dyn-clone",
+ "ethrex-common",
+ "ethrex-crypto",
+ "ethrex-levm",
+ "ethrex-rlp",
+ "rayon",
+ "rustc-hash",
+ "serde",
+ "thiserror 2.0.18",
+ "tracing",
+]
+
+[[package]]
+name = "executor"
+version = "0.1.0"
+dependencies = [
+ "ecsm",
+ "rustc-demangle",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "ff"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
+dependencies = [
+ "bitvec",
+ "rand_core",
+ "subtle",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "fixed-hash"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "835c052cb0c08c1acf6ffd71c022172e18723949c8282f2b9f27efbc51e64534"
+dependencies = [
+ "byteorder",
+ "rand",
+ "rustc-hex",
+ "static_assertions",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
+[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2"
+dependencies = [
+ "typenum",
+ "version_check",
+ "zeroize",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "group"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
+dependencies = [
+ "ff",
+ "rand_core",
+ "subtle",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "allocator-api2",
+ "foldhash 0.1.5",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "hex-conservative"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fda06d18ac606267c40c04e41b9947729bf8b9efe74bd4e82b61a5f26a510b9f"
+dependencies = [
+ "arrayvec",
+]
+
+[[package]]
+name = "hex-conservative"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830e599c2904b08f0834ee6337d8fe8f0ed4a63b5d9e7a7f49c0ffa06d08d360"
+dependencies = [
+ "arrayvec",
+]
+
+[[package]]
+name = "hex-literal"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
+
+[[package]]
+name = "hex-simd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f7685beb53fc20efc2605f32f5d51e9ba18b8ef237961d1760169d2290d3bee"
+dependencies = [
+ "outref",
+ "vsimd",
+]
+
+[[package]]
+name = "hmac"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
+[[package]]
+name = "impl-codec"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d40b9d5e17727407e55028eafc22b2dc68781786e6d7eb8a21103f5058e3a14"
+dependencies = [
+ "parity-scale-codec",
+]
+
+[[package]]
+name = "impl-rlp"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54ed8ad1f3877f7e775b8cbf30ed1bd3209a95401817f19a0eb4402d13f8cf90"
+dependencies = [
+ "rlp",
+]
+
+[[package]]
+name = "impl-serde"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a143eada6a1ec4aefa5049037a26a6d597bfd64f8c026d07b77133e02b7dd0b"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "impl-trait-for-tuples"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+ "serde",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.17.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "js-sys"
+version = "0.3.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53b44bfcdb3f8d5837a46dae1ca9660a837176eee74a28b229bc626816589102"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "k256"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
+dependencies = [
+ "cfg-if",
+ "ecdsa",
+ "elliptic-curve",
+ "once_cell",
+ "sha2",
+ "signature",
+]
+
+[[package]]
+name = "keccak"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653"
+dependencies = [
+ "cpufeatures",
+]
+
+[[package]]
+name = "konst"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb"
+dependencies = [
+ "konst_macro_rules",
+]
+
+[[package]]
+name = "konst_macro_rules"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37"
+
+[[package]]
+name = "lambdaworks-crypto"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "58b1a1c1102a5a7fbbda117b79fb3a01e033459c738a3c1642269603484fd1c1"
+dependencies = [
+ "lambdaworks-math",
+ "rand",
+ "rand_chacha",
+ "serde",
+ "sha2",
+ "sha3",
+]
+
+[[package]]
+name = "lambdaworks-math"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "018a95aa873eb49896a858dee0d925c33f3978d073c64b08dd4f2c9b35a017c6"
+dependencies = [
+ "getrandom",
+ "num-bigint",
+ "num-traits",
+ "rand",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
+[[package]]
+name = "log"
+version = "0.4.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad"
+
+[[package]]
+name = "lru"
+version = "0.16.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39"
+dependencies = [
+ "hashbrown 0.16.1",
+]
+
+[[package]]
+name = "malachite"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec410515e231332b14cd986a475d1c3323bcfa4c7efc038bfa1d5b410b1c57e4"
+dependencies = [
+ "malachite-base",
+ "malachite-nz",
+ "malachite-q",
+]
+
+[[package]]
+name = "malachite-base"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c738d3789301e957a8f7519318fcbb1b92bb95863b28f6938ae5a05be6259f34"
+dependencies = [
+ "hashbrown 0.15.5",
+ "itertools 0.14.0",
+ "libm",
+ "ryu",
+]
+
+[[package]]
+name = "malachite-nz"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1707c9a1fa36ce21749b35972bfad17bbf34cf5a7c96897c0491da321e387d3b"
+dependencies = [
+ "itertools 0.14.0",
+ "libm",
+ "malachite-base",
+ "wide",
+]
+
+[[package]]
+name = "malachite-q"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d764801aa4e96bbb69b389dcd03b50075345131cd63ca2e380bca71cc37a3675"
+dependencies = [
+ "itertools 0.14.0",
+ "malachite-base",
+ "malachite-nz",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
+
+[[package]]
+name = "munge"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c"
+dependencies = [
+ "munge_macro",
+]
+
+[[package]]
+name = "munge_macro"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c863e9ab5e7bf9c99ba75e1050f1e4d624ae87ed3532d6238ffbdc7b585dbbe6"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441"
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "outref"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
+
+[[package]]
+name = "p256"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b"
+dependencies = [
+ "ecdsa",
+ "elliptic-curve",
+ "primeorder",
+ "sha2",
+]
+
+[[package]]
+name = "pairing"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81fec4625e73cf41ef4bb6846cafa6d44736525f442ba45e407c4a000a13996f"
+dependencies = [
+ "group",
+]
+
+[[package]]
+name = "parity-scale-codec"
+version = "3.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "799781ae679d79a948e13d4824a40970bfa500058d245760dd857301059810fa"
+dependencies = [
+ "arrayvec",
+ "bitvec",
+ "byte-slice-cast",
+ "const_format",
+ "impl-trait-for-tuples",
+ "parity-scale-codec-derive",
+ "rustversion",
+ "serde",
+]
+
+[[package]]
+name = "parity-scale-codec-derive"
+version = "3.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34b4653168b563151153c9e4c08ebed57fb8262bebfa79711552fa983c623e7a"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pkcs8"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
+dependencies = [
+ "der",
+ "spki",
+]
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "primeorder"
+version = "0.13.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
+dependencies = [
+ "elliptic-curve",
+]
+
+[[package]]
+name = "primitive-types"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d15600a7d856470b7d278b3fe0e311fe28c2526348549f8ef2ff7db3299c87f5"
+dependencies = [
+ "fixed-hash",
+ "impl-codec",
+ "impl-rlp",
+ "impl-serde",
+ "uint",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "ptr_meta"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79"
+dependencies = [
+ "ptr_meta_derive",
+]
+
+[[package]]
+name = "ptr_meta_derive"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
+[[package]]
+name = "rancor"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daff8b7b3ccf5f7ba270b3e7a0a4d4c701c5797e38dec27c7e2c3dbb830fed1c"
+dependencies = [
+ "ptr_meta",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "ref-cast"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
+dependencies = [
+ "ref-cast-impl",
+]
+
+[[package]]
+name = "ref-cast-impl"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "rend"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "663ba70707f96e871406fe10d68128412e619b06d1d47cb91c3a4c6501176240"
+dependencies = [
+ "bytecheck",
+]
+
+[[package]]
+name = "rfc6979"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
+dependencies = [
+ "hmac",
+ "subtle",
+]
+
+[[package]]
+name = "ripemd"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd124222d17ad93a644ed9d011a40f4fb64aa54275c08cc216524a9ea82fb09f"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "rkyv"
+version = "0.8.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73389e0c99e664f919275ab5b5b0471391fe9a8de61e1dff9b1eaf56a90f16e3"
+dependencies = [
+ "bytecheck",
+ "bytes",
+ "hashbrown 0.17.1",
+ "indexmap 2.14.0",
+ "munge",
+ "ptr_meta",
+ "rancor",
+ "rend",
+ "rkyv_derive",
+ "tinyvec",
+ "uuid",
+]
+
+[[package]]
+name = "rkyv_derive"
+version = "0.8.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d2ed0b54125315fb36bd021e82d314d1c126548f871634b483f46b31d13cac6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "rlp"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa24e92bb2a83198bb76d661a71df9f7076b8c420b8696e4d3d97d50d94479e3"
+dependencies = [
+ "bytes",
+ "rustc-hex",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b1e7f9a428571be2dc5bc0505c13fb6bf936822b894ec87abf8a08a4e51742d"
+
+[[package]]
+name = "rustc-hex"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "safe_arch"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "schemars"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "sec1"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
+dependencies = [
+ "base16ct",
+ "der",
+ "generic-array",
+ "pkcs8",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "secp256k1"
+version = "0.30.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b50c5943d326858130af85e049f2661ba3c78b26589b8ab98e65e80ae44a1252"
+dependencies = [
+ "bitcoin_hashes",
+ "rand",
+ "secp256k1-sys",
+]
+
+[[package]]
+name = "secp256k1-sys"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4387882333d3aa8cb20530a17c69a3752e97837832f34f6dccc760e715001d9"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.150"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_with"
+version = "3.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a5c54c7310e7b8b9577c286d7e399ddd876c3e12b3ed917a8aabc4b96e9e8c"
+dependencies = [
+ "base64",
+ "bs58",
+ "chrono",
+ "hex",
+ "indexmap 1.9.3",
+ "indexmap 2.14.0",
+ "schemars 0.9.0",
+ "schemars 1.2.1",
+ "serde_core",
+ "serde_json",
+ "serde_with_macros",
+ "time",
+]
+
+[[package]]
+name = "serde_with_macros"
+version = "3.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84d57bc0c8b9a17920c178daa6bb924850d54a9c97ab45194bb8c17ad66bb660"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "sha3"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77fd7028345d415a4034cf8777cd4f8ab1851274233b45f84e3d955502d93874"
+dependencies = [
+ "digest",
+ "keccak",
+]
+
+[[package]]
+name = "shlex"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
+
+[[package]]
+name = "signature"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
+dependencies = [
+ "digest",
+ "rand_core",
+]
+
+[[package]]
+name = "simdutf8"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "spki"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
+dependencies = [
+ "base64ct",
+ "der",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "strum"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "syn"
+version = "2.0.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "time"
+version = "0.3.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18dfaaeddcb932337b5e7866ee7d0ce9b76d2fd092997146f187ec09b4558a50"
+dependencies = [
+ "deranged",
+ "num-conv",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1c906769ad99c88eaa54e728060edef082f8e358ff32030cb7c7d315e81109"
+
+[[package]]
+name = "time-macros"
+version = "0.2.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c431b87111666e491a90baa837f914fb45cd5dc3c268591b0220ff5057f2085f"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "toml_datetime"
+version = "1.1.1+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.25.12+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7"
+dependencies = [
+ "indexmap 2.14.0",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.1.2+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "typenum"
+version = "1.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
+
+[[package]]
+name = "uint"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "909988d098b2f738727b161a106cfc7cab00c539c2687a8836f8e565976fb53e"
+dependencies = [
+ "byteorder",
+ "crunchy",
+ "hex",
+ "static_assertions",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "uuid"
+version = "1.23.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf80a72845275afea99e7f2b434723d3bc7e38470fcd1c7ed39a599c73319a53"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "vsimd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b067c0c11094aef6b7a801c1e34a26affafdf3d051dba08456b868789aaf9a4"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "167ce5e579f6bcf889c4f7175a8a5a585de84e8ff93976ce393efa5f2837aab1"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3997c7839262f4ef12cf90b818d6340c18e80f263f1a94bf157d0ec4420380e"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc1b4cb0cc549fcf58d7dfc081778139b3d283a081644e833e84682ad71cea24"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "wide"
+version = "0.7.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce5da8ecb62bcd8ec8b7ea19f69a51275e91299be594ea5cc6ef7819e16cd03"
+dependencies = [
+ "bytemuck",
+ "safe_arch",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "winnow"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13c156562582aa81c60cb29407084cdb54c4164760106ab78e6c5b0858cf64e"
+dependencies = [
+ "zeroize_derive",
+]
+
+[[package]]
+name = "zeroize_derive"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c50655cbb0fe3fc43170059e702f1ce5e19b84cec58dc87b037a09935c2f328"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/tooling/ethrex-tests/Cargo.toml b/tooling/ethrex-tests/Cargo.toml
new file mode 100644
index 000000000..f4774278d
--- /dev/null
+++ b/tooling/ethrex-tests/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "ethrex-tests"
+version = "0.1.0"
+edition = "2024"
+
+# Detached workspace: ethrex pins rkyv with the `unaligned` feature (a global
+# archived-layout switch), which must never feature-unify with the main
+# workspace's aligned rkyv proof format. See tests/ethrex.rs.
+[workspace]
+
+[dev-dependencies]
+executor = { path = "../../executor" }
+# Pinned to the SAME ethrex rev as the guest (open LambdaVM-backend PR branch)
+# so the native reference reads the same ProgramInput rkyv layout.
+ethrex-guest-program = { git = "https://github.com/lambdaclass/ethrex.git", rev = "156cb8d6a3974f411d71622eecd1b249ee37ff1c", package = "ethrex-guest-program" }
+# Exact pin: the fixture writer and the guest/executor readers must agree on the
+# rkyv layout. Keep this in sync with tooling/ethrex-fixtures and
+# executor/programs/rust/ethrex/Cargo.toml.
+rkyv = { version = "=0.8.16", features = ["std", "unaligned"] }
+
+# Match the root workspace's optimized dev profile: these tests execute the VM.
+[profile.dev]
+opt-level = 3
+debug = true
diff --git a/tooling/ethrex-tests/tests/ethrex.rs b/tooling/ethrex-tests/tests/ethrex.rs
new file mode 100644
index 000000000..c87ccceba
--- /dev/null
+++ b/tooling/ethrex-tests/tests/ethrex.rs
@@ -0,0 +1,88 @@
+//! Host-reference execution tests for the ethrex guest, relocated out of the
+//! `executor` test suite: `ethrex-guest-program` pins rkyv with the
+//! `unaligned` feature (a global archived-layout switch), which would
+//! feature-unify with the main workspace's aligned rkyv and silently change
+//! the proof wire format. This crate is a detached workspace so the two rkyv
+//! configurations never meet.
+//!
+//! Fixtures are generated by `tooling/ethrex-fixtures`; the guest ELF comes
+//! from `make compile-programs`. See `executor/tests/README.md`.
+
+use executor::elf::Elf;
+use executor::vm::execution::Executor;
+use executor::vm::execution::ReturnValues;
+
+fn run_program_without_expect(
+    elf_path: &str,
+    private_inputs: Vec<u8>,
+) -> Result<ReturnValues, executor::vm::execution::ExecutorError> {
+    println!("Testing {}", elf_path);
+    let elf_data = std::fs::read(elf_path).unwrap();
+    let program = Elf::load(&elf_data).unwrap();
+    println!("Program entry: 0x{:016x}", program.entry_point);
+    let mut executor = Executor::new(&program, private_inputs)?;
+    while let Some(_logs) = executor.resume()? {}
+    executor.finish()
+}
+
+fn run_program_and_check_public_output(
+    elf_path: &str,
+    expected_output: Vec<u8>,
+    private_inputs: Vec<u8>,
+) {
+    let result =
+        run_program_without_expect(elf_path, private_inputs).expect("Failed to run program");
+
+    assert_eq!(result.memory_values, expected_output);
+}
+
+const ELF_PATH: &str = "../../executor/program_artifacts/rust/ethrex.elf";
+const FIXTURES_DIR: &str = "../../executor/tests";
+
+/// Larger-block smoke test: a synthetic ethrex block with 10 ETH transfers.
+/// (Replaces the old `ethrex_hoodi.bin` real-block fixture, which was in the
+/// pre-Crypto-trait ethrex format and no longer deserializes.)
+#[ignore = "heavier synthetic block (10 txs); run in the dedicated --ignored CI step"]
+#[test]
+fn test_ethrex() {
+    use ethrex_guest_program::crypto::NativeCrypto;
+    use ethrex_guest_program::l1::{ProgramInput, execution_program};
+    use rkyv::rancor::Error;
+    use std::fs;
+    use std::sync::Arc;
+    let inputs = fs::read(format!("{FIXTURES_DIR}/ethrex_10_transfers.bin")).unwrap();
+    let input = rkyv::from_bytes::<ProgramInput, Error>(&inputs).unwrap();
+    let output = execution_program(input, Arc::new(NativeCrypto)).unwrap();
+    run_program_and_check_public_output(ELF_PATH, output.encode(), inputs);
+}
+
+/// Executes a stateless ethrex block containing a single (plain ETH transfer)
+/// transaction. Execution only — no proving — against the ethrex guest ELF
+/// built from the same pinned ethrex revision as the native reference.
+#[test]
+fn test_ethrex_simple_tx() {
+    use ethrex_guest_program::crypto::NativeCrypto;
+    use ethrex_guest_program::l1::{ProgramInput, execution_program};
+    use rkyv::rancor::Error;
+    use std::sync::Arc;
+    let inputs = std::fs::read(format!("{FIXTURES_DIR}/ethrex_simple_tx.bin")).unwrap();
+    let input = rkyv::from_bytes::<ProgramInput, Error>(&inputs).unwrap();
+    let output = execution_program(input, Arc::new(NativeCrypto)).unwrap();
+    run_program_and_check_public_output(ELF_PATH, output.encode(), inputs);
+}
+
+/// Executes a stateless ethrex block with NO transactions (empty block).
+/// Execution only — no proving. Pins the committed `ethrex_empty_block.bin`
+/// fixture so its rkyv `ProgramInput` layout (the 0-tx edge case) is
+/// exercised and stays consistent with the guest across ethrex rev bumps.
+#[test]
+fn test_ethrex_empty_block() {
+    use ethrex_guest_program::crypto::NativeCrypto;
+    use ethrex_guest_program::l1::{ProgramInput, execution_program};
+    use rkyv::rancor::Error;
+    use std::sync::Arc;
+    let inputs = std::fs::read(format!("{FIXTURES_DIR}/ethrex_empty_block.bin")).unwrap();
+    let input = rkyv::from_bytes::<ProgramInput, Error>(&inputs).unwrap();
+    let output = execution_program(input, Arc::new(NativeCrypto)).unwrap();
+    run_program_and_check_public_output(ELF_PATH, output.encode(), inputs);
+}

From ea14eebdfc32a2f6f778d3a24b2d691d091f05f6 Mon Sep 17 00:00:00 2001
From: Mario Rugiero <mrugiero@gmail.com>
Date: Thu, 2 Jul 2026 23:52:21 -0300
Subject: [PATCH 36/36] feat(executor)!: 16-align the private-input payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Widen the private-input header from 4 to 16 bytes ([len: u32 LE] +
12 reserved), moving the payload base to PRIVATE_INPUT_START + 16 —
16-aligned, so guests can read structured (rkyv-archived) input in
place with naturally-aligned loads instead of working around a
4-aligned base (the recursion blob's pad arithmetic; ethrex's rkyv
'unaligned' pin exists for the same reason and can eventually be
dropped upstream).

- executor: PRIVATE_INPUT_PAYLOAD_OFFSET = 16 (const-asserted
  16-aligned); store_private_inputs writes payload at +16
- syscalls: get_private_input(_slice) and ef_io read at +16
- prover: private_input_bytes mirrors the header in the PAGE/genesis
  image; verifier page bound uses the offset
- recursion prefix: 16 bytes (magic+version+reserved(8)) — pure
  framing now, sized to a multiple of the alignment;
  encode_recursion_input serializes directly after the prefix into an
  AlignedVec (no archive copy), so the host path is aligned by
  construction and the misaligned fallback only guards foreign buffers
- fixtures: fibonacci bench guest and test_private_input_xpage read
  the payload at +16; xpage now commits payload[0..8]

BREAKING: guests built against the +4 payload base read garbage;
rebuild all guest ELFs (make compile-programs).
---
 bench_vs/lambda/fibonacci/src/main.rs         |  4 +-
 bench_vs/lambda/recursion/src/main.rs         | 14 ++--
 .../programs/asm/test_private_input_xpage.s   | 10 +--
 executor/src/vm/memory.rs                     | 27 ++++++-
 executor/tests/asm.rs                         |  4 +-
 prover/src/lib.rs                             | 79 ++++++++++---------
 prover/src/tables/trace_builder.rs            | 15 ++--
 prover/src/tests/local_to_global_bus_tests.rs | 18 ++---
 prover/src/tests/prove_elfs_tests.rs          |  8 +-
 prover/src/tests/recursion_smoke_test.rs      | 16 ++--
 syscalls/src/ef_io.rs                         |  4 +-
 syscalls/src/syscalls.rs                      | 15 +++-
 12 files changed, 124 insertions(+), 90 deletions(-)

diff --git a/bench_vs/lambda/fibonacci/src/main.rs b/bench_vs/lambda/fibonacci/src/main.rs
index 9ef9ef69c..d3c595e37 100644
--- a/bench_vs/lambda/fibonacci/src/main.rs
+++ b/bench_vs/lambda/fibonacci/src/main.rs
@@ -15,7 +15,7 @@ fn panic(_info: &PanicInfo) -> ! {
 
 fn read_n() -> u64 {
     // Layout matches `syscalls::get_private_input`: 4-byte LE length prefix at
-    // PRIVATE_INPUT_START, payload at +4. We only need the first 8 bytes (u64).
+    // PRIVATE_INPUT_START, payload at +16. We only need the first 8 bytes (u64).
     let mut n_bytes = [0u8; 8];
 
     debug_assert!(
@@ -23,7 +23,7 @@ fn read_n() -> u64 {
         "private input too short to contain a u64"
     );
 
-    let input_data = (PRIVATE_INPUT_START + 4) as *const u8;
+    let input_data = (PRIVATE_INPUT_START + 16) as *const u8;
     n_bytes.copy_from_slice(unsafe { core::slice::from_raw_parts(input_data, 8) });
 
     u64::from_le_bytes(n_bytes)
diff --git a/bench_vs/lambda/recursion/src/main.rs b/bench_vs/lambda/recursion/src/main.rs
index 11e64f90c..f9d3215db 100644
--- a/bench_vs/lambda/recursion/src/main.rs
+++ b/bench_vs/lambda/recursion/src/main.rs
@@ -1,11 +1,11 @@
 //! Naive recursion guest: verifies an inner lambda-vm proof inside the VM.
 //!
-//! Private input layout: a 12-byte `"LVMR" + version + reserved` prefix
+//! Private input layout: a 16-byte `"LVMR" + version + reserved` prefix
 //! followed by an rkyv archive of `lambda_vm_prover::RecursionInput`
-//! `{ vm_proof, inner_elf, options, vkey }`. The prefix 16-aligns the archive
-//! in guest memory (the executor maps the payload at `PRIVATE_INPUT_START + 4`,
-//! which is only 4-aligned) and tags the format so the guest rejects a
-//! wrong-format blob before the unsafe access. The proof is verified **in
+//! `{ vm_proof, inner_elf, options, vkey }`. The prefix tags the format so the
+//! guest rejects a wrong-format blob before the unsafe access; sized to a
+//! multiple of 16, it keeps the archive 16-aligned at the executor's aligned
+//! payload base (`PRIVATE_INPUT_START + 16`). The proof is verified **in
 //! place** via `verify_recursion_blob` — no deserialization pass, no owned
 //! `VmProof`. Commits `vk_digest ‖ inner public output` on success: every
 //! input here is prover-supplied, so soundness comes from the outer verifier
@@ -34,8 +34,8 @@ pub fn main() -> ! {
     }));
 
     // Zero-copy: borrow the blob straight from the mapped private-input region.
-    // The 12-byte prefix puts the archive at a 16-aligned guest address, so the
-    // verifier's in-place doubleword loads don't trap.
+    // The payload base and prefix are both 16-aligned, so the archive sits at a
+    // 16-aligned guest address and the verifier's in-place loads don't trap.
     let blob = lambda_vm_syscalls::syscalls::get_private_input_slice();
     lambda_vm_prover::profile_markers::step_marker::<
         { lambda_vm_prover::profile_markers::STEP_DECODE_DONE },
diff --git a/executor/programs/asm/test_private_input_xpage.s b/executor/programs/asm/test_private_input_xpage.s
index abe58452c..a78bfdd1c 100644
--- a/executor/programs/asm/test_private_input_xpage.s
+++ b/executor/programs/asm/test_private_input_xpage.s
@@ -2,7 +2,7 @@
 	.globl	main
 main:
 	# Read private input directly from 0xFF000000 (memory-mapped).
-	# Layout: [len:u32 LE] [data...]
+	# Layout: [len:u32 LE] [12 reserved bytes] [data at +16]
 	# Commits 8 bytes of data.
 	#
 	# Note: lui in RV64 sign-extends to 64 bits. lui with 0xFF000 would give
@@ -16,11 +16,11 @@ main:
 	# Read length at 0xFF000000
 	lw	t3, 0(t0)		# 2: t3 = length
 
-	# Load 8 bytes of data at 0xFF000008 (aligned, 4 bytes into data region)
-	ld	t1, 8(t0)		# 3
+	# Load 8 bytes of data at 0xFF000010 (aligned, start of data region)
+	ld	t1, 16(t0)		# 3
 
-	# Commit 8 bytes from 0xFF000008
-	addi	a1, t0, 8		# 4: buf_addr = 0xFF000008
+	# Commit 8 bytes from 0xFF000010
+	addi	a1, t0, 16		# 4: buf_addr = 0xFF000010
 	li	a0, 1			# 5: fd = 1
 	li	a2, 8			# 6: count = 8
 	li	a7, 64			# 7: syscall = Commit
diff --git a/executor/src/vm/memory.rs b/executor/src/vm/memory.rs
index f349eeae6..8ad0a9fb6 100644
--- a/executor/src/vm/memory.rs
+++ b/executor/src/vm/memory.rs
@@ -48,10 +48,25 @@ pub const MAX_PUBLIC_OUTPUT_TOTAL_SIZE: u64 = 1024 * 1024;
 pub const MAX_PRIVATE_INPUT_SIZE: u64 = 64 * 1024 * 1024;
 /// Fixed high address where private input is mapped. Guest programs can read
 /// directly from this address (ZisK-style memory-mapped input).
-/// Layout: 4-byte LE length prefix at `PRIVATE_INPUT_START_INDEX`, then data at +4.
+/// Layout: a 16-byte header — 4-byte LE length prefix at
+/// `PRIVATE_INPUT_START_INDEX`, then 12 reserved bytes — followed by the
+/// payload at `+PRIVATE_INPUT_PAYLOAD_OFFSET`. The payload base is 16-aligned
+/// so guests can read structured (e.g. rkyv-archived) input in place with
+/// naturally-aligned loads.
 /// Must match `PRIVATE_INPUT_START` in `syscalls/src/syscalls.rs`.
 pub const PRIVATE_INPUT_START_INDEX: u64 = 0xFF000000;
 
+/// Byte offset of the private-input payload from [`PRIVATE_INPUT_START_INDEX`]:
+/// the size of the `[len: u32 LE][reserved: 12 bytes]` header. A multiple of 16
+/// so the payload base stays 16-aligned.
+/// Must match `PRIVATE_INPUT_PAYLOAD_OFFSET` in `syscalls/src/syscalls.rs`.
+pub const PRIVATE_INPUT_PAYLOAD_OFFSET: u64 = 16;
+
+const _: () = assert!(
+    (PRIVATE_INPUT_START_INDEX + PRIVATE_INPUT_PAYLOAD_OFFSET).is_multiple_of(16),
+    "private-input payload base must be 16-aligned",
+);
+
 #[derive(Default, Debug, Clone)]
 pub struct Memory {
     cells: U64HashMap<[u8; 4]>,
@@ -218,8 +233,9 @@ impl Memory {
         Ok(self.public_output.clone())
     }
 
-    /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX` as a
-    /// 4-byte LE length prefix followed by the raw data. The guest reads these
+    /// Pre-loads private input bytes at `PRIVATE_INPUT_START_INDEX`: a 4-byte
+    /// LE length prefix, 12 reserved (zero) bytes, then the raw data at
+    /// `+PRIVATE_INPUT_PAYLOAD_OFFSET` (16-aligned). The guest reads these
     /// bytes directly via normal RISC-V loads (ZisK-style memory-mapped input).
     pub fn store_private_inputs(&mut self, inputs: Vec<u8>) -> Result<(), MemoryError> {
         if inputs.is_empty() {
@@ -231,7 +247,10 @@ impl Memory {
         let len_u32 =
             u32::try_from(inputs.len()).map_err(|_| MemoryError::PrivateInputSizeExceeded)?;
         self.store_word(PRIVATE_INPUT_START_INDEX, len_u32)?;
-        self.set_bytes_aligned(PRIVATE_INPUT_START_INDEX + 4, &inputs)?;
+        self.set_bytes_aligned(
+            PRIVATE_INPUT_START_INDEX + PRIVATE_INPUT_PAYLOAD_OFFSET,
+            &inputs,
+        )?;
         Ok(())
     }
 
diff --git a/executor/tests/asm.rs b/executor/tests/asm.rs
index a1c9baf2b..11a2fc644 100644
--- a/executor/tests/asm.rs
+++ b/executor/tests/asm.rs
@@ -33,8 +33,8 @@ fn test_private_input_memory_mapped() {
     let input: Vec<u8> = (0u8..16).collect();
     let executor = Executor::new(&program, input.clone()).unwrap();
     let result = executor.run().unwrap();
-    // Committed bytes are at 0xFF000008 = data bytes [4..12]
-    assert_eq!(result.return_values.memory_values, input[4..12].to_vec());
+    // Committed bytes are at 0xFF000010 = data bytes [0..8]
+    assert_eq!(result.return_values.memory_values, input[0..8].to_vec());
 }
 
 #[test]
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index 18734a7d7..cbee1f51a 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -196,16 +196,16 @@ pub struct RecursionInput {
 }
 
 // ============================================================================
-// Recursion-input wire format: aligning magic prefix + rkyv archive
+// Recursion-input wire format: magic/version prefix + rkyv archive
 // ============================================================================
 //
-// The guest reads the archive in place with naturally-aligned loads (archived
-// field elements are 8-aligned; we require 16 for headroom), and the executor
-// traps unaligned doubleword loads. The executor maps the private input as
-// `[u32 len][payload...]` with the payload at `PRIVATE_INPUT_START + 4`, which
-// is only 4-aligned. A fixed prefix pads the payload so the archive that
-// follows lands 16-aligned, and doubles as a magic + version tag the guest
-// validates before the unsafe access.
+// The guest reads the archive in place with naturally-aligned loads, so the
+// archive's first byte must sit 16-aligned in guest memory. The executor maps
+// the private-input payload at a 16-aligned base
+// (`PRIVATE_INPUT_START + PRIVATE_INPUT_PAYLOAD_OFFSET`), so the prefix is
+// pure framing — a magic + version tag the guest validates before the unsafe
+// access — sized to a multiple of the alignment so the archive after it stays
+// aligned. Asserted below against the executor ABI.
 
 /// 4-byte magic identifying a lambda-vm recursion input blob ("LVMR").
 pub const RECURSION_INPUT_MAGIC: [u8; 4] = *b"LVMR";
@@ -216,38 +216,44 @@ pub const RECURSION_INPUT_VERSION: u32 = 1;
 /// Required alignment (bytes) of the archive's first byte in guest memory.
 pub const RECURSION_INPUT_ALIGN: usize = 16;
 
-/// Aligning prefix length: `magic(4) + version(4) + reserved(4) = 12` bytes,
-/// chosen so the archive starts 16-aligned given the executor's
-/// `PRIVATE_INPUT_START + 4` payload base. Asserted below.
-pub const RECURSION_INPUT_PREFIX_LEN: usize = 12;
+/// Prefix length: `magic(4) + version(4) + reserved(8) = 16` bytes — a
+/// multiple of [`RECURSION_INPUT_ALIGN`] so the archive after it stays
+/// aligned. Asserted below.
+pub const RECURSION_INPUT_PREFIX_LEN: usize = 16;
 
 const _: () = {
-    let payload_base = (executor::vm::memory::PRIVATE_INPUT_START_INDEX as usize) + 4;
-    let pad =
-        (RECURSION_INPUT_ALIGN - (payload_base % RECURSION_INPUT_ALIGN)) % RECURSION_INPUT_ALIGN;
-    assert!(
-        RECURSION_INPUT_PREFIX_LEN == pad,
-        "prefix length must align the archive to RECURSION_INPUT_ALIGN given the private-input payload base",
-    );
+    let payload_base = (executor::vm::memory::PRIVATE_INPUT_START_INDEX
+        + executor::vm::memory::PRIVATE_INPUT_PAYLOAD_OFFSET) as usize;
     assert!(
         (payload_base + RECURSION_INPUT_PREFIX_LEN).is_multiple_of(RECURSION_INPUT_ALIGN),
         "archive must start at a RECURSION_INPUT_ALIGN-aligned guest address",
     );
+    assert!(
+        RECURSION_INPUT_PREFIX_LEN >= 8,
+        "prefix must hold at least magic + version",
+    );
 };
 
-/// Encode a [`RecursionInput`] into the on-wire blob: a 12-byte
-/// `magic + version + reserved` prefix followed by the rkyv archive. The prefix
-/// both aligns the archive in guest memory (so in-place reads don't trap) and
-/// tags the format/version so the guest can validate before the unsafe access.
-pub fn encode_recursion_input(input: &RecursionInput) -> Result<Vec<u8>, Error> {
-    let archive = rkyv::to_bytes::<rkyv::rancor::Error>(input)
-        .map_err(|e| Error::Execution(format!("rkyv encode failed: {e}")))?;
-    let mut blob = Vec::with_capacity(RECURSION_INPUT_PREFIX_LEN + archive.len());
+/// Encode a [`RecursionInput`] into the on-wire blob: the 16-byte
+/// `magic + version + reserved` prefix followed by the rkyv archive,
+/// serialized directly after the prefix (no archive copy). The returned
+/// buffer is 16-aligned and the prefix length is a multiple of the alignment,
+/// so the archive is aligned both here and — because the guest payload base
+/// is 16-aligned — in guest memory.
+pub fn encode_recursion_input(
+    input: &RecursionInput,
+) -> Result<rkyv::util::AlignedVec<{ RECURSION_INPUT_ALIGN }>, Error> {
+    let mut blob = rkyv::util::AlignedVec::<{ RECURSION_INPUT_ALIGN }>::new();
     blob.extend_from_slice(&RECURSION_INPUT_MAGIC);
     blob.extend_from_slice(&RECURSION_INPUT_VERSION.to_le_bytes());
-    blob.extend_from_slice(&[0u8; 4]); // reserved
+    blob.extend_from_slice(&[0u8; 8]); // reserved
     debug_assert_eq!(blob.len(), RECURSION_INPUT_PREFIX_LEN);
-    blob.extend_from_slice(&archive);
+    // rkyv computes alignment padding against positions from the buffer
+    // start; the prefix is a multiple of RECURSION_INPUT_ALIGN, so archive
+    // offsets keep the same residues and `&blob[PREFIX_LEN..]` is a valid
+    // aligned archive.
+    let blob = rkyv::api::high::to_bytes_in::<_, rkyv::rancor::Error>(input, blob)
+        .map_err(|e| Error::Execution(format!("rkyv encode failed: {e}")))?;
     Ok(blob)
 }
 
@@ -1237,11 +1243,9 @@ pub struct RecursionVerification<'a> {
 pub fn verify_recursion_blob(blob: &[u8]) -> Result<RecursionVerification<'_>, Error> {
     use rkyv::rancor::Error as RkyvError;
 
-    // Validate + strip the aligning magic/version prefix. In the guest the
-    // returned slice starts at the 16-aligned archive base (the prefix exists
-    // precisely so the archive lands aligned at
-    // `PRIVATE_INPUT_START + 4 + PREFIX_LEN`), so the in-place doubleword
-    // loads do not trap.
+    // Validate + strip the magic/version prefix. In the guest the returned
+    // slice starts at the 16-aligned archive base (aligned payload base +
+    // 16-byte prefix), so the in-place doubleword loads do not trap.
     let archive_bytes = recursion_archive_bytes(blob).ok_or_else(|| {
         Error::Execution(String::from("recursion blob: bad magic or version"))
     })?;
@@ -1344,8 +1348,11 @@ fn verify_archived_parts(
     // MAX_PRIVATE_INPUT_SIZE fits in ~257 pages of DEFAULT_PAGE_SIZE.
     {
         use crate::tables::page::DEFAULT_PAGE_SIZE;
-        use executor::vm::memory::MAX_PRIVATE_INPUT_SIZE;
-        let max_pages = (MAX_PRIVATE_INPUT_SIZE as usize + 4).div_ceil(DEFAULT_PAGE_SIZE) + 1;
+        use executor::vm::memory::{MAX_PRIVATE_INPUT_SIZE, PRIVATE_INPUT_PAYLOAD_OFFSET};
+        let max_pages = (MAX_PRIVATE_INPUT_SIZE as usize
+            + PRIVATE_INPUT_PAYLOAD_OFFSET as usize)
+            .div_ceil(DEFAULT_PAGE_SIZE)
+            + 1;
         if num_private_input_pages > max_pages {
             return Err(Error::InvalidTableCounts(format!(
                 "num_private_input_pages ({num_private_input_pages}) exceeds max ({max_pages})",
diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs
index 93f3ba563..73a42bbfc 100644
--- a/prover/src/tables/trace_builder.rs
+++ b/prover/src/tables/trace_builder.rs
@@ -1842,12 +1842,15 @@ fn collect_byte_check_ops_for_padding(num_padding_rows: usize) -> Vec<BitwiseOpe
 /// Encode private input as `[len_u32_LE][data]` — the canonical wire format.
 /// Must match `executor::vm::memory::Memory::store_private_inputs`.
 fn private_input_bytes(private_input: &[u8]) -> Vec<u8> {
-    let len_bytes = (private_input.len() as u32).to_le_bytes();
-    len_bytes
-        .iter()
-        .chain(private_input.iter())
-        .copied()
-        .collect()
+    // Mirrors `Memory::store_private_inputs`: `[len: u32 LE][reserved: 12
+    // zero bytes][payload]` — the payload base is 16-aligned.
+    use executor::vm::memory::PRIVATE_INPUT_PAYLOAD_OFFSET;
+    let mut bytes =
+        Vec::with_capacity(PRIVATE_INPUT_PAYLOAD_OFFSET as usize + private_input.len());
+    bytes.extend_from_slice(&(private_input.len() as u32).to_le_bytes());
+    bytes.resize(PRIVATE_INPUT_PAYLOAD_OFFSET as usize, 0);
+    bytes.extend_from_slice(private_input);
+    bytes
 }
 
 /// Build the initial-memory image (byte address -> value) from the ELF segments
diff --git a/prover/src/tests/local_to_global_bus_tests.rs b/prover/src/tests/local_to_global_bus_tests.rs
index 263e3d938..6416b4514 100644
--- a/prover/src/tests/local_to_global_bus_tests.rs
+++ b/prover/src/tests/local_to_global_bus_tests.rs
@@ -1015,7 +1015,8 @@ fn test_l2g_design_y_orphan_mu_zero_rejects() {
 /// epoch boundaries) and the resulting multi-epoch L2G chain verifies end-to-end.
 ///
 /// The fixture reads 16 bytes of private input from 0xFF000000, then commits
-/// bytes 4..12 (8 bytes after the 4-byte length prefix). With `epoch_size_log2=2`
+/// payload bytes 0..8 (the payload starts at +16, after the input header).
+/// With `epoch_size_log2=2`
 /// (4 cycles) the 11-cycle program spans three epochs: epoch 0 reads the private-input
 /// page (touching 0xFF000000..), epoch 1 performs the commit syscall, epoch 2
 /// halts. The private-input page's L2G entry (epoch 0 fini → epoch 1+ init)
@@ -1028,15 +1029,12 @@ fn test_l2g_design_y_orphan_mu_zero_rejects() {
 fn test_continuation_private_input_spans_epochs() {
     let elf_bytes = crate::test_utils::asm_elf_bytes("test_private_input_xpage");
 
-    // 16-byte private input: 4-byte length prefix (=16) + 8 bytes of payload
-    // that will be committed + 4 padding bytes (the fixture commits bytes 4..12).
+    // 16-byte private input: the fixture commits payload bytes 0..8.
     let mut input: Vec<u8> = Vec::with_capacity(16);
-    // Length prefix: 16 as little-endian u32.
-    input.extend_from_slice(&16u32.to_le_bytes());
     // 8-byte payload that will be committed.
     input.extend_from_slice(&[0x11u8, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88]);
-    // 4 trailing padding bytes (not committed).
-    input.extend_from_slice(&[0x00u8, 0x00, 0x00, 0x00]);
+    // 8 trailing padding bytes (not committed).
+    input.extend_from_slice(&[0u8; 8]);
     assert_eq!(input.len(), 16);
 
     let result = crate::continuation::prove_and_verify_continuation(
@@ -1049,11 +1047,11 @@ fn test_continuation_private_input_spans_epochs() {
     // The continuation must prove and verify without error.
     let output = result.expect("prove_and_verify_continuation must not error");
 
-    // The fixture commits bytes 4..12 of private input (the 8-byte payload).
+    // The fixture commits payload bytes 0..8 of the private input.
     assert_eq!(
         output.as_deref(),
-        Some(&input[4..12]),
-        "committed output must equal private input bytes 4..12"
+        Some(&input[0..8]),
+        "committed output must equal private input bytes 0..8"
     );
 }
 
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index 98d10e5dc..4b5f2cbad 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -2666,7 +2666,7 @@ fn test_prove_private_input_xpage() {
     let input: Vec<u8> = (0u8..16).collect();
     let proof = prove_vm_minimal(&elf_bytes, &input, &Default::default());
     assert!(verify_vm_minimal(&proof, &elf_bytes), "proof should verify");
-    assert_eq!(proof.public_output, input[4..12].to_vec());
+    assert_eq!(proof.public_output, input[0..8].to_vec());
 }
 
 /// Same ASM, different input values — output depends on input.
@@ -2679,7 +2679,7 @@ fn test_prove_private_input_different_values() {
     ];
     let proof = prove_vm_minimal(&elf_bytes, &input, &Default::default());
     assert!(verify_vm_minimal(&proof, &elf_bytes), "proof should verify");
-    assert_eq!(proof.public_output, input[4..12].to_vec());
+    assert_eq!(proof.public_output, input[0..8].to_vec());
 }
 
 /// End-to-end: EF zkVM IO interface — demo guest reads its private input via
@@ -2868,8 +2868,8 @@ fn test_proof_does_not_contain_private_input_field() {
     // not the actual bytes. Verify the proof's public fields don't contain them.
     assert_eq!(vm_proof.num_private_input_pages, 1);
     // public_output is the committed output, NOT the private input.
-    // It should contain bytes [4..12] of the input (what the ASM program commits).
-    assert_eq!(vm_proof.public_output, input[4..12].to_vec());
+    // It should contain bytes [0..8] of the input (what the ASM program commits).
+    assert_eq!(vm_proof.public_output, input[0..8].to_vec());
     // No `private_input` field exists — this is enforced by the type system,
     // but explicitly document that the proof carries only the page count.
     assert!(
diff --git a/prover/src/tests/recursion_smoke_test.rs b/prover/src/tests/recursion_smoke_test.rs
index 68b9e26c3..bc7a0596a 100644
--- a/prover/src/tests/recursion_smoke_test.rs
+++ b/prover/src/tests/recursion_smoke_test.rs
@@ -46,7 +46,7 @@ fn prove_inner_and_encode_blob(
     inner_elf: &[u8],
     inner_input: &[u8],
     opts: &stark::proof::options::ProofOptions,
-) -> (crate::VmProof, Vec<u8>) {
+) -> (crate::VmProof, rkyv::util::AlignedVec<16>) {
     eprintln!(
         "[{tag}] proving inner (blowup={}, fri_queries={}) ...",
         opts.blowup_factor, opts.fri_number_of_queries
@@ -197,7 +197,7 @@ fn setup_guest_run(
         "{guest_name} ELF has entry_point=0 — build artifact is malformed"
     );
     let executor =
-        executor::vm::execution::Executor::new(&program, blob).expect("Executor::new failed");
+        executor::vm::execution::Executor::new(&program, blob.to_vec()).expect("Executor::new failed");
     (guest_elf_bytes, program, executor)
 }
 
@@ -538,11 +538,11 @@ fn test_recursion_blob_decodes_and_verifies_on_host() {
         "vk digest must match the proof's"
     );
 
-    // Host buffers carry no alignment guarantee, so `verify_recursion_blob`
-    // must accept the blob at any base alignment (falling back to an aligned
-    // copy when needed). The plain call above already exercises the common
-    // misaligned case (`Vec` base + 12-byte prefix → 4-aligned archive);
-    // shifting the base by 4 covers another residue class.
+    // The encoder returns a 16-aligned buffer whose archive offset is a
+    // multiple of 16, so the plain call above exercises the zero-copy aligned
+    // host path. `verify_recursion_blob` must also accept arbitrary base
+    // alignment (falling back to one aligned copy): shift the base by 4 to
+    // exercise the misaligned fallback.
     let mut padded: Vec<u8> = Vec::with_capacity(blob.len() + 4);
     padded.extend_from_slice(&[0u8; 4]);
     padded.extend_from_slice(&blob);
@@ -726,7 +726,7 @@ fn test_dump_recursion_input() {
         prove_inner_and_encode_blob("dump-input", &empty_elf_bytes, &[], &MIN_PROOF_OPTIONS);
 
     let path = "/tmp/recursion_input.bin";
-    std::fs::write(path, &blob).expect("write blob");
+    std::fs::write(path, &blob[..]).expect("write blob");
     eprintln!("[dump-input] wrote {} bytes to {path}", blob.len());
 }
 
diff --git a/syscalls/src/ef_io.rs b/syscalls/src/ef_io.rs
index dabf7818d..4e21fd84e 100644
--- a/syscalls/src/ef_io.rs
+++ b/syscalls/src/ef_io.rs
@@ -18,7 +18,7 @@
 use core::arch::asm;
 
 #[cfg(target_arch = "riscv64")]
-use crate::syscalls::{PRIVATE_INPUT_START, SyscallNumbers};
+use crate::syscalls::{PRIVATE_INPUT_PAYLOAD_OFFSET, PRIVATE_INPUT_START, SyscallNumbers};
 
 /// EF IO: return a zero-copy pointer and size for the private input.
 ///
@@ -36,7 +36,7 @@ pub unsafe extern "C" fn read_input(buf_ptr: *mut *const u8, buf_size: *mut usiz
     unsafe {
         let len_ptr = PRIVATE_INPUT_START as *const u32;
         let len = core::ptr::read_volatile(len_ptr) as usize;
-        *buf_ptr = (PRIVATE_INPUT_START + 4) as *const u8;
+        *buf_ptr = (PRIVATE_INPUT_START + PRIVATE_INPUT_PAYLOAD_OFFSET) as *const u8;
         *buf_size = len;
     }
 }
diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs
index fa4e43edd..3a5a1dfc5 100644
--- a/syscalls/src/syscalls.rs
+++ b/syscalls/src/syscalls.rs
@@ -8,6 +8,12 @@ use core::arch::asm;
 #[cfg(target_arch = "riscv64")]
 pub const PRIVATE_INPUT_START: usize = 0xFF000000;
 
+/// Byte offset of the private-input payload from [`PRIVATE_INPUT_START`]: the
+/// `[len: u32 LE][reserved: 12 bytes]` header. The payload base is 16-aligned
+/// so structured (e.g. rkyv-archived) input can be read in place.
+/// Must match `executor::vm::memory::PRIVATE_INPUT_PAYLOAD_OFFSET`.
+pub const PRIVATE_INPUT_PAYLOAD_OFFSET: usize = 16;
+
 #[cfg(target_arch = "riscv64")]
 pub enum SyscallNumbers {
     Print = 1,
@@ -87,7 +93,7 @@ pub fn get_private_input() -> Vec<u8> {
     // executor). The data pointer and length are within the memory-mapped region.
     let len_ptr = PRIVATE_INPUT_START as *const u32;
     let len = unsafe { core::ptr::read_volatile(len_ptr) } as usize;
-    let data_ptr = (PRIVATE_INPUT_START + 4) as *const u8;
+    let data_ptr = (PRIVATE_INPUT_START + PRIVATE_INPUT_PAYLOAD_OFFSET) as *const u8;
     let slice = unsafe { core::slice::from_raw_parts(data_ptr, len) };
     slice.to_vec()
 }
@@ -99,8 +105,9 @@ pub fn get_private_input() -> Vec<u8> {
 
 /// Borrow the private input bytes in place from the memory-mapped region —
 /// no copy, no allocation. Same layout as [`get_private_input`]; the returned
-/// slice starts at `PRIVATE_INPUT_START + 4` (a 4-aligned address) and lives
-/// for the whole execution (the host never remaps the region).
+/// slice starts at `PRIVATE_INPUT_START + PRIVATE_INPUT_PAYLOAD_OFFSET` (a
+/// 16-aligned address) and lives for the whole execution (the host never
+/// remaps the region).
 #[cfg(target_arch = "riscv64")]
 pub fn get_private_input_slice() -> &'static [u8] {
     // SAFETY: The host pre-loads private input at PRIVATE_INPUT_START before
@@ -109,7 +116,7 @@ pub fn get_private_input_slice() -> &'static [u8] {
     // region, which stays mapped and unmodified for the whole execution.
     let len_ptr = PRIVATE_INPUT_START as *const u32;
     let len = unsafe { core::ptr::read_volatile(len_ptr) } as usize;
-    let data_ptr = (PRIVATE_INPUT_START + 4) as *const u8;
+    let data_ptr = (PRIVATE_INPUT_START + PRIVATE_INPUT_PAYLOAD_OFFSET) as *const u8;
     unsafe { core::slice::from_raw_parts(data_ptr, len) }
 }