BlockRunAI · VickyXAI · Jun 26, 2026 · Jun 26, 2026
diff --git a/src/payment-preauth.test.ts b/src/payment-preauth.test.ts
@@ -0,0 +1,151 @@
+/**
+ * Pre-auth cache correctness under per-request (token-based) pricing.
+ *
+ * BlockRun prices each call on input + max_tokens, so the same model can cost
+ * different amounts. These tests pin the guarantees that keep that from
+ * underpaying via a stale cached authorization:
+ *  - pre-auth is reused only when an up-front estimate proves it still covers
+ *    the request (fires on a same/cheaper repeat, skipped when the request grows),
+ *  - a rejected pre-auth is discarded and the request re-fetched cleanly — the
+ *    rejection is never treated as a fresh challenge (no "Failed to parse…"),
+ *  - with no estimator, pre-auth is disabled rather than risking an underpay.
+ */
+import { describe, it, expect, vi } from "vitest";
+import { x402Client } from "@x402/fetch";
+import { registerExactEvmScheme } from "@x402/evm/exact/client";
+import { toClientEvmSigner } from "@x402/evm";
+import { createPublicClient, http } from "viem";
+import { base } from "viem/chains";
+import { privateKeyToAccount } from "viem/accounts";
+import { deriveAllKeys } from "./wallet.js";
+import { createPayFetchWithPreAuth } from "./payment-preauth.js";
+
+const MNEMONIC =
+  "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon art";
+
+function testClient(): x402Client {
+  const keys = deriveAllKeys(MNEMONIC);
+  const account = privateKeyToAccount(keys.evmPrivateKey);
+  const pc = createPublicClient({ chain: base, transport: http() });
+  const client = new x402Client();
+  registerExactEvmScheme(client, { signer: toClientEvmSigner(account, pc) });
+  return client;
+}
+
+const CHALLENGE = {
+  x402Version: 2,
+  accepts: [
+    {
+      scheme: "exact",
+      network: "eip155:8453",
+      amount: "1000",
+      asset: "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913",
+      payTo: "0xe9030014F5DAe217d0A152f02A043567b16c1aBf",
+      maxTimeoutSeconds: 300,
+      extra: { name: "USD Coin", version: "2" },
+    },
+  ],
+  resource: { url: "https://gw/api", description: "t", mimeType: "application/json" },
+};
+
+function challenge402(): Response {
+  const b64 = Buffer.from(JSON.stringify(CHALLENGE)).toString("base64");
+  return new Response(JSON.stringify({ error: "Payment Required" }), {
+    status: 402,
+    headers: {
+      "payment-required": b64,
+      "www-authenticate": `X402 requirements="${b64}"`,
+      "content-type": "application/json",
+    },
+  });
+}
+
+/** A fake gateway: 200 when a payment is attached, a fresh 402 challenge when
+ *  not. `rejectNextPaid` makes the next paid request 402 (an underpayment), to
+ *  exercise the safety-net path. Records whether each call carried payment. */
+function fakeGateway() {
+  const calls: Array<{ paid: boolean }> = [];
+  const ctl = { rejectNextPaid: false }; // flip AFTER seeding to reject a pre-auth
+  const fn = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
+    const req = new Request(input, init);
+    const paid = req.headers.has("payment-signature");
+    calls.push({ paid });
+    if (paid) {
+      if (ctl.rejectNextPaid) {
+        ctl.rejectNextPaid = false;
+        return challenge402(); // underpayment rejected
+      }
+      return new Response(JSON.stringify({ ok: true }), { status: 200 });
+    }
+    return challenge402();
+  });
+  return { fn: fn as unknown as typeof fetch, calls, ctl };
+}
+
+const URL = "https://gw/api/v1/chat/completions";
+function body(maxTokens = 10) {
+  return JSON.stringify({ model: "test/model", max_tokens: maxTokens, messages: [] });
+}
+
+describe("payment pre-auth — per-request pricing safety", () => {
+  it("reuses pre-auth when the estimate proves the cache still covers it (no extra 402)", async () => {
+    const est = vi.fn(() => "1000"); // every request estimated equal
+    const gw = fakeGateway();
+    const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est });
+
+    await pay(URL, { method: "POST", body: body() }); // seed: [unpaid→402, paid→200]
+    const seeded = gw.calls.length;
+    expect(gw.calls.map((c) => c.paid)).toEqual([false, true]);
+
+    const res = await pay(URL, { method: "POST", body: body() }); // identical → pre-auth
+    expect(res.status).toBe(200);
+    expect(gw.calls.length - seeded).toBe(1); // one round-trip, no 402
+    expect(gw.calls[seeded].paid).toBe(true); // it pre-paid
+  });
+
+  it("skips pre-auth (clean fresh 402) when the request grows beyond the cached amount", async () => {
+    let big = false;
+    const est = vi.fn(() => (big ? "5000" : "1000"));
+    const gw = fakeGateway();
+    const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est });
+
+    await pay(URL, { method: "POST", body: body(10) }); // seed cover=1000
+    const seeded = gw.calls.length;
+
+    big = true;
+    const res = await pay(URL, { method: "POST", body: body(9000) }); // needs 5000 > 1000
+    expect(res.status).toBe(200); // NOT a 500 "Failed to parse payment requirements"
+    // Skipped pre-auth → clean unpaid request first, then the paid retry.
+    expect(gw.calls.slice(seeded).map((c) => c.paid)).toEqual([false, true]);
+  });
+
+  it("discards a rejected pre-auth and re-fetches cleanly (no parse error)", async () => {
+    const est = vi.fn(() => "1000");
+    const gw = fakeGateway();
+    const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est });
+
+    await pay(URL, { method: "POST", body: body() }); // seed (cache warm, cover=1000)
+    gw.calls.length = 0;
+    // Now make the next PAID request (the pre-auth) get rejected by the gateway.
+    gw.ctl.rejectNextPaid = true;
+    // pre-auth fires (covered) → rejected 402 → clean refetch → paid retry → 200
+    const res = await pay(URL, { method: "POST", body: body() });
+    expect(res.status).toBe(200);
+    const seq = gw.calls.map((c) => c.paid);
+    expect(seq[0]).toBe(true); // pre-auth attempt (got rejected)
+    expect(seq).toContain(false); // a CLEAN refetch followed (rejection not reused)
+    expect(seq[seq.length - 1]).toBe(true); // then paid correctly
+  });
+
+  it("disables pre-auth entirely when no estimator is provided (never underpays)", async () => {
+    const gw = fakeGateway();
+    const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, {}); // no estimateAmount
+
+    await pay(URL, { method: "POST", body: body() });
+    const seeded = gw.calls.length;
+    const res = await pay(URL, { method: "POST", body: body() }); // identical
+    expect(res.status).toBe(200);
+    // No pre-auth → still a fresh 402 + paid retry, never a pre-signed first call.
+    expect(gw.calls.slice(seeded).map((c) => c.paid)).toEqual([false, true]);
+  });
+});
diff --git a/src/payment-preauth.ts b/src/payment-preauth.ts
@@ -6,7 +6,19 @@
  * On subsequent requests, pre-signs payment and attaches it to the first
  * request, skipping the 402 round trip (~200ms savings per request).
  *
- * Falls back to normal 402 flow if pre-signed payment is rejected.
+ * IMPORTANT — pricing is per-request, not per-model. BlockRun prices each call
+ * on (input tokens + max_tokens reservation), so two calls to the SAME model
+ * can cost different amounts. A cached payment authorizes one EXACT amount, so
+ * blindly reusing it for a larger request underpays — the gateway then rejects
+ * it with a 402 that is NOT a fresh x402 challenge, and parsing that throws
+ * "Failed to parse payment requirements". To stay correct we:
+ *   1. only reuse a cached pre-auth when an up-front cost estimate proves the
+ *      cached amount still covers this request (never knowingly underpay), and
+ *   2. if a pre-auth is rejected anyway, discard it and re-request WITHOUT
+ *      payment to obtain a fresh, canonical challenge — never treat the
+ *      rejection response itself as the challenge.
+ *
+ * Falls back to the normal 402 flow whenever pre-auth can't be proven safe.
  */
 
 import type { x402Client } from "@x402/fetch";
@@ -17,8 +29,17 @@ type PaymentRequired = Parameters<InstanceType<typeof x402Client>["createPayment
 interface CachedEntry {
   paymentRequired: PaymentRequired;
   cachedAt: number;
+  /** Estimated cost (USDC micro-units) of the request that established this
+   *  entry. The cached payment is known to cover at least this much, so it is
+   *  only reused when a new request's estimate is <= this value. `undefined`
+   *  when the cost couldn't be estimated — in which case pre-auth is skipped. */
+  coverMicros: number | undefined;
 }
 
+/** Up-front per-request cost estimator (USDC micro-units as a string), e.g.
+ *  proxy.ts#estimateAmount. Returns undefined when the model/cost is unknown. */
+type EstimateFn = (modelId: string, bodyLength: number, maxTokens: number) => string | undefined;
+
 const DEFAULT_TTL_MS = 3_600_000; // 1 hour
 
 type FetchFn = (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>;
@@ -27,7 +48,7 @@ export function createPayFetchWithPreAuth(
   baseFetch: FetchFn,
   client: x402Client,
   ttlMs = DEFAULT_TTL_MS,
-  options?: { skipPreAuth?: boolean },
+  options?: { skipPreAuth?: boolean; estimateAmount?: EstimateFn },
 ): FetchFn {
   const httpClient = new x402HTTPClient(client);
   const cache = new Map<string, CachedEntry>();
@@ -36,11 +57,13 @@ export function createPayFetchWithPreAuth(
     const request = new Request(input, init);
     const urlPath = new URL(request.url).pathname;
 
-    // Extract model from request body to create model-specific cache keys.
-    // Without this, a cached payment from a paid model (e.g. sonnet) would be
-    // incorrectly applied to a free model (nvidia/gpt-oss-120b), causing
-    // payment errors even when the server wouldn't charge for the request.
+    // Extract model + size from the request body. Model gives a per-model cache
+    // key (a cached sonnet payment must not be applied to a free model); body
+    // length + max_tokens drive the up-front cost estimate used to decide
+    // whether a cached pre-auth still covers this (possibly larger) request.
     let requestModel = "";
+    let bodyLength = 0;
+    let maxTokens = 0;
     if (init?.body) {
       try {
         const bodyStr =
@@ -50,21 +73,38 @@ export function createPayFetchWithPreAuth(
               ? init.body
               : "";
         if (bodyStr) {
-          const parsed = JSON.parse(bodyStr) as { model?: string };
+          bodyLength = bodyStr.length;
+          const parsed = JSON.parse(bodyStr) as { model?: string; max_tokens?: number };
           requestModel = parsed.model ?? "";
+          maxTokens = Number(parsed.max_tokens) || 0;
         }
       } catch {
         /* not JSON, use empty model */
       }
     }
     const cacheKey = `${urlPath}:${requestModel}`;
 
-    // Try pre-auth if we have cached payment requirements
-    // Skip for Solana: payments use per-tx blockhashes that expire ~60-90s,
-    // making cached requirements useless and causing double charges.
+    // Up-front estimate of what THIS request will cost (USDC micro-units), used
+    // both to gate pre-auth reuse and to record what a new cache entry covers.
+    const estimateMicros = (): number | undefined => {
+      if (!options?.estimateAmount || !requestModel) return undefined;
+      const est = options.estimateAmount(requestModel, bodyLength, maxTokens);
+      return est === undefined ? undefined : Number(est);
+    };
+    const needMicros = estimateMicros();
+
+    // Try pre-auth only when we can PROVE the cached payment still covers this
+    // request (needMicros <= what the cached entry covered). Skip for Solana:
+    // payments use per-tx blockhashes that expire ~60-90s, making cached
+    // requirements useless and causing double charges.
     const cached = !options?.skipPreAuth ? cache.get(cacheKey) : undefined;
-    let rejected402: Response | undefined;
-    if (cached && Date.now() - cached.cachedAt < ttlMs) {
+    const preAuthCovers =
+      cached !== undefined &&
+      Date.now() - cached.cachedAt < ttlMs &&
+      cached.coverMicros !== undefined &&
+      needMicros !== undefined &&
+      needMicros <= cached.coverMicros;
+    if (preAuthCovers) {
       try {
         const payload = await client.createPaymentPayload(cached.paymentRequired);
         const headers = httpClient.encodePaymentSignatureHeader(payload);
@@ -76,20 +116,19 @@ export function createPayFetchWithPreAuth(
         if (response.status !== 402) {
           return response; // Pre-auth worked — saved ~200ms
         }
-        // Pre-auth rejected (params may have changed) — invalidate and reuse
-        // this 402 below: it already carries the fresh payment requirements,
-        // so re-requesting without payment would just buy the same 402 again.
+        // Rejected despite our estimate (server priced it higher than we did).
+        // The rejection 402 is NOT a reusable challenge, so drop it and fall
+        // through to a clean, un-paid request that yields a fresh challenge.
         cache.delete(cacheKey);
-        rejected402 = response;
       } catch {
-        // Pre-auth signing failed — invalidate and fall through
+        // Pre-auth signing failed — invalidate and fall through.
         cache.delete(cacheKey);
       }
     }
 
-    // Normal flow: make request (or reuse the rejected pre-auth 402), handle 402 if needed
+    // Normal flow: make a clean (un-paid) request and handle the 402 if needed.
     const clonedRequest = request.clone();
-    const response = rejected402 ?? (await baseFetch(request));
+    const response = await baseFetch(request);
     if (response.status !== 402) {
       return response;
     }
@@ -111,7 +150,9 @@ export function createPayFetchWithPreAuth(
         /* empty body is fine */
       }
       paymentRequired = httpClient.getPaymentRequiredResponse(getHeader, body);
-      cache.set(cacheKey, { paymentRequired, cachedAt: Date.now() });
+      // Record what this cached payment covers (this request's estimate). It is
+      // only reused later when a new request's estimate is <= this value.
+      cache.set(cacheKey, { paymentRequired, cachedAt: Date.now(), coverMicros: needMicros });
     } catch (error) {
       throw new Error(
         `Failed to parse payment requirements: ${error instanceof Error ? error.message : "Unknown error"}`,

diff --git a/src/proxy.ts b/src/proxy.ts
@@ -2024,6 +2024,10 @@ export async function startProxy(options: ProxyOptions): Promise<ProxyHandle> {
 
   const payFetch = createPayFetchWithPreAuth(fetch, x402, undefined, {
     skipPreAuth: paymentChain === "solana",
+    // Per-request cost estimate so pre-auth is only reused when the cached
+    // payment still covers the (possibly larger) request — BlockRun prices per
+    // token, so one model can cost different amounts across requests.
+    estimateAmount,
   });
 
   // Create balance monitor for pre-request checks (lazy import to avoid loading @solana/kit on Base chain)