Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions src/payment-preauth.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/**
* Pre-auth cache correctness under per-request (token-based) pricing.
*
* BlockRun prices each call on input + max_tokens, so the same model can cost
* different amounts. These tests pin the guarantees that keep that from
* underpaying via a stale cached authorization:
* - pre-auth is reused only when an up-front estimate proves it still covers
* the request (fires on a same/cheaper repeat, skipped when the request grows),
* - a rejected pre-auth is discarded and the request re-fetched cleanly — the
* rejection is never treated as a fresh challenge (no "Failed to parse…"),
* - with no estimator, pre-auth is disabled rather than risking an underpay.
*/
import { describe, it, expect, vi } from "vitest";
import { x402Client } from "@x402/fetch";
import { registerExactEvmScheme } from "@x402/evm/exact/client";
import { toClientEvmSigner } from "@x402/evm";
import { createPublicClient, http } from "viem";
import { base } from "viem/chains";
import { privateKeyToAccount } from "viem/accounts";
import { deriveAllKeys } from "./wallet.js";
import { createPayFetchWithPreAuth } from "./payment-preauth.js";

const MNEMONIC =
"abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon art";

function testClient(): x402Client {
const keys = deriveAllKeys(MNEMONIC);
const account = privateKeyToAccount(keys.evmPrivateKey);
const pc = createPublicClient({ chain: base, transport: http() });
const client = new x402Client();
registerExactEvmScheme(client, { signer: toClientEvmSigner(account, pc) });
return client;
}

const CHALLENGE = {
x402Version: 2,
accepts: [
{
scheme: "exact",
network: "eip155:8453",
amount: "1000",
asset: "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913",
payTo: "0xe9030014F5DAe217d0A152f02A043567b16c1aBf",
maxTimeoutSeconds: 300,
extra: { name: "USD Coin", version: "2" },
},
],
resource: { url: "https://gw/api", description: "t", mimeType: "application/json" },
};

function challenge402(): Response {
const b64 = Buffer.from(JSON.stringify(CHALLENGE)).toString("base64");
return new Response(JSON.stringify({ error: "Payment Required" }), {
status: 402,
headers: {
"payment-required": b64,
"www-authenticate": `X402 requirements="${b64}"`,
"content-type": "application/json",
},
});
}

/** A fake gateway: 200 when a payment is attached, a fresh 402 challenge when
* not. `rejectNextPaid` makes the next paid request 402 (an underpayment), to
* exercise the safety-net path. Records whether each call carried payment. */
function fakeGateway() {
const calls: Array<{ paid: boolean }> = [];
const ctl = { rejectNextPaid: false }; // flip AFTER seeding to reject a pre-auth
const fn = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
const req = new Request(input, init);
const paid = req.headers.has("payment-signature");
calls.push({ paid });
if (paid) {
if (ctl.rejectNextPaid) {
ctl.rejectNextPaid = false;
return challenge402(); // underpayment rejected
}
return new Response(JSON.stringify({ ok: true }), { status: 200 });
}
return challenge402();
});
return { fn: fn as unknown as typeof fetch, calls, ctl };
}

const URL = "https://gw/api/v1/chat/completions";
function body(maxTokens = 10) {
return JSON.stringify({ model: "test/model", max_tokens: maxTokens, messages: [] });
}

describe("payment pre-auth — per-request pricing safety", () => {
it("reuses pre-auth when the estimate proves the cache still covers it (no extra 402)", async () => {
const est = vi.fn(() => "1000"); // every request estimated equal
const gw = fakeGateway();
const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est });

await pay(URL, { method: "POST", body: body() }); // seed: [unpaid→402, paid→200]
const seeded = gw.calls.length;
expect(gw.calls.map((c) => c.paid)).toEqual([false, true]);

const res = await pay(URL, { method: "POST", body: body() }); // identical → pre-auth
expect(res.status).toBe(200);
expect(gw.calls.length - seeded).toBe(1); // one round-trip, no 402
expect(gw.calls[seeded].paid).toBe(true); // it pre-paid
});

it("skips pre-auth (clean fresh 402) when the request grows beyond the cached amount", async () => {
let big = false;
const est = vi.fn(() => (big ? "5000" : "1000"));
const gw = fakeGateway();
const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est });

await pay(URL, { method: "POST", body: body(10) }); // seed cover=1000
const seeded = gw.calls.length;

big = true;
const res = await pay(URL, { method: "POST", body: body(9000) }); // needs 5000 > 1000
expect(res.status).toBe(200); // NOT a 500 "Failed to parse payment requirements"
// Skipped pre-auth → clean unpaid request first, then the paid retry.
expect(gw.calls.slice(seeded).map((c) => c.paid)).toEqual([false, true]);
});

it("discards a rejected pre-auth and re-fetches cleanly (no parse error)", async () => {
const est = vi.fn(() => "1000");
const gw = fakeGateway();
const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est });

await pay(URL, { method: "POST", body: body() }); // seed (cache warm, cover=1000)
gw.calls.length = 0;
// Now make the next PAID request (the pre-auth) get rejected by the gateway.
gw.ctl.rejectNextPaid = true;
// pre-auth fires (covered) → rejected 402 → clean refetch → paid retry → 200
const res = await pay(URL, { method: "POST", body: body() });
expect(res.status).toBe(200);
const seq = gw.calls.map((c) => c.paid);
expect(seq[0]).toBe(true); // pre-auth attempt (got rejected)
expect(seq).toContain(false); // a CLEAN refetch followed (rejection not reused)
expect(seq[seq.length - 1]).toBe(true); // then paid correctly
});

it("disables pre-auth entirely when no estimator is provided (never underpays)", async () => {
const gw = fakeGateway();
const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, {}); // no estimateAmount

await pay(URL, { method: "POST", body: body() });
const seeded = gw.calls.length;
const res = await pay(URL, { method: "POST", body: body() }); // identical
expect(res.status).toBe(200);
// No pre-auth → still a fresh 402 + paid retry, never a pre-signed first call.
expect(gw.calls.slice(seeded).map((c) => c.paid)).toEqual([false, true]);
});
});
81 changes: 61 additions & 20 deletions src/payment-preauth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,19 @@
* On subsequent requests, pre-signs payment and attaches it to the first
* request, skipping the 402 round trip (~200ms savings per request).
*
* Falls back to normal 402 flow if pre-signed payment is rejected.
* IMPORTANT — pricing is per-request, not per-model. BlockRun prices each call
* on (input tokens + max_tokens reservation), so two calls to the SAME model
* can cost different amounts. A cached payment authorizes one EXACT amount, so
* blindly reusing it for a larger request underpays — the gateway then rejects
* it with a 402 that is NOT a fresh x402 challenge, and parsing that throws
* "Failed to parse payment requirements". To stay correct we:
* 1. only reuse a cached pre-auth when an up-front cost estimate proves the
* cached amount still covers this request (never knowingly underpay), and
* 2. if a pre-auth is rejected anyway, discard it and re-request WITHOUT
* payment to obtain a fresh, canonical challenge — never treat the
* rejection response itself as the challenge.
*
* Falls back to the normal 402 flow whenever pre-auth can't be proven safe.
*/

import type { x402Client } from "@x402/fetch";
Expand All @@ -17,8 +29,17 @@ type PaymentRequired = Parameters<InstanceType<typeof x402Client>["createPayment
interface CachedEntry {
paymentRequired: PaymentRequired;
cachedAt: number;
/** Estimated cost (USDC micro-units) of the request that established this
* entry. The cached payment is known to cover at least this much, so it is
* only reused when a new request's estimate is <= this value. `undefined`
* when the cost couldn't be estimated — in which case pre-auth is skipped. */
coverMicros: number | undefined;
}

/** Up-front per-request cost estimator (USDC micro-units as a string), e.g.
* proxy.ts#estimateAmount. Returns undefined when the model/cost is unknown. */
type EstimateFn = (modelId: string, bodyLength: number, maxTokens: number) => string | undefined;

const DEFAULT_TTL_MS = 3_600_000; // 1 hour

type FetchFn = (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>;
Expand All @@ -27,7 +48,7 @@ export function createPayFetchWithPreAuth(
baseFetch: FetchFn,
client: x402Client,
ttlMs = DEFAULT_TTL_MS,
options?: { skipPreAuth?: boolean },
options?: { skipPreAuth?: boolean; estimateAmount?: EstimateFn },
): FetchFn {
const httpClient = new x402HTTPClient(client);
const cache = new Map<string, CachedEntry>();
Expand All @@ -36,11 +57,13 @@ export function createPayFetchWithPreAuth(
const request = new Request(input, init);
const urlPath = new URL(request.url).pathname;

// Extract model from request body to create model-specific cache keys.
// Without this, a cached payment from a paid model (e.g. sonnet) would be
// incorrectly applied to a free model (nvidia/gpt-oss-120b), causing
// payment errors even when the server wouldn't charge for the request.
// Extract model + size from the request body. Model gives a per-model cache
// key (a cached sonnet payment must not be applied to a free model); body
// length + max_tokens drive the up-front cost estimate used to decide
// whether a cached pre-auth still covers this (possibly larger) request.
let requestModel = "";
let bodyLength = 0;
let maxTokens = 0;
if (init?.body) {
try {
const bodyStr =
Expand All @@ -50,21 +73,38 @@ export function createPayFetchWithPreAuth(
? init.body
: "";
if (bodyStr) {
const parsed = JSON.parse(bodyStr) as { model?: string };
bodyLength = bodyStr.length;
const parsed = JSON.parse(bodyStr) as { model?: string; max_tokens?: number };
requestModel = parsed.model ?? "";
maxTokens = Number(parsed.max_tokens) || 0;
}
} catch {
/* not JSON, use empty model */
}
}
const cacheKey = `${urlPath}:${requestModel}`;

// Try pre-auth if we have cached payment requirements
// Skip for Solana: payments use per-tx blockhashes that expire ~60-90s,
// making cached requirements useless and causing double charges.
// Up-front estimate of what THIS request will cost (USDC micro-units), used
// both to gate pre-auth reuse and to record what a new cache entry covers.
const estimateMicros = (): number | undefined => {
if (!options?.estimateAmount || !requestModel) return undefined;
const est = options.estimateAmount(requestModel, bodyLength, maxTokens);
return est === undefined ? undefined : Number(est);
};
const needMicros = estimateMicros();

// Try pre-auth only when we can PROVE the cached payment still covers this
// request (needMicros <= what the cached entry covered). Skip for Solana:
// payments use per-tx blockhashes that expire ~60-90s, making cached
// requirements useless and causing double charges.
const cached = !options?.skipPreAuth ? cache.get(cacheKey) : undefined;
let rejected402: Response | undefined;
if (cached && Date.now() - cached.cachedAt < ttlMs) {
const preAuthCovers =
cached !== undefined &&
Date.now() - cached.cachedAt < ttlMs &&
cached.coverMicros !== undefined &&
needMicros !== undefined &&
needMicros <= cached.coverMicros;
if (preAuthCovers) {
try {
const payload = await client.createPaymentPayload(cached.paymentRequired);
const headers = httpClient.encodePaymentSignatureHeader(payload);
Expand All @@ -76,20 +116,19 @@ export function createPayFetchWithPreAuth(
if (response.status !== 402) {
return response; // Pre-auth worked — saved ~200ms
}
// Pre-auth rejected (params may have changed) — invalidate and reuse
// this 402 below: it already carries the fresh payment requirements,
// so re-requesting without payment would just buy the same 402 again.
// Rejected despite our estimate (server priced it higher than we did).
// The rejection 402 is NOT a reusable challenge, so drop it and fall
// through to a clean, un-paid request that yields a fresh challenge.
cache.delete(cacheKey);
rejected402 = response;
} catch {
// Pre-auth signing failed — invalidate and fall through
// Pre-auth signing failed — invalidate and fall through.
cache.delete(cacheKey);
}
}

// Normal flow: make request (or reuse the rejected pre-auth 402), handle 402 if needed
// Normal flow: make a clean (un-paid) request and handle the 402 if needed.
const clonedRequest = request.clone();
const response = rejected402 ?? (await baseFetch(request));
const response = await baseFetch(request);
if (response.status !== 402) {
return response;
}
Expand All @@ -111,7 +150,9 @@ export function createPayFetchWithPreAuth(
/* empty body is fine */
}
paymentRequired = httpClient.getPaymentRequiredResponse(getHeader, body);
cache.set(cacheKey, { paymentRequired, cachedAt: Date.now() });
// Record what this cached payment covers (this request's estimate). It is
// only reused later when a new request's estimate is <= this value.
cache.set(cacheKey, { paymentRequired, cachedAt: Date.now(), coverMicros: needMicros });
} catch (error) {
throw new Error(
`Failed to parse payment requirements: ${error instanceof Error ? error.message : "Unknown error"}`,
Expand Down
4 changes: 4 additions & 0 deletions src/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2024,6 +2024,10 @@ export async function startProxy(options: ProxyOptions): Promise<ProxyHandle> {

const payFetch = createPayFetchWithPreAuth(fetch, x402, undefined, {
skipPreAuth: paymentChain === "solana",
// Per-request cost estimate so pre-auth is only reused when the cached
// payment still covers the (possibly larger) request — BlockRun prices per
// token, so one model can cost different amounts across requests.
estimateAmount,
});

// Create balance monitor for pre-request checks (lazy import to avoid loading @solana/kit on Base chain)
Expand Down
Loading