Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 46 additions & 35 deletions src/tools/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import { formatError } from "../utils/errors.js";
import type { BudgetState } from "../types.js";
import { getChain, getImageClient } from "../utils/wallet.js";
import { readFile } from "node:fs/promises";
import { shouldInline, buildInlineImageBlock } from "../utils/inline-image.js";
import { confirmSpend } from "../utils/confirm-spend.js";

// The gateway's /v1/images/image2image only accepts base64 data URIs for the
// source image(s). Callers naturally have local file paths or http(s) URLs, so
Expand Down Expand Up @@ -144,10 +146,11 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil
mask: z.string().optional().describe("Inpaint mask for edit action (openai/gpt-image-* only): a base64 data URI, http(s) URL, or local file path. Transparent areas of the mask are regenerated. Cannot be combined with multiple source images."),
size: z.string().optional().default("1024x1024").describe("Image size. Common values: 1024x1024 (all models), 1536x1024 / 1024x1536 (gpt-image-*), 2048x2048 / 4096x4096 (nano-banana-pro)"),
quality: z.enum(["standard", "hd"]).optional().default("standard"),
inline: z.boolean().optional().describe("Return a small inline image preview (thumbnail) the client can render in-conversation, in addition to the full-resolution URL. Defaults to the BLOCKRUN_INLINE_IMAGES env setting (off unless set). Rich clients (e.g. the VS Code extension) render it; plain terminals ignore it. Off keeps responses lightweight."),
agent_id: z.string().optional().describe("Agent identifier for budget tracking and enforcement."),
},
},
async ({ prompt, action, model, image, mask, size, quality, agent_id }) => {
async ({ prompt, action, model, image, mask, size, quality, inline, agent_id }) => {
try {
if (getChain() !== "base") {
return {
Expand All @@ -157,8 +160,13 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil
}

const selectedModel = model || "openai/gpt-image-2";
let response;

// Edit-mode inputs, normalized to data URIs in the edit branch below and
// consumed at the shared call site after the spend confirmation.
let normalizedImage: string | string[] | undefined;
let normalizedMask: string | undefined;

// Validate the edit action up front (before estimating/charging).
if (action === "edit") {
if (!image) {
return {
Expand Down Expand Up @@ -194,8 +202,6 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil
};
}
}
let normalizedImage: string | string[];
let normalizedMask: string | undefined;
try {
const dataUris = await Promise.all(sourceImages.map(toImageDataUri));
normalizedImage = dataUris.length === 1 ? dataUris[0] : dataUris;
Expand All @@ -206,37 +212,37 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil
isError: true,
};
}
const estimatedCost = estimateCost(selectedModel, size);
const budgetCheck = checkBudget(budget, agent_id, estimatedCost);
if (!budgetCheck.allowed) {
return {
content: [{ type: "text", text: `${budgetCheck.reason}. Use blockrun_wallet action:"report" to see usage or action:"delegate" to increase agent budget.` }],
isError: true,
};
}
response = await getImageClient().edit(prompt, normalizedImage, {
model: selectedModel,
size,
...(normalizedMask ? { mask: normalizedMask } : {}),
});
recordSpending(budget, estimatedCost, agent_id);
} else {
const estimatedCost = estimateCost(selectedModel, size);
const budgetCheck = checkBudget(budget, agent_id, estimatedCost);
if (!budgetCheck.allowed) {
return {
content: [{ type: "text", text: `${budgetCheck.reason}. Use blockrun_wallet action:"report" to see usage or action:"delegate" to increase agent budget.` }],
isError: true,
};
}
response = await getImageClient().generate(prompt, {
model: selectedModel,
size,
quality: quality as "standard" | "hd",
});
recordSpending(budget, estimatedCost, agent_id);
}

const estimatedCost = estimateCost(selectedModel, size);
const budgetCheck = checkBudget(budget, agent_id, estimatedCost);
if (!budgetCheck.allowed) {
return {
content: [{ type: "text", text: `${budgetCheck.reason}. Use blockrun_wallet action:"report" to see usage or action:"delegate" to increase agent budget.` }],
isError: true,
};
}

// Confirm the spend before charging (elicitation; user can approve once,
// approve all for the session, or decline to abort). No-ops on clients
// without elicitation or when disabled via env.
const confirm = await confirmSpend(server, {
usd: estimatedCost,
label: `${action === "edit" ? "image edit" : "image"} · ${selectedModel}`,
});
if (!confirm.ok) {
return { content: [{ type: "text", text: confirm.reason || "Charge cancelled." }] };
}

const response = action === "edit"
? await getImageClient().edit(prompt, normalizedImage!, {
model: selectedModel,
size,
...(normalizedMask ? { mask: normalizedMask } : {}),
})
: await getImageClient().generate(prompt, { model: selectedModel, size, quality: quality as "standard" | "hd" });
recordSpending(budget, estimatedCost, agent_id);

const imageUrl = response.data?.[0]?.url;

if (!imageUrl) {
Expand All @@ -246,9 +252,14 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil
};
}

const textBlock = { type: "text" as const, text: `Image: ${imageUrl}\nPrompt: ${prompt}\nModel: ${selectedModel}` };
// Optional inline preview (thumbnail) for rich clients. Best-effort:
// on failure or if disabled, fall back to the URL-only text block.
const previewBlock = shouldInline(inline) ? await buildInlineImageBlock(imageUrl) : null;

return {
content: [{ type: "text", text: `Image: ${imageUrl}\nPrompt: ${prompt}\nModel: ${selectedModel}` }],
structuredContent: { url: imageUrl, prompt, model: selectedModel },
content: previewBlock ? [previewBlock, textBlock] : [textBlock],
structuredContent: { url: imageUrl, prompt, model: selectedModel, inlined: Boolean(previewBlock) },
};
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
Expand Down
99 changes: 99 additions & 0 deletions src/utils/confirm-spend.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// src/utils/confirm-spend.ts
//
// Server-side spend confirmation via MCP elicitation. Before a paid call, the
// server asks the client to render a confirm dialog showing the estimated cost.
// The dialog carries an "approve all this session" checkbox — when ticked, the
// server skips every later prompt for the rest of the session (in-memory flag,
// scoped to this server process = this session). This is reliable across
// clients that support elicitation (e.g. Claude Code), unlike PreToolUse hooks.
//
// Off by default — opt in with BLOCKRUN_CONFIRM_SPEND=on (or 1/true/yes). This
// avoids double-prompting when a plugin already gates spend via a PreToolUse
// hook (the hook renders the cost and is honored on more clients), and keeps
// the bare MCP from surprising users with an extra dialog.
//
// Controls (env):
// BLOCKRUN_CONFIRM_SPEND=on enable elicitation-based spend confirmation
// BLOCKRUN_CONFIRM_THRESHOLD=0.05 only confirm calls estimated above this USD
//
// Degradation: if the client doesn't advertise elicitation, we proceed without
// prompting (the tool's cost footer still tells the user what was charged) —
// better than failing the call on clients that can't ask.

import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";

// Session-scoped "approve all" flag. The MCP server is one process per session,
// so a module-level flag is exactly session lifetime.
let sessionAutoApprove = false;

const CONFIRM_ON = /^(1|true|on|yes)$/i.test(process.env.BLOCKRUN_CONFIRM_SPEND ?? "");
const THRESHOLD = Number(process.env.BLOCKRUN_CONFIRM_THRESHOLD || 0);

export interface ConfirmResult {
ok: boolean;
reason?: string;
}

/**
* Ask the user to confirm a charge. Returns { ok:true } to proceed, or
* { ok:false, reason } when the user cancels (caller should abort without
* charging). Free calls, an active session approval, the off switch, the
* sub-threshold case, and clients without elicitation all return ok:true.
*/
export async function confirmSpend(
server: McpServer,
opts: { usd: number; label: string; balanceNote?: string },
): Promise<ConfirmResult> {
const { usd, label, balanceNote } = opts;

if (!CONFIRM_ON) return { ok: true }; // off by default (opt-in)
if (usd <= 0) return { ok: true }; // free
if (sessionAutoApprove) return { ok: true }; // user already approved all
if (usd <= THRESHOLD) return { ok: true }; // cheap enough to skip

const caps = server.server.getClientCapabilities?.();
if (!caps?.elicitation) return { ok: true }; // client can't be asked → proceed

try {
const result = await server.server.elicitInput({
message:
`💸 BlockRun charge — ${label}\n` +
`Estimated: $${usd.toFixed(4)}${balanceNote ? ` · ${balanceNote}` : ""}\n` +
`Approve this spend? (USDC is debited per call.)\n` +
`To stop the charge, choose Decline — Cancel/ESC lets it proceed.`,
requestedSchema: {
type: "object",
properties: {
approve_all_session: {
type: "boolean",
title: "Approve all BlockRun charges for the rest of this session (don't ask again)",
default: false,
},
},
},
});

const content = result.content as { approve_all_session?: boolean } | undefined;
if (content?.approve_all_session) sessionAutoApprove = true;

// Only an EXPLICIT decline stops the charge. Some clients (e.g. the desktop
// app) return action "cancel" even when the user confirms a form dialog, and
// the client's own tool-permission prompt is already the real gate — so we
// must not treat a non-"accept" action as a cancellation, or a confirmed
// generation gets wrongly aborted ("you cancelled"). Honor "decline" only.
if (result.action === "decline") {
return { ok: false, reason: "Charge declined — nothing was generated or charged." };
}
return { ok: true };
} catch {
// Couldn't render the prompt (e.g. client advertises elicitation but not
// the form mode). Fail open — proceed rather than block a legitimate call.
// Only an explicit user decline (above) stops the charge.
return { ok: true };
}
}

/** Test/escape hatch: reset the session approval (not used in normal flow). */
export function resetSpendApproval(): void {
sessionAutoApprove = false;
}
77 changes: 77 additions & 0 deletions src/utils/inline-image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// src/utils/inline-image.ts
//
// Optional inline image preview. When enabled, a generated image is fetched,
// downscaled to a small JPEG thumbnail, and returned as an MCP `type:"image"`
// content block so rich clients (e.g. the VS Code extension) render it inline.
// The full-resolution URL is always kept in the text block — the thumbnail is
// a preview, not a replacement.
//
// Off by default to avoid context/token bloat. Enable globally with
// BLOCKRUN_INLINE_IMAGES=1 (or true/yes/on), or per call with the tool's
// `inline` param (which takes precedence over the env default).

import sharp from "sharp";

// Thumbnail bounds — small enough that the base64 stays cheap in context.
const MAX_DIM = Number(process.env.BLOCKRUN_INLINE_MAX_DIM || 512);
const JPEG_QUALITY = Number(process.env.BLOCKRUN_INLINE_QUALITY || 70);
// Hard ceiling on the BASE64-encoded thumbnail (the string that actually lands
// in the context window — base64 inflates the raw JPEG ~33%). Above this we
// skip inlining entirely (URL-only) so a single image can't blow up context.
const MAX_BYTES = Number(process.env.BLOCKRUN_INLINE_MAX_BYTES || 900_000);
// Defensive caps on the SOURCE download/decode. Upstream is the trusted
// blockrun-hosted asset, but bounding the buffer + decode keeps a pathological
// response from ballooning memory before the thumbnail step runs.
const MAX_SOURCE_BYTES = 25_000_000; // 25 MB ceiling on the fetched image
const MAX_INPUT_PIXELS = 100_000_000; // ~100 MP decode guard for sharp

function truthy(v: string | undefined): boolean {
return v != null && /^(1|true|yes|on)$/i.test(v.trim());
}

/**
* Resolve whether to inline a preview. Per-call `param` wins; otherwise the
* BLOCKRUN_INLINE_IMAGES env default; otherwise off.
*/
export function shouldInline(param?: boolean): boolean {
if (typeof param === "boolean") return param;
return truthy(process.env.BLOCKRUN_INLINE_IMAGES);
}

export interface InlineImageBlock {
type: "image";
data: string; // base64 (no data: prefix, per MCP ImageContent)
mimeType: string;
}

/**
* Fetch the image at `url`, downscale to a JPEG thumbnail, and return an MCP
* image content block. Returns null (caller falls back to URL-only) on any
* failure or if the thumbnail exceeds MAX_BYTES — inlining is best-effort and
* must never break the tool call.
*/
export async function buildInlineImageBlock(url: string): Promise<InlineImageBlock | null> {
try {
const resp = await fetch(url, { signal: AbortSignal.timeout(15_000) });
if (!resp.ok) return null;
// Cap the download: reject early on a too-large Content-Length, and guard
// again on the actual buffer in case the header lied or was absent.
const declared = Number(resp.headers.get("content-length") || 0);
if (declared > MAX_SOURCE_BYTES) return null;
const input = Buffer.from(await resp.arrayBuffer());
if (input.byteLength > MAX_SOURCE_BYTES) return null;

const thumb = await sharp(input, { limitInputPixels: MAX_INPUT_PIXELS })
.rotate()
.resize(MAX_DIM, MAX_DIM, { fit: "inside", withoutEnlargement: true })
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();

const data = thumb.toString("base64");
if (data.length > MAX_BYTES) return null; // measure the encoded size

return { type: "image", data, mimeType: "image/jpeg" };
} catch {
return null;
}
}
75 changes: 75 additions & 0 deletions test/confirm-spend.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Run with: npm test (tsx --test)
//
// Exercises confirmSpend with confirmation ENABLED. The module reads its env
// gates at import time, so we set them before the dynamic import below. Node's
// test runner runs each file in its own process, so this env doesn't leak.
process.env.BLOCKRUN_CONFIRM_SPEND = "on";
process.env.BLOCKRUN_CONFIRM_THRESHOLD = "0.05";

import { test } from "node:test";
import assert from "node:assert/strict";

const { confirmSpend, resetSpendApproval } = await import("../src/utils/confirm-spend.js");

// Minimal McpServer stand-in: only the two members confirmSpend touches.
function fakeServer(opts: { elicitation?: boolean; result?: unknown; throws?: boolean }) {
return {
server: {
getClientCapabilities: () => (opts.elicitation === false ? {} : { elicitation: {} }),
elicitInput: async () => {
if (opts.throws) throw new Error("client has no form mode");
return opts.result;
},
},
} as never;
}

test("free call (usd <= 0) proceeds without prompting", async () => {
resetSpendApproval();
const r = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 0, label: "x" });
assert.equal(r.ok, true);
});

test("sub-threshold call proceeds without prompting", async () => {
resetSpendApproval();
// 0.04 <= 0.05 threshold → never reaches elicitInput, even a decline-stub is allowed.
const r = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 0.04, label: "x" });
assert.equal(r.ok, true);
});

test("client without elicitation proceeds (fail-open)", async () => {
resetSpendApproval();
const r = await confirmSpend(fakeServer({ elicitation: false }), { usd: 1, label: "x" });
assert.equal(r.ok, true);
});

test("explicit decline aborts the charge", async () => {
resetSpendApproval();
const r = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 1, label: "x" });
assert.equal(r.ok, false);
});

test("cancel/ESC is fail-open — the charge still proceeds", async () => {
resetSpendApproval();
const r = await confirmSpend(fakeServer({ result: { action: "cancel" } }), { usd: 1, label: "x" });
assert.equal(r.ok, true);
});

test("approve_all silences subsequent prompts for the session", async () => {
resetSpendApproval();
const first = await confirmSpend(
fakeServer({ result: { action: "accept", content: { approve_all_session: true } } }),
{ usd: 1, label: "x" },
);
assert.equal(first.ok, true);
// A server that WOULD decline is never consulted now — session auto-approved.
const later = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 5, label: "y" });
assert.equal(later.ok, true);
resetSpendApproval();
});

test("elicitInput throwing fails open", async () => {
resetSpendApproval();
const r = await confirmSpend(fakeServer({ throws: true }), { usd: 1, label: "x" });
assert.equal(r.ok, true);
});
Loading