diff --git a/src/tools/image.ts b/src/tools/image.ts index d61758f..59b8652 100644 --- a/src/tools/image.ts +++ b/src/tools/image.ts @@ -7,6 +7,8 @@ import { formatError } from "../utils/errors.js"; import type { BudgetState } from "../types.js"; import { getChain, getImageClient } from "../utils/wallet.js"; import { readFile } from "node:fs/promises"; +import { shouldInline, buildInlineImageBlock } from "../utils/inline-image.js"; +import { confirmSpend } from "../utils/confirm-spend.js"; // The gateway's /v1/images/image2image only accepts base64 data URIs for the // source image(s). Callers naturally have local file paths or http(s) URLs, so @@ -144,10 +146,11 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil mask: z.string().optional().describe("Inpaint mask for edit action (openai/gpt-image-* only): a base64 data URI, http(s) URL, or local file path. Transparent areas of the mask are regenerated. Cannot be combined with multiple source images."), size: z.string().optional().default("1024x1024").describe("Image size. Common values: 1024x1024 (all models), 1536x1024 / 1024x1536 (gpt-image-*), 2048x2048 / 4096x4096 (nano-banana-pro)"), quality: z.enum(["standard", "hd"]).optional().default("standard"), + inline: z.boolean().optional().describe("Return a small inline image preview (thumbnail) the client can render in-conversation, in addition to the full-resolution URL. Defaults to the BLOCKRUN_INLINE_IMAGES env setting (off unless set). Rich clients (e.g. the VS Code extension) render it; plain terminals ignore it. Off keeps responses lightweight."), agent_id: z.string().optional().describe("Agent identifier for budget tracking and enforcement."), }, }, - async ({ prompt, action, model, image, mask, size, quality, agent_id }) => { + async ({ prompt, action, model, image, mask, size, quality, inline, agent_id }) => { try { if (getChain() !== "base") { return { @@ -157,8 +160,13 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil } const selectedModel = model || "openai/gpt-image-2"; - let response; + // Edit-mode inputs, normalized to data URIs in the edit branch below and + // consumed at the shared call site after the spend confirmation. + let normalizedImage: string | string[] | undefined; + let normalizedMask: string | undefined; + + // Validate the edit action up front (before estimating/charging). if (action === "edit") { if (!image) { return { @@ -194,8 +202,6 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil }; } } - let normalizedImage: string | string[]; - let normalizedMask: string | undefined; try { const dataUris = await Promise.all(sourceImages.map(toImageDataUri)); normalizedImage = dataUris.length === 1 ? dataUris[0] : dataUris; @@ -206,37 +212,37 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil isError: true, }; } - const estimatedCost = estimateCost(selectedModel, size); - const budgetCheck = checkBudget(budget, agent_id, estimatedCost); - if (!budgetCheck.allowed) { - return { - content: [{ type: "text", text: `${budgetCheck.reason}. Use blockrun_wallet action:"report" to see usage or action:"delegate" to increase agent budget.` }], - isError: true, - }; - } - response = await getImageClient().edit(prompt, normalizedImage, { - model: selectedModel, - size, - ...(normalizedMask ? { mask: normalizedMask } : {}), - }); - recordSpending(budget, estimatedCost, agent_id); - } else { - const estimatedCost = estimateCost(selectedModel, size); - const budgetCheck = checkBudget(budget, agent_id, estimatedCost); - if (!budgetCheck.allowed) { - return { - content: [{ type: "text", text: `${budgetCheck.reason}. Use blockrun_wallet action:"report" to see usage or action:"delegate" to increase agent budget.` }], - isError: true, - }; - } - response = await getImageClient().generate(prompt, { - model: selectedModel, - size, - quality: quality as "standard" | "hd", - }); - recordSpending(budget, estimatedCost, agent_id); } + const estimatedCost = estimateCost(selectedModel, size); + const budgetCheck = checkBudget(budget, agent_id, estimatedCost); + if (!budgetCheck.allowed) { + return { + content: [{ type: "text", text: `${budgetCheck.reason}. Use blockrun_wallet action:"report" to see usage or action:"delegate" to increase agent budget.` }], + isError: true, + }; + } + + // Confirm the spend before charging (elicitation; user can approve once, + // approve all for the session, or decline to abort). No-ops on clients + // without elicitation or when disabled via env. + const confirm = await confirmSpend(server, { + usd: estimatedCost, + label: `${action === "edit" ? "image edit" : "image"} · ${selectedModel}`, + }); + if (!confirm.ok) { + return { content: [{ type: "text", text: confirm.reason || "Charge cancelled." }] }; + } + + const response = action === "edit" + ? await getImageClient().edit(prompt, normalizedImage!, { + model: selectedModel, + size, + ...(normalizedMask ? { mask: normalizedMask } : {}), + }) + : await getImageClient().generate(prompt, { model: selectedModel, size, quality: quality as "standard" | "hd" }); + recordSpending(budget, estimatedCost, agent_id); + const imageUrl = response.data?.[0]?.url; if (!imageUrl) { @@ -246,9 +252,14 @@ Source images and masks accept a base64 data URI, an http(s) URL, or a local fil }; } + const textBlock = { type: "text" as const, text: `Image: ${imageUrl}\nPrompt: ${prompt}\nModel: ${selectedModel}` }; + // Optional inline preview (thumbnail) for rich clients. Best-effort: + // on failure or if disabled, fall back to the URL-only text block. + const previewBlock = shouldInline(inline) ? await buildInlineImageBlock(imageUrl) : null; + return { - content: [{ type: "text", text: `Image: ${imageUrl}\nPrompt: ${prompt}\nModel: ${selectedModel}` }], - structuredContent: { url: imageUrl, prompt, model: selectedModel }, + content: previewBlock ? [previewBlock, textBlock] : [textBlock], + structuredContent: { url: imageUrl, prompt, model: selectedModel, inlined: Boolean(previewBlock) }, }; } catch (err) { const errMsg = err instanceof Error ? err.message : String(err); diff --git a/src/utils/confirm-spend.ts b/src/utils/confirm-spend.ts new file mode 100644 index 0000000..4e57ac1 --- /dev/null +++ b/src/utils/confirm-spend.ts @@ -0,0 +1,99 @@ +// src/utils/confirm-spend.ts +// +// Server-side spend confirmation via MCP elicitation. Before a paid call, the +// server asks the client to render a confirm dialog showing the estimated cost. +// The dialog carries an "approve all this session" checkbox — when ticked, the +// server skips every later prompt for the rest of the session (in-memory flag, +// scoped to this server process = this session). This is reliable across +// clients that support elicitation (e.g. Claude Code), unlike PreToolUse hooks. +// +// Off by default — opt in with BLOCKRUN_CONFIRM_SPEND=on (or 1/true/yes). This +// avoids double-prompting when a plugin already gates spend via a PreToolUse +// hook (the hook renders the cost and is honored on more clients), and keeps +// the bare MCP from surprising users with an extra dialog. +// +// Controls (env): +// BLOCKRUN_CONFIRM_SPEND=on enable elicitation-based spend confirmation +// BLOCKRUN_CONFIRM_THRESHOLD=0.05 only confirm calls estimated above this USD +// +// Degradation: if the client doesn't advertise elicitation, we proceed without +// prompting (the tool's cost footer still tells the user what was charged) — +// better than failing the call on clients that can't ask. + +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; + +// Session-scoped "approve all" flag. The MCP server is one process per session, +// so a module-level flag is exactly session lifetime. +let sessionAutoApprove = false; + +const CONFIRM_ON = /^(1|true|on|yes)$/i.test(process.env.BLOCKRUN_CONFIRM_SPEND ?? ""); +const THRESHOLD = Number(process.env.BLOCKRUN_CONFIRM_THRESHOLD || 0); + +export interface ConfirmResult { + ok: boolean; + reason?: string; +} + +/** + * Ask the user to confirm a charge. Returns { ok:true } to proceed, or + * { ok:false, reason } when the user cancels (caller should abort without + * charging). Free calls, an active session approval, the off switch, the + * sub-threshold case, and clients without elicitation all return ok:true. + */ +export async function confirmSpend( + server: McpServer, + opts: { usd: number; label: string; balanceNote?: string }, +): Promise { + const { usd, label, balanceNote } = opts; + + if (!CONFIRM_ON) return { ok: true }; // off by default (opt-in) + if (usd <= 0) return { ok: true }; // free + if (sessionAutoApprove) return { ok: true }; // user already approved all + if (usd <= THRESHOLD) return { ok: true }; // cheap enough to skip + + const caps = server.server.getClientCapabilities?.(); + if (!caps?.elicitation) return { ok: true }; // client can't be asked → proceed + + try { + const result = await server.server.elicitInput({ + message: + `💸 BlockRun charge — ${label}\n` + + `Estimated: $${usd.toFixed(4)}${balanceNote ? ` · ${balanceNote}` : ""}\n` + + `Approve this spend? (USDC is debited per call.)\n` + + `To stop the charge, choose Decline — Cancel/ESC lets it proceed.`, + requestedSchema: { + type: "object", + properties: { + approve_all_session: { + type: "boolean", + title: "Approve all BlockRun charges for the rest of this session (don't ask again)", + default: false, + }, + }, + }, + }); + + const content = result.content as { approve_all_session?: boolean } | undefined; + if (content?.approve_all_session) sessionAutoApprove = true; + + // Only an EXPLICIT decline stops the charge. Some clients (e.g. the desktop + // app) return action "cancel" even when the user confirms a form dialog, and + // the client's own tool-permission prompt is already the real gate — so we + // must not treat a non-"accept" action as a cancellation, or a confirmed + // generation gets wrongly aborted ("you cancelled"). Honor "decline" only. + if (result.action === "decline") { + return { ok: false, reason: "Charge declined — nothing was generated or charged." }; + } + return { ok: true }; + } catch { + // Couldn't render the prompt (e.g. client advertises elicitation but not + // the form mode). Fail open — proceed rather than block a legitimate call. + // Only an explicit user decline (above) stops the charge. + return { ok: true }; + } +} + +/** Test/escape hatch: reset the session approval (not used in normal flow). */ +export function resetSpendApproval(): void { + sessionAutoApprove = false; +} diff --git a/src/utils/inline-image.ts b/src/utils/inline-image.ts new file mode 100644 index 0000000..2e14670 --- /dev/null +++ b/src/utils/inline-image.ts @@ -0,0 +1,77 @@ +// src/utils/inline-image.ts +// +// Optional inline image preview. When enabled, a generated image is fetched, +// downscaled to a small JPEG thumbnail, and returned as an MCP `type:"image"` +// content block so rich clients (e.g. the VS Code extension) render it inline. +// The full-resolution URL is always kept in the text block — the thumbnail is +// a preview, not a replacement. +// +// Off by default to avoid context/token bloat. Enable globally with +// BLOCKRUN_INLINE_IMAGES=1 (or true/yes/on), or per call with the tool's +// `inline` param (which takes precedence over the env default). + +import sharp from "sharp"; + +// Thumbnail bounds — small enough that the base64 stays cheap in context. +const MAX_DIM = Number(process.env.BLOCKRUN_INLINE_MAX_DIM || 512); +const JPEG_QUALITY = Number(process.env.BLOCKRUN_INLINE_QUALITY || 70); +// Hard ceiling on the BASE64-encoded thumbnail (the string that actually lands +// in the context window — base64 inflates the raw JPEG ~33%). Above this we +// skip inlining entirely (URL-only) so a single image can't blow up context. +const MAX_BYTES = Number(process.env.BLOCKRUN_INLINE_MAX_BYTES || 900_000); +// Defensive caps on the SOURCE download/decode. Upstream is the trusted +// blockrun-hosted asset, but bounding the buffer + decode keeps a pathological +// response from ballooning memory before the thumbnail step runs. +const MAX_SOURCE_BYTES = 25_000_000; // 25 MB ceiling on the fetched image +const MAX_INPUT_PIXELS = 100_000_000; // ~100 MP decode guard for sharp + +function truthy(v: string | undefined): boolean { + return v != null && /^(1|true|yes|on)$/i.test(v.trim()); +} + +/** + * Resolve whether to inline a preview. Per-call `param` wins; otherwise the + * BLOCKRUN_INLINE_IMAGES env default; otherwise off. + */ +export function shouldInline(param?: boolean): boolean { + if (typeof param === "boolean") return param; + return truthy(process.env.BLOCKRUN_INLINE_IMAGES); +} + +export interface InlineImageBlock { + type: "image"; + data: string; // base64 (no data: prefix, per MCP ImageContent) + mimeType: string; +} + +/** + * Fetch the image at `url`, downscale to a JPEG thumbnail, and return an MCP + * image content block. Returns null (caller falls back to URL-only) on any + * failure or if the thumbnail exceeds MAX_BYTES — inlining is best-effort and + * must never break the tool call. + */ +export async function buildInlineImageBlock(url: string): Promise { + try { + const resp = await fetch(url, { signal: AbortSignal.timeout(15_000) }); + if (!resp.ok) return null; + // Cap the download: reject early on a too-large Content-Length, and guard + // again on the actual buffer in case the header lied or was absent. + const declared = Number(resp.headers.get("content-length") || 0); + if (declared > MAX_SOURCE_BYTES) return null; + const input = Buffer.from(await resp.arrayBuffer()); + if (input.byteLength > MAX_SOURCE_BYTES) return null; + + const thumb = await sharp(input, { limitInputPixels: MAX_INPUT_PIXELS }) + .rotate() + .resize(MAX_DIM, MAX_DIM, { fit: "inside", withoutEnlargement: true }) + .jpeg({ quality: JPEG_QUALITY }) + .toBuffer(); + + const data = thumb.toString("base64"); + if (data.length > MAX_BYTES) return null; // measure the encoded size + + return { type: "image", data, mimeType: "image/jpeg" }; + } catch { + return null; + } +} diff --git a/test/confirm-spend.test.ts b/test/confirm-spend.test.ts new file mode 100644 index 0000000..a836a4e --- /dev/null +++ b/test/confirm-spend.test.ts @@ -0,0 +1,75 @@ +// Run with: npm test (tsx --test) +// +// Exercises confirmSpend with confirmation ENABLED. The module reads its env +// gates at import time, so we set them before the dynamic import below. Node's +// test runner runs each file in its own process, so this env doesn't leak. +process.env.BLOCKRUN_CONFIRM_SPEND = "on"; +process.env.BLOCKRUN_CONFIRM_THRESHOLD = "0.05"; + +import { test } from "node:test"; +import assert from "node:assert/strict"; + +const { confirmSpend, resetSpendApproval } = await import("../src/utils/confirm-spend.js"); + +// Minimal McpServer stand-in: only the two members confirmSpend touches. +function fakeServer(opts: { elicitation?: boolean; result?: unknown; throws?: boolean }) { + return { + server: { + getClientCapabilities: () => (opts.elicitation === false ? {} : { elicitation: {} }), + elicitInput: async () => { + if (opts.throws) throw new Error("client has no form mode"); + return opts.result; + }, + }, + } as never; +} + +test("free call (usd <= 0) proceeds without prompting", async () => { + resetSpendApproval(); + const r = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 0, label: "x" }); + assert.equal(r.ok, true); +}); + +test("sub-threshold call proceeds without prompting", async () => { + resetSpendApproval(); + // 0.04 <= 0.05 threshold → never reaches elicitInput, even a decline-stub is allowed. + const r = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 0.04, label: "x" }); + assert.equal(r.ok, true); +}); + +test("client without elicitation proceeds (fail-open)", async () => { + resetSpendApproval(); + const r = await confirmSpend(fakeServer({ elicitation: false }), { usd: 1, label: "x" }); + assert.equal(r.ok, true); +}); + +test("explicit decline aborts the charge", async () => { + resetSpendApproval(); + const r = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 1, label: "x" }); + assert.equal(r.ok, false); +}); + +test("cancel/ESC is fail-open — the charge still proceeds", async () => { + resetSpendApproval(); + const r = await confirmSpend(fakeServer({ result: { action: "cancel" } }), { usd: 1, label: "x" }); + assert.equal(r.ok, true); +}); + +test("approve_all silences subsequent prompts for the session", async () => { + resetSpendApproval(); + const first = await confirmSpend( + fakeServer({ result: { action: "accept", content: { approve_all_session: true } } }), + { usd: 1, label: "x" }, + ); + assert.equal(first.ok, true); + // A server that WOULD decline is never consulted now — session auto-approved. + const later = await confirmSpend(fakeServer({ result: { action: "decline" } }), { usd: 5, label: "y" }); + assert.equal(later.ok, true); + resetSpendApproval(); +}); + +test("elicitInput throwing fails open", async () => { + resetSpendApproval(); + const r = await confirmSpend(fakeServer({ throws: true }), { usd: 1, label: "x" }); + assert.equal(r.ok, true); +}); diff --git a/test/inline-image.test.ts b/test/inline-image.test.ts new file mode 100644 index 0000000..6fe5957 --- /dev/null +++ b/test/inline-image.test.ts @@ -0,0 +1,65 @@ +// Run with: npm test (tsx --test) +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { shouldInline, buildInlineImageBlock } from "../src/utils/inline-image.js"; + +// 1x1 transparent PNG. +const PNG = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==", + "base64", +); + +test("shouldInline: explicit param wins over env", () => { + process.env.BLOCKRUN_INLINE_IMAGES = "1"; + assert.equal(shouldInline(false), false); // param beats a truthy env + assert.equal(shouldInline(true), true); + delete process.env.BLOCKRUN_INLINE_IMAGES; +}); + +test("shouldInline: falls back to env when param is undefined", () => { + delete process.env.BLOCKRUN_INLINE_IMAGES; + assert.equal(shouldInline(undefined), false); + process.env.BLOCKRUN_INLINE_IMAGES = "true"; + assert.equal(shouldInline(undefined), true); + process.env.BLOCKRUN_INLINE_IMAGES = "off"; // only 1/true/yes/on are truthy + assert.equal(shouldInline(undefined), false); + delete process.env.BLOCKRUN_INLINE_IMAGES; +}); + +test("buildInlineImageBlock: null on non-ok fetch", async () => { + const orig = globalThis.fetch; + globalThis.fetch = (async () => new Response(null, { status: 404 })) as typeof fetch; + try { + assert.equal(await buildInlineImageBlock("https://example.com/x.png"), null); + } finally { + globalThis.fetch = orig; + } +}); + +test("buildInlineImageBlock: null when fetch throws", async () => { + const orig = globalThis.fetch; + globalThis.fetch = (async () => { + throw new Error("network down"); + }) as typeof fetch; + try { + assert.equal(await buildInlineImageBlock("https://example.com/x.png"), null); + } finally { + globalThis.fetch = orig; + } +}); + +test("buildInlineImageBlock: encodes a small image into a base64 JPEG block", async () => { + const orig = globalThis.fetch; + globalThis.fetch = (async () => + new Response(PNG, { status: 200, headers: { "content-type": "image/png" } })) as typeof fetch; + try { + const block = await buildInlineImageBlock("https://example.com/x.png"); + assert.ok(block, "expected an image block"); + assert.equal(block.type, "image"); + assert.equal(block.mimeType, "image/jpeg"); + assert.ok(block.data.length > 0); + assert.doesNotThrow(() => Buffer.from(block.data, "base64")); // valid base64 + } finally { + globalThis.fetch = orig; + } +});