Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions apps/code/scripts/download-binaries.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ import {
createWriteStream,
existsSync,
mkdirSync,
readFileSync,
realpathSync,
rmSync,
writeFileSync,
} from "node:fs";
import { dirname, join } from "node:path";
import { pipeline } from "node:stream/promises";
Expand All @@ -21,7 +23,7 @@ const DEST_DIR = join(__dirname, "..", "resources", "codex-acp");
const BINARIES = [
{
name: "codex-acp",
version: "0.14.0",
version: "0.16.0",
getUrl: (version, target) => {
const ext = target.includes("windows") ? "zip" : "tar.gz";
return `https://github.com/zed-industries/codex-acp/releases/download/v${version}/codex-acp-${version}-${target}.${ext}`;
Expand Down Expand Up @@ -140,12 +142,22 @@ async function downloadBinary(binary) {
const binaryName =
process.platform === "win32" ? `${binary.name}.exe` : binary.name;
const binaryPath = join(DEST_DIR, binaryName);
const versionPath = join(DEST_DIR, `${binary.name}.version`);

console.log(`\n[${binary.name}] v${binary.version}`);

if (existsSync(binaryPath)) {
console.log(` Already exists: ${binaryPath}`);
return;
const currentVersion = existsSync(versionPath)
? readFileSync(versionPath, "utf8").trim()
: null;
if (currentVersion === binary.version) {
console.log(` Already exists: ${binaryPath}`);
return;
}
console.log(
` Replacing existing ${binary.name}${currentVersion ? ` v${currentVersion}` : ""} with v${binary.version}`,
);
rmSync(binaryPath, { force: true });
}

const target = binary.getTarget();
Expand All @@ -170,6 +182,7 @@ async function downloadBinary(binary) {
if (process.platform === "darwin") {
signForMacOS(binaryPath);
}
writeFileSync(versionPath, `${binary.version}\n`);

console.log(` Ready: ${binaryPath}`);
}
Expand Down
2 changes: 2 additions & 0 deletions apps/code/scripts/download-binaries.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ vi.mock("node:fs", () => {
createWriteStream: vi.fn(() => ({})),
existsSync: vi.fn(() => true),
mkdirSync: vi.fn(),
readFileSync: vi.fn(() => ""),
realpathSync: vi.fn(() => "/not/the/entrypoint"),
rmSync: vi.fn(),
writeFileSync: vi.fn(),
};
return { ...fns, default: fns };
});
Expand Down
8 changes: 8 additions & 0 deletions packages/agent/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
"types": "./dist/gateway-models.d.ts",
"import": "./dist/gateway-models.js"
},
"./mcp-store/tool-keys": {
"types": "./dist/mcp-store/tool-keys.d.ts",
"import": "./dist/mcp-store/tool-keys.js"
},
"./posthog-api": {
"types": "./dist/posthog-api.d.ts",
"import": "./dist/posthog-api.js"
Expand Down Expand Up @@ -68,6 +72,10 @@
"types": "./dist/adapters/claude/mcp/tool-metadata.d.ts",
"import": "./dist/adapters/claude/mcp/tool-metadata.js"
},
"./adapters/codex/mcp-approval-hook": {
"types": "./dist/adapters/codex/mcp-approval-hook.d.ts",
"import": "./dist/adapters/codex/mcp-approval-hook.js"
},
"./execution-mode": {
"types": "./dist/execution-mode.d.ts",
"import": "./dist/execution-mode.js"
Expand Down
6 changes: 3 additions & 3 deletions packages/agent/src/acp-extensions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ export const POSTHOG_NOTIFICATIONS = {
export const POSTHOG_METHODS = {
/**
* Client requests a session refresh between turns. Payload may include
* `mcpServers` to trigger a resume-with-new-options reinit; future fields
* can extend this without adding new methods. Returns once the refresh has
* completed so the caller can safely send the next prompt.
* `mcpServers` to trigger a resume-with-new-options reinit or
* `mcpToolApprovals` to refresh the in-memory approval gate. Returns once
* the refresh has completed so the caller can safely send the next prompt.
*/
REFRESH_SESSION: "_posthog/refresh_session",
} as const;
Expand Down
23 changes: 22 additions & 1 deletion packages/agent/src/adapters/claude/claude-agent.refresh.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ vi.mock("@anthropic-ai/claude-agent-sdk", () => ({
const fetchMcpToolMetadataMock = vi.fn().mockResolvedValue(undefined);
const clearMcpToolMetadataCacheMock = vi.fn();
const clearMcpToolApprovalCacheMock = vi.fn();
const setMcpToolApprovalStatesMock = vi.fn();
vi.mock("./mcp/tool-metadata", () => ({
fetchMcpToolMetadata: fetchMcpToolMetadataMock,
getConnectedMcpServerNames: vi.fn().mockReturnValue([]),
getCachedMcpTools: vi.fn().mockReturnValue([]),
clearMcpToolMetadataCache: clearMcpToolMetadataCacheMock,
clearMcpToolApprovalCache: clearMcpToolApprovalCacheMock,
setMcpToolApprovalStates: vi.fn(),
setMcpToolApprovalStates: setMcpToolApprovalStatesMock,
}));

// Import after the mocks so ClaudeAcpAgent resolves the mocked SDK
Expand Down Expand Up @@ -173,6 +174,7 @@ describe("ClaudeAcpAgent.extMethod refresh_session", () => {
fetchMcpToolMetadataMock.mockClear();
clearMcpToolMetadataCacheMock.mockClear();
clearMcpToolApprovalCacheMock.mockClear();
setMcpToolApprovalStatesMock.mockClear();
});

it("returns methodNotFound for unknown extension methods", async () => {
Expand All @@ -191,6 +193,25 @@ describe("ClaudeAcpAgent.extMethod refresh_session", () => {
).rejects.toThrow(/requires at least one refreshable field/);
});

it("updates MCP approval state without rebuilding when only approvals changed", async () => {
const agent = makeAgent();
const { oldQuery, endSpy } = installFakeSession(agent, "s-approval");

const result = await agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {
mcpToolApprovals: {
mcp__Linear__search: "needs_approval",
},
});

expect(result).toEqual({ refreshed: true });
expect(setMcpToolApprovalStatesMock).toHaveBeenCalledWith({
mcp__Linear__search: "needs_approval",
});
expect(oldQuery.interrupt).not.toHaveBeenCalled();
expect(endSpy).not.toHaveBeenCalled();
expect(createdQueries).toHaveLength(0);
});

it("rejects when mcpServers is not an array", async () => {
const agent = makeAgent();
installFakeSession(agent, "s-malformed");
Expand Down
22 changes: 19 additions & 3 deletions packages/agent/src/adapters/claude/claude-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import {
POSTHOG_PRODUCTS,
type PostHogProductId,
} from "../../posthog-products";
import { mcpToolApprovalsSchema } from "../../server/schemas";
import type { PostHogAPIConfig } from "../../types";
import {
isCloudRun,
Expand Down Expand Up @@ -1231,10 +1232,25 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
throw RequestError.methodNotFound(method);
}

// Trust boundary: refresh is only safe when the caller is trusted infra
// (e.g. the sandbox agent-server). Do not route this method from
// untrusted clients — parseMcpServers does no URL/command validation.
const approvalResult =
params.mcpToolApprovals === undefined
? undefined
: mcpToolApprovalsSchema.safeParse(params.mcpToolApprovals);
if (approvalResult && !approvalResult.success) {
throw new RequestError(
-32602,
"refresh_session: mcpToolApprovals must be an object of approval states",
);
}
if (approvalResult) {
setMcpToolApprovalStates(approvalResult.data);
}

// Trust boundary: server refresh is only safe when the caller is trusted
// infra. Do not route mcpServers from untrusted clients — parseMcpServers
// does no URL/command validation.
if (params.mcpServers === undefined) {
if (approvalResult) return { refreshed: true };
throw new RequestError(
-32602,
"refresh_session requires at least one refreshable field (e.g. mcpServers)",
Expand Down
4 changes: 2 additions & 2 deletions packages/agent/src/adapters/claude/hooks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ describe("createPreToolUseHook", () => {
clearMcpToolApprovalCache();
});

test("respects an explicit allow rule for a needs_approval MCP Store tool", async () => {
test("routes needs_approval MCP Store tools to ask even with an old allow rule", async () => {
clearMcpToolMetadataCache();
clearMcpToolApprovalCache();
setMcpToolApprovalStates({
Expand All @@ -388,7 +388,7 @@ describe("createPreToolUseHook", () => {
);

expect(result).toMatchObject({
hookSpecificOutput: { permissionDecision: "allow" },
hookSpecificOutput: { permissionDecision: "ask" },
});
clearMcpToolMetadataCache();
clearMcpToolApprovalCache();
Expand Down
5 changes: 2 additions & 3 deletions packages/agent/src/adapters/claude/hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ export const createPreToolUseHook =
// gate that runs for these tools. Force a decision here: deny blocked
// tools outright, and route needs_approval tools back through canUseTool
// (via "ask") so handleMcpApprovalFlow relays the approval dialog to the
// desktop and persists the result to the backend. An explicit allow rule
// (e.g. a prior "always allow") takes precedence so the prompt sticks.
// desktop and persists the result to the backend. A later Settings change
// back to needs_approval must override any old local allow rule.
const mcpApprovalState = getMcpToolApprovalState(toolName);
if (mcpApprovalState === "do_not_use") {
logger.info(`[PreToolUseHook] Blocking do_not_use MCP tool: ${toolName}`);
Expand All @@ -433,7 +433,6 @@ export const createPreToolUseHook =
}
if (
mcpApprovalState === "needs_approval" &&
permissionCheck.decision !== "allow" &&
permissionCheck.decision !== "deny"
) {
logger.info(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,35 @@ describe("canUseTool MCP approval enforcement", () => {
);
});

it("updates the approval cache after the user approves a tool", async () => {
it("keeps needs_approval after the user allows a tool once", async () => {
const toolName = "mcp__Linear__search";
setMcpToolApprovalStates({
[toolName]: "needs_approval",
});

const result = await canUseTool(createContext(toolName));

expect(result.behavior).toBe("allow");
expect(getMcpToolApprovalState(toolName)).toBe("needs_approval");
});

it("updates the approval cache after the user always allows a tool", async () => {
const toolName = "mcp__Linear__search";
setMcpToolApprovalStates({
[toolName]: "needs_approval",
});

const result = await canUseTool(
createContext(toolName, {
client: {
sessionUpdate: vi.fn().mockResolvedValue(undefined),
requestPermission: vi.fn().mockResolvedValue({
outcome: { outcome: "selected", optionId: "allow_always" },
}),
},
}),
);

expect(result.behavior).toBe("allow");
expect(getMcpToolApprovalState(toolName)).toBe("approved");
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,8 @@ async function handleMcpApprovalFlow(
(response.outcome.optionId === "allow" ||
response.outcome.optionId === "allow_always")
) {
setMcpToolApprovalStates({ [approvalToolName]: "approved" });
if (response.outcome.optionId === "allow_always") {
setMcpToolApprovalStates({ [approvalToolName]: "approved" });
return {
behavior: "allow",
updatedInput: toolInput as Record<string, unknown>,
Expand Down
19 changes: 19 additions & 0 deletions packages/agent/src/adapters/codex/codex-agent.refresh.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ type PrivateAgent = {
};
configOptions: unknown[];
taskRunId?: string;
mcpToolApprovals?: Record<string, string>;
};
codexProcess: SpawnHandle;
codexConnection: MockCodexConnection;
Expand Down Expand Up @@ -188,6 +189,24 @@ describe("CodexAcpAgent.extMethod refresh_session", () => {
).rejects.toThrow(/at least one refreshable field/);
});

it("updates MCP approval state without respawning when only approvals changed", async () => {
const agent = makeAgent();
const { priv } = primeSession(agent, "s-approval");

const result = await agent.extMethod(POSTHOG_METHODS.REFRESH_SESSION, {
mcpToolApprovals: {
mcp__Linear__search: "needs_approval",
},
});

expect(result).toEqual({ refreshed: true });
expect(priv.sessionState.mcpToolApprovals).toEqual({
mcp__Linear__search: "needs_approval",
});
expect(spawnedProcesses).toHaveLength(1);
expect(createdConnections).toHaveLength(1);
});

it("rejects when mcpServers is not an array", async () => {
const agent = makeAgent();
await expect(
Expand Down
32 changes: 25 additions & 7 deletions packages/agent/src/adapters/codex/codex-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ import {
isCodexNativeMode,
type PermissionMode,
} from "../../execution-mode";
import type {
McpToolApprovalsConfig,
McpToolInstallationsConfig,
import {
type McpToolApprovalsConfig,
type McpToolInstallationsConfig,
mcpToolApprovalsSchema,
} from "../../server/schemas";
import type { PostHogAPIConfig, ProcessSpawnedCallback } from "../../types";
import { isCloudRun } from "../../utils/common";
Expand Down Expand Up @@ -881,11 +882,28 @@ export class CodexAcpAgent extends BaseAcpAgent {
throw RequestError.methodNotFound(method);
}

// Trust boundary: refresh is only safe when the caller is trusted infra
// (e.g. the sandbox agent-server). Do not route this method from
// untrusted clients — mcpServers contents are forwarded verbatim to
// codex-acp with no URL/command validation.
const approvalResult =
params.mcpToolApprovals === undefined
? undefined
: mcpToolApprovalsSchema.safeParse(params.mcpToolApprovals);
if (approvalResult && !approvalResult.success) {
throw new RequestError(
-32602,
"refresh_session: mcpToolApprovals must be an object of approval states",
);
}
if (approvalResult) {
this.sessionState.mcpToolApprovals = {
...this.sessionState.mcpToolApprovals,
...approvalResult.data,
};
}

// Trust boundary: server refresh is only safe when the caller is trusted
// infra. Do not route mcpServers from untrusted clients — contents are
// forwarded verbatim to codex-acp with no URL/command validation.
if (params.mcpServers === undefined) {
if (approvalResult) return { refreshed: true };
throw new RequestError(
-32602,
"refresh_session requires at least one refreshable field (e.g. mcpServers)",
Expand Down
10 changes: 6 additions & 4 deletions packages/agent/src/adapters/codex/codex-client.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ describe("createCodexClient MCP Store permissions", () => {
};
}

test("relays needs_approval MCP tools by bare title even in full-access mode", async () => {
test("relays needs_approval MCP tools by bare title even in full-access mode without promoting allow once", async () => {
const toolName = "mcp__Granola__query_granola_meetings";
const upstream = makePermissionUpstream();
const sessionState = createSessionState("sess", "/tmp", {
Expand Down Expand Up @@ -178,12 +178,14 @@ describe("createCodexClient MCP Store permissions", () => {
}),
}),
);
expect(sessionState.mcpToolApprovals?.[toolName]).toBe("approved");
expect(sessionState.mcpToolApprovals?.[toolName]).toBe("needs_approval");
});

test("relays needs_approval MCP tools with unsanitized MCP server names", async () => {
test("promotes needs_approval MCP tools only when the user chooses always allow", async () => {
const toolName = "mcp__Granola_Meetings__query_granola_meetings";
const upstream = makePermissionUpstream();
const upstream = makePermissionUpstream({
outcome: { outcome: "selected" as const, optionId: "allow_always" },
});
const sessionState = createSessionState("sess", "/tmp", {
permissionMode: "full-access",
mcpToolApprovals: { [toolName]: "needs_approval" },
Expand Down
7 changes: 3 additions & 4 deletions packages/agent/src/adapters/codex/codex-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,10 @@ function withMcpToolName(
};
}

function isAllowResponse(response: RequestPermissionResponse): boolean {
function isAlwaysAllowResponse(response: RequestPermissionResponse): boolean {
return (
response.outcome?.outcome === "selected" &&
(response.outcome.optionId === "allow" ||
response.outcome.optionId === "allow_always")
response.outcome.optionId === "allow_always"
);
}

Expand Down Expand Up @@ -217,7 +216,7 @@ export function createCodexClient(
const response = await upstreamClient.requestPermission(
withMcpToolName(params, mcpToolName),
);
if (isAllowResponse(response)) {
if (isAlwaysAllowResponse(response)) {
sessionState.mcpToolApprovals = {
...sessionState.mcpToolApprovals,
[mcpToolName]: "approved",
Expand Down
Loading
Loading