diff --git a/src/gradient.ts b/src/gradient.ts index a39ae59..6afda2c 100644 --- a/src/gradient.ts +++ b/src/gradient.ts @@ -341,6 +341,145 @@ function toolStripAnnotation(toolName: string, output: string): string { return annotation; } +// --------------------------------------------------------------------------- +// Content-aware deduplication +// --------------------------------------------------------------------------- +// Inspired by Dirac's ContextManager file-read deduplication: detects when the +// same content appears multiple times in the conversation (e.g., the same file +// read multiple times, or the same command output repeated) and replaces earlier +// occurrences with compact annotations. This reduces token pressure before layer +// selection, potentially keeping sessions at lower (less lossy) gradient layers. + +// Minimum output size (chars) to consider for dedup — annotations for smaller +// outputs would cost more tokens than the original content. +const DEDUP_MIN_CHARS = 600; + +/** Fast FNV-1a hash for content comparison. */ +function simpleHash(str: string): number { + let hash = 0x811c9dc5; + for (let i = 0; i < str.length; i++) { + hash ^= str.charCodeAt(i); + hash = (hash * 0x01000193) >>> 0; + } + return hash; +} + +/** Extract file path from a tool's input JSON. + * Handles common formats: {"path": "/foo.ts"}, {"filePath": "/foo.ts"}, + * and plain text fallback. */ +function extractFilePath(input: string): string | undefined { + try { + const parsed = JSON.parse(input); + return parsed.path || parsed.filePath || parsed.file; + } catch { + // Plain text — try to extract a path-like string + const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/); + return match?.[0]; + } +} + +/** Annotation for deduplicated tool output — follows the toolStripAnnotation() pattern. */ +function dedupAnnotation(toolName: string, filePath?: string): string { + if (filePath) { + return `[earlier version of ${filePath} — see latest read below for current content]`; + } + return `[duplicate output — same content as later ${toolName} in this session — use recall for details]`; +} + +/** + * Replace duplicate tool outputs with compact back-references, keeping only + * the latest occurrence of each unique output. Reduces context token usage + * without information loss — the model sees the most recent content intact. + * + * Deduplicates by: + * 1. Exact content hash: identical tool outputs (same file read twice, same command output) + * 2. Same-file reads: read_file outputs for the same path (content may differ due to edits) + * + * The current turn (from currentTurnIdx onward) is never touched — the model + * needs full context for its active work. Tool parts are never removed entirely; + * only state.output is replaced with a compact annotation. + * + * Returns the original array reference (not a copy) when no duplicates exist. + */ +export function deduplicateToolOutputs( + messages: MessageWithParts[], + currentTurnIdx: number, +): MessageWithParts[] { + // Track latest occurrence: contentKey → latest message index + const contentLatest = new Map(); + // Track latest read by file path: "read:path" → latest message index + const fileLatest = new Map(); + + // Also include current-turn reads in the "latest" tracking so we properly + // recognize earlier reads as duplicates of current-turn content. + for (let i = 0; i < messages.length; i++) { + for (const part of messages[i].parts) { + if (part.type !== "tool" || part.state.status !== "completed") continue; + const output = part.state.output; + if (!output || output.length < DEDUP_MIN_CHARS) continue; + + const key = `${part.tool}:${simpleHash(output)}`; + contentLatest.set(key, i); + + // For read-type tools, also track by file path + if (part.tool === "read_file" || part.tool === "read") { + const inputStr = typeof part.state.input === "string" + ? part.state.input + : JSON.stringify(part.state.input); + const fp = extractFilePath(inputStr); + if (fp) fileLatest.set(`read:${fp}`, i); + } + } + } + + // Second pass: replace earlier occurrences (but never touch the current turn) + let changed = false; + const result = messages.map((msg, msgIdx) => { + if (msgIdx >= currentTurnIdx) return msg; // sacred boundary + + let partsChanged = false; + const parts = msg.parts.map((part) => { + if (part.type !== "tool" || part.state.status !== "completed") return part; + const output = part.state.output; + if (!output || output.length < DEDUP_MIN_CHARS) return part; + + // Check exact-match dedup: is this the latest occurrence of this content? + const contentKey = `${part.tool}:${simpleHash(output)}`; + const isLatestContent = contentLatest.get(contentKey) === msgIdx; + + // Check file-path dedup for read tools: is this the latest read of this file? + let filePath: string | undefined; + let isLatestFile = true; + if (part.tool === "read_file" || part.tool === "read") { + const inputStr = typeof part.state.input === "string" + ? part.state.input + : JSON.stringify(part.state.input); + filePath = extractFilePath(inputStr); + if (filePath) isLatestFile = fileLatest.get(`read:${filePath}`) === msgIdx; + } + + // Keep if this is both the latest content AND latest file read (or not a read tool) + if (isLatestContent && isLatestFile) return part; + + // This is a duplicate — replace with compact annotation + partsChanged = true; + return { + ...part, + state: { + ...part.state, + output: dedupAnnotation(part.tool, filePath), + }, + } as Part; + }); + + if (!partsChanged) return msg; + changed = true; + return { ...msg, parts }; + }); + + return changed ? result : messages; +} + // Ensure every tool part in the window has a terminal state (completed or error). // Pending/running tool parts produce tool_use blocks at the API level but have no // output to generate a matching tool_result — causing Anthropic to reject the request @@ -993,6 +1132,13 @@ function transformInner(input: { // --- Gradient mode: context exhausted (or force-escalated), compress older messages --- + // Pre-pass: deduplicate repeated tool outputs before layer selection. + // Keeps only the latest occurrence of each unique output, replacing earlier + // ones with compact annotations. This can save thousands of tokens for sessions + // with repeated file reads, potentially avoiding escalation to higher layers. + const turnStart = currentTurnStart(input.messages); + const dedupMessages = deduplicateToolOutputs(input.messages, turnStart); + const distillations = sid ? loadDistillations(input.projectPath, sid) : []; // Layer 1 uses the append-only cached prefix (Approach C) to keep the @@ -1014,7 +1160,7 @@ function transformInner(input: { if (effectiveMinLayer <= 1) { const layer1 = sid ? tryFitStable({ - messages: input.messages, + messages: dedupMessages, prefix: cached.messages, prefixTokens: cached.tokens, distilledBudget, @@ -1023,7 +1169,7 @@ function transformInner(input: { sessState, }) : tryFit({ - messages: input.messages, + messages: dedupMessages, prefix: cached.messages, prefixTokens: cached.tokens, distilledBudget, @@ -1041,7 +1187,7 @@ function transformInner(input: { // Skipped when force-escalated to layer 3+. if (effectiveMinLayer <= 2) { const layer2 = tryFit({ - messages: input.messages, + messages: dedupMessages, prefix: cached.messages, prefixTokens: cached.tokens, distilledBudget, @@ -1063,7 +1209,7 @@ function transformInner(input: { 0, ); const layer3 = tryFit({ - messages: input.messages, + messages: dedupMessages, prefix: trimmedPrefix, prefixTokens: trimmedPrefixTokens, distilledBudget: Math.floor(usable * 0.15), diff --git a/test/gradient.test.ts b/test/gradient.test.ts index 21c5905..5db12e1 100644 --- a/test/gradient.test.ts +++ b/test/gradient.test.ts @@ -13,6 +13,7 @@ import { setForceMinLayer, getLastLayer, estimateMessages, + deduplicateToolOutputs, } from "../src/gradient"; import type { Message, Part } from "@opencode-ai/sdk"; @@ -1333,3 +1334,184 @@ describe("gradient — calibration oscillation fix", () => { expect(r2.layer).toBeGreaterThanOrEqual(1); }); }); + +// --------------------------------------------------------------------------- +// Content-aware deduplication tests +// --------------------------------------------------------------------------- + +function makeMsgWithTool( + id: string, + role: "user" | "assistant", + toolName: string, + input: string, + output: string, + sessionID = "dedup-sess", +): { info: Message; parts: Part[] } { + const base = makeMsg(id, role, "", sessionID); + return { + info: base.info, + parts: [ + { + id: `tool-${id}`, + sessionID, + messageID: id, + type: "tool", + tool: toolName, + callID: `call-${id}`, + title: toolName, + state: { + status: "completed" as const, + input: JSON.parse(input) as { [key: string]: unknown }, + output, + title: toolName, + metadata: {}, + time: { start: Date.now(), end: Date.now() }, + }, + time: { start: Date.now(), end: Date.now() }, + } as Part, + ], + }; +} + +/** Helper to extract output from a completed tool part. */ +function getToolOutput(part: Part): string | undefined { + if (part.type === "tool" && part.state.status === "completed") { + return part.state.output; + } + return undefined; +} + +describe("deduplicateToolOutputs", () => { + const LARGE_CONTENT = "x".repeat(800); // above DEDUP_MIN_CHARS (600) + + test("deduplicates identical tool outputs, keeps latest", () => { + const msgs = [ + makeMsg("u1", "user", "read file A"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "now edit"), + makeMsg("a2", "assistant", "done editing"), + makeMsg("u3", "user", "read file A again"), + makeMsgWithTool("a3", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + makeMsg("u4", "user", "looks good"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 6); + + // First read (index 1) should be deduplicated + expect(getToolOutput(result[1].parts[0])).toContain("earlier version of src/foo.ts"); + + // Latest read (index 5) should be intact + expect(getToolOutput(result[5].parts[0])).toBe(LARGE_CONTENT); + }); + + test("deduplicates same-file reads with different content", () => { + const oldContent = "old version " + "y".repeat(800); + const newContent = "new version " + "z".repeat(800); + const msgs = [ + makeMsg("u1", "user", "read file"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/bar.ts"}', oldContent), + makeMsg("u2", "user", "edit it"), + makeMsg("a2", "assistant", "edited"), + makeMsg("u3", "user", "read it again"), + makeMsgWithTool("a3", "assistant", "read_file", '{"path":"src/bar.ts"}', newContent), + makeMsg("u4", "user", "verify"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 6); + + // First read (old content) should be replaced — same file, not latest + expect(getToolOutput(result[1].parts[0])).toContain("earlier version of src/bar.ts"); + + // Latest read (new content) should be intact + expect(getToolOutput(result[5].parts[0])).toBe(newContent); + }); + + test("does not touch current turn messages", () => { + const msgs = [ + makeMsg("u1", "user", "first"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "read again"), // current turn starts here (index 2) + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + ]; + + const result = deduplicateToolOutputs(msgs, 2); + + // Earlier read (index 1) should be deduped since latest is in current turn + expect(getToolOutput(result[1].parts[0])).toContain("earlier version"); + + // Current-turn read (index 3) should NOT be touched + expect(getToolOutput(result[3].parts[0])).toBe(LARGE_CONTENT); + }); + + test("skips small outputs (below threshold)", () => { + const smallContent = "short"; // well below DEDUP_MIN_CHARS + const msgs = [ + makeMsg("u1", "user", "read"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"small.txt"}', smallContent), + makeMsg("u2", "user", "read again"), + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"small.txt"}', smallContent), + makeMsg("u3", "user", "done"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 4); + + // Both small outputs should be untouched + expect(getToolOutput(result[1].parts[0])).toBe(smallContent); + expect(getToolOutput(result[3].parts[0])).toBe(smallContent); + }); + + test("returns same array reference when no duplicates", () => { + const msgs = [ + makeMsg("u1", "user", "hello"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"a.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "read different"), + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"b.ts"}', "different " + LARGE_CONTENT), + makeMsg("u3", "user", "done"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 4); + expect(result).toBe(msgs); // same reference — no copy + }); + + test("deduplicates non-read tools by exact content hash", () => { + const bashOutput = "npm test\n" + "PASS ".repeat(200); // large enough + const msgs = [ + makeMsg("u1", "user", "run tests"), + makeMsgWithTool("a1", "assistant", "bash", '{"command":"npm test"}', bashOutput), + makeMsg("u2", "user", "run tests again"), + makeMsgWithTool("a2", "assistant", "bash", '{"command":"npm test"}', bashOutput), + makeMsg("u3", "user", "ok"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 4); + + // First bash (index 1) should be deduped — exact same output + const firstOut = getToolOutput(result[1].parts[0])!; + expect(firstOut).toContain("duplicate output"); + expect(firstOut).toContain("bash"); + + // Latest bash (index 3) should be intact + expect(getToolOutput(result[3].parts[0])).toBe(bashOutput); + }); + + test("handles three reads of the same file — only latest survives", () => { + const msgs = [ + makeMsg("u1", "user", "read"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/x.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "read again"), + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"src/x.ts"}', LARGE_CONTENT), + makeMsg("u3", "user", "read third time"), + makeMsgWithTool("a3", "assistant", "read_file", '{"path":"src/x.ts"}', LARGE_CONTENT), + makeMsg("u4", "user", "done"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 6); + + // First two reads should be deduped + expect(getToolOutput(result[1].parts[0])).toContain("earlier version"); + expect(getToolOutput(result[3].parts[0])).toContain("earlier version"); + + // Third (latest) should be intact + expect(getToolOutput(result[5].parts[0])).toBe(LARGE_CONTENT); + }); +});