From 8c8e2de2d0ab5a4deb6aa5fe2e88e0f442ee6b0d Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Apr 2026 18:39:53 +0000 Subject: [PATCH] feat: content-aware deduplication pre-pass in gradient transform Add deduplicateToolOutputs() as a pre-pass before gradient layer selection. Detects repeated tool outputs (same file read multiple times, identical command results) and replaces earlier occurrences with compact annotations, keeping only the latest. Two dedup levels: exact content hash match and same-file-path reads with different content (edit between reads). This reduces token pressure before layer selection, potentially keeping sessions at lower (less lossy) gradient layers. A 500-line file read appearing 3 times costs ~15K tokens; after dedup: ~5.1K tokens. Inspired by Dirac's ContextManager file-read deduplication approach. --- src/gradient.ts | 154 ++++++++++++++++++++++++++++++++++- test/gradient.test.ts | 182 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 332 insertions(+), 4 deletions(-) diff --git a/src/gradient.ts b/src/gradient.ts index a39ae59..6afda2c 100644 --- a/src/gradient.ts +++ b/src/gradient.ts @@ -341,6 +341,145 @@ function toolStripAnnotation(toolName: string, output: string): string { return annotation; } +// --------------------------------------------------------------------------- +// Content-aware deduplication +// --------------------------------------------------------------------------- +// Inspired by Dirac's ContextManager file-read deduplication: detects when the +// same content appears multiple times in the conversation (e.g., the same file +// read multiple times, or the same command output repeated) and replaces earlier +// occurrences with compact annotations. This reduces token pressure before layer +// selection, potentially keeping sessions at lower (less lossy) gradient layers. + +// Minimum output size (chars) to consider for dedup — annotations for smaller +// outputs would cost more tokens than the original content. +const DEDUP_MIN_CHARS = 600; + +/** Fast FNV-1a hash for content comparison. */ +function simpleHash(str: string): number { + let hash = 0x811c9dc5; + for (let i = 0; i < str.length; i++) { + hash ^= str.charCodeAt(i); + hash = (hash * 0x01000193) >>> 0; + } + return hash; +} + +/** Extract file path from a tool's input JSON. + * Handles common formats: {"path": "/foo.ts"}, {"filePath": "/foo.ts"}, + * and plain text fallback. */ +function extractFilePath(input: string): string | undefined { + try { + const parsed = JSON.parse(input); + return parsed.path || parsed.filePath || parsed.file; + } catch { + // Plain text — try to extract a path-like string + const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/); + return match?.[0]; + } +} + +/** Annotation for deduplicated tool output — follows the toolStripAnnotation() pattern. */ +function dedupAnnotation(toolName: string, filePath?: string): string { + if (filePath) { + return `[earlier version of ${filePath} — see latest read below for current content]`; + } + return `[duplicate output — same content as later ${toolName} in this session — use recall for details]`; +} + +/** + * Replace duplicate tool outputs with compact back-references, keeping only + * the latest occurrence of each unique output. Reduces context token usage + * without information loss — the model sees the most recent content intact. + * + * Deduplicates by: + * 1. Exact content hash: identical tool outputs (same file read twice, same command output) + * 2. Same-file reads: read_file outputs for the same path (content may differ due to edits) + * + * The current turn (from currentTurnIdx onward) is never touched — the model + * needs full context for its active work. Tool parts are never removed entirely; + * only state.output is replaced with a compact annotation. + * + * Returns the original array reference (not a copy) when no duplicates exist. + */ +export function deduplicateToolOutputs( + messages: MessageWithParts[], + currentTurnIdx: number, +): MessageWithParts[] { + // Track latest occurrence: contentKey → latest message index + const contentLatest = new Map(); + // Track latest read by file path: "read:path" → latest message index + const fileLatest = new Map(); + + // Also include current-turn reads in the "latest" tracking so we properly + // recognize earlier reads as duplicates of current-turn content. + for (let i = 0; i < messages.length; i++) { + for (const part of messages[i].parts) { + if (part.type !== "tool" || part.state.status !== "completed") continue; + const output = part.state.output; + if (!output || output.length < DEDUP_MIN_CHARS) continue; + + const key = `${part.tool}:${simpleHash(output)}`; + contentLatest.set(key, i); + + // For read-type tools, also track by file path + if (part.tool === "read_file" || part.tool === "read") { + const inputStr = typeof part.state.input === "string" + ? part.state.input + : JSON.stringify(part.state.input); + const fp = extractFilePath(inputStr); + if (fp) fileLatest.set(`read:${fp}`, i); + } + } + } + + // Second pass: replace earlier occurrences (but never touch the current turn) + let changed = false; + const result = messages.map((msg, msgIdx) => { + if (msgIdx >= currentTurnIdx) return msg; // sacred boundary + + let partsChanged = false; + const parts = msg.parts.map((part) => { + if (part.type !== "tool" || part.state.status !== "completed") return part; + const output = part.state.output; + if (!output || output.length < DEDUP_MIN_CHARS) return part; + + // Check exact-match dedup: is this the latest occurrence of this content? + const contentKey = `${part.tool}:${simpleHash(output)}`; + const isLatestContent = contentLatest.get(contentKey) === msgIdx; + + // Check file-path dedup for read tools: is this the latest read of this file? + let filePath: string | undefined; + let isLatestFile = true; + if (part.tool === "read_file" || part.tool === "read") { + const inputStr = typeof part.state.input === "string" + ? part.state.input + : JSON.stringify(part.state.input); + filePath = extractFilePath(inputStr); + if (filePath) isLatestFile = fileLatest.get(`read:${filePath}`) === msgIdx; + } + + // Keep if this is both the latest content AND latest file read (or not a read tool) + if (isLatestContent && isLatestFile) return part; + + // This is a duplicate — replace with compact annotation + partsChanged = true; + return { + ...part, + state: { + ...part.state, + output: dedupAnnotation(part.tool, filePath), + }, + } as Part; + }); + + if (!partsChanged) return msg; + changed = true; + return { ...msg, parts }; + }); + + return changed ? result : messages; +} + // Ensure every tool part in the window has a terminal state (completed or error). // Pending/running tool parts produce tool_use blocks at the API level but have no // output to generate a matching tool_result — causing Anthropic to reject the request @@ -993,6 +1132,13 @@ function transformInner(input: { // --- Gradient mode: context exhausted (or force-escalated), compress older messages --- + // Pre-pass: deduplicate repeated tool outputs before layer selection. + // Keeps only the latest occurrence of each unique output, replacing earlier + // ones with compact annotations. This can save thousands of tokens for sessions + // with repeated file reads, potentially avoiding escalation to higher layers. + const turnStart = currentTurnStart(input.messages); + const dedupMessages = deduplicateToolOutputs(input.messages, turnStart); + const distillations = sid ? loadDistillations(input.projectPath, sid) : []; // Layer 1 uses the append-only cached prefix (Approach C) to keep the @@ -1014,7 +1160,7 @@ function transformInner(input: { if (effectiveMinLayer <= 1) { const layer1 = sid ? tryFitStable({ - messages: input.messages, + messages: dedupMessages, prefix: cached.messages, prefixTokens: cached.tokens, distilledBudget, @@ -1023,7 +1169,7 @@ function transformInner(input: { sessState, }) : tryFit({ - messages: input.messages, + messages: dedupMessages, prefix: cached.messages, prefixTokens: cached.tokens, distilledBudget, @@ -1041,7 +1187,7 @@ function transformInner(input: { // Skipped when force-escalated to layer 3+. if (effectiveMinLayer <= 2) { const layer2 = tryFit({ - messages: input.messages, + messages: dedupMessages, prefix: cached.messages, prefixTokens: cached.tokens, distilledBudget, @@ -1063,7 +1209,7 @@ function transformInner(input: { 0, ); const layer3 = tryFit({ - messages: input.messages, + messages: dedupMessages, prefix: trimmedPrefix, prefixTokens: trimmedPrefixTokens, distilledBudget: Math.floor(usable * 0.15), diff --git a/test/gradient.test.ts b/test/gradient.test.ts index 21c5905..5db12e1 100644 --- a/test/gradient.test.ts +++ b/test/gradient.test.ts @@ -13,6 +13,7 @@ import { setForceMinLayer, getLastLayer, estimateMessages, + deduplicateToolOutputs, } from "../src/gradient"; import type { Message, Part } from "@opencode-ai/sdk"; @@ -1333,3 +1334,184 @@ describe("gradient — calibration oscillation fix", () => { expect(r2.layer).toBeGreaterThanOrEqual(1); }); }); + +// --------------------------------------------------------------------------- +// Content-aware deduplication tests +// --------------------------------------------------------------------------- + +function makeMsgWithTool( + id: string, + role: "user" | "assistant", + toolName: string, + input: string, + output: string, + sessionID = "dedup-sess", +): { info: Message; parts: Part[] } { + const base = makeMsg(id, role, "", sessionID); + return { + info: base.info, + parts: [ + { + id: `tool-${id}`, + sessionID, + messageID: id, + type: "tool", + tool: toolName, + callID: `call-${id}`, + title: toolName, + state: { + status: "completed" as const, + input: JSON.parse(input) as { [key: string]: unknown }, + output, + title: toolName, + metadata: {}, + time: { start: Date.now(), end: Date.now() }, + }, + time: { start: Date.now(), end: Date.now() }, + } as Part, + ], + }; +} + +/** Helper to extract output from a completed tool part. */ +function getToolOutput(part: Part): string | undefined { + if (part.type === "tool" && part.state.status === "completed") { + return part.state.output; + } + return undefined; +} + +describe("deduplicateToolOutputs", () => { + const LARGE_CONTENT = "x".repeat(800); // above DEDUP_MIN_CHARS (600) + + test("deduplicates identical tool outputs, keeps latest", () => { + const msgs = [ + makeMsg("u1", "user", "read file A"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "now edit"), + makeMsg("a2", "assistant", "done editing"), + makeMsg("u3", "user", "read file A again"), + makeMsgWithTool("a3", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + makeMsg("u4", "user", "looks good"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 6); + + // First read (index 1) should be deduplicated + expect(getToolOutput(result[1].parts[0])).toContain("earlier version of src/foo.ts"); + + // Latest read (index 5) should be intact + expect(getToolOutput(result[5].parts[0])).toBe(LARGE_CONTENT); + }); + + test("deduplicates same-file reads with different content", () => { + const oldContent = "old version " + "y".repeat(800); + const newContent = "new version " + "z".repeat(800); + const msgs = [ + makeMsg("u1", "user", "read file"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/bar.ts"}', oldContent), + makeMsg("u2", "user", "edit it"), + makeMsg("a2", "assistant", "edited"), + makeMsg("u3", "user", "read it again"), + makeMsgWithTool("a3", "assistant", "read_file", '{"path":"src/bar.ts"}', newContent), + makeMsg("u4", "user", "verify"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 6); + + // First read (old content) should be replaced — same file, not latest + expect(getToolOutput(result[1].parts[0])).toContain("earlier version of src/bar.ts"); + + // Latest read (new content) should be intact + expect(getToolOutput(result[5].parts[0])).toBe(newContent); + }); + + test("does not touch current turn messages", () => { + const msgs = [ + makeMsg("u1", "user", "first"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "read again"), // current turn starts here (index 2) + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"src/foo.ts"}', LARGE_CONTENT), + ]; + + const result = deduplicateToolOutputs(msgs, 2); + + // Earlier read (index 1) should be deduped since latest is in current turn + expect(getToolOutput(result[1].parts[0])).toContain("earlier version"); + + // Current-turn read (index 3) should NOT be touched + expect(getToolOutput(result[3].parts[0])).toBe(LARGE_CONTENT); + }); + + test("skips small outputs (below threshold)", () => { + const smallContent = "short"; // well below DEDUP_MIN_CHARS + const msgs = [ + makeMsg("u1", "user", "read"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"small.txt"}', smallContent), + makeMsg("u2", "user", "read again"), + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"small.txt"}', smallContent), + makeMsg("u3", "user", "done"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 4); + + // Both small outputs should be untouched + expect(getToolOutput(result[1].parts[0])).toBe(smallContent); + expect(getToolOutput(result[3].parts[0])).toBe(smallContent); + }); + + test("returns same array reference when no duplicates", () => { + const msgs = [ + makeMsg("u1", "user", "hello"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"a.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "read different"), + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"b.ts"}', "different " + LARGE_CONTENT), + makeMsg("u3", "user", "done"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 4); + expect(result).toBe(msgs); // same reference — no copy + }); + + test("deduplicates non-read tools by exact content hash", () => { + const bashOutput = "npm test\n" + "PASS ".repeat(200); // large enough + const msgs = [ + makeMsg("u1", "user", "run tests"), + makeMsgWithTool("a1", "assistant", "bash", '{"command":"npm test"}', bashOutput), + makeMsg("u2", "user", "run tests again"), + makeMsgWithTool("a2", "assistant", "bash", '{"command":"npm test"}', bashOutput), + makeMsg("u3", "user", "ok"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 4); + + // First bash (index 1) should be deduped — exact same output + const firstOut = getToolOutput(result[1].parts[0])!; + expect(firstOut).toContain("duplicate output"); + expect(firstOut).toContain("bash"); + + // Latest bash (index 3) should be intact + expect(getToolOutput(result[3].parts[0])).toBe(bashOutput); + }); + + test("handles three reads of the same file — only latest survives", () => { + const msgs = [ + makeMsg("u1", "user", "read"), + makeMsgWithTool("a1", "assistant", "read_file", '{"path":"src/x.ts"}', LARGE_CONTENT), + makeMsg("u2", "user", "read again"), + makeMsgWithTool("a2", "assistant", "read_file", '{"path":"src/x.ts"}', LARGE_CONTENT), + makeMsg("u3", "user", "read third time"), + makeMsgWithTool("a3", "assistant", "read_file", '{"path":"src/x.ts"}', LARGE_CONTENT), + makeMsg("u4", "user", "done"), // current turn + ]; + + const result = deduplicateToolOutputs(msgs, 6); + + // First two reads should be deduped + expect(getToolOutput(result[1].parts[0])).toContain("earlier version"); + expect(getToolOutput(result[3].parts[0])).toContain("earlier version"); + + // Third (latest) should be intact + expect(getToolOutput(result[5].parts[0])).toBe(LARGE_CONTENT); + }); +});