From 5eddb553e264f4d3baf760c66f1f60ffc0ef70dc Mon Sep 17 00:00:00 2001 From: zerob13 Date: Mon, 18 May 2026 16:50:37 +0800 Subject: [PATCH] fix(agent): bypass chat budget for image routes --- .../plan.md | 24 ++++ .../spec.md | 41 +++++++ .../tasks.md | 7 ++ .../presenter/agentRuntimePresenter/index.ts | 92 +++++++++++----- .../agentRuntimePresenter.test.ts | 103 ++++++++++++++++++ 5 files changed, 242 insertions(+), 25 deletions(-) create mode 100644 docs/issues/image-generation-context-budget-bypass/plan.md create mode 100644 docs/issues/image-generation-context-budget-bypass/spec.md create mode 100644 docs/issues/image-generation-context-budget-bypass/tasks.md diff --git a/docs/issues/image-generation-context-budget-bypass/plan.md b/docs/issues/image-generation-context-budget-bypass/plan.md new file mode 100644 index 000000000..8557e630b --- /dev/null +++ b/docs/issues/image-generation-context-budget-bypass/plan.md @@ -0,0 +1,24 @@ +# Image Generation Context Budget Bypass Plan + +## Approach + +- Add a model-aware Agent runtime helper that returns true only when DeepChat should use its chat + context budget. +- Keep ACP bypass behavior, and also bypass when the model config explicitly identifies + `ImageGeneration`, `TTS`, a non-chat API endpoint, or `endpointType === 'image-generation'`. +- Treat missing legacy model metadata as chat-compatible. + +## Runtime Changes + +- Use the helper in new user turns and resume/retry context construction before deciding whether to + compact, trim, or use a finite chat context length. +- Use the helper inside the provider-call wrapper before running preflight/recovery or shrinking the + per-call `maxTokens`. +- Leave `contextBudget.ts`, public contracts, IPC, and renderer code unchanged. + +## Test Strategy + +- Add an Agent runtime regression for an image endpoint request that would fail chat-budget + preflight, asserting the provider is still called and max tokens are preserved. +- Keep chat-model pressure tests verifying the existing budget preflight path still runs. +- Run the targeted Agent runtime/context budget tests plus repository format, i18n, and lint checks. diff --git a/docs/issues/image-generation-context-budget-bypass/spec.md b/docs/issues/image-generation-context-budget-bypass/spec.md new file mode 100644 index 000000000..cc61002f3 --- /dev/null +++ b/docs/issues/image-generation-context-budget-bypass/spec.md @@ -0,0 +1,41 @@ +# Image Generation Context Budget Bypass Spec + +> Status: Draft +> Date: 2026-05-18 + +## Background + +DeepChat Agent applies a chat-oriented provider-call context preflight before sending model +requests. The check estimates message tokens, tool schemas, and output tokens, then blocks requests +that cannot fit inside the configured model context window. + +That check is valid for chat models, but image generation and other non-chat routes do not use the +same request shape. Image requests can therefore fail before reaching the provider with: + +`Request was not sent because it cannot fit within the model context window after applying the safety margin.` + +## Goals + +- Only apply DeepChat's chat context budget to chat model requests. +- Skip the chat budget preflight, compaction recovery, and temporary max-token shrink for explicit + image generation and other non-chat model routes. +- Preserve current behavior for chat models and ACP provider bypasses. + +## Acceptance Criteria + +- Image generation models or image endpoints reach the provider even when chat-budget estimation + would fail. +- Non-chat requests do not trigger the DeepChat context-pressure compaction path solely because of + chat message/tool-schema estimates. +- Non-chat request max tokens are not reduced by the chat preflight safety margin. +- Chat models keep the existing preflight, recovery, and overflow failure behavior. +- Existing legacy model configs without explicit type or endpoint metadata continue to be treated as + chat requests. +- No public API, IPC, schema, or renderer UI changes are introduced. + +## Non-Goals + +- Redesign image generation request construction. +- Change the agent image generation tool behavior for chat models. +- Change `contextBudget.ts` budgeting math. +- Add renderer UI for non-chat routing diagnostics. diff --git a/docs/issues/image-generation-context-budget-bypass/tasks.md b/docs/issues/image-generation-context-budget-bypass/tasks.md new file mode 100644 index 000000000..727a38d9b --- /dev/null +++ b/docs/issues/image-generation-context-budget-bypass/tasks.md @@ -0,0 +1,7 @@ +# Image Generation Context Budget Bypass Tasks + +- [x] Document the issue and intended behavior. +- [x] Add model-aware chat budget gating in the Agent runtime. +- [x] Add regression coverage for image endpoint bypass. +- [x] Verify chat context budget behavior remains unchanged. +- [x] Run targeted tests and repository quality checks. diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts index ac7198b10..af5bcbd04 100644 --- a/src/main/presenter/agentRuntimePresenter/index.ts +++ b/src/main/presenter/agentRuntimePresenter/index.ts @@ -57,7 +57,7 @@ import { normalizeImageGenerationOptions, supportsOpenAIImageGenerationSettings } from '@shared/imageGenerationSettings' -import { isDeepSeekSeriesModelId } from '@shared/model' +import { ApiEndpointType, ModelType, isDeepSeekSeriesModelId } from '@shared/model' import { nanoid } from 'nanoid' import type { SQLitePresenter } from '../sqlitePresenter' import { eventBus, SendTarget } from '@/eventbus' @@ -621,6 +621,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { try { this.throwIfAbortRequested(preStreamAbortSignal) const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId) + const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) + const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig) this.throwIfAbortRequested(preStreamAbortSignal) const interleavedReasoning = this.resolveInterleavedReasoningConfig( state.providerId, @@ -629,7 +631,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { ) const contextBudgetLength = this.resolveDeepChatContextBudgetLength( state.providerId, - generationSettings.contextLength + generationSettings.contextLength, + modelConfig ) const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength) const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId) @@ -656,9 +659,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { think: false } - const compactionIntent = this.shouldBypassDeepChatContextBudget(state.providerId) - ? null - : await this.compactionService.prepareForNextUserTurn({ + const compactionIntent = useContextBudget + ? await this.compactionService.prepareForNextUserTurn({ sessionId, providerId: state.providerId, modelId: state.modelId, @@ -674,6 +676,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { newUserContent: normalizedInput, signal: preStreamAbortSignal }) + : null let summaryState: SessionSummaryState if (compactionIntent) { @@ -1428,15 +1431,46 @@ export class AgentRuntimePresenter implements IAgentImplementation { return resolvedProviderId === 'acp' } - private shouldBypassDeepChatContextBudget(providerId?: string | null): boolean { - return providerId?.trim() === 'acp' + private shouldUseDeepChatContextBudget( + providerId?: string | null, + modelConfig?: Pick | null + ): boolean { + if (providerId?.trim() === 'acp') { + return false + } + + if (!modelConfig) { + return true + } + + if (modelConfig.type === ModelType.ImageGeneration || modelConfig.type === ModelType.TTS) { + return false + } + + if (modelConfig.apiEndpoint && modelConfig.apiEndpoint !== ApiEndpointType.Chat) { + return false + } + + if (modelConfig.endpointType === 'image-generation') { + return false + } + + return true + } + + private shouldBypassDeepChatContextBudget( + providerId?: string | null, + modelConfig?: Pick | null + ): boolean { + return !this.shouldUseDeepChatContextBudget(providerId, modelConfig) } private resolveDeepChatContextBudgetLength( providerId: string | null | undefined, - contextLength: number + contextLength: number, + modelConfig?: Pick | null ): number { - return this.shouldBypassDeepChatContextBudget(providerId) + return this.shouldBypassDeepChatContextBudget(providerId, modelConfig) ? Number.MAX_SAFE_INTEGER : contextLength } @@ -1619,7 +1653,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { if (!state) { throw new Error(`Session ${sessionId} not found`) } - if (this.shouldBypassDeepChatContextBudget(state.providerId)) { + const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) + if (this.shouldBypassDeepChatContextBudget(state.providerId, modelConfig)) { throw new Error('Manual compaction is only available for DeepChat agent sessions.') } if (state.status !== 'idle') { @@ -1639,7 +1674,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { ) const contextBudgetLength = this.resolveDeepChatContextBudgetLength( state.providerId, - generationSettings.contextLength + generationSettings.contextLength, + modelConfig ) const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength) const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId) @@ -1854,15 +1890,15 @@ export class AgentRuntimePresenter implements IAgentImplementation { ).getProviderInstance(state.providerId) const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId) + const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) const interleavedReasoning = providedInterleavedReasoning ?? this.resolveInterleavedReasoningConfig(state.providerId, state.modelId, generationSettings) - const bypassContextBudget = this.shouldBypassDeepChatContextBudget(state.providerId) const contextBudgetLength = this.resolveDeepChatContextBudgetLength( state.providerId, - generationSettings.contextLength + generationSettings.contextLength, + baseModelConfig ) - const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) const capabilityProviderId = this.resolveCapabilityProviderId(state.providerId, state.modelId) const reasoningPortrait = this.getReasoningPortrait(state.providerId, state.modelId) const modelConfig: ModelConfig = { @@ -1886,6 +1922,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { const traceEnabled = this.configPresenter.getSetting('traceDebugEnabled') === true const llmProviderPresenter = this.llmProviderPresenter const pendingInputCoordinator = this.pendingInputCoordinator + const shouldBypassContextBudget = this.shouldBypassDeepChatContextBudget.bind(this) const injectSteerInputsIntoRequest = this.injectSteerInputsIntoRequest.bind(this) const recoverContextPressure = this.recoverRequestContextPressure.bind(this) const replaceLeadingSystemPromptInPlace = this.replaceLeadingSystemPromptInPlace.bind(this) @@ -1947,13 +1984,19 @@ export class AgentRuntimePresenter implements IAgentImplementation { requestMaxTokens, requestTools ) { + const requestBypassesContextBudget = shouldBypassContextBudget( + state.providerId, + requestModelConfig + ) const claimedSteerBatch = pendingInputCoordinator.claimSteerBatchForNextLoop(sessionId) const injectedMessages = injectSteerInputsIntoRequest( requestMessages, claimedSteerBatch, supportsVision, supportsAudioInput, - bypassContextBudget ? Number.MAX_SAFE_INTEGER : requestModelConfig.contextLength, + requestBypassesContextBudget + ? Number.MAX_SAFE_INTEGER + : requestModelConfig.contextLength, requestMaxTokens ) @@ -1964,7 +2007,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { let providerMessages = injectedMessages let providerMaxTokens = requestMaxTokens - if (!bypassContextBudget) { + if (!requestBypassesContextBudget) { const protectedSteerTailCount = claimedSteerBatch.length > 0 ? claimedSteerBatch.length + (requestMessages.at(-1)?.role === 'user' ? 1 : 0) @@ -2543,6 +2586,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { preStreamAbortSignal = preStreamAbortController.signal this.throwIfAbortRequested(preStreamAbortSignal) const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId) + const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) + const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig) this.throwIfAbortRequested(preStreamAbortSignal) const interleavedReasoning = this.resolveInterleavedReasoningConfig( state.providerId, @@ -2551,7 +2596,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { ) const contextBudgetLength = this.resolveDeepChatContextBudgetLength( state.providerId, - generationSettings.contextLength + generationSettings.contextLength, + modelConfig ) const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength) const projectDir = this.resolveProjectDir(sessionId) @@ -2570,9 +2616,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { activeSkillNames ) this.throwIfAbortRequested(preStreamAbortSignal) - const summaryState = this.shouldBypassDeepChatContextBudget(state.providerId) - ? this.sessionStore.getSummaryState(sessionId) - : await this.resolveCompactionStateForResumeTurn({ + const summaryState = useContextBudget + ? await this.resolveCompactionStateForResumeTurn({ sessionId, messageId, providerId: state.providerId, @@ -2588,6 +2633,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { interleavedReasoning.preserveEmptyReasoningContent === true, signal: preStreamAbortSignal }) + : this.sessionStore.getSummaryState(sessionId) this.throwIfAbortRequested(preStreamAbortSignal) const systemPrompt = appendSummarySection(baseSystemPrompt, summaryState.summaryText) let resumeContext = buildResumeContext( @@ -2608,11 +2654,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { interleavedReasoning.preserveEmptyReasoningContent === true } ) - if ( - budgetToolCall?.id && - budgetToolCall.name && - !this.shouldBypassDeepChatContextBudget(state.providerId) - ) { + if (budgetToolCall?.id && budgetToolCall.name && useContextBudget) { const resumeBudget = this.fitResumeBudgetForToolCall({ resumeContext, toolDefinitions: tools, diff --git a/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts b/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts index 8aebcc8c3..466e3425b 100644 --- a/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts +++ b/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts @@ -4,6 +4,7 @@ import os from 'os' import path from 'path' import { app } from 'electron' import type { DeepChatSessionState } from '@shared/types/agent-interface' +import { ApiEndpointType, ModelType } from '@shared/model' import { AgentRuntimePresenter } from '@/presenter/agentRuntimePresenter/index' import { NewSessionHooksBridge } from '@/presenter/hooksNotifications/newSessionBridge' import { estimateMessagesTokens } from '@/presenter/agentRuntimePresenter/contextBuilder' @@ -3471,6 +3472,108 @@ describe('AgentRuntimePresenter', () => { ) }) + it('bypasses chat context preflight for image generation endpoints', async () => { + const imageModelConfig = { + temperature: 0.7, + maxTokens: 4096, + contextLength: 8192, + thinkingBudget: 512, + reasoningEffort: 'medium', + verbosity: 'medium', + vision: false, + functionCall: false, + reasoning: false, + type: ModelType.ImageGeneration, + apiEndpoint: ApiEndpointType.Image, + endpointType: 'image-generation' as const + } + configPresenter.getModelConfig.mockImplementation((modelId: string) => + modelId === 'gpt-image-2' + ? imageModelConfig + : { + temperature: 0.7, + maxTokens: 4096, + contextLength: 128000, + thinkingBudget: 512, + reasoningEffort: 'medium', + verbosity: 'medium', + vision: false + } + ) + const prepareSpy = vi.spyOn( + (agent as unknown as { compactionService: { prepareForNextUserTurn: () => unknown } }) + .compactionService, + 'prepareForNextUserTurn' + ) + + await agent.initSession('s1', { + providerId: 'openai', + modelId: 'gpt-image-2', + generationSettings: { + contextLength: 8192, + maxTokens: 4096 + } + }) + await agent.processMessage('s1', 'draw a mountain') + + const callArgs = (processStream as ReturnType).mock.calls[0][0] + expect(callArgs.maxTokens).toBe(4096) + expect(prepareSpy).not.toHaveBeenCalled() + + const providerCoreStream = llmProvider.getProviderInstance.mock.results[0].value.coreStream + providerCoreStream.mockClear() + llmProvider.generateText.mockClear() + const oversizedTools = [ + { + type: 'function', + function: { + name: 'large_schema', + description: makeTextWithEstimatedTokens(10000), + parameters: { + type: 'object', + properties: { + prompt: { + type: 'string', + description: makeTextWithEstimatedTokens(10000) + } + }, + required: ['prompt'] + } + }, + server: { + name: 'test', + icons: '', + description: 'large schema' + } + } + ] + const requestMessages = [ + { role: 'user' as const, content: makeTextWithEstimatedTokens(9000) } + ] + + for await (const _event of callArgs.coreStream( + requestMessages, + callArgs.modelId, + callArgs.modelConfig, + callArgs.temperature, + 4096, + oversizedTools + )) { + } + + expect(providerCoreStream).toHaveBeenCalledTimes(1) + expect(providerCoreStream.mock.calls[0][0]).toEqual(requestMessages) + expect(providerCoreStream.mock.calls[0][4]).toBe(4096) + expect(providerCoreStream.mock.calls[0][5]).toEqual(oversizedTools) + expect(llmProvider.generateText).not.toHaveBeenCalled() + expect( + JSON.stringify((eventBus.sendToRenderer as ReturnType).mock.calls) + ).not.toContain('Request was not sent') + expect( + JSON.stringify(sqlitePresenter.deepchatMessagesTable.updateContentAndStatus.mock.calls) + ).not.toContain('Request was not sent') + }) + it('preflights provider calls with a safety margin and compacts before low-output pressure calls', async () => { await agent.initSession('s1', { providerId: 'openai',