From 5eddb553e264f4d3baf760c66f1f60ffc0ef70dc Mon Sep 17 00:00:00 2001
From: zerob13 <zerob13@gmail.com>
Date: Mon, 18 May 2026 16:50:37 +0800
Subject: [PATCH] fix(agent): bypass chat budget for image routes

---
 .../plan.md                                   |  24 ++++
 .../spec.md                                   |  41 +++++++
 .../tasks.md                                  |   7 ++
 .../presenter/agentRuntimePresenter/index.ts  |  92 +++++++++++-----
 .../agentRuntimePresenter.test.ts             | 103 ++++++++++++++++++
 5 files changed, 242 insertions(+), 25 deletions(-)
 create mode 100644 docs/issues/image-generation-context-budget-bypass/plan.md
 create mode 100644 docs/issues/image-generation-context-budget-bypass/spec.md
 create mode 100644 docs/issues/image-generation-context-budget-bypass/tasks.md

diff --git a/docs/issues/image-generation-context-budget-bypass/plan.md b/docs/issues/image-generation-context-budget-bypass/plan.md
new file mode 100644
index 000000000..8557e630b
--- /dev/null
+++ b/docs/issues/image-generation-context-budget-bypass/plan.md
@@ -0,0 +1,24 @@
+# Image Generation Context Budget Bypass Plan
+
+## Approach
+
+- Add a model-aware Agent runtime helper that returns true only when DeepChat should use its chat
+  context budget.
+- Keep ACP bypass behavior, and also bypass when the model config explicitly identifies
+  `ImageGeneration`, `TTS`, a non-chat API endpoint, or `endpointType === 'image-generation'`.
+- Treat missing legacy model metadata as chat-compatible.
+
+## Runtime Changes
+
+- Use the helper in new user turns and resume/retry context construction before deciding whether to
+  compact, trim, or use a finite chat context length.
+- Use the helper inside the provider-call wrapper before running preflight/recovery or shrinking the
+  per-call `maxTokens`.
+- Leave `contextBudget.ts`, public contracts, IPC, and renderer code unchanged.
+
+## Test Strategy
+
+- Add an Agent runtime regression for an image endpoint request that would fail chat-budget
+  preflight, asserting the provider is still called and max tokens are preserved.
+- Keep chat-model pressure tests verifying the existing budget preflight path still runs.
+- Run the targeted Agent runtime/context budget tests plus repository format, i18n, and lint checks.
diff --git a/docs/issues/image-generation-context-budget-bypass/spec.md b/docs/issues/image-generation-context-budget-bypass/spec.md
new file mode 100644
index 000000000..cc61002f3
--- /dev/null
+++ b/docs/issues/image-generation-context-budget-bypass/spec.md
@@ -0,0 +1,41 @@
+# Image Generation Context Budget Bypass Spec
+
+> Status: Draft
+> Date: 2026-05-18
+
+## Background
+
+DeepChat Agent applies a chat-oriented provider-call context preflight before sending model
+requests. The check estimates message tokens, tool schemas, and output tokens, then blocks requests
+that cannot fit inside the configured model context window.
+
+That check is valid for chat models, but image generation and other non-chat routes do not use the
+same request shape. Image requests can therefore fail before reaching the provider with:
+
+`Request was not sent because it cannot fit within the model context window after applying the safety margin.`
+
+## Goals
+
+- Only apply DeepChat's chat context budget to chat model requests.
+- Skip the chat budget preflight, compaction recovery, and temporary max-token shrink for explicit
+  image generation and other non-chat model routes.
+- Preserve current behavior for chat models and ACP provider bypasses.
+
+## Acceptance Criteria
+
+- Image generation models or image endpoints reach the provider even when chat-budget estimation
+  would fail.
+- Non-chat requests do not trigger the DeepChat context-pressure compaction path solely because of
+  chat message/tool-schema estimates.
+- Non-chat request max tokens are not reduced by the chat preflight safety margin.
+- Chat models keep the existing preflight, recovery, and overflow failure behavior.
+- Existing legacy model configs without explicit type or endpoint metadata continue to be treated as
+  chat requests.
+- No public API, IPC, schema, or renderer UI changes are introduced.
+
+## Non-Goals
+
+- Redesign image generation request construction.
+- Change the agent image generation tool behavior for chat models.
+- Change `contextBudget.ts` budgeting math.
+- Add renderer UI for non-chat routing diagnostics.
diff --git a/docs/issues/image-generation-context-budget-bypass/tasks.md b/docs/issues/image-generation-context-budget-bypass/tasks.md
new file mode 100644
index 000000000..727a38d9b
--- /dev/null
+++ b/docs/issues/image-generation-context-budget-bypass/tasks.md
@@ -0,0 +1,7 @@
+# Image Generation Context Budget Bypass Tasks
+
+- [x] Document the issue and intended behavior.
+- [x] Add model-aware chat budget gating in the Agent runtime.
+- [x] Add regression coverage for image endpoint bypass.
+- [x] Verify chat context budget behavior remains unchanged.
+- [x] Run targeted tests and repository quality checks.
diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts
index ac7198b10..af5bcbd04 100644
--- a/src/main/presenter/agentRuntimePresenter/index.ts
+++ b/src/main/presenter/agentRuntimePresenter/index.ts
@@ -57,7 +57,7 @@ import {
   normalizeImageGenerationOptions,
   supportsOpenAIImageGenerationSettings
 } from '@shared/imageGenerationSettings'
-import { isDeepSeekSeriesModelId } from '@shared/model'
+import { ApiEndpointType, ModelType, isDeepSeekSeriesModelId } from '@shared/model'
 import { nanoid } from 'nanoid'
 import type { SQLitePresenter } from '../sqlitePresenter'
 import { eventBus, SendTarget } from '@/eventbus'
@@ -621,6 +621,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     try {
       this.throwIfAbortRequested(preStreamAbortSignal)
       const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId)
+      const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
+      const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig)
       this.throwIfAbortRequested(preStreamAbortSignal)
       const interleavedReasoning = this.resolveInterleavedReasoningConfig(
         state.providerId,
@@ -629,7 +631,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
-        generationSettings.contextLength
+        generationSettings.contextLength,
+        modelConfig
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
       const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId)
@@ -656,9 +659,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
         think: false
       }
 
-      const compactionIntent = this.shouldBypassDeepChatContextBudget(state.providerId)
-        ? null
-        : await this.compactionService.prepareForNextUserTurn({
+      const compactionIntent = useContextBudget
+        ? await this.compactionService.prepareForNextUserTurn({
             sessionId,
             providerId: state.providerId,
             modelId: state.modelId,
@@ -674,6 +676,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
             newUserContent: normalizedInput,
             signal: preStreamAbortSignal
           })
+        : null
       let summaryState: SessionSummaryState
 
       if (compactionIntent) {
@@ -1428,15 +1431,46 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     return resolvedProviderId === 'acp'
   }
 
-  private shouldBypassDeepChatContextBudget(providerId?: string | null): boolean {
-    return providerId?.trim() === 'acp'
+  private shouldUseDeepChatContextBudget(
+    providerId?: string | null,
+    modelConfig?: Pick<ModelConfig, 'apiEndpoint' | 'endpointType' | 'type'> | null
+  ): boolean {
+    if (providerId?.trim() === 'acp') {
+      return false
+    }
+
+    if (!modelConfig) {
+      return true
+    }
+
+    if (modelConfig.type === ModelType.ImageGeneration || modelConfig.type === ModelType.TTS) {
+      return false
+    }
+
+    if (modelConfig.apiEndpoint && modelConfig.apiEndpoint !== ApiEndpointType.Chat) {
+      return false
+    }
+
+    if (modelConfig.endpointType === 'image-generation') {
+      return false
+    }
+
+    return true
+  }
+
+  private shouldBypassDeepChatContextBudget(
+    providerId?: string | null,
+    modelConfig?: Pick<ModelConfig, 'apiEndpoint' | 'endpointType' | 'type'> | null
+  ): boolean {
+    return !this.shouldUseDeepChatContextBudget(providerId, modelConfig)
   }
 
   private resolveDeepChatContextBudgetLength(
     providerId: string | null | undefined,
-    contextLength: number
+    contextLength: number,
+    modelConfig?: Pick<ModelConfig, 'apiEndpoint' | 'endpointType' | 'type'> | null
   ): number {
-    return this.shouldBypassDeepChatContextBudget(providerId)
+    return this.shouldBypassDeepChatContextBudget(providerId, modelConfig)
       ? Number.MAX_SAFE_INTEGER
       : contextLength
   }
@@ -1619,7 +1653,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     if (!state) {
       throw new Error(`Session ${sessionId} not found`)
     }
-    if (this.shouldBypassDeepChatContextBudget(state.providerId)) {
+    const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
+    if (this.shouldBypassDeepChatContextBudget(state.providerId, modelConfig)) {
       throw new Error('Manual compaction is only available for DeepChat agent sessions.')
     }
     if (state.status !== 'idle') {
@@ -1639,7 +1674,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
-        generationSettings.contextLength
+        generationSettings.contextLength,
+        modelConfig
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
       const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId)
@@ -1854,15 +1890,15 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     ).getProviderInstance(state.providerId)
 
     const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId)
+    const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
     const interleavedReasoning =
       providedInterleavedReasoning ??
       this.resolveInterleavedReasoningConfig(state.providerId, state.modelId, generationSettings)
-    const bypassContextBudget = this.shouldBypassDeepChatContextBudget(state.providerId)
     const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
       state.providerId,
-      generationSettings.contextLength
+      generationSettings.contextLength,
+      baseModelConfig
     )
-    const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
     const capabilityProviderId = this.resolveCapabilityProviderId(state.providerId, state.modelId)
     const reasoningPortrait = this.getReasoningPortrait(state.providerId, state.modelId)
     const modelConfig: ModelConfig = {
@@ -1886,6 +1922,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     const traceEnabled = this.configPresenter.getSetting<boolean>('traceDebugEnabled') === true
     const llmProviderPresenter = this.llmProviderPresenter
     const pendingInputCoordinator = this.pendingInputCoordinator
+    const shouldBypassContextBudget = this.shouldBypassDeepChatContextBudget.bind(this)
     const injectSteerInputsIntoRequest = this.injectSteerInputsIntoRequest.bind(this)
     const recoverContextPressure = this.recoverRequestContextPressure.bind(this)
     const replaceLeadingSystemPromptInPlace = this.replaceLeadingSystemPromptInPlace.bind(this)
@@ -1947,13 +1984,19 @@ export class AgentRuntimePresenter implements IAgentImplementation {
           requestMaxTokens,
           requestTools
         ) {
+          const requestBypassesContextBudget = shouldBypassContextBudget(
+            state.providerId,
+            requestModelConfig
+          )
           const claimedSteerBatch = pendingInputCoordinator.claimSteerBatchForNextLoop(sessionId)
           const injectedMessages = injectSteerInputsIntoRequest(
             requestMessages,
             claimedSteerBatch,
             supportsVision,
             supportsAudioInput,
-            bypassContextBudget ? Number.MAX_SAFE_INTEGER : requestModelConfig.contextLength,
+            requestBypassesContextBudget
+              ? Number.MAX_SAFE_INTEGER
+              : requestModelConfig.contextLength,
             requestMaxTokens
           )
 
@@ -1964,7 +2007,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
             let providerMessages = injectedMessages
             let providerMaxTokens = requestMaxTokens
 
-            if (!bypassContextBudget) {
+            if (!requestBypassesContextBudget) {
               const protectedSteerTailCount =
                 claimedSteerBatch.length > 0
                   ? claimedSteerBatch.length + (requestMessages.at(-1)?.role === 'user' ? 1 : 0)
@@ -2543,6 +2586,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       preStreamAbortSignal = preStreamAbortController.signal
       this.throwIfAbortRequested(preStreamAbortSignal)
       const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId)
+      const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
+      const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig)
       this.throwIfAbortRequested(preStreamAbortSignal)
       const interleavedReasoning = this.resolveInterleavedReasoningConfig(
         state.providerId,
@@ -2551,7 +2596,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
-        generationSettings.contextLength
+        generationSettings.contextLength,
+        modelConfig
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
       const projectDir = this.resolveProjectDir(sessionId)
@@ -2570,9 +2616,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
         activeSkillNames
       )
       this.throwIfAbortRequested(preStreamAbortSignal)
-      const summaryState = this.shouldBypassDeepChatContextBudget(state.providerId)
-        ? this.sessionStore.getSummaryState(sessionId)
-        : await this.resolveCompactionStateForResumeTurn({
+      const summaryState = useContextBudget
+        ? await this.resolveCompactionStateForResumeTurn({
             sessionId,
             messageId,
             providerId: state.providerId,
@@ -2588,6 +2633,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
               interleavedReasoning.preserveEmptyReasoningContent === true,
             signal: preStreamAbortSignal
           })
+        : this.sessionStore.getSummaryState(sessionId)
       this.throwIfAbortRequested(preStreamAbortSignal)
       const systemPrompt = appendSummarySection(baseSystemPrompt, summaryState.summaryText)
       let resumeContext = buildResumeContext(
@@ -2608,11 +2654,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
             interleavedReasoning.preserveEmptyReasoningContent === true
         }
       )
-      if (
-        budgetToolCall?.id &&
-        budgetToolCall.name &&
-        !this.shouldBypassDeepChatContextBudget(state.providerId)
-      ) {
+      if (budgetToolCall?.id && budgetToolCall.name && useContextBudget) {
         const resumeBudget = this.fitResumeBudgetForToolCall({
           resumeContext,
           toolDefinitions: tools,
diff --git a/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts b/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts
index 8aebcc8c3..466e3425b 100644
--- a/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts
+++ b/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts
@@ -4,6 +4,7 @@ import os from 'os'
 import path from 'path'
 import { app } from 'electron'
 import type { DeepChatSessionState } from '@shared/types/agent-interface'
+import { ApiEndpointType, ModelType } from '@shared/model'
 import { AgentRuntimePresenter } from '@/presenter/agentRuntimePresenter/index'
 import { NewSessionHooksBridge } from '@/presenter/hooksNotifications/newSessionBridge'
 import { estimateMessagesTokens } from '@/presenter/agentRuntimePresenter/contextBuilder'
@@ -3471,6 +3472,108 @@ describe('AgentRuntimePresenter', () => {
       )
     })
 
+    it('bypasses chat context preflight for image generation endpoints', async () => {
+      const imageModelConfig = {
+        temperature: 0.7,
+        maxTokens: 4096,
+        contextLength: 8192,
+        thinkingBudget: 512,
+        reasoningEffort: 'medium',
+        verbosity: 'medium',
+        vision: false,
+        functionCall: false,
+        reasoning: false,
+        type: ModelType.ImageGeneration,
+        apiEndpoint: ApiEndpointType.Image,
+        endpointType: 'image-generation' as const
+      }
+      configPresenter.getModelConfig.mockImplementation((modelId: string) =>
+        modelId === 'gpt-image-2'
+          ? imageModelConfig
+          : {
+              temperature: 0.7,
+              maxTokens: 4096,
+              contextLength: 128000,
+              thinkingBudget: 512,
+              reasoningEffort: 'medium',
+              verbosity: 'medium',
+              vision: false
+            }
+      )
+      const prepareSpy = vi.spyOn(
+        (agent as unknown as { compactionService: { prepareForNextUserTurn: () => unknown } })
+          .compactionService,
+        'prepareForNextUserTurn'
+      )
+
+      await agent.initSession('s1', {
+        providerId: 'openai',
+        modelId: 'gpt-image-2',
+        generationSettings: {
+          contextLength: 8192,
+          maxTokens: 4096
+        }
+      })
+      await agent.processMessage('s1', 'draw a mountain')
+
+      const callArgs = (processStream as ReturnType<typeof vi.fn>).mock.calls[0][0]
+      expect(callArgs.maxTokens).toBe(4096)
+      expect(prepareSpy).not.toHaveBeenCalled()
+
+      const providerCoreStream = llmProvider.getProviderInstance.mock.results[0].value.coreStream
+      providerCoreStream.mockClear()
+      llmProvider.generateText.mockClear()
+      const oversizedTools = [
+        {
+          type: 'function',
+          function: {
+            name: 'large_schema',
+            description: makeTextWithEstimatedTokens(10000),
+            parameters: {
+              type: 'object',
+              properties: {
+                prompt: {
+                  type: 'string',
+                  description: makeTextWithEstimatedTokens(10000)
+                }
+              },
+              required: ['prompt']
+            }
+          },
+          server: {
+            name: 'test',
+            icons: '',
+            description: 'large schema'
+          }
+        }
+      ]
+      const requestMessages = [
+        { role: 'user' as const, content: makeTextWithEstimatedTokens(9000) }
+      ]
+
+      for await (const _event of callArgs.coreStream(
+        requestMessages,
+        callArgs.modelId,
+        callArgs.modelConfig,
+        callArgs.temperature,
+        4096,
+        oversizedTools
+      )) {
+      }
+
+      expect(providerCoreStream).toHaveBeenCalledTimes(1)
+      expect(providerCoreStream.mock.calls[0][0]).toEqual(requestMessages)
+      expect(providerCoreStream.mock.calls[0][4]).toBe(4096)
+      expect(providerCoreStream.mock.calls[0][5]).toEqual(oversizedTools)
+      expect(llmProvider.generateText).not.toHaveBeenCalled()
+      expect(
+        JSON.stringify((eventBus.sendToRenderer as ReturnType<typeof vi.fn>).mock.calls)
+      ).not.toContain('Request was not sent')
+      expect(
+        JSON.stringify(sqlitePresenter.deepchatMessagesTable.updateContentAndStatus.mock.calls)
+      ).not.toContain('Request was not sent')
+    })
+
     it('preflights provider calls with a safety margin and compacts before low-output pressure calls', async () => {
       await agent.initSession('s1', {
         providerId: 'openai',