ThinkInAIXYZ · zerob13 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/docs/issues/image-generation-context-budget-bypass/plan.md b/docs/issues/image-generation-context-budget-bypass/plan.md
@@ -0,0 +1,24 @@
+# Image Generation Context Budget Bypass Plan
+
+## Approach
+
+- Add a model-aware Agent runtime helper that returns true only when DeepChat should use its chat
+  context budget.
+- Keep ACP bypass behavior, and also bypass when the model config explicitly identifies
+  `ImageGeneration`, `TTS`, a non-chat API endpoint, or `endpointType === 'image-generation'`.
+- Treat missing legacy model metadata as chat-compatible.
+
+## Runtime Changes
+
+- Use the helper in new user turns and resume/retry context construction before deciding whether to
+  compact, trim, or use a finite chat context length.
+- Use the helper inside the provider-call wrapper before running preflight/recovery or shrinking the
+  per-call `maxTokens`.
+- Leave `contextBudget.ts`, public contracts, IPC, and renderer code unchanged.
+
+## Test Strategy
+
+- Add an Agent runtime regression for an image endpoint request that would fail chat-budget
+  preflight, asserting the provider is still called and max tokens are preserved.
+- Keep chat-model pressure tests verifying the existing budget preflight path still runs.
+- Run the targeted Agent runtime/context budget tests plus repository format, i18n, and lint checks.
diff --git a/docs/issues/image-generation-context-budget-bypass/spec.md b/docs/issues/image-generation-context-budget-bypass/spec.md
@@ -0,0 +1,41 @@
+# Image Generation Context Budget Bypass Spec
+
+> Status: Draft
+> Date: 2026-05-18
+
+## Background
+
+DeepChat Agent applies a chat-oriented provider-call context preflight before sending model
+requests. The check estimates message tokens, tool schemas, and output tokens, then blocks requests
+that cannot fit inside the configured model context window.
+
+That check is valid for chat models, but image generation and other non-chat routes do not use the
+same request shape. Image requests can therefore fail before reaching the provider with:
+
+`Request was not sent because it cannot fit within the model context window after applying the safety margin.`
+
+## Goals
+
+- Only apply DeepChat's chat context budget to chat model requests.
+- Skip the chat budget preflight, compaction recovery, and temporary max-token shrink for explicit
+  image generation and other non-chat model routes.
+- Preserve current behavior for chat models and ACP provider bypasses.
+
+## Acceptance Criteria
+
+- Image generation models or image endpoints reach the provider even when chat-budget estimation
+  would fail.
+- Non-chat requests do not trigger the DeepChat context-pressure compaction path solely because of
+  chat message/tool-schema estimates.
+- Non-chat request max tokens are not reduced by the chat preflight safety margin.
+- Chat models keep the existing preflight, recovery, and overflow failure behavior.
+- Existing legacy model configs without explicit type or endpoint metadata continue to be treated as
+  chat requests.
+- No public API, IPC, schema, or renderer UI changes are introduced.
+
+## Non-Goals
+
+- Redesign image generation request construction.
+- Change the agent image generation tool behavior for chat models.
+- Change `contextBudget.ts` budgeting math.
+- Add renderer UI for non-chat routing diagnostics.
diff --git a/docs/issues/image-generation-context-budget-bypass/tasks.md b/docs/issues/image-generation-context-budget-bypass/tasks.md
@@ -0,0 +1,7 @@
+# Image Generation Context Budget Bypass Tasks
+
+- [x] Document the issue and intended behavior.
+- [x] Add model-aware chat budget gating in the Agent runtime.
+- [x] Add regression coverage for image endpoint bypass.
+- [x] Verify chat context budget behavior remains unchanged.
+- [x] Run targeted tests and repository quality checks.
diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts
@@ -57,7 +57,7 @@ import {
   normalizeImageGenerationOptions,
   supportsOpenAIImageGenerationSettings
 } from '@shared/imageGenerationSettings'
-import { isDeepSeekSeriesModelId } from '@shared/model'
+import { ApiEndpointType, ModelType, isDeepSeekSeriesModelId } from '@shared/model'
 import { isTtsModelConfig, isTtsModelId } from '@shared/ttsSettings'
 import { nanoid } from 'nanoid'
 import type { SQLitePresenter } from '../sqlitePresenter'
@@ -622,6 +622,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     try {
       this.throwIfAbortRequested(preStreamAbortSignal)
       const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId)
+      const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
+      const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig)
       this.throwIfAbortRequested(preStreamAbortSignal)
       const interleavedReasoning = this.resolveInterleavedReasoningConfig(
         state.providerId,
@@ -630,7 +632,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
-        generationSettings.contextLength
+        generationSettings.contextLength,
+        modelConfig
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
       const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId)
@@ -657,9 +660,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
         think: false
       }
 
-      const compactionIntent = this.shouldBypassDeepChatContextBudget(state.providerId)
-        ? null
-        : await this.compactionService.prepareForNextUserTurn({
+      const compactionIntent = useContextBudget
+        ? await this.compactionService.prepareForNextUserTurn({
             sessionId,
             providerId: state.providerId,
             modelId: state.modelId,
@@ -675,6 +677,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
             newUserContent: normalizedInput,
             signal: preStreamAbortSignal
           })
+        : null
       let summaryState: SessionSummaryState
 
       if (compactionIntent) {
@@ -1429,15 +1432,46 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     return resolvedProviderId === 'acp'
   }
 
-  private shouldBypassDeepChatContextBudget(providerId?: string | null): boolean {
-    return providerId?.trim() === 'acp'
+  private shouldUseDeepChatContextBudget(
+    providerId?: string | null,
+    modelConfig?: Pick<ModelConfig, 'apiEndpoint' | 'endpointType' | 'type'> | null
+  ): boolean {
+    if (providerId?.trim() === 'acp') {
+      return false
+    }
+
+    if (!modelConfig) {
+      return true
+    }
+
+    if (modelConfig.type === ModelType.ImageGeneration || modelConfig.type === ModelType.TTS) {
+      return false
+    }
+
+    if (modelConfig.apiEndpoint && modelConfig.apiEndpoint !== ApiEndpointType.Chat) {
+      return false
+    }
+
+    if (modelConfig.endpointType === 'image-generation') {
+      return false
+    }
+
+    return true
+  }
+
+  private shouldBypassDeepChatContextBudget(
+    providerId?: string | null,
+    modelConfig?: Pick<ModelConfig, 'apiEndpoint' | 'endpointType' | 'type'> | null
+  ): boolean {
+    return !this.shouldUseDeepChatContextBudget(providerId, modelConfig)
   }
 
   private resolveDeepChatContextBudgetLength(
     providerId: string | null | undefined,
-    contextLength: number
+    contextLength: number,
+    modelConfig?: Pick<ModelConfig, 'apiEndpoint' | 'endpointType' | 'type'> | null
   ): number {
-    return this.shouldBypassDeepChatContextBudget(providerId)
+    return this.shouldBypassDeepChatContextBudget(providerId, modelConfig)
       ? Number.MAX_SAFE_INTEGER
       : contextLength
   }
@@ -1620,7 +1654,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     if (!state) {
       throw new Error(`Session ${sessionId} not found`)
     }
-    if (this.shouldBypassDeepChatContextBudget(state.providerId)) {
+    const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
+    if (this.shouldBypassDeepChatContextBudget(state.providerId, modelConfig)) {
       throw new Error('Manual compaction is only available for DeepChat agent sessions.')
     }
     if (state.status !== 'idle') {
@@ -1640,7 +1675,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
-        generationSettings.contextLength
+        generationSettings.contextLength,
+        modelConfig
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
       const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId)
@@ -1855,15 +1891,15 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     ).getProviderInstance(state.providerId)
 
     const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId)
+    const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
     const interleavedReasoning =
       providedInterleavedReasoning ??
       this.resolveInterleavedReasoningConfig(state.providerId, state.modelId, generationSettings)
-    const bypassContextBudget = this.shouldBypassDeepChatContextBudget(state.providerId)
     const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
       state.providerId,
-      generationSettings.contextLength
+      generationSettings.contextLength,
+      baseModelConfig
     )
-    const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
     const capabilityProviderId = this.resolveCapabilityProviderId(state.providerId, state.modelId)
     const reasoningPortrait = this.getReasoningPortrait(state.providerId, state.modelId)
     const modelConfig: ModelConfig = {
@@ -1887,6 +1923,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     const traceEnabled = this.configPresenter.getSetting<boolean>('traceDebugEnabled') === true
     const llmProviderPresenter = this.llmProviderPresenter
     const pendingInputCoordinator = this.pendingInputCoordinator
+    const shouldBypassContextBudget = this.shouldBypassDeepChatContextBudget.bind(this)
     const injectSteerInputsIntoRequest = this.injectSteerInputsIntoRequest.bind(this)
     const recoverContextPressure = this.recoverRequestContextPressure.bind(this)
     const replaceLeadingSystemPromptInPlace = this.replaceLeadingSystemPromptInPlace.bind(this)
@@ -1948,13 +1985,19 @@ export class AgentRuntimePresenter implements IAgentImplementation {
           requestMaxTokens,
           requestTools
         ) {
+          const requestBypassesContextBudget = shouldBypassContextBudget(
+            state.providerId,
+            requestModelConfig
+          )
           const claimedSteerBatch = pendingInputCoordinator.claimSteerBatchForNextLoop(sessionId)
           const injectedMessages = injectSteerInputsIntoRequest(
             requestMessages,
             claimedSteerBatch,
             supportsVision,
             supportsAudioInput,
-            bypassContextBudget ? Number.MAX_SAFE_INTEGER : requestModelConfig.contextLength,
+            requestBypassesContextBudget
+              ? Number.MAX_SAFE_INTEGER
+              : requestModelConfig.contextLength,
             requestMaxTokens
           )
 
@@ -1968,7 +2011,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
               isTtsModelConfig(requestModelConfig) || isTtsModelId(requestModelId)
             const effectiveRequestTools: MCPToolDefinition[] = isTtsRequest ? [] : requestTools
 
-            if (!bypassContextBudget) {
+            if (!requestBypassesContextBudget) {
               const protectedSteerTailCount =
                 claimedSteerBatch.length > 0
                   ? claimedSteerBatch.length + (requestMessages.at(-1)?.role === 'user' ? 1 : 0)
@@ -2547,6 +2590,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       preStreamAbortSignal = preStreamAbortController.signal
       this.throwIfAbortRequested(preStreamAbortSignal)
       const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId)
+      const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
+      const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig)
       this.throwIfAbortRequested(preStreamAbortSignal)
       const interleavedReasoning = this.resolveInterleavedReasoningConfig(
         state.providerId,
@@ -2555,7 +2600,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
-        generationSettings.contextLength
+        generationSettings.contextLength,
+        modelConfig
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
       const projectDir = this.resolveProjectDir(sessionId)
@@ -2574,9 +2620,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
         activeSkillNames
       )
       this.throwIfAbortRequested(preStreamAbortSignal)
-      const summaryState = this.shouldBypassDeepChatContextBudget(state.providerId)
-        ? this.sessionStore.getSummaryState(sessionId)
-        : await this.resolveCompactionStateForResumeTurn({
+      const summaryState = useContextBudget
+        ? await this.resolveCompactionStateForResumeTurn({
             sessionId,
             messageId,
             providerId: state.providerId,
@@ -2592,6 +2637,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
               interleavedReasoning.preserveEmptyReasoningContent === true,
             signal: preStreamAbortSignal
           })
+        : this.sessionStore.getSummaryState(sessionId)
       this.throwIfAbortRequested(preStreamAbortSignal)
       const systemPrompt = appendSummarySection(baseSystemPrompt, summaryState.summaryText)
       let resumeContext = buildResumeContext(
@@ -2612,11 +2658,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
             interleavedReasoning.preserveEmptyReasoningContent === true
         }
       )
-      if (
-        budgetToolCall?.id &&
-        budgetToolCall.name &&
-        !this.shouldBypassDeepChatContextBudget(state.providerId)
-      ) {
+      if (budgetToolCall?.id && budgetToolCall.name && useContextBudget) {
         const resumeBudget = this.fitResumeBudgetForToolCall({
           resumeContext,
           toolDefinitions: tools,