From 60099cdd7ba454257dfb5209aff8e90343b0c492 Mon Sep 17 00:00:00 2001
From: zhangmo8 <zhangmo8@users.noreply.github.com>
Date: Mon, 18 May 2026 16:50:45 +0800
Subject: [PATCH 1/3] feat: implement OpenAI-compatible video generation
 features and settings

---
 .../plan.md                                   |  42 ++
 .../spec.md                                   |  32 ++
 .../tasks.md                                  |  25 +
 .../plan.md                                   |  21 +
 .../spec.md                                   |  25 +
 .../tasks.md                                  |  11 +
 .../presenter/agentRuntimePresenter/index.ts  |  98 +++-
 src/main/presenter/configPresenter/index.ts   |  12 +
 .../presenter/configPresenter/modelConfig.ts  |  20 +-
 .../configPresenter/providerModelHelper.ts    |  21 +-
 src/main/presenter/index.ts                   |   8 +
 .../llmProviderPresenter/aiSdk/runtime.ts     | 507 ++++++++++++++++++
 .../presenter/llmProviderPresenter/index.ts   |  88 +++
 .../providers/aiSdkProvider.ts                |  69 ++-
 .../tables/deepchatSessions.ts                |  52 +-
 .../settings/components/ProviderModelList.vue |   5 +-
 src/renderer/src/components/ChatConfig.vue    |  14 +-
 .../src/components/chat/ChatStatusBar.vue     |  89 ++-
 .../src/components/chat/messageListItems.ts   |   1 +
 .../components/message/MessageBlockVideo.vue  | 132 +++++
 .../message/MessageItemAssistant.vue          |  24 +-
 .../components/settings/ModelConfigDialog.vue | 112 +++-
 .../OpenAIVideoGenerationSettingsFields.vue   | 175 ++++++
 .../src/composables/useModelTypeDetection.ts  |  13 +-
 src/renderer/src/i18n/da-DK/model.json        |   3 +-
 src/renderer/src/i18n/da-DK/settings.json     |  40 +-
 src/renderer/src/i18n/en-US/model.json        |   3 +-
 src/renderer/src/i18n/en-US/settings.json     |  40 +-
 src/renderer/src/i18n/fa-IR/model.json        |   3 +-
 src/renderer/src/i18n/fa-IR/settings.json     |  40 +-
 src/renderer/src/i18n/fr-FR/model.json        |   3 +-
 src/renderer/src/i18n/fr-FR/settings.json     |  40 +-
 src/renderer/src/i18n/he-IL/model.json        |   3 +-
 src/renderer/src/i18n/he-IL/settings.json     |  40 +-
 src/renderer/src/i18n/ja-JP/model.json        |   3 +-
 src/renderer/src/i18n/ja-JP/settings.json     |  40 +-
 src/renderer/src/i18n/ko-KR/model.json        |   3 +-
 src/renderer/src/i18n/ko-KR/settings.json     |  40 +-
 src/renderer/src/i18n/pt-BR/model.json        |   3 +-
 src/renderer/src/i18n/pt-BR/settings.json     |  40 +-
 src/renderer/src/i18n/ru-RU/model.json        |   3 +-
 src/renderer/src/i18n/ru-RU/settings.json     |  40 +-
 src/renderer/src/i18n/zh-CN/model.json        |   3 +-
 src/renderer/src/i18n/zh-CN/settings.json     |  40 +-
 src/renderer/src/i18n/zh-HK/model.json        |   3 +-
 src/renderer/src/i18n/zh-HK/settings.json     |  40 +-
 src/renderer/src/i18n/zh-TW/model.json        |   3 +-
 src/renderer/src/i18n/zh-TW/settings.json     |  40 +-
 src/renderer/src/pages/NewThreadPage.vue      |   1 +
 src/renderer/src/stores/modelStore.ts         |  26 +-
 src/renderer/src/stores/ui/draft.ts           |  19 +
 src/shared/contracts/common.ts                |  36 +-
 src/shared/contracts/domainSchemas.ts         |   2 +
 src/shared/model.ts                           |  17 +-
 src/shared/types/agent-interface.d.ts         |   2 +
 src/shared/types/model-db.ts                  |  17 +-
 src/shared/types/presenters/index.d.ts        |   1 +
 .../types/presenters/legacy.presenters.d.ts   |  16 +
 .../presenters/llmprovider.presenter.d.ts     |  16 +
 src/shared/videoGenerationSettings.ts         | 339 ++++++++++++
 .../llmProviderPresenter/aiSdkRuntime.test.ts | 208 +++++++
 .../aihubmixProvider.test.ts                  |  33 ++
 62 files changed, 2720 insertions(+), 125 deletions(-)
 create mode 100644 docs/features/openai-compatible-video-generation/plan.md
 create mode 100644 docs/features/openai-compatible-video-generation/spec.md
 create mode 100644 docs/features/openai-compatible-video-generation/tasks.md
 create mode 100644 docs/issues/openai-compatible-video-prompt-duration-fallback/plan.md
 create mode 100644 docs/issues/openai-compatible-video-prompt-duration-fallback/spec.md
 create mode 100644 docs/issues/openai-compatible-video-prompt-duration-fallback/tasks.md
 create mode 100644 src/renderer/src/components/message/MessageBlockVideo.vue
 create mode 100644 src/renderer/src/components/settings/OpenAIVideoGenerationSettingsFields.vue
 create mode 100644 src/shared/videoGenerationSettings.ts

diff --git a/docs/features/openai-compatible-video-generation/plan.md b/docs/features/openai-compatible-video-generation/plan.md
new file mode 100644
index 000000000..37b2c5b4f
--- /dev/null
+++ b/docs/features/openai-compatible-video-generation/plan.md
@@ -0,0 +1,42 @@
+# Plan
+
+## Approach
+Treat video generation as a first-class model capability parallel to image generation and TTS:
+- Extend shared model/type enums and model-db parsing to include `videoGeneration`.
+- Add a shared video compatibility helper that can recover video intent from model metadata, endpoint hints, modalities, or known model ID patterns when upstream data is incomplete.
+- Add an OpenAI-compatible video runtime path that sends requests to `/v1/videos`, normalizes provider responses, and emits media output into the assistant stream.
+- Reuse the current assistant media block transport by carrying video payloads through the existing message block structure with video MIME detection on the renderer side.
+
+## Affected Areas
+- Shared types/contracts:
+  - `src/shared/model.ts`
+  - `src/shared/types/model-db.ts`
+  - `src/shared/types/presenters/llmprovider.presenter.d.ts`
+  - `src/shared/types/presenters/legacy.presenters.d.ts`
+  - `src/shared/videoGenerationSettings.ts` (new)
+- Main runtime/provider:
+  - `src/main/presenter/configPresenter/index.ts`
+  - `src/main/presenter/configPresenter/modelConfig.ts`
+  - `src/main/presenter/llmProviderPresenter/index.ts`
+  - `src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts`
+  - `src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts`
+- Renderer:
+  - `src/renderer/src/composables/useModelTypeDetection.ts`
+  - `src/renderer/src/components/chat/messageListItems.ts`
+  - `src/renderer/src/components/message/MessageItemAssistant.vue`
+  - `src/renderer/src/components/message/MessageBlockVideo.vue` (new)
+  - `src/renderer/settings/components/ProviderModelList.vue`
+- Model DB:
+  - `resources/model-db/providers.json`
+
+## Compatibility
+- Existing text, image, and TTS paths remain unchanged.
+- Existing assistant block persistence remains compatible by reusing the current media payload field rather than changing the storage shape.
+- Future video models can plug in through shared detection helpers or explicit `videoGeneration` metadata.
+
+## Verification Strategy
+Run:
+- `pnpm run typecheck`
+- `pnpm run format`
+- `pnpm run i18n`
+- `pnpm run lint`
diff --git a/docs/features/openai-compatible-video-generation/spec.md b/docs/features/openai-compatible-video-generation/spec.md
new file mode 100644
index 000000000..66550c902
--- /dev/null
+++ b/docs/features/openai-compatible-video-generation/spec.md
@@ -0,0 +1,32 @@
+# OpenAI-Compatible Video Generation
+
+## User Need
+Users need DeepChat to recognize and run video generation models such as `doubao-seedance-2-0-fast-260128` through the same model-driven provider flow used by text and audio generation, without hardcoding one-off provider logic for each future video model.
+
+## Goal
+Enable first-class video generation routing in DeepChat for OpenAI-compatible providers, starting with AIHubMix Seedance models and leaving a compatibility layer for future video models.
+
+## Acceptance Criteria
+1. Shared model/type contracts support `videoGeneration` and preserve compatibility with existing model metadata.
+2. DeepChat can recognize `doubao-seedance-2-0-fast-260128` as a video generation model even when upstream metadata is incomplete or still marked as `chat`.
+3. Main runtime can route video generation requests through an OpenAI-compatible `/v1/videos` flow.
+4. Video generation responses are normalized into a stable internal result shape that future providers/models can reuse.
+5. Generated video output reaches the existing assistant message pipeline and renders in the chat UI.
+6. Validation commands pass:
+- `pnpm run typecheck`
+- `pnpm run format`
+- `pnpm run i18n`
+- `pnpm run lint`
+
+## Constraints
+- Keep the provider integration generic for OpenAI-compatible video endpoints.
+- Reuse the current assistant media block pipeline where practical instead of introducing a parallel storage format.
+- Do not scope in advanced video editing controls or provider-specific parameter UIs for this change.
+
+## Non-Goals
+- Dedicated video generation settings panels.
+- Agent-level video generation tool configuration.
+- Non-OpenAI-compatible video provider protocols.
+
+## Open Questions
+- None for current scope.
diff --git a/docs/features/openai-compatible-video-generation/tasks.md b/docs/features/openai-compatible-video-generation/tasks.md
new file mode 100644
index 000000000..d27f8ab8f
--- /dev/null
+++ b/docs/features/openai-compatible-video-generation/tasks.md
@@ -0,0 +1,25 @@
+# Tasks
+
+## Shared Types + Detection
+- [x] Add `ModelType.VideoGeneration` and extend model-db parsing/schema for `videoGeneration`.
+- [x] Add shared video detection/compatibility helpers for endpoint hints, modalities, and known model IDs.
+- [x] Update model config inference to classify video models consistently in main and renderer flows.
+- [x] Extend session generation settings/contracts and draft state to carry `videoGeneration` options.
+
+## Runtime + Provider
+- [x] Add `generateVideoStandalone` presenter contracts and implementation.
+- [x] Add OpenAI-compatible `/v1/videos` request/response normalization in the AI SDK runtime/provider path.
+- [x] Persist and sanitize session-level video generation settings through agent runtime and sqlite storage.
+- [ ] Mark Seedance built-in model metadata as `videoGeneration` where available.
+
+## Renderer
+- [x] Expose video model detection for UI behavior alignment.
+- [x] Add assistant message rendering for generated video media.
+- [x] Update model list/type display for video generation models.
+- [x] Expose video generation settings in chat status bar and model config dialog flows.
+
+## Validation
+- [x] Run `pnpm run typecheck`.
+- [x] Run `pnpm run format`.
+- [x] Run `pnpm run i18n`.
+- [x] Run `pnpm run lint`.
diff --git a/docs/issues/openai-compatible-video-prompt-duration-fallback/plan.md b/docs/issues/openai-compatible-video-prompt-duration-fallback/plan.md
new file mode 100644
index 000000000..2f7ed4812
--- /dev/null
+++ b/docs/issues/openai-compatible-video-prompt-duration-fallback/plan.md
@@ -0,0 +1,21 @@
+# Plan
+
+## Approach
+Add a small runtime helper that extracts an integer duration from obvious prompt hints only when structured video settings are absent and the parsed value is supported by the active model, then reuse that helper for both request tracing and the actual `/videos` request body.
+
+## Implementation
+- Add a focused runtime test that exercises the OpenAI-compatible `/videos` flow and asserts `duration: 2` is sent for prompts like `... 2s`.
+- Add a conservative prompt-duration extractor for `Ns`, `N sec`, `N seconds`, and `N秒`.
+- Enforce model-specific validity before injecting the derived duration (for Seedance, `4~15`).
+- Apply the fallback only when `videoGeneration.duration` and `videoGeneration.seconds` are both unset.
+
+## Affected Files
+- `src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts`
+- `test/main/presenter/llmProviderPresenter/aiSdkRuntime.test.ts`
+- `docs/issues/openai-compatible-video-prompt-duration-fallback/tasks.md`
+
+## Validation
+- Focused AI SDK runtime tests for video request bodies.
+- `pnpm run format`
+- `pnpm run i18n`
+- `pnpm run lint`
diff --git a/docs/issues/openai-compatible-video-prompt-duration-fallback/spec.md b/docs/issues/openai-compatible-video-prompt-duration-fallback/spec.md
new file mode 100644
index 000000000..04f28d04b
--- /dev/null
+++ b/docs/issues/openai-compatible-video-prompt-duration-fallback/spec.md
@@ -0,0 +1,25 @@
+# OpenAI-Compatible Video Prompt Duration Fallback
+
+## User Need
+When users send prompts such as `生成 马斯克 喝酒的视频 2s` to OpenAI-compatible video models, DeepChat should preserve the obvious structured duration hint instead of sending only the raw prompt body.
+
+## Goal
+Infer an explicit video duration from clear prompt suffixes like `5s` or `5秒` when the session has no structured video duration configured and the parsed value is valid for the target model.
+
+## Acceptance Criteria
+1. OpenAI-compatible video requests derive `duration` from obvious prompt hints when neither `duration` nor `seconds` is already configured and the parsed value is valid for the current model.
+2. Explicit structured video settings still take precedence over any prompt-derived fallback.
+3. The emitted request trace matches the actual `/videos` body for this fallback.
+4. Focused validation passes for the touched runtime slice.
+
+## Constraints
+- Keep the fallback narrow and conservative; do not attempt broad natural-language parameter parsing.
+- Preserve existing request-shape compatibility and polling behavior.
+
+## Non-Goals
+- Adding or changing video settings UI.
+- Parsing arbitrary style, ratio, or resolution hints from prompts.
+- Changing provider safety or moderation behavior.
+
+## Open Questions
+- None.
diff --git a/docs/issues/openai-compatible-video-prompt-duration-fallback/tasks.md b/docs/issues/openai-compatible-video-prompt-duration-fallback/tasks.md
new file mode 100644
index 000000000..bed18d1c6
--- /dev/null
+++ b/docs/issues/openai-compatible-video-prompt-duration-fallback/tasks.md
@@ -0,0 +1,11 @@
+# Tasks
+
+## Runtime Fallback
+- [x] Add a runtime regression test for prompt-derived video duration.
+- [x] Apply a conservative prompt duration fallback before building `/videos` requests.
+
+## Validation
+- [x] Run focused AI SDK runtime tests.
+- [x] Run `pnpm run format`.
+- [x] Run `pnpm run i18n`.
+- [x] Run `pnpm run lint`.
diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts
index 73dc22488..d5cd9a559 100644
--- a/src/main/presenter/agentRuntimePresenter/index.ts
+++ b/src/main/presenter/agentRuntimePresenter/index.ts
@@ -57,8 +57,13 @@ import {
   normalizeImageGenerationOptions,
   supportsOpenAIImageGenerationSettings
 } from '@shared/imageGenerationSettings'
-import { isDeepSeekSeriesModelId } from '@shared/model'
+import { ModelType, isDeepSeekSeriesModelId } from '@shared/model'
 import { isTtsModelConfig, isTtsModelId } from '@shared/ttsSettings'
+import {
+  isVideoGenerationModelConfig,
+  normalizeVideoGenerationOptions,
+  supportsOpenAICompatibleVideoGeneration
+} from '@shared/videoGenerationSettings'
 import { nanoid } from 'nanoid'
 import type { SQLitePresenter } from '../sqlitePresenter'
 import { eventBus, SendTarget } from '@/eventbus'
@@ -630,6 +635,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
+        state.modelId,
         generationSettings.contextLength
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
@@ -657,7 +663,10 @@ export class AgentRuntimePresenter implements IAgentImplementation {
         think: false
       }
 
-      const compactionIntent = this.shouldBypassDeepChatContextBudget(state.providerId)
+      const compactionIntent = this.shouldBypassDeepChatContextBudget(
+        state.providerId,
+        state.modelId
+      )
         ? null
         : await this.compactionService.prepareForNextUserTurn({
             sessionId,
@@ -1429,15 +1438,34 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     return resolvedProviderId === 'acp'
   }
 
-  private shouldBypassDeepChatContextBudget(providerId?: string | null): boolean {
-    return providerId?.trim() === 'acp'
+  private shouldBypassDeepChatContextBudget(
+    providerId?: string | null,
+    modelId?: string | null
+  ): boolean {
+    const normalizedProviderId = providerId?.trim()
+    if (normalizedProviderId === 'acp') {
+      return true
+    }
+
+    const normalizedModelId = modelId?.trim()
+    if (!normalizedProviderId || !normalizedModelId) {
+      return false
+    }
+
+    const modelConfig = this.configPresenter.getModelConfig(normalizedModelId, normalizedProviderId)
+    return (
+      modelConfig.type === ModelType.ImageGeneration ||
+      modelConfig.type === ModelType.TTS ||
+      isVideoGenerationModelConfig(modelConfig, normalizedModelId)
+    )
   }
 
   private resolveDeepChatContextBudgetLength(
     providerId: string | null | undefined,
+    modelId: string | null | undefined,
     contextLength: number
   ): number {
-    return this.shouldBypassDeepChatContextBudget(providerId)
+    return this.shouldBypassDeepChatContextBudget(providerId, modelId)
       ? Number.MAX_SAFE_INTEGER
       : contextLength
   }
@@ -1620,7 +1648,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     if (!state) {
       throw new Error(`Session ${sessionId} not found`)
     }
-    if (this.shouldBypassDeepChatContextBudget(state.providerId)) {
+    if (this.shouldBypassDeepChatContextBudget(state.providerId, state.modelId)) {
       throw new Error('Manual compaction is only available for DeepChat agent sessions.')
     }
     if (state.status !== 'idle') {
@@ -1640,6 +1668,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
+        state.modelId,
         generationSettings.contextLength
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
@@ -1858,9 +1887,13 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     const interleavedReasoning =
       providedInterleavedReasoning ??
       this.resolveInterleavedReasoningConfig(state.providerId, state.modelId, generationSettings)
-    const bypassContextBudget = this.shouldBypassDeepChatContextBudget(state.providerId)
+    const bypassContextBudget = this.shouldBypassDeepChatContextBudget(
+      state.providerId,
+      state.modelId
+    )
     const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
       state.providerId,
+      state.modelId,
       generationSettings.contextLength
     )
     const baseModelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId)
@@ -1877,6 +1910,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       reasoningVisibility: generationSettings.reasoningVisibility,
       verbosity: generationSettings.verbosity,
       imageGeneration: generationSettings.imageGeneration,
+      videoGeneration: generationSettings.videoGeneration,
       reasoning: getReasoningEffectiveEnabledForProvider(capabilityProviderId, reasoningPortrait, {
         reasoning: baseModelConfig.reasoning,
         reasoningEffort: generationSettings.reasoningEffort ?? baseModelConfig.reasoningEffort
@@ -2555,6 +2589,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       )
       const contextBudgetLength = this.resolveDeepChatContextBudgetLength(
         state.providerId,
+        state.modelId,
         generationSettings.contextLength
       )
       const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength)
@@ -2574,7 +2609,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
         activeSkillNames
       )
       this.throwIfAbortRequested(preStreamAbortSignal)
-      const summaryState = this.shouldBypassDeepChatContextBudget(state.providerId)
+      const summaryState = this.shouldBypassDeepChatContextBudget(state.providerId, state.modelId)
         ? this.sessionStore.getSummaryState(sessionId)
         : await this.resolveCompactionStateForResumeTurn({
             sessionId,
@@ -2615,7 +2650,7 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       if (
         budgetToolCall?.id &&
         budgetToolCall.name &&
-        !this.shouldBypassDeepChatContextBudget(state.providerId)
+        !this.shouldBypassDeepChatContextBudget(state.providerId, state.modelId)
       ) {
         const resumeBudget = this.fitResumeBudgetForToolCall({
           resumeContext,
@@ -3393,6 +3428,22 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       }
     }
 
+    if (
+      supportsOpenAICompatibleVideoGeneration({
+        providerId,
+        providerApiType: this.resolveProviderApiType(providerId),
+        modelId,
+        apiEndpoint: modelConfig.apiEndpoint,
+        endpointType: modelConfig.endpointType,
+        type: modelConfig.type
+      })
+    ) {
+      const videoGeneration = normalizeVideoGenerationOptions(modelConfig.videoGeneration)
+      if (videoGeneration) {
+        defaults.videoGeneration = videoGeneration
+      }
+    }
+
     const supportsReasoning =
       this.configPresenter.supportsReasoningCapability?.(providerId, modelId) === true
     if (supportsReasoning) {
@@ -3637,6 +3688,35 @@ export class AgentRuntimePresenter implements IAgentImplementation {
       delete next.imageGeneration
     }
 
+    if (
+      supportsOpenAICompatibleVideoGeneration({
+        providerId,
+        providerApiType: this.resolveProviderApiType(providerId),
+        modelId,
+        apiEndpoint: modelConfig.apiEndpoint,
+        endpointType: modelConfig.endpointType,
+        type: modelConfig.type
+      })
+    ) {
+      if (Object.prototype.hasOwnProperty.call(patch, 'videoGeneration')) {
+        const videoGeneration = normalizeVideoGenerationOptions(patch.videoGeneration)
+        if (videoGeneration) {
+          next.videoGeneration = videoGeneration
+        } else {
+          delete next.videoGeneration
+        }
+      } else {
+        const videoGeneration = normalizeVideoGenerationOptions(next.videoGeneration)
+        if (videoGeneration) {
+          next.videoGeneration = videoGeneration
+        } else {
+          delete next.videoGeneration
+        }
+      }
+    } else {
+      delete next.videoGeneration
+    }
+
     if (fixedTemperatureKimi) {
       next.temperature = fixedTemperatureKimi.temperature
     }
diff --git a/src/main/presenter/configPresenter/index.ts b/src/main/presenter/configPresenter/index.ts
index b06bde6ec..35512639e 100644
--- a/src/main/presenter/configPresenter/index.ts
+++ b/src/main/presenter/configPresenter/index.ts
@@ -27,6 +27,7 @@ import {
   resolveProviderCapabilityProviderId,
   type NewApiEndpointType
 } from '@shared/model'
+import { resolveVideoGenerationCompatType } from '@shared/videoGenerationSettings'
 import {
   DEFAULT_MODEL_CAPABILITY_FALLBACKS,
   resolveDerivedModelMaxTokens,
@@ -973,6 +974,15 @@ export class ConfigPresenter implements IConfigPresenter {
   }
 
   private inferProviderDbModelType(model: ProviderModel): ModelType {
+    const videoGenerationType = resolveVideoGenerationCompatType({
+      modelId: model.id,
+      type: model.type,
+      modalities: model.modalities
+    })
+    if (videoGenerationType) {
+      return videoGenerationType
+    }
+
     if (Array.isArray(model.modalities?.output) && model.modalities.output.includes('image')) {
       return ModelType.ImageGeneration
     }
@@ -984,6 +994,8 @@ export class ConfigPresenter implements IConfigPresenter {
         return ModelType.Rerank
       case 'imageGeneration':
         return ModelType.ImageGeneration
+      case 'videoGeneration':
+        return ModelType.VideoGeneration
       case 'tts':
         return ModelType.TTS
       case 'chat':
diff --git a/src/main/presenter/configPresenter/modelConfig.ts b/src/main/presenter/configPresenter/modelConfig.ts
index c6fe4fe04..fcd2c709c 100644
--- a/src/main/presenter/configPresenter/modelConfig.ts
+++ b/src/main/presenter/configPresenter/modelConfig.ts
@@ -13,6 +13,7 @@ import {
   resolveModelFunctionCall
 } from '@shared/modelConfigDefaults'
 import { applyMoonshotKimiReasoningTemperaturePolicy } from '@shared/moonshotKimiPolicy'
+import { resolveVideoGenerationCompatType } from '@shared/videoGenerationSettings'
 import ElectronStore from 'electron-store'
 import { providerDbLoader } from './providerDbLoader'
 import {
@@ -105,6 +106,15 @@ export class ModelConfigHelper {
    * Priority: 1. modalities.output includes image 2. model.type (from provider.json) 3. default Chat
    */
   private inferModelType(model: ProviderModel): ModelType {
+    const videoGenerationType = resolveVideoGenerationCompatType({
+      modelId: model.id,
+      type: model.type,
+      modalities: model.modalities
+    })
+    if (videoGenerationType) {
+      return videoGenerationType
+    }
+
     // Priority 1: Output modality indicates image generation
     if (Array.isArray(model.modalities?.output) && model.modalities.output.includes('image')) {
       return ModelType.ImageGeneration
@@ -121,6 +131,8 @@ export class ModelConfigHelper {
           return ModelType.Rerank
         case 'imageGeneration':
           return ModelType.ImageGeneration
+        case 'videoGeneration':
+          return ModelType.VideoGeneration
         case 'tts':
           return ModelType.TTS
         default:
@@ -180,9 +192,11 @@ export class ModelConfigHelper {
       apiEndpoint:
         modelType === ModelType.ImageGeneration
           ? ApiEndpointType.Image
-          : modelType === ModelType.TTS
-            ? ApiEndpointType.AudioSpeech
-            : ApiEndpointType.Chat,
+          : modelType === ModelType.VideoGeneration
+            ? ApiEndpointType.Video
+            : modelType === ModelType.TTS
+              ? ApiEndpointType.AudioSpeech
+              : ApiEndpointType.Chat,
       thinkingBudget,
       forceInterleavedThinkingCompat,
       reasoningEffort,
diff --git a/src/main/presenter/configPresenter/providerModelHelper.ts b/src/main/presenter/configPresenter/providerModelHelper.ts
index e71a2833d..6ded4e17d 100644
--- a/src/main/presenter/configPresenter/providerModelHelper.ts
+++ b/src/main/presenter/configPresenter/providerModelHelper.ts
@@ -2,6 +2,7 @@ import { eventBus, SendTarget } from '@/eventbus'
 import { CONFIG_EVENTS } from '@/events'
 import { ModelConfig, MODEL_META } from '@shared/presenter'
 import { ModelType } from '@shared/model'
+import { resolveVideoGenerationCompatType } from '@shared/videoGenerationSettings'
 import ElectronStore from 'electron-store'
 import path from 'path'
 import type { StoreLike } from './storeLike'
@@ -143,16 +144,30 @@ export class ProviderModelHelper {
         normalizedModel.reasoning !== undefined
           ? normalizedModel.reasoning
           : config.reasoning || false
-      normalizedModel.type =
-        normalizedModel.type !== undefined ? normalizedModel.type : config.type || ModelType.Chat
       normalizedModel.endpointType = config.endpointType ?? normalizedModel.endpointType
+      normalizedModel.type =
+        resolveVideoGenerationCompatType({
+          modelId: normalizedModel.id,
+          type: config.type ?? normalizedModel.type,
+          apiEndpoint: config.apiEndpoint,
+          endpointType: normalizedModel.endpointType,
+          supportedEndpointTypes: normalizedModel.supportedEndpointTypes
+        }) ??
+        (normalizedModel.type !== undefined ? normalizedModel.type : config.type || ModelType.Chat)
       return normalizedModel
     }
 
     normalizedModel.vision = normalizedModel.vision || false
     normalizedModel.functionCall = normalizedModel.functionCall || false
     normalizedModel.reasoning = normalizedModel.reasoning || false
-    normalizedModel.type = normalizedModel.type || ModelType.Chat
+    normalizedModel.type =
+      resolveVideoGenerationCompatType({
+        modelId: normalizedModel.id,
+        type: normalizedModel.type,
+        endpointType: normalizedModel.endpointType,
+        supportedEndpointTypes: normalizedModel.supportedEndpointTypes
+      }) ??
+      (normalizedModel.type || ModelType.Chat)
     return normalizedModel
   }
 
diff --git a/src/main/presenter/index.ts b/src/main/presenter/index.ts
index dfb631d9d..5c3a65308 100644
--- a/src/main/presenter/index.ts
+++ b/src/main/presenter/index.ts
@@ -388,6 +388,14 @@ export class Presenter implements IPresenter {
             modelId,
             imageOptions,
             options
+          ),
+        generateVideoStandalone: (providerId, prompt, modelId, videoOptions, options) =>
+          this.llmproviderPresenter.generateVideoStandalone(
+            providerId,
+            prompt,
+            modelId,
+            videoOptions,
+            options
           )
       }),
       cacheImage: (data) => this.devicePresenter.cacheImage(data),
diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts
index 56cff686e..bf5cf959b 100644
--- a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts
+++ b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts
@@ -19,6 +19,13 @@ import {
   supportsOpenAIImageGenerationSettings,
   type ImageGenerationOptions
 } from '@shared/imageGenerationSettings'
+import {
+  isVideoGenerationModelConfig,
+  normalizeVideoGenerationOptions,
+  resolveOpenAICompatibleVideoRequestBodyShape,
+  type VideoGenerationOptions,
+  type VideoGenerationReference
+} from '@shared/videoGenerationSettings'
 import {
   isChatAudioTtsModel,
   isGeminiGenerateContentTtsModel,
@@ -48,9 +55,40 @@ type ImageGenerationRequestOptions = {
   providerOptions?: Record<string, ImageGenerationProviderPayload>
 }
 
+type VideoGenerationRequestBody = {
+  model: string
+  prompt: string
+  seconds?: string
+  size?: string
+  input_reference?: string | { mime_type?: string; data: string }
+  content?: Array<Record<string, unknown>>
+  ratio?: string
+  duration?: number
+  resolution?: string
+  watermark?: boolean
+  generate_audio?: boolean
+  extra_body?: Record<string, unknown>
+}
+
+type VideoGenerationTaskResponse = {
+  id?: string
+  status?: string
+  url?: string | null
+  error?:
+    | string
+    | {
+        message?: string
+      }
+    | null
+}
+
 const DEFAULT_GEMINI_TTS_VOICE = 'Kore'
 const DEFAULT_GEMINI_PCM_SAMPLE_RATE = 24000
 const DEFAULT_GEMINI_PCM_BITS_PER_SAMPLE = 16
+const VIDEO_GENERATION_POLL_INTERVAL_MS = 3000
+const PROMPT_VIDEO_DURATION_EN_PATTERN =
+  /(^|[^0-9a-z])(?<duration>\d{1,2})\s*(?:s|sec|secs|second|seconds)\b/i
+const PROMPT_VIDEO_DURATION_ZH_PATTERN = /(?<duration>\d{1,2})\s*秒/u
 
 export interface AiSdkRuntimeContext {
   providerKind: AiSdkProviderKind
@@ -71,6 +109,7 @@ export interface AiSdkRuntimeContext {
   cleanHeaders?: boolean
   supportsNativeTools?: (modelId: string, modelConfig: ModelConfig) => boolean
   shouldUseImageGeneration?: (modelId: string, modelConfig: ModelConfig) => boolean
+  shouldUseVideoGeneration?: (modelId: string, modelConfig: ModelConfig) => boolean
   shouldUseTts?: (modelId: string, modelConfig: ModelConfig) => boolean
 }
 
@@ -146,6 +185,63 @@ function normalizePromptValue(value: unknown): string {
   return ''
 }
 
+function supportsPromptDerivedVideoDuration(modelId: string, duration: number): boolean {
+  const normalizedModelId = modelId.trim().toLowerCase()
+
+  if (normalizedModelId.startsWith('doubao-seedance-')) {
+    return duration >= 4 && duration <= 15
+  }
+
+  return true
+}
+
+function resolvePromptVideoDuration(prompt: string, modelId: string): number | undefined {
+  const normalizedPrompt = prompt.trim()
+  if (!normalizedPrompt) {
+    return undefined
+  }
+
+  const matchedDuration =
+    normalizedPrompt.match(PROMPT_VIDEO_DURATION_EN_PATTERN)?.groups?.duration ||
+    normalizedPrompt.match(PROMPT_VIDEO_DURATION_ZH_PATTERN)?.groups?.duration
+
+  if (!matchedDuration) {
+    return undefined
+  }
+
+  const parsed = Number.parseInt(matchedDuration, 10)
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    return undefined
+  }
+
+  return supportsPromptDerivedVideoDuration(modelId, parsed) ? parsed : undefined
+}
+
+function resolveVideoGenerationRequestOptions(
+  prompt: string,
+  modelId: string,
+  options: VideoGenerationOptions | undefined
+): VideoGenerationOptions | undefined {
+  const normalizedOptions = normalizeVideoGenerationOptions(options)
+
+  if (
+    typeof normalizedOptions?.duration === 'number' ||
+    (typeof normalizedOptions?.seconds === 'string' && normalizedOptions.seconds.trim().length > 0)
+  ) {
+    return normalizedOptions
+  }
+
+  const promptDuration = resolvePromptVideoDuration(prompt, modelId)
+  if (promptDuration === undefined) {
+    return normalizedOptions
+  }
+
+  return normalizeVideoGenerationOptions({
+    ...normalizedOptions,
+    duration: promptDuration
+  })
+}
+
 function extractImagePrompt(messages: ChatMessage[]): string {
   return messages
     .map((message) => (message.role === 'user' ? normalizePromptValue(message.content) : ''))
@@ -153,6 +249,10 @@ function extractImagePrompt(messages: ChatMessage[]): string {
     .join('\n\n')
 }
 
+function extractVideoPrompt(messages: ChatMessage[]): string {
+  return extractImagePrompt(messages)
+}
+
 function resolveSupportsNativeTools(
   context: AiSdkRuntimeContext,
   modelId: string,
@@ -177,6 +277,21 @@ function shouldUseImageGenerationRuntime(
   return modelConfig.apiEndpoint === ApiEndpointType.Image
 }
 
+function shouldUseVideoGenerationRuntime(
+  context: AiSdkRuntimeContext,
+  modelId: string,
+  modelConfig: ModelConfig
+): boolean {
+  if (context.shouldUseVideoGeneration) {
+    return context.shouldUseVideoGeneration(modelId, modelConfig)
+  }
+
+  return (
+    modelConfig.apiEndpoint === ApiEndpointType.Video ||
+    isVideoGenerationModelConfig(modelConfig, modelId)
+  )
+}
+
 function shouldUseTtsRuntime(
   context: AiSdkRuntimeContext,
   modelId: string,
@@ -626,6 +741,355 @@ function resolveRuntimeTemperature(
   }
 }
 
+function normalizeOpenAICompatibleBaseUrl(baseUrl: string | undefined): string {
+  const normalized = (baseUrl || 'https://api.openai.com/v1').trim().replace(/\/+$/, '')
+  if (!normalized) {
+    return 'https://api.openai.com/v1'
+  }
+
+  return /\/v1(?:beta\d+)?$/i.test(normalized) ? normalized : `${normalized}/v1`
+}
+
+function normalizeVideoReferenceDataUrl(reference: VideoGenerationReference): string | undefined {
+  if (reference.url?.trim()) {
+    return reference.url.trim()
+  }
+
+  if (!reference.data?.trim()) {
+    return undefined
+  }
+
+  const normalizedData = reference.data.trim()
+  if (normalizedData.startsWith('data:')) {
+    return normalizedData
+  }
+
+  const fallbackMimeType =
+    reference.mimeType?.trim() ||
+    (reference.type === 'image'
+      ? 'image/png'
+      : reference.type === 'audio'
+        ? 'audio/mpeg'
+        : 'video/mp4')
+
+  return `data:${fallbackMimeType};base64,${normalizedData}`
+}
+
+function buildVideoGenerationContent(
+  options: VideoGenerationOptions | undefined
+): Array<Record<string, unknown>> | undefined {
+  if (!options) {
+    return undefined
+  }
+
+  const content: Record<string, unknown>[] = []
+
+  for (const reference of options.references ?? []) {
+    const url = normalizeVideoReferenceDataUrl(reference)
+    if (!url) {
+      continue
+    }
+
+    if (reference.type === 'image') {
+      content.push({
+        type: 'image_url',
+        image_url: { url },
+        role: 'reference_image'
+      })
+      continue
+    }
+
+    if (reference.type === 'audio') {
+      content.push({
+        type: 'audio_url',
+        audio_url: { url },
+        role: 'reference_audio'
+      })
+      continue
+    }
+
+    content.push({
+      type: 'video_url',
+      video_url: { url },
+      role: 'reference_video'
+    })
+  }
+
+  return content.length > 0 ? content : undefined
+}
+
+function buildVideoGenerationExtraBody(
+  options: VideoGenerationOptions | undefined
+): Record<string, unknown> | undefined {
+  if (!options) {
+    return undefined
+  }
+
+  const extraBody: Record<string, unknown> = {}
+
+  if (typeof options.duration === 'number' && Number.isFinite(options.duration)) {
+    extraBody.duration = options.duration
+  }
+  if (typeof options.ratio === 'string' && options.ratio.trim()) {
+    extraBody.ratio = options.ratio.trim()
+  }
+  if (typeof options.resolution === 'string' && options.resolution.trim()) {
+    extraBody.resolution = options.resolution.trim()
+  }
+  if (typeof options.watermark === 'boolean') {
+    extraBody.watermark = options.watermark
+  }
+  if (typeof options.generateAudio === 'boolean') {
+    extraBody.generate_audio = options.generateAudio
+  }
+
+  const content = buildVideoGenerationContent(options)
+  if (content) {
+    extraBody.content = content
+  }
+
+  return Object.keys(extraBody).length > 0 ? extraBody : undefined
+}
+
+function resolveFlatTopLevelVideoDuration(
+  options: VideoGenerationOptions | undefined
+): number | undefined {
+  if (typeof options?.duration === 'number' && Number.isFinite(options.duration)) {
+    return Math.max(-1, Math.round(options.duration))
+  }
+
+  if (typeof options?.seconds !== 'string') {
+    return undefined
+  }
+
+  const parsed = Number.parseInt(options.seconds.trim(), 10)
+  return Number.isFinite(parsed) ? Math.max(-1, parsed) : undefined
+}
+
+function buildVideoGenerationRequestBody(
+  provider: LLM_PROVIDER,
+  modelId: string,
+  prompt: string,
+  options: VideoGenerationOptions | undefined
+): VideoGenerationRequestBody {
+  const body: VideoGenerationRequestBody = {
+    model: modelId,
+    prompt
+  }
+
+  if (options?.seconds) {
+    body.seconds = options.seconds
+  }
+  if (options?.size) {
+    body.size = options.size
+  }
+  if (options?.inputReference) {
+    if (typeof options.inputReference === 'string') {
+      body.input_reference = options.inputReference
+    } else {
+      body.input_reference = {
+        data: options.inputReference.data,
+        ...(options.inputReference.mimeType ? { mime_type: options.inputReference.mimeType } : {})
+      }
+    }
+  }
+
+  const requestBodyShape = resolveOpenAICompatibleVideoRequestBodyShape({
+    providerId: provider.id,
+    providerApiType: provider.apiType,
+    baseUrl: provider.baseUrl,
+    modelId
+  })
+
+  if (requestBodyShape === 'flat-top-level') {
+    const content = buildVideoGenerationContent(options)
+    if (content) {
+      body.content = content
+    }
+    if (options?.ratio) {
+      body.ratio = options.ratio.trim()
+    }
+    const duration = resolveFlatTopLevelVideoDuration(options)
+    if (duration !== undefined) {
+      body.duration = duration
+    }
+    if (options?.resolution) {
+      body.resolution = options.resolution.trim()
+    }
+    if (typeof options?.watermark === 'boolean') {
+      body.watermark = options.watermark
+    }
+    if (typeof options?.generateAudio === 'boolean') {
+      body.generate_audio = options.generateAudio
+    }
+
+    return body
+  }
+
+  const extraBody = buildVideoGenerationExtraBody(options)
+  if (extraBody) {
+    body.extra_body = extraBody
+  }
+
+  return body
+}
+
+function extractVideoTaskError(response: VideoGenerationTaskResponse | null | undefined): string {
+  const error = response?.error
+  if (typeof error === 'string' && error.trim()) {
+    return error.trim()
+  }
+
+  if (
+    error &&
+    typeof error === 'object' &&
+    typeof error.message === 'string' &&
+    error.message.trim()
+  ) {
+    return error.message.trim()
+  }
+
+  return 'Video generation failed'
+}
+
+function resolveVideoTaskStatus(response: VideoGenerationTaskResponse | null | undefined): string {
+  return typeof response?.status === 'string' ? response.status.trim().toLowerCase() : ''
+}
+
+function delayWithAbort(ms: number, signal: AbortSignal): Promise<void> {
+  return new Promise((resolve, reject) => {
+    if (signal.aborted) {
+      reject(signal.reason instanceof Error ? signal.reason : new Error('Aborted'))
+      return
+    }
+
+    const onAbort = () => {
+      clearTimeout(timeoutId)
+      signal.removeEventListener('abort', onAbort)
+      reject(signal.reason instanceof Error ? signal.reason : new Error('Aborted'))
+    }
+
+    const timeoutId = setTimeout(() => {
+      signal.removeEventListener('abort', onAbort)
+      resolve()
+    }, ms)
+
+    signal.addEventListener('abort', onAbort, { once: true })
+  })
+}
+
+async function executeOpenAICompatibleVideoGeneration(
+  provider: LLM_PROVIDER,
+  defaultHeaders: Record<string, string>,
+  modelId: string,
+  prompt: string,
+  modelConfig: ModelConfig,
+  timeout: number | undefined
+): Promise<{ base64: string; mimeType: string }> {
+  const normalizedOptions = resolveVideoGenerationRequestOptions(
+    prompt,
+    modelId,
+    modelConfig.videoGeneration
+  )
+  const baseUrl = normalizeOpenAICompatibleBaseUrl(provider.baseUrl)
+  const createUrl = `${baseUrl}/videos`
+  const body = buildVideoGenerationRequestBody(provider, modelId, prompt, normalizedOptions)
+  const controller = new AbortController()
+  const timeoutId = timeout ? setTimeout(() => controller.abort(), timeout) : undefined
+  const proxyUrl = proxyConfig.getProxyUrl()
+  const dispatcher = proxyUrl ? new ProxyAgent(proxyUrl) : undefined
+
+  const fetchJson = async <T>(url: string, init: RequestInit): Promise<T> => {
+    const fetchInit: RequestInit & { dispatcher?: ProxyAgent } = {
+      ...init,
+      headers: {
+        ...defaultHeaders,
+        Authorization: `Bearer ${provider.oauthToken || provider.apiKey || ''}`,
+        ...(init.headers as Record<string, string> | undefined)
+      },
+      signal: controller.signal
+    }
+    if (dispatcher) fetchInit.dispatcher = dispatcher
+
+    const response = await fetch(url, fetchInit)
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => '')
+      throw new Error(`Video request failed (${response.status}): ${errorText}`)
+    }
+
+    return (await response.json()) as T
+  }
+
+  const fetchBinary = async (url: string): Promise<{ buffer: ArrayBuffer; mimeType: string }> => {
+    const fetchInit: RequestInit & { dispatcher?: ProxyAgent } = {
+      method: 'GET',
+      headers: {
+        ...defaultHeaders,
+        Authorization: `Bearer ${provider.oauthToken || provider.apiKey || ''}`
+      },
+      signal: controller.signal
+    }
+    if (dispatcher) fetchInit.dispatcher = dispatcher
+
+    const response = await fetch(url, fetchInit)
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => '')
+      throw new Error(`Video content download failed (${response.status}): ${errorText}`)
+    }
+
+    return {
+      buffer: await response.arrayBuffer(),
+      mimeType: response.headers.get('content-type')?.split(';')[0]?.trim() || 'video/mp4'
+    }
+  }
+
+  try {
+    let task = await fetchJson<VideoGenerationTaskResponse>(createUrl, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify(body)
+    })
+
+    const taskId = typeof task.id === 'string' ? task.id.trim() : ''
+    if (!taskId) {
+      throw new Error('Video generation response missing task id')
+    }
+
+    let status = resolveVideoTaskStatus(task)
+    while (status !== 'completed') {
+      if (status === 'failed') {
+        throw new Error(extractVideoTaskError(task))
+      }
+
+      await delayWithAbort(VIDEO_GENERATION_POLL_INTERVAL_MS, controller.signal)
+      task = await fetchJson<VideoGenerationTaskResponse>(
+        `${createUrl}/${encodeURIComponent(taskId)}`,
+        {
+          method: 'GET'
+        }
+      )
+      status = resolveVideoTaskStatus(task)
+    }
+
+    const contentUrl =
+      typeof task.url === 'string' && task.url.trim().length > 0
+        ? task.url.trim()
+        : `${createUrl}/${encodeURIComponent(taskId)}/content`
+    const { buffer, mimeType } = await fetchBinary(contentUrl)
+
+    return {
+      base64: Buffer.from(buffer).toString('base64'),
+      mimeType
+    }
+  } finally {
+    if (timeoutId !== undefined) {
+      clearTimeout(timeoutId)
+    }
+  }
+}
+
 async function buildPromptRuntime(
   context: AiSdkRuntimeContext,
   messages: ChatMessage[],
@@ -801,6 +1265,49 @@ export async function* runAiSdkCoreStream(
     return
   }
 
+  if (shouldUseVideoGenerationRuntime(context, modelId, normalizedModelConfig)) {
+    const prompt = extractVideoPrompt(messages)
+    const normalizedVideoOptions = resolveVideoGenerationRequestOptions(
+      prompt,
+      modelId,
+      normalizedModelConfig.videoGeneration
+    )
+    const requestBody = buildVideoGenerationRequestBody(
+      context.provider,
+      modelId,
+      prompt,
+      normalizedVideoOptions
+    )
+
+    await context.emitRequestTrace?.(normalizedModelConfig, {
+      endpoint: `${normalizeOpenAICompatibleBaseUrl(context.provider.baseUrl)}/videos`,
+      headers: context.buildTraceHeaders?.() ?? context.defaultHeaders,
+      body: requestBody
+    })
+
+    const { base64, mimeType } = await executeOpenAICompatibleVideoGeneration(
+      context.provider,
+      context.defaultHeaders,
+      modelId,
+      prompt,
+      normalizedModelConfig,
+      timeout
+    )
+
+    yield {
+      type: 'image_data',
+      image_data: {
+        data: `data:${mimeType};base64,${base64}`,
+        mimeType
+      }
+    }
+    yield {
+      type: 'stop',
+      stop_reason: 'complete'
+    }
+    return
+  }
+
   if (shouldUseImageGenerationRuntime(context, modelId, normalizedModelConfig)) {
     const prompt = extractImagePrompt(messages)
 
diff --git a/src/main/presenter/llmProviderPresenter/index.ts b/src/main/presenter/llmProviderPresenter/index.ts
index a1c100a2c..b7bab7c04 100644
--- a/src/main/presenter/llmProviderPresenter/index.ts
+++ b/src/main/presenter/llmProviderPresenter/index.ts
@@ -9,6 +9,7 @@ import {
   KeyStatus,
   LLM_EMBEDDING_ATTRS,
   StandaloneImageGenerationResult,
+  StandaloneVideoGenerationResult,
   ModelScopeMcpSyncOptions,
   ModelScopeMcpSyncResult,
   IConfigPresenter,
@@ -24,6 +25,10 @@ import {
   normalizeImageGenerationOptions,
   type ImageGenerationOptions
 } from '@shared/imageGenerationSettings'
+import {
+  normalizeVideoGenerationOptions,
+  type VideoGenerationOptions
+} from '@shared/videoGenerationSettings'
 import { ProviderChange, ProviderBatchUpdate } from '@shared/provider-operations'
 import { isProviderDbBackedProvider } from '@shared/providerDbCatalog'
 import { eventBus } from '@/eventbus'
@@ -538,6 +543,89 @@ export class LLMProviderPresenter implements ILlmProviderPresenter {
     }
   }
 
+  async generateVideoStandalone(
+    providerId: string,
+    prompt: string,
+    modelId: string,
+    videoOptions?: VideoGenerationOptions,
+    options?: { signal?: AbortSignal }
+  ): Promise<StandaloneVideoGenerationResult> {
+    const normalizedPrompt = prompt.trim()
+    if (!normalizedPrompt) {
+      throw new Error('Video generation prompt is required')
+    }
+
+    const signal = options?.signal
+    if (signal?.aborted) {
+      throw createAbortError()
+    }
+
+    await this.executeWithRateLimit(providerId, { signal })
+
+    const provider = this.getProviderInstance(providerId)
+    const modelConfig = this.configPresenter.getModelConfig(modelId, providerId)
+    const mergedVideoOptions = normalizeVideoGenerationOptions({
+      ...modelConfig.videoGeneration,
+      ...videoOptions
+    })
+    const resolvedModelConfig: ModelConfig = {
+      ...modelConfig,
+      type: ModelType.VideoGeneration,
+      apiEndpoint: ApiEndpointType.Video,
+      videoGeneration: mergedVideoOptions
+    }
+    const stream = provider.coreStream(
+      [{ role: 'user', content: normalizedPrompt }],
+      modelId,
+      resolvedModelConfig,
+      modelConfig.temperature ?? 0.7,
+      modelConfig.maxTokens ?? 1024,
+      []
+    )
+    const videos: StandaloneVideoGenerationResult['videos'] = []
+    const abort = createAbortPromise(signal, () => {
+      void stream.return?.(undefined as never)
+    })
+
+    const collect = async () => {
+      for await (const event of stream) {
+        if (signal?.aborted) {
+          throw createAbortError()
+        }
+
+        if (
+          event.type === 'image_data' &&
+          event.image_data.mimeType.trim().toLowerCase().startsWith('video/')
+        ) {
+          videos.push({
+            data: event.image_data.data,
+            mimeType: event.image_data.mimeType
+          })
+        }
+        if (event.type === 'error') {
+          throw new Error(event.error_message)
+        }
+      }
+    }
+
+    try {
+      await (abort.promise ? Promise.race([collect(), abort.promise]) : collect())
+    } finally {
+      abort.cleanup()
+    }
+
+    if (videos.length === 0) {
+      throw new Error('Video generation completed without video output')
+    }
+
+    return {
+      providerId,
+      modelId,
+      ...(mergedVideoOptions ? { options: mergedVideoOptions } : {}),
+      videos
+    }
+  }
+
   // 配置相关方法
   setMaxConcurrentStreams(max: number): void {
     this.config.maxConcurrentStreams = max
diff --git a/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts b/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts
index 0cb7ec041..37efbd9a9 100644
--- a/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts
+++ b/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts
@@ -8,6 +8,7 @@ import {
   type NewApiEndpointType
 } from '@shared/model'
 import { isTtsModelConfig, isTtsModelId } from '@shared/ttsSettings'
+import { isVideoGenerationModelConfig } from '@shared/videoGenerationSettings'
 import {
   DEFAULT_MODEL_CONTEXT_LENGTH,
   DEFAULT_MODEL_MAX_TOKENS,
@@ -96,6 +97,10 @@ const shouldUseOpenAIImageGenerationRoute = (modelId: string, modelConfig: Model
   modelConfig.apiEndpoint === ApiEndpointType.Image ||
   modelConfig.type === ModelType.ImageGeneration
 
+const shouldUseOpenAIVideoGenerationRoute = (modelId: string, modelConfig: ModelConfig): boolean =>
+  modelConfig.apiEndpoint === ApiEndpointType.Video ||
+  isVideoGenerationModelConfig(modelConfig, modelId)
+
 const shouldUseOpenAITtsRoute = (modelId: string, modelConfig: ModelConfig): boolean =>
   isTtsModelConfig(modelConfig) ||
   modelConfig.apiEndpoint === ApiEndpointType.AudioSpeech ||
@@ -351,6 +356,27 @@ export class AiSdkProvider extends BaseLLMProvider {
               endpointType: 'image-generation'
             }
           }
+        case 'video-generation':
+          return {
+            providerKind: 'openai-compatible',
+            endpointType,
+            providerPatch: {
+              apiType: 'openai-completions',
+              baseUrl: `${host}/v1`,
+              capabilityProviderId: resolveProviderCapabilityProviderId(
+                this.provider.id,
+                {
+                  endpointType
+                },
+                modelId
+              )
+            },
+            modelConfigPatch: {
+              apiEndpoint: ApiEndpointType.Video,
+              type: ModelType.VideoGeneration,
+              endpointType: 'video-generation'
+            }
+          }
         case 'openai':
         default:
           return {
@@ -575,6 +601,17 @@ export class AiSdkProvider extends BaseLLMProvider {
                     isOpenAIImageGenerationModel(runtimeModelId) ||
                     runtimeModelConfig.apiEndpoint === ApiEndpointType.Image
 
+    const shouldUseVideoGeneration =
+      this.isAzureOpenAI(decision, runtimeProvider) ||
+      decision.providerKind === 'gemini' ||
+      decision.providerKind === 'vertex' ||
+      decision.providerKind === 'anthropic'
+        ? undefined
+        : decision.endpointType === 'video-generation'
+          ? () => true
+          : (runtimeModelId: string, runtimeModelConfig: ModelConfig) =>
+              shouldUseOpenAIVideoGenerationRoute(runtimeModelId, runtimeModelConfig)
+
     // TTS route: only applicable for OpenAI-compatible providers (not Azure, Gemini, Vertex)
     const shouldUseTts =
       this.isAzureOpenAI(decision, runtimeProvider) ||
@@ -602,6 +639,7 @@ export class AiSdkProvider extends BaseLLMProvider {
         supportsNativeTools: (_runtimeModelId, runtimeModelConfig) =>
           runtimeModelConfig.functionCall === true,
         shouldUseImageGeneration,
+        shouldUseVideoGeneration,
         shouldUseTts
       }
     }
@@ -1673,17 +1711,22 @@ export class AiSdkProvider extends BaseLLMProvider {
           normalizedRawType === 'image' ||
           supportedEndpointTypes.includes('image-generation')
             ? ModelType.ImageGeneration
-            : normalizedRawType === 'tts' ||
-                normalizedRawType === 'audio-speech' ||
-                normalizedRawType === 'audiospeech'
-              ? ModelType.TTS
-              : normalizedRawType === 'embedding' ||
-                  normalizedRawType === 'embeddings' ||
-                  normalizedModelId.includes('embedding')
-                ? ModelType.Embedding
-                : normalizedRawType === 'rerank' || normalizedModelId.includes('rerank')
-                  ? ModelType.Rerank
-                  : undefined
+            : normalizedRawType === 'videogeneration' ||
+                normalizedRawType === 'video-generation' ||
+                normalizedRawType === 'video' ||
+                supportedEndpointTypes.includes('video-generation')
+              ? ModelType.VideoGeneration
+              : normalizedRawType === 'tts' ||
+                  normalizedRawType === 'audio-speech' ||
+                  normalizedRawType === 'audiospeech'
+                ? ModelType.TTS
+                : normalizedRawType === 'embedding' ||
+                    normalizedRawType === 'embeddings' ||
+                    normalizedModelId.includes('embedding')
+                  ? ModelType.Embedding
+                  : normalizedRawType === 'rerank' || normalizedModelId.includes('rerank')
+                    ? ModelType.Rerank
+                    : undefined
 
         const contextLengthCandidate = [
           rawModel.context_length,
@@ -1708,7 +1751,9 @@ export class AiSdkProvider extends BaseLLMProvider {
           supportedEndpointTypes.length === 0
             ? type === ModelType.ImageGeneration
               ? 'image-generation'
-              : undefined
+              : type === ModelType.VideoGeneration
+                ? 'video-generation'
+                : undefined
             : resolveNewApiEndpointTypeFromRoute(
                 {
                   supportedEndpointTypes,
diff --git a/src/main/presenter/sqlitePresenter/tables/deepchatSessions.ts b/src/main/presenter/sqlitePresenter/tables/deepchatSessions.ts
index 779dfa71f..7993f9742 100644
--- a/src/main/presenter/sqlitePresenter/tables/deepchatSessions.ts
+++ b/src/main/presenter/sqlitePresenter/tables/deepchatSessions.ts
@@ -12,6 +12,10 @@ import {
   normalizeImageGenerationOptions,
   type ImageGenerationOptions
 } from '@shared/imageGenerationSettings'
+import {
+  normalizeVideoGenerationOptions,
+  type VideoGenerationOptions
+} from '@shared/videoGenerationSettings'
 
 type DeepChatSessionGenerationSettings = Pick<
   SessionGenerationSettings,
@@ -26,6 +30,7 @@ type DeepChatSessionGenerationSettings = Pick<
   | 'verbosity'
   | 'forceInterleavedThinkingCompat'
   | 'imageGeneration'
+  | 'videoGeneration'
 >
 
 export interface DeepChatSessionRow {
@@ -44,6 +49,7 @@ export interface DeepChatSessionRow {
   verbosity: 'low' | 'medium' | 'high' | null
   force_interleaved_thinking_compat: number | null
   image_generation_options_json: string | null
+  video_generation_options_json: string | null
   summary_text: string | null
   summary_cursor_order_seq: number | null
   summary_updated_at: number | null
@@ -109,6 +115,10 @@ export class DeepChatSessionsTable extends BaseTable {
       columns.push('image_generation_options_json TEXT')
     }
 
+    if (version >= 28) {
+      columns.push('video_generation_options_json TEXT')
+    }
+
     if (version >= 14) {
       columns.push(
         'summary_text TEXT',
@@ -187,6 +197,11 @@ export class DeepChatSessionsTable extends BaseTable {
         'ALTER TABLE deepchat_sessions ADD COLUMN image_generation_options_json TEXT;'
       )
     }
+    if (!this.hasColumn('video_generation_options_json')) {
+      statements.push(
+        'ALTER TABLE deepchat_sessions ADD COLUMN video_generation_options_json TEXT;'
+      )
+    }
 
     return statements
   }
@@ -230,11 +245,14 @@ export class DeepChatSessionsTable extends BaseTable {
     if (version === 27) {
       return 'ALTER TABLE deepchat_sessions ADD COLUMN image_generation_options_json TEXT;'
     }
+    if (version === 28) {
+      return 'ALTER TABLE deepchat_sessions ADD COLUMN video_generation_options_json TEXT;'
+    }
     return null
   }
 
   getLatestVersion(): number {
-    return 27
+    return 28
   }
 
   private serializeImageGenerationOptions(
@@ -257,6 +275,26 @@ export class DeepChatSessionsTable extends BaseTable {
     }
   }
 
+  private serializeVideoGenerationOptions(
+    value: VideoGenerationOptions | undefined
+  ): string | null {
+    const normalized = normalizeVideoGenerationOptions(value)
+    return normalized ? JSON.stringify(normalized) : null
+  }
+
+  private parseVideoGenerationOptions(value: string | null): VideoGenerationOptions | undefined {
+    if (!value) {
+      return undefined
+    }
+
+    try {
+      const parsed = JSON.parse(value) as VideoGenerationOptions
+      return normalizeVideoGenerationOptions(parsed)
+    } catch {
+      return undefined
+    }
+  }
+
   create(
     id: string,
     providerId: string,
@@ -282,11 +320,12 @@ export class DeepChatSessionsTable extends BaseTable {
            verbosity,
            force_interleaved_thinking_compat,
            image_generation_options_json,
+           video_generation_options_json,
            summary_text,
            summary_cursor_order_seq,
            summary_updated_at
          )
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
       )
       .run(
         id,
@@ -308,6 +347,7 @@ export class DeepChatSessionsTable extends BaseTable {
             ? 1
             : 0,
         this.serializeImageGenerationOptions(generationSettings?.imageGeneration),
+        this.serializeVideoGenerationOptions(generationSettings?.videoGeneration),
         null,
         1,
         null
@@ -362,6 +402,10 @@ export class DeepChatSessionsTable extends BaseTable {
     if (imageGeneration) {
       settings.imageGeneration = imageGeneration
     }
+    const videoGeneration = this.parseVideoGenerationOptions(row.video_generation_options_json)
+    if (videoGeneration) {
+      settings.videoGeneration = videoGeneration
+    }
 
     return settings
   }
@@ -430,6 +474,10 @@ export class DeepChatSessionsTable extends BaseTable {
       updates.push('image_generation_options_json = ?')
       params.push(this.serializeImageGenerationOptions(settings.imageGeneration))
     }
+    if (Object.prototype.hasOwnProperty.call(settings, 'videoGeneration')) {
+      updates.push('video_generation_options_json = ?')
+      params.push(this.serializeVideoGenerationOptions(settings.videoGeneration))
+    }
 
     if (updates.length === 0) {
       return
diff --git a/src/renderer/settings/components/ProviderModelList.vue b/src/renderer/settings/components/ProviderModelList.vue
index d4b45ff76..3aa5f83b2 100644
--- a/src/renderer/settings/components/ProviderModelList.vue
+++ b/src/renderer/settings/components/ProviderModelList.vue
@@ -376,7 +376,9 @@ const TYPE_ORDER: ModelType[] = [
   ModelType.Chat,
   ModelType.Embedding,
   ModelType.Rerank,
-  ModelType.ImageGeneration
+  ModelType.ImageGeneration,
+  ModelType.VideoGeneration,
+  ModelType.TTS
 ]
 
 const CAPABILITY_ICONS: Record<ModelCapabilityKey, string> = {
@@ -391,6 +393,7 @@ const TYPE_ICONS: Record<ModelType, string> = {
   [ModelType.Embedding]: 'lucide:database',
   [ModelType.Rerank]: 'lucide:arrow-up-wide-narrow',
   [ModelType.ImageGeneration]: 'lucide:image',
+  [ModelType.VideoGeneration]: 'lucide:clapperboard',
   [ModelType.TTS]: 'lucide:volume-2'
 }
 
diff --git a/src/renderer/src/components/ChatConfig.vue b/src/renderer/src/components/ChatConfig.vue
index 6b3e151db..044aaf584 100644
--- a/src/renderer/src/components/ChatConfig.vue
+++ b/src/renderer/src/components/ChatConfig.vue
@@ -40,7 +40,7 @@ const props = defineProps<{
   providerId?: string
   reasoningEffort?: ReasoningEffort
   verbosity?: Verbosity
-  modelType?: 'chat' | 'imageGeneration' | 'embedding' | 'rerank'
+  modelType?: 'chat' | 'imageGeneration' | 'videoGeneration' | 'tts' | 'embedding' | 'rerank'
 }>()
 
 const systemPrompt = defineModel<string>('systemPrompt')
@@ -129,7 +129,7 @@ const { sliderFields, inputFields, selectFields } = useChatConfigFields({
 watch(
   () => props.modelType,
   (newType) => {
-    if (newType === 'imageGeneration' && systemPrompt.value) {
+    if ((newType === 'imageGeneration' || newType === 'videoGeneration') && systemPrompt.value) {
       systemPrompt.value = ''
     }
   }
@@ -140,6 +140,8 @@ const modelTypeIcon = computed(() => {
   const icons = {
     chat: 'lucide:message-circle',
     imageGeneration: 'lucide:image',
+    videoGeneration: 'lucide:clapperboard',
+    tts: 'lucide:volume-2',
     embedding: 'lucide:layers',
     rerank: 'lucide:arrow-up-down'
   }
@@ -157,7 +159,13 @@ const modelTypeIcon = computed(() => {
 
     <div class="space-y-6">
       <!-- System Prompt (hidden for image generation models) -->
-      <div v-if="!modelTypeDetection.isImageGenerationModel.value" class="space-y-2 px-2">
+      <div
+        v-if="
+          !modelTypeDetection.isImageGenerationModel.value &&
+          !modelTypeDetection.isVideoGenerationModel.value
+        "
+        class="space-y-2 px-2"
+      >
         <div class="flex items-center space-x-2 py-1.5">
           <Icon icon="lucide:terminal" class="w-4 h-4 text-muted-foreground" />
           <Label class="text-xs font-medium">{{ t('settings.model.systemPrompt.label') }}</Label>
diff --git a/src/renderer/src/components/chat/ChatStatusBar.vue b/src/renderer/src/components/chat/ChatStatusBar.vue
index 8d7612cc0..7d479c971 100644
--- a/src/renderer/src/components/chat/ChatStatusBar.vue
+++ b/src/renderer/src/components/chat/ChatStatusBar.vue
@@ -264,7 +264,7 @@
 
                   <div v-else-if="localSettings" class="space-y-4">
                     <div
-                      v-if="!showOpenAIImageGenerationSettings && showTemperatureControl"
+                      v-if="!showOpenAIMediaGenerationSettings && showTemperatureControl"
                       class="space-y-1.5"
                     >
                       <label class="text-xs font-medium">{{
@@ -338,7 +338,7 @@
                       </p>
                     </div>
 
-                    <div v-if="!showOpenAIImageGenerationSettings" class="space-y-1.5">
+                    <div v-if="!showOpenAIMediaGenerationSettings" class="space-y-1.5">
                       <label class="text-xs font-medium">{{
                         t('chat.advancedSettings.contextLength')
                       }}</label>
@@ -402,7 +402,7 @@
                       </p>
                     </div>
 
-                    <div v-if="!showOpenAIImageGenerationSettings" class="space-y-1.5">
+                    <div v-if="!showOpenAIMediaGenerationSettings" class="space-y-1.5">
                       <label class="text-xs font-medium">{{
                         t('chat.advancedSettings.maxTokens')
                       }}</label>
@@ -541,8 +541,15 @@
                       @update:model-value="onImageGenerationSettingsUpdate"
                     />
 
+                    <OpenAIVideoGenerationSettingsFields
+                      v-if="showOpenAIVideoGenerationSettings"
+                      density="compact"
+                      :model-value="localSettings.videoGeneration"
+                      @update:model-value="onVideoGenerationSettingsUpdate"
+                    />
+
                     <div
-                      v-if="!showOpenAIImageGenerationSettings && showReasoningEffort"
+                      v-if="!showOpenAIMediaGenerationSettings && showReasoningEffort"
                       class="space-y-1.5"
                     >
                       <label class="text-xs font-medium">{{
@@ -572,7 +579,7 @@
                     </div>
 
                     <div
-                      v-if="!showOpenAIImageGenerationSettings && showReasoningVisibility"
+                      v-if="!showOpenAIMediaGenerationSettings && showReasoningVisibility"
                       class="space-y-1.5"
                     >
                       <label class="text-xs font-medium">{{
@@ -604,7 +611,7 @@
                     </div>
 
                     <div
-                      v-if="!showOpenAIImageGenerationSettings && showVerbosity"
+                      v-if="!showOpenAIMediaGenerationSettings && showVerbosity"
                       class="space-y-1.5"
                     >
                       <label class="text-xs font-medium">{{
@@ -632,7 +639,7 @@
                     </div>
 
                     <div
-                      v-if="!showOpenAIImageGenerationSettings && showThinkingBudget"
+                      v-if="!showOpenAIMediaGenerationSettings && showThinkingBudget"
                       class="space-y-1.5"
                     >
                       <div class="flex items-center justify-between">
@@ -715,7 +722,7 @@
                       </p>
                     </div>
 
-                    <div v-if="!showOpenAIImageGenerationSettings" class="space-y-1.5">
+                    <div v-if="!showOpenAIMediaGenerationSettings" class="space-y-1.5">
                       <div class="flex items-start justify-between gap-3">
                         <div class="min-w-0">
                           <label class="text-xs font-medium">
@@ -945,10 +952,15 @@ import {
   normalizeImageGenerationOptions,
   supportsOpenAIImageGenerationSettings
 } from '@shared/imageGenerationSettings'
+import {
+  normalizeVideoGenerationOptions,
+  supportsOpenAICompatibleVideoGeneration
+} from '@shared/videoGenerationSettings'
 import { resolvePreferredChatModel, type ChatModelSelection } from '@/lib/chatModelSelection'
 import McpIndicator from '@/components/chat-input/McpIndicator.vue'
 import ModelIcon from '@/components/icons/ModelIcon.vue'
 import OpenAIImageGenerationSettingsFields from '@/components/settings/OpenAIImageGenerationSettingsFields.vue'
+import OpenAIVideoGenerationSettingsFields from '@/components/settings/OpenAIVideoGenerationSettingsFields.vue'
 import { createConfigClient } from '@api/ConfigClient'
 import { createModelClient } from '@api/ModelClient'
 import { createOnboardingClient } from '@api/OnboardingClient'
@@ -1305,6 +1317,29 @@ const showOpenAIImageGenerationSettings = computed(() => {
   })
 })
 
+const showOpenAIVideoGenerationSettings = computed(() => {
+  const target = modelSettingsTarget.value
+  if (!target) {
+    return false
+  }
+
+  const modelMeta = modelSettingsTargetMeta.value
+  const modelConfig = modelSettingsTargetResolvedConfig.value
+  return supportsOpenAICompatibleVideoGeneration({
+    providerId: target.providerId,
+    providerApiType: resolveProviderApiType(target.providerId),
+    modelId: target.modelId,
+    apiEndpoint: modelConfig?.apiEndpoint,
+    endpointType: modelConfig?.endpointType ?? modelMeta?.endpointType,
+    supportedEndpointTypes: modelMeta?.supportedEndpointTypes,
+    type: modelConfig?.type ?? modelMeta?.type
+  })
+})
+
+const showOpenAIMediaGenerationSettings = computed(
+  () => showOpenAIImageGenerationSettings.value || showOpenAIVideoGenerationSettings.value
+)
+
 watch(
   () => {
     const target = modelSettingsTarget.value
@@ -2038,6 +2073,23 @@ const resolveDefaultGenerationSettings = async (
     }
   }
 
+  if (
+    supportsOpenAICompatibleVideoGeneration({
+      providerId,
+      providerApiType: resolveProviderApiType(providerId),
+      modelId,
+      apiEndpoint: modelConfig.apiEndpoint,
+      endpointType: modelConfig.endpointType ?? modelMeta?.endpointType,
+      supportedEndpointTypes: modelMeta?.supportedEndpointTypes,
+      type: modelConfig.type ?? modelMeta?.type
+    })
+  ) {
+    const videoGeneration = normalizeVideoGenerationOptions(modelConfig.videoGeneration)
+    if (videoGeneration) {
+      defaults.videoGeneration = videoGeneration
+    }
+  }
+
   if (portrait?.supported === true && hasThinkingBudgetSupport(portrait)) {
     const defaultBudget = normalizeLegacyThinkingBudgetValue(
       modelConfig.thinkingBudget ?? portrait.budget?.default
@@ -2216,6 +2268,10 @@ const updateLocalGenerationSettings = (patch: Partial<SessionGenerationSettings>
     normalizedPatch.imageGeneration = normalizeImageGenerationOptions(next.imageGeneration)
     next.imageGeneration = normalizedPatch.imageGeneration
   }
+  if (Object.prototype.hasOwnProperty.call(nextPatch, 'videoGeneration')) {
+    normalizedPatch.videoGeneration = normalizeVideoGenerationOptions(next.videoGeneration)
+    next.videoGeneration = normalizedPatch.videoGeneration
+  }
 
   scheduleGenerationPersist(normalizedPatch)
 }
@@ -2538,7 +2594,8 @@ async function changeModelSelection(
     reasoningVisibility: draftStore.reasoningVisibility,
     verbosity: draftStore.verbosity,
     forceInterleavedThinkingCompat: draftStore.forceInterleavedThinkingCompat,
-    imageGeneration: draftStore.imageGeneration
+    imageGeneration: draftStore.imageGeneration,
+    videoGeneration: draftStore.videoGeneration
   } as Partial<SessionGenerationSettings>
   const clearedDraftModelOverrides = {
     temperature: undefined,
@@ -2550,7 +2607,8 @@ async function changeModelSelection(
     reasoningVisibility: undefined,
     verbosity: undefined,
     forceInterleavedThinkingCompat: undefined,
-    imageGeneration: undefined
+    imageGeneration: undefined,
+    videoGeneration: undefined
   } as Partial<SessionGenerationSettings>
 
   try {
@@ -2904,6 +2962,17 @@ function onImageGenerationSettingsUpdate(
   })
 }
 
+function onVideoGenerationSettingsUpdate(
+  videoGeneration: SessionGenerationSettings['videoGeneration']
+) {
+  if (!localSettings.value) {
+    return
+  }
+  updateLocalGenerationSettings({
+    videoGeneration: normalizeVideoGenerationOptions(videoGeneration)
+  })
+}
+
 async function selectPermissionMode(mode: PermissionMode) {
   if (!canSelectPermissionMode.value) return
   if (permissionMode.value === mode) return
diff --git a/src/renderer/src/components/chat/messageListItems.ts b/src/renderer/src/components/chat/messageListItems.ts
index 1c451391d..ce798232f 100644
--- a/src/renderer/src/components/chat/messageListItems.ts
+++ b/src/renderer/src/components/chat/messageListItems.ts
@@ -85,6 +85,7 @@ export type DisplayAssistantMessageBlock = {
     | 'tool_call'
     | 'action'
     | 'image'
+    | 'video'
     | 'audio'
     | 'artifact-thinking'
   id?: string
diff --git a/src/renderer/src/components/message/MessageBlockVideo.vue b/src/renderer/src/components/message/MessageBlockVideo.vue
new file mode 100644
index 000000000..d54787bf1
--- /dev/null
+++ b/src/renderer/src/components/message/MessageBlockVideo.vue
@@ -0,0 +1,132 @@
+<template>
+  <div class="my-1">
+    <div class="rounded-lg border bg-card text-card-foreground p-4 w-fit max-w-full">
+      <div class="flex flex-col space-y-2 min-w-[320px] max-w-130">
+        <div class="flex items-center gap-2 text-xs text-muted-foreground">
+          <Icon icon="lucide:clapperboard" class="h-4 w-4" />
+          <span>{{ translate('common.video') }}</span>
+        </div>
+
+        <template v-if="resolvedVideoData">
+          <div class="rounded-xl border bg-muted/30 p-2">
+            <video
+              :src="videoSrc"
+              controls
+              playsinline
+              class="max-h-105 w-full rounded-lg bg-black"
+              @error="videoError = true"
+            />
+          </div>
+          <div class="text-[11px] text-muted-foreground break-all">
+            {{ resolvedVideoData.mimeType }}
+          </div>
+          <div v-if="videoError" class="text-xs text-red-500">
+            {{ translate('common.error.requestFailed') }}
+          </div>
+        </template>
+
+        <div v-else class="flex items-center justify-center h-40 w-full">
+          <Icon icon="lucide:loader-2" class="w-6 h-6 animate-spin text-muted-foreground" />
+        </div>
+      </div>
+    </div>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { computed, ref } from 'vue'
+import { Icon } from '@iconify/vue'
+import { useI18n } from 'vue-i18n'
+import type { DisplayAssistantMessageBlock } from '@/components/chat/messageListItems'
+
+const keyMap: Record<string, string> = {
+  'common.video': 'Video',
+  'common.error.requestFailed': 'Request failed'
+}
+
+const i18n = (() => {
+  try {
+    return useI18n().t
+  } catch {
+    return (key: string) => keyMap[key] || key
+  }
+})()
+
+const translate = (key: string) => {
+  const translated = i18n(key)
+  return translated === key ? keyMap[key] || key : translated
+}
+
+const props = defineProps<{
+  block: DisplayAssistantMessageBlock
+  messageId?: string
+  threadId?: string
+}>()
+
+type LegacyVideoBlockContent = {
+  data?: string
+  mimeType?: string
+}
+
+const videoError = ref(false)
+
+const parseVideoDataUri = (value: string): { data: string; mimeType: string } | null => {
+  const match = value.match(/^data:([^;]+);base64,(.*)$/)
+  if (!match?.[1] || !match?.[2]) return null
+  if (!match[1].startsWith('video/')) return null
+  return { data: match[2], mimeType: match[1] }
+}
+
+const normalizeVideoData = (rawData: string, mimeType?: string) => {
+  const trimmed = rawData.trim()
+  if (!trimmed) return null
+
+  if (
+    trimmed.startsWith('imgcache://') ||
+    trimmed.startsWith('http://') ||
+    trimmed.startsWith('https://')
+  ) {
+    return {
+      data: trimmed,
+      mimeType: mimeType?.trim() || 'video/mp4'
+    }
+  }
+
+  const parsed = parseVideoDataUri(trimmed)
+  if (parsed) return parsed
+
+  return {
+    data: trimmed,
+    mimeType: mimeType?.trim() || 'video/mp4'
+  }
+}
+
+const resolvedVideoData = computed(() => {
+  if (props.block.image_data?.data) {
+    return normalizeVideoData(props.block.image_data.data, props.block.image_data.mimeType)
+  }
+
+  const content = props.block.content
+  if (content && typeof content === 'object' && 'data' in (content as LegacyVideoBlockContent)) {
+    const legacyContent = content as LegacyVideoBlockContent
+    if (legacyContent.data) {
+      return normalizeVideoData(legacyContent.data, legacyContent.mimeType)
+    }
+  }
+
+  if (typeof content === 'string' && content.length > 0) {
+    return normalizeVideoData(content)
+  }
+
+  return null
+})
+
+const videoSrc = computed(() => {
+  if (!resolvedVideoData.value) return ''
+  const raw = resolvedVideoData.value.data
+  if (raw.startsWith('imgcache://') || raw.startsWith('http://') || raw.startsWith('https://')) {
+    return raw
+  }
+  return `data:${resolvedVideoData.value.mimeType};base64,${raw}`
+})
+</script>
diff --git a/src/renderer/src/components/message/MessageItemAssistant.vue b/src/renderer/src/components/message/MessageItemAssistant.vue
index 4ce8c6ab3..bf4cbd680 100644
--- a/src/renderer/src/components/message/MessageItemAssistant.vue
+++ b/src/renderer/src/components/message/MessageItemAssistant.vue
@@ -77,6 +77,12 @@
                 :message-id="currentMessage.id"
                 :thread-id="currentThreadId"
               />
+              <MessageBlockVideo
+                v-else-if="isVideoBlock(block)"
+                :block="block"
+                :message-id="currentMessage.id"
+                :thread-id="currentThreadId"
+              />
               <MessageBlockImage
                 v-else-if="block.type === 'image'"
                 :block="block"
@@ -185,6 +191,7 @@ import MessageBlockAction from './MessageBlockAction.vue'
 import { useI18n } from 'vue-i18n'
 import MessageBlockImage from './MessageBlockImage.vue'
 import MessageBlockAudio from './MessageBlockAudio.vue'
+import MessageBlockVideo from './MessageBlockVideo.vue'
 import MessageBlockPlan from './MessageBlockPlan.vue'
 
 import {
@@ -219,7 +226,8 @@ const deviceClient = createDeviceClient()
 const uiSettingsStore = useUiSettingsStore()
 const { t } = useI18n()
 
-const AUDIO_EXTENSIONS = ['.mp3', '.wav', '.m4a', '.aac', '.flac', '.ogg', '.opus', '.webm']
+const AUDIO_EXTENSIONS = ['.mp3', '.wav', '.m4a', '.aac', '.flac', '.ogg', '.opus']
+const VIDEO_EXTENSIONS = ['.mp4', '.mov', '.m4v', '.webm', '.avi', '.mkv']
 
 const isAudioBlock = (block: DisplayAssistantMessageBlock): boolean => {
   if (block.type === 'audio') return true
@@ -235,6 +243,20 @@ const isAudioBlock = (block: DisplayAssistantMessageBlock): boolean => {
   return false
 }
 
+const isVideoBlock = (block: DisplayAssistantMessageBlock): boolean => {
+  if (block.type === 'video') return true
+  if (block.type !== 'image') return false
+  const mimeType = block.image_data?.mimeType?.toLowerCase() || ''
+  if (mimeType.startsWith('video/')) return true
+  const data = block.image_data?.data || ''
+  if (data.startsWith('data:video/')) return true
+  if (data.startsWith('imgcache://') || data.startsWith('http://') || data.startsWith('https://')) {
+    const lower = data.toLowerCase()
+    return VIDEO_EXTENSIONS.some((ext) => lower.includes(ext))
+  }
+  return false
+}
+
 // 定义事件
 const emit = defineEmits<{
   copyImage: [
diff --git a/src/renderer/src/components/settings/ModelConfigDialog.vue b/src/renderer/src/components/settings/ModelConfigDialog.vue
index 71eaed14f..b5671b0b2 100644
--- a/src/renderer/src/components/settings/ModelConfigDialog.vue
+++ b/src/renderer/src/components/settings/ModelConfigDialog.vue
@@ -11,7 +11,7 @@
       <div class="overflow-y-auto flex-1 pr-2 -mr-2">
         <form @submit.prevent="handleSave" class="space-y-6">
           <!-- 模型名称 -->
-          <div v-if="!showOpenAIImageGenerationSettings || canEditModelIdentity" class="space-y-2">
+          <div v-if="!showOpenAIMediaGenerationSettings || canEditModelIdentity" class="space-y-2">
             <Label for="modelName">{{ t('settings.model.modelConfig.name.label') }}</Label>
             <Input
               id="modelName"
@@ -34,7 +34,7 @@
           </div>
 
           <!-- 模型 ID -->
-          <div v-if="!showOpenAIImageGenerationSettings || canEditModelIdentity" class="space-y-2">
+          <div v-if="!showOpenAIMediaGenerationSettings || canEditModelIdentity" class="space-y-2">
             <Label for="modelId">{{ t('settings.model.modelConfig.id.label') }}</Label>
             <Input
               id="modelId"
@@ -57,7 +57,7 @@
           </div>
 
           <!-- 最大输出长度 -->
-          <div v-if="!showOpenAIImageGenerationSettings" class="space-y-2">
+          <div v-if="!showOpenAIMediaGenerationSettings" class="space-y-2">
             <Label for="maxTokens">{{ t('settings.model.modelConfig.maxTokens.label') }}</Label>
             <Input
               id="maxTokens"
@@ -77,7 +77,7 @@
           </div>
 
           <!-- 上下文长度 -->
-          <div v-if="!showOpenAIImageGenerationSettings" class="space-y-2">
+          <div v-if="!showOpenAIMediaGenerationSettings" class="space-y-2">
             <Label for="contextLength">{{
               t('settings.model.modelConfig.contextLength.label')
             }}</Label>
@@ -123,11 +123,16 @@
             v-model="config.imageGeneration"
           />
 
+          <OpenAIVideoGenerationSettingsFields
+            v-if="showOpenAIVideoGenerationSettings"
+            v-model="config.videoGeneration"
+          />
+
           <TtsSettingsFields v-if="showTtsSettings" v-model="config.tts" />
 
           <!-- 温度 (支持推理努力程度的模型不显示) -->
           <div
-            v-if="!showOpenAIImageGenerationSettings && showTemperatureControl"
+            v-if="!showOpenAIMediaGenerationSettings && showTemperatureControl"
             class="space-y-2"
           >
             <Label for="temperature">{{ t('settings.model.modelConfig.temperature.label') }}</Label>
@@ -155,7 +160,7 @@
 
           <!-- 模型类型 -->
           <div
-            v-if="!showOpenAIImageGenerationSettings || showOpenAIImageGenerationRouteControls"
+            v-if="!showOpenAIMediaGenerationSettings || showOpenAIMediaGenerationRouteControls"
             class="space-y-2"
           >
             <Label for="type">{{ t('settings.model.modelConfig.type.label') }}</Label>
@@ -176,6 +181,9 @@
                 <SelectItem value="imageGeneration">
                   {{ t('settings.model.modelConfig.type.options.imageGeneration') }}
                 </SelectItem>
+                <SelectItem value="videoGeneration">
+                  {{ t('settings.model.modelConfig.type.options.videoGeneration') }}
+                </SelectItem>
                 <SelectItem value="tts">
                   {{ t('settings.provider.tts.title') }}
                 </SelectItem>
@@ -188,7 +196,7 @@
 
           <div
             v-if="
-              (!showOpenAIImageGenerationSettings || showOpenAIImageGenerationRouteControls) &&
+              (!showOpenAIMediaGenerationSettings || showOpenAIMediaGenerationRouteControls) &&
               showEndpointTypeSelector
             "
             class="space-y-2"
@@ -223,7 +231,7 @@
           <!-- API 端点（仅 OpenAI 兼容 provider 显示） -->
           <div
             v-if="
-              (!showOpenAIImageGenerationSettings || showOpenAIImageGenerationRouteControls) &&
+              (!showOpenAIMediaGenerationSettings || showOpenAIMediaGenerationRouteControls) &&
               showApiEndpointSelector
             "
             class="space-y-2"
@@ -240,6 +248,9 @@
                 <SelectItem value="image">
                   {{ t('settings.model.modelConfig.apiEndpoint.options.image') }}
                 </SelectItem>
+                <SelectItem value="video">
+                  {{ t('settings.model.modelConfig.apiEndpoint.options.video') }}
+                </SelectItem>
                 <SelectItem value="audio-speech">
                   {{ t('settings.provider.tts.title') }}
                 </SelectItem>
@@ -251,7 +262,7 @@
           </div>
 
           <!-- 视觉能力 -->
-          <div v-if="!showOpenAIImageGenerationSettings" class="flex items-center justify-between">
+          <div v-if="!showOpenAIMediaGenerationSettings" class="flex items-center justify-between">
             <div class="space-y-0.5">
               <Label>{{ t('settings.model.modelConfig.vision.label') }}</Label>
               <p class="text-xs text-muted-foreground">
@@ -264,7 +275,7 @@
             />
           </div>
 
-          <div v-if="!showOpenAIImageGenerationSettings" class="flex items-center justify-between">
+          <div v-if="!showOpenAIMediaGenerationSettings" class="flex items-center justify-between">
             <div class="space-y-0.5">
               <Label>{{ t('settings.model.modelConfig.speechRecognition.label') }}</Label>
               <p class="text-xs text-muted-foreground">
@@ -279,7 +290,7 @@
           </div>
 
           <!-- 函数调用 -->
-          <div v-if="!showOpenAIImageGenerationSettings" class="flex items-center justify-between">
+          <div v-if="!showOpenAIMediaGenerationSettings" class="flex items-center justify-between">
             <div class="space-y-0.5">
               <Label>{{ t('settings.model.modelConfig.functionCall.label') }}</Label>
               <p class="text-xs text-muted-foreground">
@@ -298,7 +309,7 @@
 
           <!-- 推理能力 -->
           <div
-            v-if="!showOpenAIImageGenerationSettings && showReasoningToggle"
+            v-if="!showOpenAIMediaGenerationSettings && showReasoningToggle"
             class="flex items-center justify-between"
           >
             <div class="space-y-0.5">
@@ -319,7 +330,7 @@
           </div>
 
           <div
-            v-if="!showOpenAIImageGenerationSettings && showInterleavedThinking"
+            v-if="!showOpenAIMediaGenerationSettings && showInterleavedThinking"
             class="flex items-center justify-between gap-4"
           >
             <div class="space-y-0.5">
@@ -338,7 +349,7 @@
           </div>
 
           <!-- 推理努力程度 (支持推理努力程度的模型显示) -->
-          <div v-if="!showOpenAIImageGenerationSettings && showReasoningEffort" class="space-y-2">
+          <div v-if="!showOpenAIMediaGenerationSettings && showReasoningEffort" class="space-y-2">
             <Label for="reasoningEffort">{{
               t('settings.model.modelConfig.reasoningEffort.label')
             }}</Label>
@@ -364,7 +375,7 @@
           </div>
 
           <div
-            v-if="!showOpenAIImageGenerationSettings && showReasoningVisibility"
+            v-if="!showOpenAIMediaGenerationSettings && showReasoningVisibility"
             class="space-y-2"
           >
             <Label for="reasoningVisibility">{{
@@ -392,7 +403,7 @@
           </div>
 
           <!-- 详细程度（存在该参数即显示） -->
-          <div v-if="!showOpenAIImageGenerationSettings && supportsVerbosity" class="space-y-2">
+          <div v-if="!showOpenAIMediaGenerationSettings && supportsVerbosity" class="space-y-2">
             <Label for="verbosity">{{ t('settings.model.modelConfig.verbosity.label') }}</Label>
             <Select v-model="config.verbosity">
               <SelectTrigger>
@@ -414,7 +425,7 @@
           </div>
 
           <!-- 思考预算（统一基于能力） -->
-          <div v-if="!showOpenAIImageGenerationSettings && showThinkingBudget" class="space-y-4">
+          <div v-if="!showOpenAIMediaGenerationSettings && showThinkingBudget" class="space-y-4">
             <div class="flex items-center justify-between">
               <div class="space-y-0.5">
                 <Label>{{ t('settings.model.modelConfig.thinkingBudget.label') }}</Label>
@@ -559,11 +570,16 @@ import {
   normalizeImageGenerationOptions,
   supportsOpenAIImageGenerationSettings
 } from '@shared/imageGenerationSettings'
+import {
+  normalizeVideoGenerationOptions,
+  supportsOpenAICompatibleVideoGeneration
+} from '@shared/videoGenerationSettings'
 import { normalizeTtsSettings } from '@shared/ttsSettings'
 import { useModelConfigStore } from '@/stores/modelConfigStore'
 import { useModelStore } from '@/stores/modelStore'
 import { useProviderStore } from '@/stores/providerStore'
 import OpenAIImageGenerationSettingsFields from './OpenAIImageGenerationSettingsFields.vue'
+import OpenAIVideoGenerationSettingsFields from './OpenAIVideoGenerationSettingsFields.vue'
 import TtsSettingsFields from './TtsSettingsFields.vue'
 import { createModelClient } from '@api/ModelClient'
 import {
@@ -708,9 +724,29 @@ const showOpenAIImageGenerationSettings = computed(() =>
     type: config.value.type ?? providerModelMeta.value?.type
   })
 )
+const showOpenAIVideoGenerationSettings = computed(() =>
+  supportsOpenAICompatibleVideoGeneration({
+    providerId: props.providerId,
+    providerApiType: currentProvider.value?.apiType,
+    modelId: modelIdField.value.trim(),
+    apiEndpoint: config.value.apiEndpoint,
+    endpointType: config.value.endpointType ?? providerModelMeta.value?.endpointType,
+    supportedEndpointTypes: providerModelMeta.value?.supportedEndpointTypes,
+    type: config.value.type ?? providerModelMeta.value?.type
+  })
+)
+const showOpenAIMediaGenerationSettings = computed(
+  () => showOpenAIImageGenerationSettings.value || showOpenAIVideoGenerationSettings.value
+)
 const showOpenAIImageGenerationRouteControls = computed(
   () => showOpenAIImageGenerationSettings.value && canEditModelIdentity.value
 )
+const showOpenAIVideoGenerationRouteControls = computed(
+  () => showOpenAIVideoGenerationSettings.value && canEditModelIdentity.value
+)
+const showOpenAIMediaGenerationRouteControls = computed(
+  () => showOpenAIImageGenerationRouteControls.value || showOpenAIVideoGenerationRouteControls.value
+)
 const showTtsSettings = computed(() => config.value.type === ModelType.TTS)
 
 // 重置确认对话框
@@ -1049,12 +1085,23 @@ const syncNewApiDerivedFields = () => {
     return
   }
 
+  if (config.value.endpointType === 'video-generation') {
+    config.value.apiEndpoint = ApiEndpointType.Video
+    config.value.type = ModelType.VideoGeneration
+    return
+  }
+
   config.value.apiEndpoint = ApiEndpointType.Chat
 
-  if (config.value.type === ModelType.ImageGeneration) {
+  if (
+    config.value.type === ModelType.ImageGeneration ||
+    config.value.type === ModelType.VideoGeneration
+  ) {
     const providerModelType = providerModelMeta.value?.type
     config.value.type =
-      providerModelType && providerModelType !== ModelType.ImageGeneration
+      providerModelType &&
+      providerModelType !== ModelType.ImageGeneration &&
+      providerModelType !== ModelType.VideoGeneration
         ? providerModelType
         : ModelType.Chat
   }
@@ -1099,13 +1146,17 @@ const loadConfig = async () => {
         availableEndpointTypes.value[0]
     }
 
-    if (showApiEndpointSelector.value && !config.value.apiEndpoint) {
-      config.value.apiEndpoint = ApiEndpointType.Chat
+    if (config.value.type === ModelType.VideoGeneration && !config.value.apiEndpoint) {
+      config.value.apiEndpoint = ApiEndpointType.Video
     }
 
     if (config.value.type === ModelType.TTS && !config.value.apiEndpoint) {
       config.value.apiEndpoint = ApiEndpointType.AudioSpeech
     }
+
+    if (showApiEndpointSelector.value && !config.value.apiEndpoint) {
+      config.value.apiEndpoint = ApiEndpointType.Chat
+    }
   } catch (error) {
     console.error('Failed to load model config:', error)
     config.value = createDefaultConfig()
@@ -1195,7 +1246,7 @@ const validateForm = () => {
     }
   }
 
-  if (!showOpenAIImageGenerationSettings.value) {
+  if (!showOpenAIMediaGenerationSettings.value) {
     // 验证最大输出长度
     if (!config.value.maxTokens || config.value.maxTokens <= 0) {
       errors.value.maxTokens = t('settings.model.modelConfig.validation.maxTokensMin')
@@ -1213,7 +1264,7 @@ const validateForm = () => {
 
   // 验证温度 (仅对显示 temperature 控件的模型)
   if (
-    !showOpenAIImageGenerationSettings.value &&
+    !showOpenAIMediaGenerationSettings.value &&
     showTemperatureControl.value &&
     config.value.temperature !== undefined
   ) {
@@ -1234,7 +1285,7 @@ const validateForm = () => {
   }
 
   if (
-    (!showOpenAIImageGenerationSettings.value || showOpenAIImageGenerationRouteControls.value) &&
+    (!showOpenAIMediaGenerationSettings.value || showOpenAIMediaGenerationRouteControls.value) &&
     showEndpointTypeSelector.value &&
     !isNewApiEndpointType(config.value.endpointType)
   ) {
@@ -1263,6 +1314,9 @@ const handleSave = async () => {
     imageGeneration: showOpenAIImageGenerationSettings.value
       ? normalizeImageGenerationOptions(config.value.imageGeneration)
       : undefined,
+    videoGeneration: showOpenAIVideoGenerationSettings.value
+      ? normalizeVideoGenerationOptions(config.value.videoGeneration)
+      : undefined,
     tts: showTtsSettings.value ? normalizeTtsSettings(config.value.tts) : undefined
   }
 
@@ -1376,12 +1430,20 @@ watch(
       return
     }
 
+    if (config.value.type === ModelType.VideoGeneration) {
+      config.value.apiEndpoint = ApiEndpointType.Video
+      return
+    }
+
     if (config.value.type === ModelType.TTS) {
       config.value.apiEndpoint = ApiEndpointType.AudioSpeech
       return
     }
 
-    if (config.value.apiEndpoint === ApiEndpointType.Image) {
+    if (
+      config.value.apiEndpoint === ApiEndpointType.Image ||
+      config.value.apiEndpoint === ApiEndpointType.Video
+    ) {
       config.value.apiEndpoint = ApiEndpointType.Chat
     }
   },
diff --git a/src/renderer/src/components/settings/OpenAIVideoGenerationSettingsFields.vue b/src/renderer/src/components/settings/OpenAIVideoGenerationSettingsFields.vue
new file mode 100644
index 000000000..bd09b8a5c
--- /dev/null
+++ b/src/renderer/src/components/settings/OpenAIVideoGenerationSettingsFields.vue
@@ -0,0 +1,175 @@
+<template>
+  <div :class="containerClass">
+    <div :class="fieldClass">
+      <Label :class="labelClass">
+        {{ t('settings.model.modelConfig.videoGeneration.size.label') }}
+      </Label>
+      <Input
+        :model-value="videoGeneration.size ?? ''"
+        :class="inputClass"
+        :placeholder="t('settings.model.modelConfig.videoGeneration.size.placeholder')"
+        @update:model-value="onTextFieldUpdate('size', $event)"
+      />
+    </div>
+
+    <div :class="fieldClass">
+      <Label :class="labelClass">
+        {{ t('settings.model.modelConfig.videoGeneration.seconds.label') }}
+      </Label>
+      <Input
+        :model-value="videoGeneration.seconds ?? ''"
+        :class="inputClass"
+        :placeholder="t('settings.model.modelConfig.videoGeneration.seconds.placeholder')"
+        @update:model-value="onTextFieldUpdate('seconds', $event)"
+      />
+    </div>
+
+    <div :class="fieldClass">
+      <Label :class="labelClass">
+        {{ t('settings.model.modelConfig.videoGeneration.duration.label') }}
+      </Label>
+      <Input
+        :model-value="durationDraft"
+        :class="inputClass"
+        inputmode="numeric"
+        :placeholder="t('settings.model.modelConfig.videoGeneration.duration.placeholder')"
+        @update:model-value="onDurationInput"
+      />
+      <p :class="hintClass">
+        {{ t('settings.model.modelConfig.videoGeneration.duration.description') }}
+      </p>
+    </div>
+
+    <div :class="fieldClass">
+      <Label :class="labelClass">
+        {{ t('settings.model.modelConfig.videoGeneration.ratio.label') }}
+      </Label>
+      <Input
+        :model-value="videoGeneration.ratio ?? ''"
+        :class="inputClass"
+        :placeholder="t('settings.model.modelConfig.videoGeneration.ratio.placeholder')"
+        @update:model-value="onTextFieldUpdate('ratio', $event)"
+      />
+    </div>
+
+    <div :class="fieldClass">
+      <Label :class="labelClass">
+        {{ t('settings.model.modelConfig.videoGeneration.resolution.label') }}
+      </Label>
+      <Input
+        :model-value="videoGeneration.resolution ?? ''"
+        :class="inputClass"
+        :placeholder="t('settings.model.modelConfig.videoGeneration.resolution.placeholder')"
+        @update:model-value="onTextFieldUpdate('resolution', $event)"
+      />
+    </div>
+
+    <div class="flex items-center justify-between gap-3 rounded-md border p-3">
+      <div class="space-y-0.5">
+        <Label :class="labelClass">
+          {{ t('settings.model.modelConfig.videoGeneration.watermark.label') }}
+        </Label>
+        <p :class="hintClass">
+          {{ t('settings.model.modelConfig.videoGeneration.watermark.description') }}
+        </p>
+      </div>
+      <Switch
+        :model-value="Boolean(videoGeneration.watermark)"
+        @update:model-value="onBooleanFieldUpdate('watermark', $event)"
+      />
+    </div>
+
+    <div class="flex items-center justify-between gap-3 rounded-md border p-3">
+      <div class="space-y-0.5">
+        <Label :class="labelClass">
+          {{ t('settings.model.modelConfig.videoGeneration.generateAudio.label') }}
+        </Label>
+        <p :class="hintClass">
+          {{ t('settings.model.modelConfig.videoGeneration.generateAudio.description') }}
+        </p>
+      </div>
+      <Switch
+        :model-value="Boolean(videoGeneration.generateAudio)"
+        @update:model-value="onBooleanFieldUpdate('generateAudio', $event)"
+      />
+    </div>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { computed } from 'vue'
+import { useI18n } from 'vue-i18n'
+import {
+  normalizeVideoGenerationOptions,
+  type VideoGenerationOptions
+} from '@shared/videoGenerationSettings'
+import { Input } from '@shadcn/components/ui/input'
+import { Label } from '@shadcn/components/ui/label'
+import { Switch } from '@shadcn/components/ui/switch'
+
+const props = withDefaults(
+  defineProps<{
+    modelValue?: VideoGenerationOptions
+    density?: 'default' | 'compact'
+  }>(),
+  {
+    modelValue: undefined,
+    density: 'default'
+  }
+)
+
+const emit = defineEmits<{
+  'update:modelValue': [value: VideoGenerationOptions | undefined]
+}>()
+
+const { t } = useI18n()
+
+const videoGeneration = computed<VideoGenerationOptions>(
+  () => normalizeVideoGenerationOptions(props.modelValue) ?? {}
+)
+
+const containerClass = computed(() => (props.density === 'compact' ? 'space-y-3' : 'space-y-4'))
+const fieldClass = computed(() => (props.density === 'compact' ? 'space-y-1.5' : 'space-y-2'))
+const labelClass = computed(() => (props.density === 'compact' ? 'text-xs font-medium' : ''))
+const hintClass = computed(() =>
+  props.density === 'compact'
+    ? 'text-[11px] text-muted-foreground'
+    : 'text-xs text-muted-foreground'
+)
+const inputClass = computed(() => (props.density === 'compact' ? 'h-8 text-xs' : ''))
+const durationDraft = computed(() =>
+  typeof videoGeneration.value.duration === 'number' ? String(videoGeneration.value.duration) : ''
+)
+
+const emitOptions = (patch: VideoGenerationOptions) => {
+  const next = normalizeVideoGenerationOptions({
+    ...videoGeneration.value,
+    ...patch
+  })
+  emit('update:modelValue', next)
+}
+
+const normalizeTextInput = (value: unknown): string | undefined => {
+  const trimmed = String(value ?? '').trim()
+  return trimmed ? trimmed : undefined
+}
+
+const onTextFieldUpdate = (field: 'size' | 'seconds' | 'ratio' | 'resolution', value: unknown) => {
+  emitOptions({ [field]: normalizeTextInput(value) })
+}
+
+const onDurationInput = (value: unknown) => {
+  const trimmed = String(value ?? '').trim()
+  if (!trimmed) {
+    emitOptions({ duration: undefined })
+    return
+  }
+
+  const parsed = Number.parseInt(trimmed, 10)
+  emitOptions({ duration: Number.isFinite(parsed) ? parsed : undefined })
+}
+
+const onBooleanFieldUpdate = (field: 'watermark' | 'generateAudio', value: unknown) => {
+  emitOptions({ [field]: Boolean(value) })
+}
+</script>
diff --git a/src/renderer/src/composables/useModelTypeDetection.ts b/src/renderer/src/composables/useModelTypeDetection.ts
index 7fb0320b5..af6203408 100644
--- a/src/renderer/src/composables/useModelTypeDetection.ts
+++ b/src/renderer/src/composables/useModelTypeDetection.ts
@@ -8,11 +8,14 @@ import { useModelConfigStore } from '@/stores/modelConfigStore'
 export interface UseModelTypeDetectionOptions {
   modelId: Ref<string | undefined>
   providerId: Ref<string | undefined>
-  modelType: Ref<'chat' | 'imageGeneration' | 'tts' | 'embedding' | 'rerank' | undefined>
+  modelType: Ref<
+    'chat' | 'imageGeneration' | 'videoGeneration' | 'tts' | 'embedding' | 'rerank' | undefined
+  >
 }
 
 export interface UseModelTypeDetectionReturn {
   isImageGenerationModel: ComputedRef<boolean>
+  isVideoGenerationModel: ComputedRef<boolean>
   isTtsModel: ComputedRef<boolean>
   isGPT5Model: ComputedRef<boolean>
   isGeminiProvider: ComputedRef<boolean>
@@ -42,6 +45,13 @@ export function useModelTypeDetection(
     return modelType.value === 'imageGeneration'
   })
 
+  /**
+   * Checks if current model is a video generation model
+   */
+  const isVideoGenerationModel = computed(() => {
+    return modelType.value === 'videoGeneration'
+  })
+
   /**
    * Checks if current model is a TTS model
    */
@@ -94,6 +104,7 @@ export function useModelTypeDetection(
   // === Return Public API ===
   return {
     isImageGenerationModel,
+    isVideoGenerationModel,
     isTtsModel,
     isGPT5Model,
     isGeminiProvider,
diff --git a/src/renderer/src/i18n/da-DK/model.json b/src/renderer/src/i18n/da-DK/model.json
index 03097e39e..6c7f66e43 100644
--- a/src/renderer/src/i18n/da-DK/model.json
+++ b/src/renderer/src/i18n/da-DK/model.json
@@ -40,7 +40,8 @@
       "chat": "Chat",
       "embedding": "Indlejring",
       "rerank": "Genrangering",
-      "imageGeneration": "Billedgenerering"
+      "imageGeneration": "Billedgenerering",
+      "videoGeneration": "Videogenerering"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/da-DK/settings.json b/src/renderer/src/i18n/da-DK/settings.json
index 756c81cd1..06355aacc 100644
--- a/src/renderer/src/i18n/da-DK/settings.json
+++ b/src/renderer/src/i18n/da-DK/settings.json
@@ -436,7 +436,8 @@
           "chat": "Sprogmodel",
           "embedding": "Embedding-model",
           "imageGeneration": "Billedgenereringsmodel",
-          "rerank": "Rerank-model"
+          "rerank": "Rerank-model",
+          "videoGeneration": "Videogenereringsmodel"
         }
       },
       "validation": {
@@ -461,7 +462,8 @@
         "label": "API-endpunkt",
         "options": {
           "chat": "tekstgenerering",
-          "image": "Billedgenerering"
+          "image": "Billedgenerering",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -474,7 +476,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "Billedgenerering"
+          "image-generation": "Billedgenerering",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -541,6 +544,37 @@
             "low": "Lav"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/en-US/model.json b/src/renderer/src/i18n/en-US/model.json
index b3211c1ff..2336921c1 100644
--- a/src/renderer/src/i18n/en-US/model.json
+++ b/src/renderer/src/i18n/en-US/model.json
@@ -40,7 +40,8 @@
       "chat": "Chat",
       "embedding": "Embedding",
       "rerank": "Rerank",
-      "imageGeneration": "Image Generation"
+      "imageGeneration": "Image Generation",
+      "videoGeneration": "Video Generation"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/en-US/settings.json b/src/renderer/src/i18n/en-US/settings.json
index 257d32b63..5cd7fe861 100644
--- a/src/renderer/src/i18n/en-US/settings.json
+++ b/src/renderer/src/i18n/en-US/settings.json
@@ -566,7 +566,8 @@
           "chat": "Language Model",
           "embedding": "Embedding Model",
           "imageGeneration": "Image Generation Model",
-          "rerank": "Rerank Model"
+          "rerank": "Rerank Model",
+          "videoGeneration": "Video Generation Model"
         }
       },
       "validation": {
@@ -591,7 +592,8 @@
         "label": "API endpoint",
         "options": {
           "chat": "Text Generation",
-          "image": "Image Generation"
+          "image": "Image Generation",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -604,7 +606,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "Image Generation"
+          "image-generation": "Image Generation",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -671,6 +674,37 @@
             "low": "Low"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/fa-IR/model.json b/src/renderer/src/i18n/fa-IR/model.json
index d6e2e25cc..cf49ed770 100644
--- a/src/renderer/src/i18n/fa-IR/model.json
+++ b/src/renderer/src/i18n/fa-IR/model.json
@@ -40,7 +40,8 @@
       "chat": "گفتگو",
       "embedding": "جاسازی",
       "rerank": "رتبه‌بندی مجدد",
-      "imageGeneration": "تولید تصویر"
+      "imageGeneration": "تولید تصویر",
+      "videoGeneration": "تولید ویدیو"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/fa-IR/settings.json b/src/renderer/src/i18n/fa-IR/settings.json
index 8e23128c4..ded0c44c6 100644
--- a/src/renderer/src/i18n/fa-IR/settings.json
+++ b/src/renderer/src/i18n/fa-IR/settings.json
@@ -503,7 +503,8 @@
           "chat": "مدل زبان",
           "embedding": "مدل",
           "imageGeneration": "مدل تولید تصویر",
-          "rerank": "مدل را دوباره مرتب کنید"
+          "rerank": "مدل را دوباره مرتب کنید",
+          "videoGeneration": "مدل تولید ویدیو"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "نقطه پایانی API",
         "options": {
           "chat": "تولید متن",
-          "image": "تولید تصویر"
+          "image": "تولید تصویر",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "تولید تصویر"
+          "image-generation": "تولید تصویر",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "کم"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/fr-FR/model.json b/src/renderer/src/i18n/fr-FR/model.json
index fb8b24c74..53e7219b3 100644
--- a/src/renderer/src/i18n/fr-FR/model.json
+++ b/src/renderer/src/i18n/fr-FR/model.json
@@ -40,7 +40,8 @@
       "chat": "Discussion",
       "embedding": "Incorporation",
       "rerank": "Reclassement",
-      "imageGeneration": "Génération d'image"
+      "imageGeneration": "Génération d'image",
+      "videoGeneration": "Génération vidéo"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/fr-FR/settings.json b/src/renderer/src/i18n/fr-FR/settings.json
index 62eb66d15..2f204624b 100644
--- a/src/renderer/src/i18n/fr-FR/settings.json
+++ b/src/renderer/src/i18n/fr-FR/settings.json
@@ -503,7 +503,8 @@
           "chat": "Modèle de langue",
           "embedding": "Modèle d'intégration",
           "imageGeneration": "Modèle de génération d'images",
-          "rerank": "Modèle de rerank"
+          "rerank": "Modèle de rerank",
+          "videoGeneration": "Modèle de génération vidéo"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "Point de terminaison API",
         "options": {
           "chat": "Génération de texte",
-          "image": "Génération d'images"
+          "image": "Génération d'images",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "Génération d'images"
+          "image-generation": "Génération d'images",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "Faible"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/he-IL/model.json b/src/renderer/src/i18n/he-IL/model.json
index 21224c708..bd489e6a3 100644
--- a/src/renderer/src/i18n/he-IL/model.json
+++ b/src/renderer/src/i18n/he-IL/model.json
@@ -40,7 +40,8 @@
       "chat": "צ'אט",
       "embedding": "הטמעה",
       "rerank": "דירוג מחדש",
-      "imageGeneration": "יצירת תמונות"
+      "imageGeneration": "יצירת תמונות",
+      "videoGeneration": "יצירת וידאו"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/he-IL/settings.json b/src/renderer/src/i18n/he-IL/settings.json
index 706a00361..5989181d9 100644
--- a/src/renderer/src/i18n/he-IL/settings.json
+++ b/src/renderer/src/i18n/he-IL/settings.json
@@ -503,7 +503,8 @@
           "chat": "מודל שפה (Chat)",
           "embedding": "מודל הטמעה (Embedding)",
           "imageGeneration": "מודל יצירת תמונות",
-          "rerank": "מודל דירוג מחדש (Rerank)"
+          "rerank": "מודל דירוג מחדש (Rerank)",
+          "videoGeneration": "מודל יצירת וידאו"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "נקודת קצה API",
         "options": {
           "chat": "יצירת טקסט",
-          "image": "יצירת תמונות"
+          "image": "יצירת תמונות",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "יצירת תמונות"
+          "image-generation": "יצירת תמונות",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "נמוכה"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/ja-JP/model.json b/src/renderer/src/i18n/ja-JP/model.json
index d81fefa54..5687be0e3 100644
--- a/src/renderer/src/i18n/ja-JP/model.json
+++ b/src/renderer/src/i18n/ja-JP/model.json
@@ -40,7 +40,8 @@
       "chat": "チャット",
       "embedding": "埋め込み",
       "rerank": "再ランク付け",
-      "imageGeneration": "画像生成"
+      "imageGeneration": "画像生成",
+      "videoGeneration": "動画生成"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/ja-JP/settings.json b/src/renderer/src/i18n/ja-JP/settings.json
index 4a7e21972..7d42befab 100644
--- a/src/renderer/src/i18n/ja-JP/settings.json
+++ b/src/renderer/src/i18n/ja-JP/settings.json
@@ -503,7 +503,8 @@
           "chat": "言語モデル",
           "embedding": "埋め込みモデル",
           "imageGeneration": "画像生成モデル",
-          "rerank": "リランクモデル"
+          "rerank": "リランクモデル",
+          "videoGeneration": "動画生成モデル"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "API エンドポイント",
         "options": {
           "chat": "テキスト生成",
-          "image": "画像生成"
+          "image": "画像生成",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "画像生成"
+          "image-generation": "画像生成",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "低"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/ko-KR/model.json b/src/renderer/src/i18n/ko-KR/model.json
index 923d42915..d4a74f769 100644
--- a/src/renderer/src/i18n/ko-KR/model.json
+++ b/src/renderer/src/i18n/ko-KR/model.json
@@ -40,7 +40,8 @@
       "chat": "채팅",
       "embedding": "임베딩",
       "rerank": "재순위",
-      "imageGeneration": "이미지 생성"
+      "imageGeneration": "이미지 생성",
+      "videoGeneration": "비디오 생성"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/ko-KR/settings.json b/src/renderer/src/i18n/ko-KR/settings.json
index 45d080ee2..82e3a39d5 100644
--- a/src/renderer/src/i18n/ko-KR/settings.json
+++ b/src/renderer/src/i18n/ko-KR/settings.json
@@ -503,7 +503,8 @@
           "chat": "언어 모델",
           "embedding": "임베드 모델",
           "imageGeneration": "이미지 생성 모델",
-          "rerank": "모델을 재정렬하십시오"
+          "rerank": "모델을 재정렬하십시오",
+          "videoGeneration": "비디오 생성 모델"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "API 엔드포인트",
         "options": {
           "chat": "텍스트 생성",
-          "image": "이미지 생성"
+          "image": "이미지 생성",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "이미지 생성"
+          "image-generation": "이미지 생성",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "낮음"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/pt-BR/model.json b/src/renderer/src/i18n/pt-BR/model.json
index 0fa6594ec..0803c4c02 100644
--- a/src/renderer/src/i18n/pt-BR/model.json
+++ b/src/renderer/src/i18n/pt-BR/model.json
@@ -40,7 +40,8 @@
       "chat": "Chat",
       "embedding": "Incorporação",
       "rerank": "Reclassificação",
-      "imageGeneration": "Geração de Imagem"
+      "imageGeneration": "Geração de Imagem",
+      "videoGeneration": "Geração de Vídeo"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/pt-BR/settings.json b/src/renderer/src/i18n/pt-BR/settings.json
index 2861c13f9..a9d2fa8ed 100644
--- a/src/renderer/src/i18n/pt-BR/settings.json
+++ b/src/renderer/src/i18n/pt-BR/settings.json
@@ -503,7 +503,8 @@
           "chat": "Modelo de Linguagem",
           "embedding": "Modelo de Embedding (Incrustação)",
           "imageGeneration": "Modelo de Geração de Imagem",
-          "rerank": "Modelo de Rerank (Reclassificação)"
+          "rerank": "Modelo de Rerank (Reclassificação)",
+          "videoGeneration": "Modelo de Geração de Vídeo"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "Endpoint da API",
         "options": {
           "chat": "Geração de texto",
-          "image": "Geração de Imagens"
+          "image": "Geração de Imagens",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "Geração de Imagens"
+          "image-generation": "Geração de Imagens",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "Baixa"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/ru-RU/model.json b/src/renderer/src/i18n/ru-RU/model.json
index af11c965c..2cf342a41 100644
--- a/src/renderer/src/i18n/ru-RU/model.json
+++ b/src/renderer/src/i18n/ru-RU/model.json
@@ -40,7 +40,8 @@
       "chat": "Чат",
       "embedding": "Встраивание",
       "rerank": "Переранжирование",
-      "imageGeneration": "Генерация изображений"
+      "imageGeneration": "Генерация изображений",
+      "videoGeneration": "Генерация видео"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/ru-RU/settings.json b/src/renderer/src/i18n/ru-RU/settings.json
index 48bc75332..a38d53c0d 100644
--- a/src/renderer/src/i18n/ru-RU/settings.json
+++ b/src/renderer/src/i18n/ru-RU/settings.json
@@ -503,7 +503,8 @@
           "chat": "Языковая модель",
           "embedding": "Встроенная модель",
           "imageGeneration": "Модель генерации изображений",
-          "rerank": "Переупорядочить модель"
+          "rerank": "Переупорядочить модель",
+          "videoGeneration": "Модель генерации видео"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "Конечная точка API",
         "options": {
           "chat": "генерация текста",
-          "image": "Генерация изображений"
+          "image": "Генерация изображений",
+          "video": "Video Generation"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "Генерация изображений"
+          "image-generation": "Генерация изображений",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "Низкая"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "Frame size",
+          "placeholder": "e.g. 1280x720"
+        },
+        "seconds": {
+          "label": "Seconds",
+          "placeholder": "e.g. 5"
+        },
+        "duration": {
+          "label": "Duration",
+          "placeholder": "e.g. 5",
+          "description": "Sent through extra_body.duration for compatible video endpoints."
+        },
+        "ratio": {
+          "label": "Aspect ratio",
+          "placeholder": "e.g. 16:9"
+        },
+        "resolution": {
+          "label": "Resolution",
+          "placeholder": "e.g. 720p"
+        },
+        "watermark": {
+          "label": "Watermark",
+          "description": "Request a provider-side watermark when the upstream endpoint supports it."
+        },
+        "generateAudio": {
+          "label": "Generate audio",
+          "description": "Request an audio track together with the generated video when supported."
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/zh-CN/model.json b/src/renderer/src/i18n/zh-CN/model.json
index 7bfdd8b45..26a565927 100644
--- a/src/renderer/src/i18n/zh-CN/model.json
+++ b/src/renderer/src/i18n/zh-CN/model.json
@@ -40,7 +40,8 @@
       "chat": "对话",
       "embedding": "向量",
       "rerank": "重排",
-      "imageGeneration": "图像生成"
+      "imageGeneration": "图像生成",
+      "videoGeneration": "视频生成"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/zh-CN/settings.json b/src/renderer/src/i18n/zh-CN/settings.json
index 5018eef0b..586bbe72c 100644
--- a/src/renderer/src/i18n/zh-CN/settings.json
+++ b/src/renderer/src/i18n/zh-CN/settings.json
@@ -536,7 +536,8 @@
           "chat": "语言模型",
           "embedding": "嵌入模型",
           "rerank": "重排序模型",
-          "imageGeneration": "图像生成模型"
+          "imageGeneration": "图像生成模型",
+          "videoGeneration": "视频生成模型"
         }
       },
       "apiEndpoint": {
@@ -544,7 +545,8 @@
         "description": "选择此模型使用的 OpenAI API 端点。",
         "options": {
           "chat": "文本生成",
-          "image": "图片生成"
+          "image": "图片生成",
+          "video": "视频生成"
         }
       },
       "resetToDefault": "重置为默认",
@@ -604,7 +606,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "Image Generation"
+          "image-generation": "Image Generation",
+          "video-generation": "Video Generation"
         }
       },
       "reasoningVisibility": {
@@ -671,6 +674,37 @@
             "low": "低"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "视频尺寸",
+          "placeholder": "例如 1280x720"
+        },
+        "seconds": {
+          "label": "秒数",
+          "placeholder": "例如 5"
+        },
+        "duration": {
+          "label": "时长",
+          "placeholder": "例如 5",
+          "description": "会通过兼容视频接口的 extra_body.duration 发给上游。"
+        },
+        "ratio": {
+          "label": "宽高比",
+          "placeholder": "例如 16:9"
+        },
+        "resolution": {
+          "label": "分辨率",
+          "placeholder": "例如 720p"
+        },
+        "watermark": {
+          "label": "水印",
+          "description": "当上游接口支持时，请求服务端为生成结果添加水印。"
+        },
+        "generateAudio": {
+          "label": "生成音频",
+          "description": "当上游接口支持时，同时为视频生成音轨。"
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/zh-HK/model.json b/src/renderer/src/i18n/zh-HK/model.json
index e7145daea..b838586dd 100644
--- a/src/renderer/src/i18n/zh-HK/model.json
+++ b/src/renderer/src/i18n/zh-HK/model.json
@@ -40,7 +40,8 @@
       "chat": "對話",
       "embedding": "向量",
       "rerank": "重排",
-      "imageGeneration": "圖像生成"
+      "imageGeneration": "圖像生成",
+      "videoGeneration": "視頻生成"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/zh-HK/settings.json b/src/renderer/src/i18n/zh-HK/settings.json
index cd6f0a35a..9e1f39a99 100644
--- a/src/renderer/src/i18n/zh-HK/settings.json
+++ b/src/renderer/src/i18n/zh-HK/settings.json
@@ -503,7 +503,8 @@
           "chat": "語言模型",
           "embedding": "嵌入模型",
           "imageGeneration": "圖像生成模型",
-          "rerank": "重排序模型"
+          "rerank": "重排序模型",
+          "videoGeneration": "視頻生成模型"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "API 端點",
         "options": {
           "chat": "文本生成",
-          "image": "圖片生成"
+          "image": "圖片生成",
+          "video": "視頻生成"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "圖片生成"
+          "image-generation": "圖片生成",
+          "video-generation": "視頻生成"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "低"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "視頻尺寸",
+          "placeholder": "例如 1280x720"
+        },
+        "seconds": {
+          "label": "秒數",
+          "placeholder": "例如 5"
+        },
+        "duration": {
+          "label": "時長",
+          "placeholder": "例如 5",
+          "description": "會透過兼容視頻接口的 extra_body.duration 傳給上游。"
+        },
+        "ratio": {
+          "label": "長寬比",
+          "placeholder": "例如 16:9"
+        },
+        "resolution": {
+          "label": "分辨率",
+          "placeholder": "例如 720p"
+        },
+        "watermark": {
+          "label": "水印",
+          "description": "當上游接口支持時，請求服務端為生成結果加入水印。"
+        },
+        "generateAudio": {
+          "label": "生成音頻",
+          "description": "當上游接口支持時，同時為視頻生成音軌。"
+        }
       }
     }
   },
diff --git a/src/renderer/src/i18n/zh-TW/model.json b/src/renderer/src/i18n/zh-TW/model.json
index 6d9b596d8..d5aeb837f 100644
--- a/src/renderer/src/i18n/zh-TW/model.json
+++ b/src/renderer/src/i18n/zh-TW/model.json
@@ -40,7 +40,8 @@
       "chat": "對話",
       "embedding": "向量",
       "rerank": "重排",
-      "imageGeneration": "圖像生成"
+      "imageGeneration": "圖像生成",
+      "videoGeneration": "影片生成"
     }
   },
   "tags": {
diff --git a/src/renderer/src/i18n/zh-TW/settings.json b/src/renderer/src/i18n/zh-TW/settings.json
index f9e6ce075..9dc2aa28a 100644
--- a/src/renderer/src/i18n/zh-TW/settings.json
+++ b/src/renderer/src/i18n/zh-TW/settings.json
@@ -503,7 +503,8 @@
           "chat": "語言模型",
           "embedding": "嵌入模型",
           "imageGeneration": "圖像生成模型",
-          "rerank": "重排序模型"
+          "rerank": "重排序模型",
+          "videoGeneration": "影片生成模型"
         }
       },
       "validation": {
@@ -528,7 +529,8 @@
         "label": "API 端點",
         "options": {
           "chat": "文字生成",
-          "image": "圖片生成"
+          "image": "圖片生成",
+          "video": "影片生成"
         }
       },
       "endpointType": {
@@ -541,7 +543,8 @@
           "openai-response": "OpenAI Responses",
           "anthropic": "Anthropic Messages",
           "gemini": "Gemini Native",
-          "image-generation": "圖片生成"
+          "image-generation": "圖片生成",
+          "video-generation": "影片生成"
         }
       },
       "reasoningVisibility": {
@@ -608,6 +611,37 @@
             "low": "低"
           }
         }
+      },
+      "videoGeneration": {
+        "size": {
+          "label": "影片尺寸",
+          "placeholder": "例如 1280x720"
+        },
+        "seconds": {
+          "label": "秒數",
+          "placeholder": "例如 5"
+        },
+        "duration": {
+          "label": "時長",
+          "placeholder": "例如 5",
+          "description": "會透過相容影片介面的 extra_body.duration 傳給上游。"
+        },
+        "ratio": {
+          "label": "長寬比",
+          "placeholder": "例如 16:9"
+        },
+        "resolution": {
+          "label": "解析度",
+          "placeholder": "例如 720p"
+        },
+        "watermark": {
+          "label": "浮水印",
+          "description": "當上游介面支援時，請求服務端為生成結果加入浮水印。"
+        },
+        "generateAudio": {
+          "label": "生成音訊",
+          "description": "當上游介面支援時，同時為影片生成音軌。"
+        }
       }
     }
   },
diff --git a/src/renderer/src/pages/NewThreadPage.vue b/src/renderer/src/pages/NewThreadPage.vue
index b0a7c3662..918b3ca1f 100644
--- a/src/renderer/src/pages/NewThreadPage.vue
+++ b/src/renderer/src/pages/NewThreadPage.vue
@@ -851,6 +851,7 @@ const applyDraftDefaultsForSelectedAgent = async (): Promise<void> => {
   draftStore.verbosity = undefined
   draftStore.forceInterleavedThinkingCompat = undefined
   draftStore.imageGeneration = undefined
+  draftStore.videoGeneration = undefined
 
   if (selectedAgent.value.type === 'acp') {
     const resolvedProjectPath = currentProjectPath ?? globalDefaultProjectPath
diff --git a/src/renderer/src/stores/modelStore.ts b/src/renderer/src/stores/modelStore.ts
index e22c77daa..b4c8ab42a 100644
--- a/src/renderer/src/stores/modelStore.ts
+++ b/src/renderer/src/stores/modelStore.ts
@@ -11,6 +11,7 @@ import {
   resolveModelMaxTokens,
   resolveModelVision
 } from '@shared/modelConfigDefaults'
+import { resolveVideoGenerationCompatType } from '@shared/videoGenerationSettings'
 import { useIpcMutation } from '@/composables/useIpcMutation'
 import { useAgentModelStore } from '@/stores/agentModelStore'
 import { useModelConfigStore } from '@/stores/modelConfigStore'
@@ -35,6 +36,19 @@ type ChatSelectableModelGroup = {
   models: RENDERER_MODEL_META[]
 }
 
+const resolveRendererModelType = (
+  model: Pick<MODEL_META, 'id' | 'type' | 'supportedEndpointTypes' | 'endpointType'>
+): ModelType => {
+  return (resolveVideoGenerationCompatType({
+    modelId: model.id,
+    type: model.type,
+    endpointType: model.endpointType,
+    supportedEndpointTypes: model.supportedEndpointTypes
+  }) ??
+    model.type ??
+    ModelType.Chat) as ModelType
+}
+
 export const useModelStore = defineStore('model', () => {
   const modelClient = createModelClient()
   const providerStore = useProviderStore()
@@ -251,7 +265,7 @@ export const useModelStore = defineStore('model', () => {
     ),
     reasoning: model.reasoning ?? false,
     enableSearch: (model as RENDERER_MODEL_META).enableSearch ?? false,
-    type: (model.type ?? ModelType.Chat) as ModelType,
+    type: resolveRendererModelType(model),
     supportedEndpointTypes: model.supportedEndpointTypes,
     endpointType: model.endpointType
   })
@@ -276,7 +290,7 @@ export const useModelStore = defineStore('model', () => {
     ),
     reasoning: model.reasoning ?? false,
     enableSearch: (model as RENDERER_MODEL_META).enableSearch ?? false,
-    type: (model.type ?? ModelType.Chat) as ModelType,
+    type: resolveRendererModelType(model),
     supportedEndpointTypes: model.supportedEndpointTypes,
     endpointType: model.endpointType
   })
@@ -667,7 +681,13 @@ export const useModelStore = defineStore('model', () => {
               (model as RENDERER_MODEL_META).enableSearch ??
               (fallback as RENDERER_MODEL_META | undefined)?.enableSearch ??
               false,
-            type: (model.type ?? fallback?.type ?? ModelType.Chat) as ModelType,
+            type: resolveRendererModelType({
+              id: model.id,
+              type: model.type ?? fallback?.type,
+              supportedEndpointTypes:
+                model.supportedEndpointTypes ?? fallback?.supportedEndpointTypes,
+              endpointType: model.endpointType ?? fallback?.endpointType
+            }),
             supportedEndpointTypes:
               model.supportedEndpointTypes ?? fallback?.supportedEndpointTypes,
             endpointType: model.endpointType ?? fallback?.endpointType
diff --git a/src/renderer/src/stores/ui/draft.ts b/src/renderer/src/stores/ui/draft.ts
index bf02563ae..e958dd863 100644
--- a/src/renderer/src/stores/ui/draft.ts
+++ b/src/renderer/src/stores/ui/draft.ts
@@ -1,6 +1,7 @@
 import { defineStore } from 'pinia'
 import { ref, shallowRef, toRaw } from 'vue'
 import { normalizeImageGenerationOptions } from '@shared/imageGenerationSettings'
+import { normalizeVideoGenerationOptions } from '@shared/videoGenerationSettings'
 import type {
   CreateSessionInput,
   PermissionMode,
@@ -39,6 +40,9 @@ export const useDraftStore = defineStore('draft', () => {
   const imageGeneration = shallowRef<SessionGenerationSettings['imageGeneration'] | undefined>(
     undefined
   )
+  const videoGeneration = shallowRef<SessionGenerationSettings['videoGeneration'] | undefined>(
+    undefined
+  )
   const permissionMode = ref<PermissionMode>('full_access')
   const disabledAgentTools = ref<string[]>([])
   const subagentEnabled = ref(false)
@@ -53,6 +57,12 @@ export const useDraftStore = defineStore('draft', () => {
     return normalizeImageGenerationOptions(toRaw(value))
   }
 
+  function normalizeDraftVideoGeneration(
+    value: SessionGenerationSettings['videoGeneration']
+  ): SessionGenerationSettings['videoGeneration'] {
+    return normalizeVideoGenerationOptions(toRaw(value))
+  }
+
   function toGenerationSettings(): Partial<SessionGenerationSettings> | undefined {
     const settings: Partial<SessionGenerationSettings> = {}
 
@@ -74,6 +84,10 @@ export const useDraftStore = defineStore('draft', () => {
     if (normalizedImageGeneration !== undefined) {
       settings.imageGeneration = normalizedImageGeneration
     }
+    const normalizedVideoGeneration = normalizeDraftVideoGeneration(videoGeneration.value)
+    if (normalizedVideoGeneration !== undefined) {
+      settings.videoGeneration = normalizedVideoGeneration
+    }
 
     return Object.keys(settings).length > 0 ? settings : undefined
   }
@@ -126,6 +140,9 @@ export const useDraftStore = defineStore('draft', () => {
     if (Object.prototype.hasOwnProperty.call(settings, 'imageGeneration')) {
       imageGeneration.value = normalizeDraftImageGeneration(settings.imageGeneration)
     }
+    if (Object.prototype.hasOwnProperty.call(settings, 'videoGeneration')) {
+      videoGeneration.value = normalizeDraftVideoGeneration(settings.videoGeneration)
+    }
   }
 
   function resetGenerationSettings(): void {
@@ -140,6 +157,7 @@ export const useDraftStore = defineStore('draft', () => {
     verbosity.value = undefined
     forceInterleavedThinkingCompat.value = undefined
     imageGeneration.value = undefined
+    videoGeneration.value = undefined
   }
 
   function reset(): void {
@@ -184,6 +202,7 @@ export const useDraftStore = defineStore('draft', () => {
     verbosity,
     forceInterleavedThinkingCompat,
     imageGeneration,
+    videoGeneration,
     permissionMode,
     disabledAgentTools,
     subagentEnabled,
diff --git a/src/shared/contracts/common.ts b/src/shared/contracts/common.ts
index d0d51e1c3..6a6a4e600 100644
--- a/src/shared/contracts/common.ts
+++ b/src/shared/contracts/common.ts
@@ -59,6 +59,39 @@ export const ImageGenerationOptionsSchema = z
   })
   .optional()
 
+export const VideoGenerationOptionsSchema = z
+  .object({
+    seconds: z.string().optional(),
+    size: z.string().optional(),
+    ratio: z.string().optional(),
+    duration: z.number().int().min(-1).optional(),
+    resolution: z.string().optional(),
+    watermark: z.boolean().optional(),
+    generateAudio: z.boolean().optional(),
+    inputReference: z
+      .union([
+        z.string(),
+        z.object({
+          data: z.string(),
+          mimeType: z.string().optional()
+        })
+      ])
+      .optional(),
+    references: z
+      .array(
+        z
+          .object({
+            type: z.enum(['image', 'video', 'audio']),
+            url: z.string().optional(),
+            data: z.string().optional(),
+            mimeType: z.string().optional()
+          })
+          .refine((value) => Boolean(value.url || value.data))
+      )
+      .optional()
+  })
+  .optional()
+
 export const TtsSettingsSchema = z
   .object({
     voice: z.string().optional(),
@@ -105,7 +138,8 @@ export const SessionGenerationSettingsSchema = z.object({
   reasoningVisibility: ReasoningVisibilitySchema.optional(),
   verbosity: VerbositySchema.optional(),
   forceInterleavedThinkingCompat: z.boolean().optional(),
-  imageGeneration: ImageGenerationOptionsSchema
+  imageGeneration: ImageGenerationOptionsSchema,
+  videoGeneration: VideoGenerationOptionsSchema
 })
 
 export const SessionGenerationSettingsPatchSchema = SessionGenerationSettingsSchema.partial()
diff --git a/src/shared/contracts/domainSchemas.ts b/src/shared/contracts/domainSchemas.ts
index 0c0348065..3bd230d8a 100644
--- a/src/shared/contracts/domainSchemas.ts
+++ b/src/shared/contracts/domainSchemas.ts
@@ -4,6 +4,7 @@ import { ApiEndpointType, ModelType, NEW_API_ENDPOINT_TYPES } from '../model'
 import {
   FileMetadataValueSchema,
   ImageGenerationOptionsSchema,
+  VideoGenerationOptionsSchema,
   TtsSettingsSchema,
   JsonValueSchema,
   ProviderModelSummarySchema
@@ -252,6 +253,7 @@ export const ModelConfigSchema = z
     forcedSearch: z.boolean().optional(),
     searchStrategy: z.enum(['turbo', 'balanced', 'precise']).optional(),
     imageGeneration: ImageGenerationOptionsSchema,
+    videoGeneration: VideoGenerationOptionsSchema,
     tts: TtsSettingsSchema
   })
   .passthrough()
diff --git a/src/shared/model.ts b/src/shared/model.ts
index 3bdb3ed98..621457b9b 100644
--- a/src/shared/model.ts
+++ b/src/shared/model.ts
@@ -6,6 +6,7 @@ export enum ModelType {
   Embedding = 'embedding',
   Rerank = 'rerank',
   ImageGeneration = 'imageGeneration',
+  VideoGeneration = 'videoGeneration',
   TTS = 'tts'
 }
 
@@ -21,7 +22,8 @@ export const NEW_API_ENDPOINT_TYPES = [
   'openai-response',
   'anthropic',
   'gemini',
-  'image-generation'
+  'image-generation',
+  'video-generation'
 ] as const
 
 export type NewApiEndpointType = (typeof NEW_API_ENDPOINT_TYPES)[number]
@@ -96,6 +98,7 @@ export const resolveNewApiCapabilityProviderId = (
     case 'openai':
     case 'openai-response':
     case 'image-generation':
+    case 'video-generation':
     default:
       return 'openai'
   }
@@ -139,6 +142,13 @@ export const resolveNewApiEndpointTypeFromRoute = (
     return 'image-generation'
   }
 
+  if (
+    route?.type === ModelType.VideoGeneration &&
+    supportedEndpointTypes.includes('video-generation')
+  ) {
+    return 'video-generation'
+  }
+
   if (shouldUseAnthropicClaudeRouteFromSupportedEndpoints(route, modelId)) {
     return 'anthropic'
   }
@@ -151,6 +161,10 @@ export const resolveNewApiEndpointTypeFromRoute = (
     return 'image-generation'
   }
 
+  if (route?.type === ModelType.VideoGeneration) {
+    return 'video-generation'
+  }
+
   return 'openai'
 }
 
@@ -174,4 +188,5 @@ export const isChatSelectableModelType = (type: ModelType | undefined): boolean
   type === undefined ||
   type === ModelType.Chat ||
   type === ModelType.ImageGeneration ||
+  type === ModelType.VideoGeneration ||
   type === ModelType.TTS
diff --git a/src/shared/types/agent-interface.d.ts b/src/shared/types/agent-interface.d.ts
index c42adf79c..4bce7827a 100644
--- a/src/shared/types/agent-interface.d.ts
+++ b/src/shared/types/agent-interface.d.ts
@@ -1,5 +1,6 @@
 import type { ReasoningEffort, ReasoningVisibility, Verbosity } from './model-db'
 import type { ImageGenerationOptions } from '../imageGenerationSettings'
+import type { VideoGenerationOptions } from '../videoGenerationSettings'
 import type { ToolCallImagePreview } from './core/mcp'
 
 /**
@@ -31,6 +32,7 @@ export interface SessionGenerationSettings {
   verbosity?: Verbosity
   forceInterleavedThinkingCompat?: boolean
   imageGeneration?: ImageGenerationOptions
+  videoGeneration?: VideoGenerationOptions
 }
 
 export interface DeepChatSessionState {
diff --git a/src/shared/types/model-db.ts b/src/shared/types/model-db.ts
index 3e3b225f4..fad9adfaf 100644
--- a/src/shared/types/model-db.ts
+++ b/src/shared/types/model-db.ts
@@ -129,7 +129,9 @@ export const ModelSchema = z.object({
   release_date: z.string().optional(),
   last_updated: z.string().optional(),
   cost: z.record(z.union([z.string(), z.number()])).optional(),
-  type: z.enum(['chat', 'embedding', 'rerank', 'imageGeneration', 'tts']).optional()
+  type: z
+    .enum(['chat', 'embedding', 'rerank', 'imageGeneration', 'videoGeneration', 'tts'])
+    .optional()
 })
 
 export type ProviderModel = z.infer<typeof ModelSchema>
@@ -382,7 +384,13 @@ function getStringNumberRecord(obj: unknown): Record<string, string | number> |
   return Object.keys(out).length ? out : undefined
 }
 
-type ModelTypeValue = 'chat' | 'embedding' | 'rerank' | 'imageGeneration' | 'tts'
+type ModelTypeValue =
+  | 'chat'
+  | 'embedding'
+  | 'rerank'
+  | 'imageGeneration'
+  | 'videoGeneration'
+  | 'tts'
 
 function getEffortValue(v: unknown): ReasoningEffort | undefined {
   return isReasoningEffort(v) ? v : undefined
@@ -464,6 +472,7 @@ function getModelTypeValue(v: unknown): ModelTypeValue | undefined {
     case 'embedding':
     case 'rerank':
     case 'imageGeneration':
+    case 'videoGeneration':
     case 'tts':
       return v
   }
@@ -480,6 +489,10 @@ function getModelTypeValue(v: unknown): ModelTypeValue | undefined {
     case 'imagegeneration':
     case 'imagegen':
       return 'imageGeneration'
+    case 'videogeneration':
+    case 'videogen':
+    case 'video':
+      return 'videoGeneration'
     case 'tts':
       return 'tts'
     default:
diff --git a/src/shared/types/presenters/index.d.ts b/src/shared/types/presenters/index.d.ts
index 647f58d33..c890bb3ba 100644
--- a/src/shared/types/presenters/index.d.ts
+++ b/src/shared/types/presenters/index.d.ts
@@ -15,6 +15,7 @@ export type {
   RateLimitQueueSnapshot,
   RENDERER_MODEL_META,
   StandaloneImageGenerationResult,
+  StandaloneVideoGenerationResult,
   LLM_EMBEDDING_ATTRS,
   KeyStatus,
   AwsBedrockCredential,
diff --git a/src/shared/types/presenters/legacy.presenters.d.ts b/src/shared/types/presenters/legacy.presenters.d.ts
index 175ac10b3..fd8701d80 100644
--- a/src/shared/types/presenters/legacy.presenters.d.ts
+++ b/src/shared/types/presenters/legacy.presenters.d.ts
@@ -6,6 +6,7 @@ import { ShortcutKeySetting } from '@/presenter/configPresenter/shortcutKeySetti
 import type { NewApiEndpointType } from '@shared/model'
 import { ApiEndpointType, ModelType } from '@shared/model'
 import type { ImageGenerationOptions } from '../../imageGenerationSettings'
+import type { VideoGenerationOptions } from '../../videoGenerationSettings'
 import type { TtsSettings } from '../../ttsSettings'
 import type { ReasoningEffort, ReasoningVisibility, Verbosity } from '../model-db'
 import type { HookTestResult, HooksNotificationsSettings } from '../../hooksNotifications'
@@ -181,6 +182,7 @@ export interface ModelConfig {
   forcedSearch?: boolean
   searchStrategy?: 'turbo' | 'balanced' | 'precise'
   imageGeneration?: ImageGenerationOptions
+  videoGeneration?: VideoGenerationOptions
   tts?: TtsSettings
 }
 
@@ -894,6 +896,13 @@ export type StandaloneImageGenerationResult = {
   images: Array<{ data: string; mimeType: string }>
 }
 
+export type StandaloneVideoGenerationResult = {
+  providerId: string
+  modelId: string
+  options?: VideoGenerationOptions
+  videos: Array<{ data: string; mimeType: string }>
+}
+
 export type AcpDebugActionType =
   | 'initialize'
   | 'newSession'
@@ -1273,6 +1282,13 @@ export interface ILlmProviderPresenter {
     imageOptions?: ImageGenerationOptions,
     options?: { signal?: AbortSignal }
   ): Promise<StandaloneImageGenerationResult>
+  generateVideoStandalone(
+    providerId: string,
+    prompt: string,
+    modelId: string,
+    videoOptions?: VideoGenerationOptions,
+    options?: { signal?: AbortSignal }
+  ): Promise<StandaloneVideoGenerationResult>
   getAcpWorkdir(conversationId: string, agentId: string): Promise<AcpWorkdirInfo>
   setAcpWorkdir(conversationId: string, agentId: string, workdir: string | null): Promise<void>
   warmupAcpProcess(agentId: string, workdir?: string): Promise<void>
diff --git a/src/shared/types/presenters/llmprovider.presenter.d.ts b/src/shared/types/presenters/llmprovider.presenter.d.ts
index f224c3838..5dd204caf 100644
--- a/src/shared/types/presenters/llmprovider.presenter.d.ts
+++ b/src/shared/types/presenters/llmprovider.presenter.d.ts
@@ -3,6 +3,7 @@ import type { ChatMessage } from '../core/chat-message'
 import { ModelType } from '../core/model'
 import type { NewApiEndpointType } from '@shared/model'
 import type { ImageGenerationOptions } from '../../imageGenerationSettings'
+import type { VideoGenerationOptions } from '../../videoGenerationSettings'
 import type { AcpDebugRequest, AcpDebugRunResult, AcpWorkdirInfo } from './legacy.presenters'
 
 /**
@@ -114,6 +115,13 @@ export type StandaloneImageGenerationResult = {
   images: Array<{ data: string; mimeType: string }>
 }
 
+export type StandaloneVideoGenerationResult = {
+  providerId: string
+  modelId: string
+  options?: VideoGenerationOptions
+  videos: Array<{ data: string; mimeType: string }>
+}
+
 export interface KeyStatus {
   remainNum?: number
   /** Remaining quota */
@@ -324,6 +332,14 @@ export interface ILlmProviderPresenter {
     options?: { signal?: AbortSignal }
   ): Promise<StandaloneImageGenerationResult>
 
+  generateVideoStandalone(
+    providerId: string,
+    prompt: string,
+    modelId: string,
+    videoOptions?: VideoGenerationOptions,
+    options?: { signal?: AbortSignal }
+  ): Promise<StandaloneVideoGenerationResult>
+
   getAcpWorkdir(conversationId: string, agentId: string): Promise<AcpWorkdirInfo>
   setAcpWorkdir(conversationId: string, agentId: string, workdir: string | null): Promise<void>
   warmupAcpProcess(agentId: string, workdir?: string): Promise<void>
diff --git a/src/shared/videoGenerationSettings.ts b/src/shared/videoGenerationSettings.ts
new file mode 100644
index 000000000..196d476fe
--- /dev/null
+++ b/src/shared/videoGenerationSettings.ts
@@ -0,0 +1,339 @@
+import { ApiEndpointType, ModelType } from './model'
+
+export const VIDEO_GENERATION_ENDPOINT_TYPE = 'video-generation' as const
+
+export type OpenAICompatibleVideoRequestBodyShape = 'extra-body' | 'flat-top-level'
+
+export type VideoGenerationReferenceType = 'image' | 'video' | 'audio'
+
+export interface VideoGenerationReference {
+  type: VideoGenerationReferenceType
+  url?: string
+  data?: string
+  mimeType?: string
+}
+
+export interface VideoGenerationInputReference {
+  data: string
+  mimeType?: string
+}
+
+export interface VideoGenerationOptions {
+  seconds?: string
+  size?: string
+  ratio?: string
+  duration?: number
+  resolution?: string
+  watermark?: boolean
+  generateAudio?: boolean
+  inputReference?: string | VideoGenerationInputReference
+  references?: VideoGenerationReference[]
+}
+
+export interface VideoGenerationDetectionTarget {
+  modelId?: unknown
+  providerId?: unknown
+  providerApiType?: unknown
+  providerKind?: unknown
+  providerOptionsKey?: unknown
+  baseUrl?: unknown
+  apiEndpoint?: unknown
+  endpointType?: unknown
+  supportedEndpointTypes?: readonly unknown[]
+  type?: unknown
+  modalities?: {
+    input?: readonly unknown[]
+    output?: readonly unknown[]
+  } | null
+}
+
+const NON_OPENAI_VIDEO_PROVIDER_HINTS = [
+  'anthropic',
+  'gemini',
+  'vertex',
+  'aws-bedrock',
+  'github-copilot',
+  'ollama',
+  'acp',
+  'voiceai'
+] as const
+
+const FLAT_TOP_LEVEL_VIDEO_PROVIDER_HINTS = ['aihubmix'] as const
+
+const VIDEO_GENERATION_MODEL_ID_PREFIXES = [
+  'doubao-seedance-',
+  'sora-',
+  'veo-',
+  'wan2.',
+  'jimeng-',
+  'happyhorse-'
+] as const
+
+const VIDEO_GENERATION_MODEL_ID_MARKERS = [
+  'seedance',
+  '-t2v',
+  '-i2v',
+  '-r2v',
+  'videoedit',
+  'video-edit'
+] as const
+
+const normalizeText = (value: unknown): string =>
+  typeof value === 'string' ? value.trim().toLowerCase() : ''
+
+const normalizeOptionalString = (value: unknown): string | undefined => {
+  if (typeof value !== 'string') {
+    return undefined
+  }
+
+  const normalized = value.trim()
+  return normalized ? normalized : undefined
+}
+
+function normalizeModelId(value: unknown): string {
+  const normalized = normalizeText(value)
+  if (!normalized) {
+    return ''
+  }
+
+  const slashIndex = normalized.lastIndexOf('/')
+  return slashIndex >= 0 ? normalized.slice(slashIndex + 1) : normalized
+}
+
+function normalizeStringArray(values: readonly unknown[] | undefined): string[] {
+  if (!Array.isArray(values)) {
+    return []
+  }
+
+  return values.map((value) => normalizeText(value)).filter(Boolean)
+}
+
+function normalizeVideoReference(value: unknown): VideoGenerationReference | undefined {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return undefined
+  }
+
+  const record = value as Record<string, unknown>
+  const type = normalizeText(record.type)
+  if (type !== 'image' && type !== 'video' && type !== 'audio') {
+    return undefined
+  }
+
+  const url = normalizeOptionalString(record.url)
+  const data = normalizeOptionalString(record.data)
+  const mimeType = normalizeOptionalString(record.mimeType)
+
+  if (!url && !data) {
+    return undefined
+  }
+
+  return {
+    type,
+    ...(url ? { url } : {}),
+    ...(data ? { data } : {}),
+    ...(mimeType ? { mimeType } : {})
+  }
+}
+
+function normalizeInputReference(
+  value: VideoGenerationOptions['inputReference']
+): VideoGenerationOptions['inputReference'] | undefined {
+  if (typeof value === 'string') {
+    const normalized = value.trim()
+    return normalized ? normalized : undefined
+  }
+
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return undefined
+  }
+
+  const data = normalizeOptionalString(value.data)
+  if (!data) {
+    return undefined
+  }
+
+  const mimeType = normalizeOptionalString(value.mimeType)
+  return {
+    data,
+    ...(mimeType ? { mimeType } : {})
+  }
+}
+
+function hasVideoEndpointHint(target: VideoGenerationDetectionTarget): boolean {
+  const apiEndpoint = normalizeText(target.apiEndpoint)
+  const endpointType = normalizeText(target.endpointType)
+  const supportedEndpointTypes = normalizeStringArray(target.supportedEndpointTypes)
+  const modelType = normalizeText(target.type)
+
+  return (
+    apiEndpoint === ApiEndpointType.Video ||
+    endpointType === VIDEO_GENERATION_ENDPOINT_TYPE ||
+    supportedEndpointTypes.includes(VIDEO_GENERATION_ENDPOINT_TYPE) ||
+    modelType === ModelType.VideoGeneration.toLowerCase()
+  )
+}
+
+function hasVideoOutputModality(target: VideoGenerationDetectionTarget): boolean {
+  const outputModalities = normalizeStringArray(target.modalities?.output)
+  return outputModalities.includes('video')
+}
+
+export function isVideoGenerationModelId(modelId: string): boolean {
+  const normalized = normalizeModelId(modelId)
+  if (!normalized) {
+    return false
+  }
+
+  return (
+    VIDEO_GENERATION_MODEL_ID_PREFIXES.some((prefix) => normalized.startsWith(prefix)) ||
+    VIDEO_GENERATION_MODEL_ID_MARKERS.some((marker) => normalized.includes(marker))
+  )
+}
+
+export function resolveVideoGenerationCompatType(
+  target: VideoGenerationDetectionTarget
+): ModelType | undefined {
+  if (hasVideoEndpointHint(target) || hasVideoOutputModality(target)) {
+    return ModelType.VideoGeneration
+  }
+
+  const modelId = typeof target.modelId === 'string' ? target.modelId : ''
+  return isVideoGenerationModelId(modelId) ? ModelType.VideoGeneration : undefined
+}
+
+export function isVideoGenerationModelConfig(
+  modelConfig: {
+    type?: ModelType
+    apiEndpoint?: ApiEndpointType
+    endpointType?: unknown
+    supportedEndpointTypes?: readonly unknown[]
+  },
+  modelId?: string
+): boolean {
+  return (
+    resolveVideoGenerationCompatType({
+      modelId,
+      type: modelConfig.type,
+      apiEndpoint: modelConfig.apiEndpoint,
+      endpointType: modelConfig.endpointType,
+      supportedEndpointTypes: modelConfig.supportedEndpointTypes
+    }) === ModelType.VideoGeneration
+  )
+}
+
+export function supportsOpenAICompatibleVideoGeneration(
+  target: VideoGenerationDetectionTarget
+): boolean {
+  const providerId = normalizeText(target.providerId)
+  const providerApiType = normalizeText(target.providerApiType)
+  const providerKind = normalizeText(target.providerKind)
+  const providerOptionsKey = normalizeText(target.providerOptionsKey)
+
+  if (
+    NON_OPENAI_VIDEO_PROVIDER_HINTS.some(
+      (hint) =>
+        providerId.includes(hint) || providerApiType.includes(hint) || providerKind.includes(hint)
+    )
+  ) {
+    return false
+  }
+
+  const isOpenAICompatibleProvider =
+    providerKind === 'openai-compatible' ||
+    providerKind === 'openai-responses' ||
+    providerOptionsKey === 'openai' ||
+    providerOptionsKey === 'new-api' ||
+    providerId === 'openai' ||
+    providerId === 'openai-responses' ||
+    providerId === 'new-api' ||
+    providerApiType === 'openai' ||
+    providerApiType === 'openai-compatible' ||
+    providerApiType === 'openai-responses' ||
+    providerApiType === 'openai_chat' ||
+    providerApiType === 'new-api'
+
+  return (
+    isOpenAICompatibleProvider &&
+    resolveVideoGenerationCompatType(target) === ModelType.VideoGeneration
+  )
+}
+
+export function resolveOpenAICompatibleVideoRequestBodyShape(
+  target: VideoGenerationDetectionTarget
+): OpenAICompatibleVideoRequestBodyShape {
+  const providerId = normalizeText(target.providerId)
+  const providerApiType = normalizeText(target.providerApiType)
+  const providerKind = normalizeText(target.providerKind)
+  const providerOptionsKey = normalizeText(target.providerOptionsKey)
+  const baseUrl = normalizeText(target.baseUrl)
+  const modelId = normalizeModelId(target.modelId)
+
+  if (
+    FLAT_TOP_LEVEL_VIDEO_PROVIDER_HINTS.some(
+      (hint) =>
+        providerId.includes(hint) ||
+        providerApiType.includes(hint) ||
+        providerKind.includes(hint) ||
+        providerOptionsKey.includes(hint) ||
+        baseUrl.includes(hint)
+    )
+  ) {
+    return 'flat-top-level'
+  }
+
+  if (modelId.startsWith('doubao-seedance-')) {
+    return 'flat-top-level'
+  }
+
+  return 'extra-body'
+}
+
+export function normalizeVideoGenerationOptions(
+  value: VideoGenerationOptions | null | undefined
+): VideoGenerationOptions | undefined {
+  if (!value || typeof value !== 'object') {
+    return undefined
+  }
+
+  const normalized: VideoGenerationOptions = {}
+  const seconds = normalizeOptionalString(value.seconds)
+  const size = normalizeOptionalString(value.size)
+  const ratio = normalizeOptionalString(value.ratio)
+  const resolution = normalizeOptionalString(value.resolution)
+  const inputReference = normalizeInputReference(value.inputReference)
+  const references = Array.isArray(value.references)
+    ? value.references
+        .map((item) => normalizeVideoReference(item))
+        .filter((item): item is VideoGenerationReference => item !== undefined)
+    : []
+
+  if (seconds) {
+    normalized.seconds = seconds
+  }
+  if (size) {
+    normalized.size = size
+  }
+  if (ratio) {
+    normalized.ratio = ratio
+  }
+  if (resolution) {
+    normalized.resolution = resolution
+  }
+  if (typeof value.duration === 'number' && Number.isFinite(value.duration)) {
+    normalized.duration = Math.max(-1, Math.round(value.duration))
+  }
+  if (typeof value.watermark === 'boolean') {
+    normalized.watermark = value.watermark
+  }
+  if (typeof value.generateAudio === 'boolean') {
+    normalized.generateAudio = value.generateAudio
+  }
+  if (inputReference) {
+    normalized.inputReference = inputReference
+  }
+  if (references.length > 0) {
+    normalized.references = references
+  }
+
+  return Object.keys(normalized).length > 0 ? normalized : undefined
+}
diff --git a/test/main/presenter/llmProviderPresenter/aiSdkRuntime.test.ts b/test/main/presenter/llmProviderPresenter/aiSdkRuntime.test.ts
index 683749d26..b76887d89 100644
--- a/test/main/presenter/llmProviderPresenter/aiSdkRuntime.test.ts
+++ b/test/main/presenter/llmProviderPresenter/aiSdkRuntime.test.ts
@@ -558,6 +558,214 @@ describe('AI SDK runtime', () => {
     ])
   })
 
+  it('does not inject unsupported Seedance duration from prompt text', async () => {
+    const videoBytes = Uint8Array.from([0, 1, 2, 3])
+    const expectedBase64 = Buffer.from(videoBytes).toString('base64')
+    const tracePayloads: Array<{ body?: Record<string, unknown> }> = []
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(
+        new Response(
+          JSON.stringify({
+            id: 'task-video-1',
+            status: 'submitted'
+          }),
+          {
+            status: 200,
+            headers: {
+              'Content-Type': 'application/json'
+            }
+          }
+        )
+      )
+      .mockResolvedValueOnce(
+        new Response(
+          JSON.stringify({
+            id: 'task-video-1',
+            status: 'completed',
+            url: 'https://cdn.example.com/video.mp4'
+          }),
+          {
+            status: 200,
+            headers: {
+              'Content-Type': 'application/json'
+            }
+          }
+        )
+      )
+      .mockResolvedValueOnce(
+        new Response(videoBytes, {
+          status: 200,
+          headers: {
+            'Content-Type': 'video/mp4'
+          }
+        })
+      )
+    vi.stubGlobal('fetch', fetchMock)
+
+    const context = {
+      providerKind: 'openai-compatible',
+      provider: {
+        id: 'aihubmix',
+        apiType: 'openai-compatible',
+        baseUrl: 'https://aihubmix.com/v1',
+        apiKey: 'test-key'
+      },
+      configPresenter: {},
+      defaultHeaders: {
+        'APP-Code': 'SMUE7630'
+      },
+      shouldUseVideoGeneration: () => true,
+      emitRequestTrace: vi.fn(async (_modelConfig, payload) => {
+        tracePayloads.push(payload)
+      })
+    } as any
+
+    const events = []
+    for await (const event of runAiSdkCoreStream(
+      context,
+      [{ role: 'user', content: '生成 马斯克 喝酒的视频 2s' }],
+      'doubao-seedance-2-0-fast-260128',
+      {
+        apiEndpoint: 'video'
+      } as any,
+      0.7,
+      1024,
+      []
+    )) {
+      events.push(event)
+    }
+
+    expect(fetchMock.mock.calls[0]?.[0]).toBe('https://aihubmix.com/v1/videos')
+
+    const requestInit = fetchMock.mock.calls[0]?.[1] as RequestInit
+    const payload = JSON.parse(String(requestInit.body)) as Record<string, unknown>
+    expect(payload).toMatchObject({
+      model: 'doubao-seedance-2-0-fast-260128',
+      prompt: '生成 马斯克 喝酒的视频 2s'
+    })
+    expect(payload).not.toHaveProperty('duration')
+    expect(tracePayloads[0]?.body).not.toHaveProperty('duration')
+
+    expect(events).toEqual([
+      {
+        type: 'image_data',
+        image_data: {
+          data: `data:video/mp4;base64,${expectedBase64}`,
+          mimeType: 'video/mp4'
+        }
+      },
+      {
+        type: 'stop',
+        stop_reason: 'complete'
+      }
+    ])
+  })
+
+  it('derives supported Seedance duration from prompt text', async () => {
+    const videoBytes = Uint8Array.from([0, 1, 2, 3])
+    const expectedBase64 = Buffer.from(videoBytes).toString('base64')
+    const tracePayloads: Array<{ body?: Record<string, unknown> }> = []
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(
+        new Response(
+          JSON.stringify({
+            id: 'task-video-2',
+            status: 'submitted'
+          }),
+          {
+            status: 200,
+            headers: {
+              'Content-Type': 'application/json'
+            }
+          }
+        )
+      )
+      .mockResolvedValueOnce(
+        new Response(
+          JSON.stringify({
+            id: 'task-video-2',
+            status: 'completed',
+            url: 'https://cdn.example.com/video-supported.mp4'
+          }),
+          {
+            status: 200,
+            headers: {
+              'Content-Type': 'application/json'
+            }
+          }
+        )
+      )
+      .mockResolvedValueOnce(
+        new Response(videoBytes, {
+          status: 200,
+          headers: {
+            'Content-Type': 'video/mp4'
+          }
+        })
+      )
+    vi.stubGlobal('fetch', fetchMock)
+
+    const context = {
+      providerKind: 'openai-compatible',
+      provider: {
+        id: 'aihubmix',
+        apiType: 'openai-compatible',
+        baseUrl: 'https://aihubmix.com/v1',
+        apiKey: 'test-key'
+      },
+      configPresenter: {},
+      defaultHeaders: {
+        'APP-Code': 'SMUE7630'
+      },
+      shouldUseVideoGeneration: () => true,
+      emitRequestTrace: vi.fn(async (_modelConfig, payload) => {
+        tracePayloads.push(payload)
+      })
+    } as any
+
+    const events = []
+    for await (const event of runAiSdkCoreStream(
+      context,
+      [{ role: 'user', content: '生成 马斯克 喝酒的视频 5s' }],
+      'doubao-seedance-2-0-fast-260128',
+      {
+        apiEndpoint: 'video'
+      } as any,
+      0.7,
+      1024,
+      []
+    )) {
+      events.push(event)
+    }
+
+    const requestInit = fetchMock.mock.calls[0]?.[1] as RequestInit
+    const payload = JSON.parse(String(requestInit.body)) as Record<string, unknown>
+    expect(payload).toMatchObject({
+      model: 'doubao-seedance-2-0-fast-260128',
+      prompt: '生成 马斯克 喝酒的视频 5s',
+      duration: 5
+    })
+    expect(tracePayloads[0]?.body).toMatchObject({
+      duration: 5
+    })
+
+    expect(events).toEqual([
+      {
+        type: 'image_data',
+        image_data: {
+          data: `data:video/mp4;base64,${expectedBase64}`,
+          mimeType: 'video/mp4'
+        }
+      },
+      {
+        type: 'stop',
+        stop_reason: 'complete'
+      }
+    ])
+  })
+
   it('omits temperature for anthropic models that disable temperature control', async () => {
     const tracePayloads: Array<{ body?: Record<string, unknown> }> = []
     const context = {
diff --git a/test/main/presenter/llmProviderPresenter/aihubmixProvider.test.ts b/test/main/presenter/llmProviderPresenter/aihubmixProvider.test.ts
index 71e29fb63..356c0382f 100644
--- a/test/main/presenter/llmProviderPresenter/aihubmixProvider.test.ts
+++ b/test/main/presenter/llmProviderPresenter/aihubmixProvider.test.ts
@@ -118,4 +118,37 @@ describe('AihubmixProvider AI SDK runtime headers', () => {
       'X-Title': 'DeepChat'
     })
   })
+
+  it('treats Seedance models as video generation even when metadata is still chat', async () => {
+    const provider = new AiSdkProvider(createProvider(), createConfigPresenter())
+    ;(provider as any).isInitialized = true
+
+    const modelConfig = {
+      maxTokens: 1024,
+      contextLength: 8192,
+      vision: false,
+      functionCall: false,
+      reasoning: false,
+      type: 'chat'
+    } as ModelConfig
+
+    for await (const _event of provider.coreStream(
+      [{ role: 'user', content: '生成 马斯克 喝酒的视频 2s' }],
+      'doubao-seedance-2-0-fast-260128',
+      modelConfig,
+      0.7,
+      256,
+      []
+    )) {
+      break
+    }
+
+    const context = mockRunAiSdkCoreStream.mock.calls.at(-1)?.[0]
+
+    expect(context.providerKind).toBe('openai-compatible')
+    expect(context.shouldUseVideoGeneration('doubao-seedance-2-0-fast-260128', modelConfig)).toBe(
+      true
+    )
+    expect(context.shouldUseVideoGeneration('gpt-4o', { type: 'chat' } as ModelConfig)).toBe(false)
+  })
 })

From 4ac4b1fb1e2d28649d47f6d69e5001b0d8a9774a Mon Sep 17 00:00:00 2001
From: xiaomo <wegi866@gmail.com>
Date: Mon, 18 May 2026 17:56:47 +0800
Subject: [PATCH 2/3] Copilot/fix review feedback pr 1637 (#1639)

* fix(agent): bypass chat budget for image routes (#1636)

* Initial plan

* merge(gen-video): resolve agent runtime conflict

Agent-Logs-Url: https://github.com/ThinkInAIXYZ/deepchat/sessions/7c9b17d6-272d-44a2-ab2d-57a554c773ab

Co-authored-by: zhangmo8 <43628500+zhangmo8@users.noreply.github.com>

---------

Co-authored-by: duskzhen <zerob13@gmail.com>
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
---
 .../plan.md                                   |  24 ++++
 .../spec.md                                   |  41 +++++++
 .../tasks.md                                  |   7 ++
 .../presenter/agentRuntimePresenter/index.ts  |   4 +-
 .../agentRuntimePresenter.test.ts             | 103 ++++++++++++++++++
 5 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 docs/issues/image-generation-context-budget-bypass/plan.md
 create mode 100644 docs/issues/image-generation-context-budget-bypass/spec.md
 create mode 100644 docs/issues/image-generation-context-budget-bypass/tasks.md

diff --git a/docs/issues/image-generation-context-budget-bypass/plan.md b/docs/issues/image-generation-context-budget-bypass/plan.md
new file mode 100644
index 000000000..8557e630b
--- /dev/null
+++ b/docs/issues/image-generation-context-budget-bypass/plan.md
@@ -0,0 +1,24 @@
+# Image Generation Context Budget Bypass Plan
+
+## Approach
+
+- Add a model-aware Agent runtime helper that returns true only when DeepChat should use its chat
+  context budget.
+- Keep ACP bypass behavior, and also bypass when the model config explicitly identifies
+  `ImageGeneration`, `TTS`, a non-chat API endpoint, or `endpointType === 'image-generation'`.
+- Treat missing legacy model metadata as chat-compatible.
+
+## Runtime Changes
+
+- Use the helper in new user turns and resume/retry context construction before deciding whether to
+  compact, trim, or use a finite chat context length.
+- Use the helper inside the provider-call wrapper before running preflight/recovery or shrinking the
+  per-call `maxTokens`.
+- Leave `contextBudget.ts`, public contracts, IPC, and renderer code unchanged.
+
+## Test Strategy
+
+- Add an Agent runtime regression for an image endpoint request that would fail chat-budget
+  preflight, asserting the provider is still called and max tokens are preserved.
+- Keep chat-model pressure tests verifying the existing budget preflight path still runs.
+- Run the targeted Agent runtime/context budget tests plus repository format, i18n, and lint checks.
diff --git a/docs/issues/image-generation-context-budget-bypass/spec.md b/docs/issues/image-generation-context-budget-bypass/spec.md
new file mode 100644
index 000000000..cc61002f3
--- /dev/null
+++ b/docs/issues/image-generation-context-budget-bypass/spec.md
@@ -0,0 +1,41 @@
+# Image Generation Context Budget Bypass Spec
+
+> Status: Draft
+> Date: 2026-05-18
+
+## Background
+
+DeepChat Agent applies a chat-oriented provider-call context preflight before sending model
+requests. The check estimates message tokens, tool schemas, and output tokens, then blocks requests
+that cannot fit inside the configured model context window.
+
+That check is valid for chat models, but image generation and other non-chat routes do not use the
+same request shape. Image requests can therefore fail before reaching the provider with:
+
+`Request was not sent because it cannot fit within the model context window after applying the safety margin.`
+
+## Goals
+
+- Only apply DeepChat's chat context budget to chat model requests.
+- Skip the chat budget preflight, compaction recovery, and temporary max-token shrink for explicit
+  image generation and other non-chat model routes.
+- Preserve current behavior for chat models and ACP provider bypasses.
+
+## Acceptance Criteria
+
+- Image generation models or image endpoints reach the provider even when chat-budget estimation
+  would fail.
+- Non-chat requests do not trigger the DeepChat context-pressure compaction path solely because of
+  chat message/tool-schema estimates.
+- Non-chat request max tokens are not reduced by the chat preflight safety margin.
+- Chat models keep the existing preflight, recovery, and overflow failure behavior.
+- Existing legacy model configs without explicit type or endpoint metadata continue to be treated as
+  chat requests.
+- No public API, IPC, schema, or renderer UI changes are introduced.
+
+## Non-Goals
+
+- Redesign image generation request construction.
+- Change the agent image generation tool behavior for chat models.
+- Change `contextBudget.ts` budgeting math.
+- Add renderer UI for non-chat routing diagnostics.
diff --git a/docs/issues/image-generation-context-budget-bypass/tasks.md b/docs/issues/image-generation-context-budget-bypass/tasks.md
new file mode 100644
index 000000000..727a38d9b
--- /dev/null
+++ b/docs/issues/image-generation-context-budget-bypass/tasks.md
@@ -0,0 +1,7 @@
+# Image Generation Context Budget Bypass Tasks
+
+- [x] Document the issue and intended behavior.
+- [x] Add model-aware chat budget gating in the Agent runtime.
+- [x] Add regression coverage for image endpoint bypass.
+- [x] Verify chat context budget behavior remains unchanged.
+- [x] Run targeted tests and repository quality checks.
diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts
index d5cd9a559..13b6e2d32 100644
--- a/src/main/presenter/agentRuntimePresenter/index.ts
+++ b/src/main/presenter/agentRuntimePresenter/index.ts
@@ -57,7 +57,7 @@ import {
   normalizeImageGenerationOptions,
   supportsOpenAIImageGenerationSettings
 } from '@shared/imageGenerationSettings'
-import { ModelType, isDeepSeekSeriesModelId } from '@shared/model'
+import { ApiEndpointType, ModelType, isDeepSeekSeriesModelId } from '@shared/model'
 import { isTtsModelConfig, isTtsModelId } from '@shared/ttsSettings'
 import {
   isVideoGenerationModelConfig,
@@ -1456,6 +1456,8 @@ export class AgentRuntimePresenter implements IAgentImplementation {
     return (
       modelConfig.type === ModelType.ImageGeneration ||
       modelConfig.type === ModelType.TTS ||
+      (modelConfig.apiEndpoint != null && modelConfig.apiEndpoint !== ApiEndpointType.Chat) ||
+      modelConfig.endpointType === 'image-generation' ||
       isVideoGenerationModelConfig(modelConfig, normalizedModelId)
     )
   }
diff --git a/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts b/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts
index 8aebcc8c3..466e3425b 100644
--- a/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts
+++ b/test/main/presenter/agentRuntimePresenter/agentRuntimePresenter.test.ts
@@ -4,6 +4,7 @@ import os from 'os'
 import path from 'path'
 import { app } from 'electron'
 import type { DeepChatSessionState } from '@shared/types/agent-interface'
+import { ApiEndpointType, ModelType } from '@shared/model'
 import { AgentRuntimePresenter } from '@/presenter/agentRuntimePresenter/index'
 import { NewSessionHooksBridge } from '@/presenter/hooksNotifications/newSessionBridge'
 import { estimateMessagesTokens } from '@/presenter/agentRuntimePresenter/contextBuilder'
@@ -3471,6 +3472,108 @@ describe('AgentRuntimePresenter', () => {
       )
     })
 
+    it('bypasses chat context preflight for image generation endpoints', async () => {
+      const imageModelConfig = {
+        temperature: 0.7,
+        maxTokens: 4096,
+        contextLength: 8192,
+        thinkingBudget: 512,
+        reasoningEffort: 'medium',
+        verbosity: 'medium',
+        vision: false,
+        functionCall: false,
+        reasoning: false,
+        type: ModelType.ImageGeneration,
+        apiEndpoint: ApiEndpointType.Image,
+        endpointType: 'image-generation' as const
+      }
+      configPresenter.getModelConfig.mockImplementation((modelId: string) =>
+        modelId === 'gpt-image-2'
+          ? imageModelConfig
+          : {
+              temperature: 0.7,
+              maxTokens: 4096,
+              contextLength: 128000,
+              thinkingBudget: 512,
+              reasoningEffort: 'medium',
+              verbosity: 'medium',
+              vision: false
+            }
+      )
+      const prepareSpy = vi.spyOn(
+        (agent as unknown as { compactionService: { prepareForNextUserTurn: () => unknown } })
+          .compactionService,
+        'prepareForNextUserTurn'
+      )
+
+      await agent.initSession('s1', {
+        providerId: 'openai',
+        modelId: 'gpt-image-2',
+        generationSettings: {
+          contextLength: 8192,
+          maxTokens: 4096
+        }
+      })
+      await agent.processMessage('s1', 'draw a mountain')
+
+      const callArgs = (processStream as ReturnType<typeof vi.fn>).mock.calls[0][0]
+      expect(callArgs.maxTokens).toBe(4096)
+      expect(prepareSpy).not.toHaveBeenCalled()
+
+      const providerCoreStream = llmProvider.getProviderInstance.mock.results[0].value.coreStream
+      providerCoreStream.mockClear()
+      llmProvider.generateText.mockClear()
+      const oversizedTools = [
+        {
+          type: 'function',
+          function: {
+            name: 'large_schema',
+            description: makeTextWithEstimatedTokens(10000),
+            parameters: {
+              type: 'object',
+              properties: {
+                prompt: {
+                  type: 'string',
+                  description: makeTextWithEstimatedTokens(10000)
+                }
+              },
+              required: ['prompt']
+            }
+          },
+          server: {
+            name: 'test',
+            icons: '',
+            description: 'large schema'
+          }
+        }
+      ]
+      const requestMessages = [
+        { role: 'user' as const, content: makeTextWithEstimatedTokens(9000) }
+      ]
+
+      for await (const _event of callArgs.coreStream(
+        requestMessages,
+        callArgs.modelId,
+        callArgs.modelConfig,
+        callArgs.temperature,
+        4096,
+        oversizedTools
+      )) {
+      }
+
+      expect(providerCoreStream).toHaveBeenCalledTimes(1)
+      expect(providerCoreStream.mock.calls[0][0]).toEqual(requestMessages)
+      expect(providerCoreStream.mock.calls[0][4]).toBe(4096)
+      expect(providerCoreStream.mock.calls[0][5]).toEqual(oversizedTools)
+      expect(llmProvider.generateText).not.toHaveBeenCalled()
+      expect(
+        JSON.stringify((eventBus.sendToRenderer as ReturnType<typeof vi.fn>).mock.calls)
+      ).not.toContain('Request was not sent')
+      expect(
+        JSON.stringify(sqlitePresenter.deepchatMessagesTable.updateContentAndStatus.mock.calls)
+      ).not.toContain('Request was not sent')
+    })
+
     it('preflights provider calls with a safety margin and compacts before low-output pressure calls', async () => {
       await agent.initSession('s1', {
         providerId: 'openai',

From 4e163c158a6181152058993a54e89c1698e22db7 Mon Sep 17 00:00:00 2001
From: zerob13 <zerob13@gmail.com>
Date: Mon, 18 May 2026 18:58:55 +0800
Subject: [PATCH 3/3] fix: review issue

---
 .../message/MessageItemAssistant.vue          | 31 +++++++-
 .../message/MessageItemAssistant.test.ts      | 71 ++++++++++++++-----
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/src/renderer/src/components/message/MessageItemAssistant.vue b/src/renderer/src/components/message/MessageItemAssistant.vue
index bf4cbd680..9d94c8e8f 100644
--- a/src/renderer/src/components/message/MessageItemAssistant.vue
+++ b/src/renderer/src/components/message/MessageItemAssistant.vue
@@ -243,16 +243,41 @@ const isAudioBlock = (block: DisplayAssistantMessageBlock): boolean => {
   return false
 }
 
+const isVideoUrl = (value: string): boolean => {
+  if (!value) return false
+
+  try {
+    const normalizedUrl = value.startsWith('imgcache://')
+      ? new URL(value.replace('imgcache://', 'https://imgcache.local/'))
+      : new URL(value)
+    const pathname = normalizedUrl.pathname.toLowerCase()
+    return VIDEO_EXTENSIONS.some((ext) => pathname.endsWith(ext))
+  } catch {
+    const lower = value.toLowerCase()
+    return VIDEO_EXTENSIONS.some(
+      (ext) => lower.endsWith(ext) || lower.includes(`${ext}?`) || lower.includes(`${ext}#`)
+    )
+  }
+}
+
+const getLegacyBlockData = (block: DisplayAssistantMessageBlock): string => {
+  const content = block.content
+  if (content && typeof content === 'object' && 'data' in content) {
+    return String((content as { data?: unknown }).data ?? '')
+  }
+
+  return typeof content === 'string' ? content : ''
+}
+
 const isVideoBlock = (block: DisplayAssistantMessageBlock): boolean => {
   if (block.type === 'video') return true
   if (block.type !== 'image') return false
   const mimeType = block.image_data?.mimeType?.toLowerCase() || ''
   if (mimeType.startsWith('video/')) return true
-  const data = block.image_data?.data || ''
+  const data = block.image_data?.data || getLegacyBlockData(block)
   if (data.startsWith('data:video/')) return true
   if (data.startsWith('imgcache://') || data.startsWith('http://') || data.startsWith('https://')) {
-    const lower = data.toLowerCase()
-    return VIDEO_EXTENSIONS.some((ext) => lower.includes(ext))
+    return isVideoUrl(data)
   }
   return false
 }
diff --git a/test/renderer/components/message/MessageItemAssistant.test.ts b/test/renderer/components/message/MessageItemAssistant.test.ts
index 02f9147d6..205230d23 100644
--- a/test/renderer/components/message/MessageItemAssistant.test.ts
+++ b/test/renderer/components/message/MessageItemAssistant.test.ts
@@ -2,7 +2,10 @@ import { mount } from '@vue/test-utils'
 import { defineComponent } from 'vue'
 import { describe, expect, it, vi } from 'vitest'
 import MessageItemAssistant from '@/components/message/MessageItemAssistant.vue'
-import type { DisplayAssistantMessage } from '@/components/chat/messageListItems'
+import type {
+  DisplayAssistantMessage,
+  DisplayAssistantMessageBlock
+} from '@/components/chat/messageListItems'
 
 vi.mock('vue-i18n', () => ({
   useI18n: () => ({
@@ -121,6 +124,19 @@ const createMessage = (
   content
 })
 
+const createVideoLikeImageBlock = (
+  overrides: Partial<DisplayAssistantMessageBlock> = {}
+): DisplayAssistantMessageBlock => ({
+  type: 'image',
+  status: 'success',
+  timestamp: 1,
+  image_data: {
+    data: 'https://example.com/sample.png',
+    mimeType: 'image/png'
+  },
+  ...overrides
+})
+
 describe('MessageItemAssistant', () => {
   const global = {
     stubs: {
@@ -134,6 +150,16 @@ describe('MessageItemAssistant', () => {
       MessageToolbar: componentStub('MessageToolbar'),
       MessageBlockAction: componentStub('MessageBlockAction'),
       MessageBlockImage: componentStub('MessageBlockImage'),
+      MessageBlockVideo: defineComponent({
+        name: 'MessageBlockVideo',
+        props: {
+          block: {
+            type: Object,
+            required: false
+          }
+        },
+        template: '<div data-testid="video-block" />'
+      }),
       MessageBlockAudio: componentStub('MessageBlockAudio'),
       MessageBlockPlan: componentStub('MessageBlockPlan')
     }
@@ -163,28 +189,39 @@ describe('MessageItemAssistant', () => {
     expect(wrapper.find('[data-testid="spinner"]').exists()).toBe(true)
   })
 
-  it('renders a spinner for the currently displayed pending variant', async () => {
-    const variant = {
-      ...createMessage('pending', []),
-      id: 'm1-variant',
-      is_variant: 1
-    }
-
+  it('renders video blocks from legacy content urls', () => {
     const wrapper = mount(MessageItemAssistant, {
       props: {
-        message: {
-          ...createMessage('sent', []),
-          variants: [variant]
-        },
-        isCapturingImage: false,
-        useLegacyActions: true
+        message: createMessage('sent', [
+          createVideoLikeImageBlock({
+            content: 'https://example.com/media/generated-video.mp4?download=1',
+            image_data: undefined
+          })
+        ]),
+        isCapturingImage: false
       },
       global
     })
 
-    wrapper.vm.handleAction('next')
-    await wrapper.vm.$nextTick()
+    expect(wrapper.find('[data-testid="video-block"]').exists()).toBe(true)
+  })
 
-    expect(wrapper.find('[data-testid="spinner"]').exists()).toBe(true)
+  it('does not classify non-video urls as video blocks when extensions only appear in query text', () => {
+    const wrapper = mount(MessageItemAssistant, {
+      props: {
+        message: createMessage('sent', [
+          createVideoLikeImageBlock({
+            image_data: {
+              data: 'https://example.com/assets/preview.png?redirect=.mp4',
+              mimeType: 'image/png'
+            }
+          })
+        ]),
+        isCapturingImage: false
+      },
+      global
+    })
+
+    expect(wrapper.find('[data-testid="video-block"]').exists()).toBe(false)
   })
 })