From 8b56897926929f76929df370c8746285d13491d4 Mon Sep 17 00:00:00 2001 From: Simon He <674949287@qq.com> Date: Fri, 15 May 2026 13:57:48 +0800 Subject: [PATCH 01/11] chore(deps): bump markstream-vue to 1.0.0-rc.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ff87a3f68..7438b719e 100644 --- a/package.json +++ b/package.json @@ -179,7 +179,7 @@ "katex": "^0.16.27", "lint-staged": "^16.4.0", "lucide-vue-next": "^0.544.0", - "markstream-vue": "0.0.14-beta.8", + "markstream-vue": "1.0.0-rc.0", "mermaid": "^11.13.0", "minimatch": "^10.2.4", "monaco-editor": "^0.55.1", From ea12958c31777c65d67f80ee608b2b06e66f4df4 Mon Sep 17 00:00:00 2001 From: xiaomo Date: Fri, 15 May 2026 14:37:04 +0800 Subject: [PATCH 02/11] =?UTF-8?q?feat(plugin):=20enhance=20Feishu=20plugin?= =?UTF-8?q?=20packaging=20and=20build=20process=20for=20a=E2=80=A6=20(#162?= =?UTF-8?q?8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(plugin): enhance Feishu plugin packaging and build process for all platforms * chore: update --- .github/workflows/build.yml | 25 +++-- .github/workflows/release.yml | 25 +++-- docs/guides/plugin-packaging.md | 139 +++++++++++++++++++--------- electron-builder.yml | 7 +- package.json | 34 +++---- plugins/feishu/plugin.json | 2 +- scripts/plugin.mjs | 156 ++++++++++++++++++++++++++++++++ 7 files changed, 308 insertions(+), 80 deletions(-) create mode 100644 scripts/plugin.mjs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 47af9bc9f..980f2713a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -70,6 +70,7 @@ jobs: - name: Build Windows run: | pnpm run build + pnpm run plugin:bundle -- --name feishu --platform win32 --arch ${{ matrix.arch }} pnpm exec electron-builder --win --${{ matrix.arch }} --publish=never env: VITE_GITHUB_CLIENT_ID: ${{ secrets.DC_GITHUB_CLIENT_ID }} @@ -77,6 +78,11 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify bundled plugins + shell: bash + run: | + pnpm run plugin:verify -- --name feishu --platform win32 --arch ${{ matrix.arch }} --plugin-root dist/win-unpacked/resources/app.asar.unpacked/plugins + - name: Upload artifacts uses: actions/upload-artifact@v6 with: @@ -125,6 +131,7 @@ jobs: - name: Build Linux run: | pnpm run build + pnpm run plugin:bundle -- --name feishu --platform linux --arch ${{ matrix.arch }} pnpm exec electron-builder --linux --${{ matrix.arch }} --publish=never env: VITE_GITHUB_CLIENT_ID: ${{ secrets.DC_GITHUB_CLIENT_ID }} @@ -132,6 +139,11 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify bundled plugins + shell: bash + run: | + pnpm run plugin:verify -- --name feishu --platform linux --arch ${{ matrix.arch }} --plugin-root dist/linux-unpacked/resources/app.asar.unpacked/plugins + - name: Upload artifacts uses: actions/upload-artifact@v6 with: @@ -188,7 +200,9 @@ jobs: - name: Build Mac run: | pnpm run build - pnpm run plugin:cua:bundle:mac:${{ matrix.arch }} + pnpm run plugin:cua:build:mac:${{ matrix.arch }} + pnpm run plugin:bundle -- --name cua --platform darwin --arch ${{ matrix.arch }} + pnpm run plugin:bundle -- --name feishu --platform darwin --arch ${{ matrix.arch }} pnpm exec electron-builder --mac --${{ matrix.arch }} --publish=never env: CSC_LINK: ${{ secrets.DEEPCHAT_CSC_LINK }} @@ -203,19 +217,18 @@ jobs: NODE_OPTIONS: '--max-old-space-size=4096' VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} - - name: Verify bundled CUA plugin + - name: Verify bundled plugins shell: bash env: TARGET_ARCH: ${{ matrix.arch }} run: | - VERSION="$(node -p "require('./package.json').version")" APP_DIR="dist/mac/DeepChat.app" if [ "$TARGET_ARCH" = "arm64" ]; then APP_DIR="dist/mac-arm64/DeepChat.app" fi - PLUGIN="${APP_DIR}/Contents/Resources/app.asar.unpacked/plugins/deepchat-plugin-cua-${VERSION}-darwin-${TARGET_ARCH}.dcplugin" - test -f "$PLUGIN" - ls -lh "$PLUGIN" + PLUGIN_ROOT="${APP_DIR}/Contents/Resources/app.asar.unpacked/plugins" + pnpm run plugin:verify -- --name cua --platform darwin --arch "$TARGET_ARCH" --plugin-root "$PLUGIN_ROOT" + pnpm run plugin:verify -- --name feishu --platform darwin --arch "$TARGET_ARCH" --plugin-root "$PLUGIN_ROOT" - name: Upload artifacts uses: actions/upload-artifact@v6 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 153c9266a..6d0495900 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -165,6 +165,7 @@ jobs: - name: Build Windows run: | pnpm run build + pnpm run plugin:bundle -- --name feishu --platform win32 --arch ${{ matrix.arch }} pnpm exec electron-builder --win --${{ matrix.arch }} --publish=never env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -173,6 +174,11 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify bundled plugins + shell: bash + run: | + pnpm run plugin:verify -- --name feishu --platform win32 --arch ${{ matrix.arch }} --plugin-root dist/win-unpacked/resources/app.asar.unpacked/plugins + - name: Upload artifacts uses: actions/upload-artifact@v6 with: @@ -221,6 +227,7 @@ jobs: - name: Build Linux run: | pnpm run build + pnpm run plugin:bundle -- --name feishu --platform linux --arch ${{ matrix.arch }} pnpm exec electron-builder --linux --${{ matrix.arch }} --publish=never env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -229,6 +236,11 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify bundled plugins + shell: bash + run: | + pnpm run plugin:verify -- --name feishu --platform linux --arch ${{ matrix.arch }} --plugin-root dist/linux-unpacked/resources/app.asar.unpacked/plugins + - name: Upload artifacts uses: actions/upload-artifact@v6 with: @@ -288,7 +300,9 @@ jobs: - name: Build Mac run: | pnpm run build - pnpm run plugin:cua:bundle:mac:${{ matrix.arch }} + pnpm run plugin:cua:build:mac:${{ matrix.arch }} + pnpm run plugin:bundle -- --name cua --platform darwin --arch ${{ matrix.arch }} + pnpm run plugin:bundle -- --name feishu --platform darwin --arch ${{ matrix.arch }} pnpm exec electron-builder --mac --${{ matrix.arch }} --publish=never env: CSC_LINK: ${{ secrets.DEEPCHAT_CSC_LINK }} @@ -304,19 +318,18 @@ jobs: NODE_OPTIONS: '--max-old-space-size=4096' VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} - - name: Verify bundled CUA plugin + - name: Verify bundled plugins shell: bash env: TARGET_ARCH: ${{ matrix.arch }} run: | - VERSION="$(node -p "require('./package.json').version")" APP_DIR="dist/mac/DeepChat.app" if [ "$TARGET_ARCH" = "arm64" ]; then APP_DIR="dist/mac-arm64/DeepChat.app" fi - PLUGIN="${APP_DIR}/Contents/Resources/app.asar.unpacked/plugins/deepchat-plugin-cua-${VERSION}-darwin-${TARGET_ARCH}.dcplugin" - test -f "$PLUGIN" - ls -lh "$PLUGIN" + PLUGIN_ROOT="${APP_DIR}/Contents/Resources/app.asar.unpacked/plugins" + pnpm run plugin:verify -- --name cua --platform darwin --arch "$TARGET_ARCH" --plugin-root "$PLUGIN_ROOT" + pnpm run plugin:verify -- --name feishu --platform darwin --arch "$TARGET_ARCH" --plugin-root "$PLUGIN_ROOT" - name: Upload artifacts uses: actions/upload-artifact@v6 diff --git a/docs/guides/plugin-packaging.md b/docs/guides/plugin-packaging.md index ea3864127..8c5a40502 100644 --- a/docs/guides/plugin-packaging.md +++ b/docs/guides/plugin-packaging.md @@ -23,97 +23,150 @@ Official packages keep DeepChat release asset URLs in their manifest metadata: https://github.com/ThinkInAIXYZ/deepchat/releases/download/v/.dcplugin ``` -## CUA Plugin Artifacts +Output naming pattern: `deepchat-plugin--[--].dcplugin` -The CUA plugin ships one macOS helper app per CPU architecture. The bundled package filename -includes both platform and architecture: +## Generic Commands -```text -deepchat-plugin-cua--darwin-arm64.dcplugin -deepchat-plugin-cua--darwin-x64.dcplugin -``` +All plugins share a common set of commands powered by `scripts/plugin.mjs`, which delegates to +`scripts/package-plugin.mjs` for the actual packaging logic. -The manifest inside each package keeps the official DeepChat release-download namespace for trust -metadata. Runtime detection inside the package uses the same architecture-specific plugin path: +### Validate -```text -plugin:runtime/darwin//DeepChat Computer Use.app/Contents/MacOS/cua-driver +Dry-run: validates the manifest and file references without producing a `.dcplugin`. + +```bash +pnpm run plugin:validate -- --name --platform --arch ``` -Architecture mapping: +### Package -| DeepChat arch | Swift arch | Runtime directory | -| --- | --- | --- | -| `arm64` | `arm64` | `runtime/darwin/arm64/` | -| `x64` | `x86_64` | `runtime/darwin/x64/` | +Build (if the plugin has a native build step) and package into a `.dcplugin` under `dist/plugins/`. -Each `.dcplugin` contains only the runtime directory for its target architecture. +```bash +pnpm run plugin:package -- --name --platform --arch +``` -## Local Commands +### Bundle -Validate the package metadata for the current host architecture: +Package into `build/bundled-plugins/` for embedding into the Electron app. ```bash -pnpm run plugin:cua:validate +pnpm run plugin:bundle -- --name --platform --arch ``` -Build and package the current host architecture: +### Verify + +Verify expected bundled official plugin artifacts from plugin metadata. ```bash -pnpm run plugin:cua:package +pnpm run plugin:verify -- --name --platform --arch --plugin-root ``` -Build and package explicit macOS architectures: +When `--name` is omitted, the script verifies all official plugins supported by the target platform. + +### Clean + +Remove all bundled plugin artifacts: ```bash -pnpm run plugin:cua:package:mac:arm64 -pnpm run plugin:cua:package:mac:x64 +pnpm run plugin:bundle:clean ``` -Build the package that will be embedded into the macOS app: +## Plugins with Native Build Steps + +Some plugins (like CUA) include pre-compiled native binaries. These require an additional build +step before packaging. The dispatcher script automatically detects and runs +`scripts/build--plugin-runtime.mjs` when it exists. + +CUA native build commands (macOS-only, requires Swift toolchain): ```bash -pnpm run plugin:cua:bundle:mac:arm64 -pnpm run plugin:cua:bundle:mac:x64 +pnpm run plugin:cua:build # host architecture +pnpm run plugin:cua:build:mac:arm64 # explicit ARM64 +pnpm run plugin:cua:build:mac:x64 # explicit x64 ``` -Validate explicit macOS architectures after their helper runtimes have been staged: +## CUA Plugin Artifacts -```bash -pnpm run plugin:cua:validate:mac:arm64 -pnpm run plugin:cua:validate:mac:x64 +The CUA plugin ships one macOS helper app per CPU architecture. The bundled package filename +includes both platform and architecture: + +```text +deepchat-plugin-cua--darwin-arm64.dcplugin +deepchat-plugin-cua--darwin-x64.dcplugin +``` + +Runtime detection inside the package uses architecture-specific paths: + +```text +plugin:runtime/darwin//DeepChat Computer Use.app/Contents/MacOS/cua-driver +``` + +Each `.dcplugin` contains only the runtime directory for its target architecture. + +## Feishu Plugin Artifacts + +The feishu plugin targets all platforms (darwin, linux, win32). Its MCP server uses +`node serve.mjs` which calls `npx` at runtime to download the `@larksuiteoapi/lark-mcp` +package on first use. + +```text +deepchat-plugin-feishu--darwin-arm64.dcplugin +deepchat-plugin-feishu--darwin-x64.dcplugin +deepchat-plugin-feishu--linux-x64.dcplugin +deepchat-plugin-feishu--win32-x64.dcplugin ``` -Standalone packages are written to: +## Output Locations + +Standalone packages: ```text dist/plugins/ ``` -Bundled packages are written to: +Bundled packages (embedded into the Electron app): ```text build/bundled-plugins/ ``` -## CI And Release +## CI and Release + +The build matrix in `.github/workflows/build.yml` bundles plugins before running `electron-builder` +on every platform: + +- **macOS**: bundles both CUA (with native build) and feishu plugins. +- **Linux**: bundles feishu plugin only (CUA is macOS-only). +- **Windows**: bundles feishu plugin only. -The macOS build matrix in `.github/workflows/build.yml` builds the matching CUA plugin bundle before -running `electron-builder`. Electron Builder embeds it into: +Electron Builder embeds `.dcplugin` files from `build/bundled-plugins/` into: ```text -DeepChat.app/Contents/Resources/app.asar.unpacked/plugins/ +/Contents/Resources/app.asar.unpacked/plugins/ (macOS) +/resources/app.asar.unpacked/plugins/ (Windows/Linux) ``` -Each matrix job verifies the expected bundled `.dcplugin` exists inside the app before uploading -artifacts. +Each matrix job verifies the expected bundled `.dcplugin` files exist inside the app before +uploading artifacts. -The release workflow repeats the same bundled package step. The final release uploads app artifacts -only; `.dcplugin` files are not published as separate GitHub Release assets. +The release workflow (`.github/workflows/release.yml`) repeats the same steps. Final release +uploads app artifacts only; `.dcplugin` files are not published as separate GitHub Release assets. -Expected embedded files: +Expected embedded files (macOS example): ```text app.asar.unpacked/plugins/deepchat-plugin-cua--darwin-x64.dcplugin app.asar.unpacked/plugins/deepchat-plugin-cua--darwin-arm64.dcplugin +app.asar.unpacked/plugins/deepchat-plugin-feishu--darwin-x64.dcplugin +app.asar.unpacked/plugins/deepchat-plugin-feishu--darwin-arm64.dcplugin ``` + +## Adding a New Plugin + +1. Create `plugins//plugin.json` with required fields (`id`, `name`, `version`, `publisher`, + `source`, `engines.platforms`, skills, settings contributions). +2. If the plugin needs a native build step, create `scripts/build--plugin-runtime.mjs`. +3. Test locally: `pnpm run plugin:validate -- --name --platform --arch ` +4. Add bundling commands to the CI workflows for the relevant platforms. +5. Add verification steps to CI to confirm the `.dcplugin` is embedded in the built app. diff --git a/electron-builder.yml b/electron-builder.yml index 3ceb532f1..1f1a84ef1 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -40,6 +40,9 @@ extraResources: - from: ./resources/skills/ to: app.asar.unpacked/resources/skills filter: ['**/*'] + - from: ./build/bundled-plugins/ + to: app.asar.unpacked/plugins + filter: ['**/*.dcplugin'] electronLanguages: - zh-CN - zh-TW @@ -78,10 +81,6 @@ nsis: afterSign: scripts/notarize.js afterPack: scripts/afterPack.js mac: - extraResources: - - from: ./build/bundled-plugins/ - to: app.asar.unpacked/plugins - filter: ['**/*.dcplugin'] entitlementsInherit: build/entitlements.mac.plist extendInfo: - NSCameraUsageDescription: Application requests access to the device's camera. diff --git a/package.json b/package.json index ff87a3f68..01ca2947e 100644 --- a/package.json +++ b/package.json @@ -38,30 +38,24 @@ "release:ff": "node scripts/release-fast-forward.mjs", "postinstall": "electron-builder install-app-deps && simple-git-hooks", "build:unpack": "pnpm run build && electron-builder --dir", - "build:win": "pnpm run build && electron-builder --win", - "build:win:x64": "pnpm run build && electron-builder --win --x64", - "build:win:arm64": "pnpm run build && electron-builder --win --arm64", - "plugin:cua:bundle:clean": "node -e \"require('fs').rmSync('build/bundled-plugins',{recursive:true,force:true})\"", - "plugin:cua:bundle": "pnpm run plugin:cua:bundle:clean && pnpm run plugin:cua:build && node scripts/package-plugin.mjs --release-version-from-root --target-platform darwin --out build/bundled-plugins plugins/cua", - "plugin:cua:bundle:mac:arm64": "pnpm run plugin:cua:bundle:clean && pnpm run plugin:cua:build:mac:arm64 && node scripts/package-plugin.mjs --release-version-from-root --target-platform darwin --target-arch arm64 --out build/bundled-plugins plugins/cua", - "plugin:cua:bundle:mac:x64": "pnpm run plugin:cua:bundle:clean && pnpm run plugin:cua:build:mac:x64 && node scripts/package-plugin.mjs --release-version-from-root --target-platform darwin --target-arch x64 --out build/bundled-plugins plugins/cua", + "plugin:validate": "node scripts/plugin.mjs validate", + "plugin:package": "node scripts/plugin.mjs package", + "plugin:bundle": "node scripts/plugin.mjs bundle", + "plugin:verify": "node scripts/plugin.mjs verify", + "plugin:bundle:clean": "node -e \"require('fs').rmSync('build/bundled-plugins',{recursive:true,force:true})\"", "plugin:cua:build": "node scripts/build-cua-plugin-runtime.mjs", "plugin:cua:build:mac:arm64": "node scripts/build-cua-plugin-runtime.mjs --arch arm64", "plugin:cua:build:mac:x64": "node scripts/build-cua-plugin-runtime.mjs --arch x64", - "plugin:cua:validate": "node scripts/package-plugin.mjs --validate --release-version-from-root --target-platform darwin plugins/cua", - "plugin:cua:validate:mac:arm64": "node scripts/package-plugin.mjs --validate --release-version-from-root --target-platform darwin --target-arch arm64 plugins/cua", - "plugin:cua:validate:mac:x64": "node scripts/package-plugin.mjs --validate --release-version-from-root --target-platform darwin --target-arch x64 plugins/cua", - "plugin:cua:package": "pnpm run plugin:cua:pack", - "plugin:cua:pack": "pnpm run plugin:cua:build && node scripts/package-plugin.mjs --release-version-from-root --target-platform darwin plugins/cua", - "plugin:cua:package:mac:arm64": "pnpm run plugin:cua:build:mac:arm64 && node scripts/package-plugin.mjs --release-version-from-root --target-platform darwin --target-arch arm64 plugins/cua", - "plugin:cua:package:mac:x64": "pnpm run plugin:cua:build:mac:x64 && node scripts/package-plugin.mjs --release-version-from-root --target-platform darwin --target-arch x64 plugins/cua", "install:sharp": "node scripts/install-sharp-for-platform.js", - "build:mac": "pnpm run build && pnpm run plugin:cua:bundle && electron-builder --mac", - "build:mac:arm64": "pnpm run build && pnpm run plugin:cua:bundle:mac:arm64 && electron-builder --mac --arm64", - "build:mac:x64": "pnpm run build && pnpm run plugin:cua:bundle:mac:x64 && electron-builder --mac --x64", - "build:linux": "pnpm run build && electron-builder --linux", - "build:linux:x64": "pnpm run build && electron-builder --linux --x64", - "build:linux:arm64": "pnpm run build && electron-builder --linux --arm64", + "build:mac": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:cua:build && pnpm run plugin:bundle -- --name cua --platform darwin && pnpm run plugin:bundle -- --name feishu --platform darwin && electron-builder --mac", + "build:mac:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:cua:build:mac:arm64 && pnpm run plugin:bundle -- --name cua --platform darwin --arch arm64 && pnpm run plugin:bundle -- --name feishu --platform darwin --arch arm64 && electron-builder --mac --arm64", + "build:mac:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:cua:build:mac:x64 && pnpm run plugin:bundle -- --name cua --platform darwin --arch x64 && pnpm run plugin:bundle -- --name feishu --platform darwin --arch x64 && electron-builder --mac --x64", + "build:win": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform win32 && electron-builder --win", + "build:win:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform win32 --arch x64 && electron-builder --win --x64", + "build:win:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform win32 --arch arm64 && electron-builder --win --arm64", + "build:linux": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform linux && electron-builder --linux", + "build:linux:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform linux --arch x64 && electron-builder --linux --x64", + "build:linux:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform linux --arch arm64 && electron-builder --linux --arm64", "afterSign": "scripts/notarize.js", "installRuntime": "npx -y tiny-runtime-injector --type uv --dir ./runtime/uv --runtime-version 0.9.18 && npx -y tiny-runtime-injector --type node --dir ./runtime/node && npx -y tiny-runtime-injector --type ripgrep --dir ./runtime/ripgrep && npx -y tiny-runtime-injector --type rtk --dir ./runtime/rtk", "installRuntime:win:x64": "npx -y tiny-runtime-injector --type uv --dir ./runtime/uv --runtime-version 0.9.18 -a x64 -p win32 && npx -y tiny-runtime-injector --type node --dir ./runtime/node -a x64 -p win32 && npx -y tiny-runtime-injector --type ripgrep --dir ./runtime/ripgrep -a x64 -p win32 && npx -y tiny-runtime-injector --type rtk --dir ./runtime/rtk -a x64 -p win32", diff --git a/plugins/feishu/plugin.json b/plugins/feishu/plugin.json index 8da49a279..2d0081ed9 100644 --- a/plugins/feishu/plugin.json +++ b/plugins/feishu/plugin.json @@ -11,7 +11,7 @@ "capabilities": ["runtime.manage", "mcp.register", "skills.register", "settings.contribute"], "source": { "type": "deepchat-official", - "url": "${github.release.download}/deepchat-plugin-feishu-${app.version}-${arch}.dcplugin", + "url": "${github.release.download}/deepchat-plugin-feishu-${app.version}-${target.platform}-${arch}.dcplugin", "publisher": "DeepChat" }, "mcpServers": [ diff --git a/scripts/plugin.mjs b/scripts/plugin.mjs new file mode 100644 index 000000000..cdc3f52b8 --- /dev/null +++ b/scripts/plugin.mjs @@ -0,0 +1,156 @@ +import { execFileSync } from 'node:child_process' +import { existsSync, readFileSync, readdirSync } from 'node:fs' +import path from 'node:path' + +const OFFICIAL_PLUGIN_SOURCE = 'deepchat-official' + +function parseArgs(argv) { + const args = { + action: null, + name: null, + platform: process.env.TARGET_PLATFORM || process.platform, + arch: process.env.TARGET_ARCH || process.arch, + pluginRoot: null + } + args.action = argv[0] + for (let i = 1; i < argv.length; i += 1) { + if (argv[i] === '--name') { + args.name = argv[++i] + } else if (argv[i] === '--platform') { + args.platform = argv[++i] + } else if (argv[i] === '--arch') { + args.arch = argv[++i] + } else if (argv[i] === '--plugin-root') { + args.pluginRoot = path.resolve(argv[++i]) + } + } + if (!args.action || !['validate', 'package', 'bundle', 'verify'].includes(args.action)) { + console.error( + 'Usage: node scripts/plugin.mjs [--name ] [--platform

] [--arch ] [--plugin-root ]' + ) + process.exit(1) + } + if (args.action !== 'verify' && !args.name) { + console.error('Missing required --name argument') + process.exit(1) + } + if (args.action === 'verify' && !args.pluginRoot) { + console.error('Missing required --plugin-root argument for verify') + process.exit(1) + } + return args +} + +const args = parseArgs(process.argv.slice(2)) +const packageVersion = JSON.parse(readFileSync(path.resolve('package.json'), 'utf8')).version + +function readPluginManifest(pluginName) { + const pluginDir = path.resolve('plugins', pluginName) + const manifestPath = path.join(pluginDir, 'plugin.json') + if (!existsSync(manifestPath)) { + throw new Error(`Plugin not found: ${manifestPath}`) + } + const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')) + return { pluginDir, manifest } +} + +function discoverOfficialPlugins() { + const pluginsRoot = path.resolve('plugins') + if (!existsSync(pluginsRoot)) { + return [] + } + + return readdirSync(pluginsRoot, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .map((entry) => { + try { + const { manifest } = readPluginManifest(entry.name) + if (manifest.source?.type !== OFFICIAL_PLUGIN_SOURCE) { + return null + } + return { + name: entry.name, + manifest, + platforms: manifest.engines?.platforms ?? [] + } + } catch { + return null + } + }) + .filter(Boolean) + .sort((a, b) => a.name.localeCompare(b.name)) +} + +function isPluginSupported(plugin, targetPlatform) { + const platforms = new Set(plugin.platforms.map((platform) => String(platform).toLowerCase())) + const aliases = targetPlatform === 'darwin' ? ['darwin', 'macos', 'mac'] : [targetPlatform] + return aliases.some((platform) => platforms.has(platform)) +} + +function artifactBaseName(pluginId) { + return pluginId.startsWith('com.deepchat.plugins.') + ? `deepchat-plugin-${pluginId.slice('com.deepchat.plugins.'.length)}` + : pluginId +} + +function artifactFileName(plugin, targetPlatform, targetArch) { + const safeId = artifactBaseName(plugin.manifest.id).replace(/[^a-zA-Z0-9._-]/g, '-') + return `${safeId}-${packageVersion}-${targetPlatform}-${targetArch}.dcplugin` +} + +function verifyArtifacts(options) { + const pluginRoot = path.resolve(options.pluginRoot) + const officialPlugins = discoverOfficialPlugins() + const selected = options.name + ? officialPlugins.filter((plugin) => plugin.name === options.name) + : officialPlugins + + if (options.name && selected.length === 0) { + throw new Error(`Official plugin not found: ${options.name}`) + } + + const expected = selected.filter((plugin) => isPluginSupported(plugin, options.platform)) + if (expected.length === 0) { + throw new Error(`No official plugins are expected for ${options.platform}/${options.arch}`) + } + + for (const plugin of expected) { + const fileName = artifactFileName(plugin, options.platform, options.arch) + const artifactPath = path.join(pluginRoot, fileName) + if (!existsSync(artifactPath)) { + throw new Error(`Missing bundled official plugin: ${artifactPath}`) + } + console.log(`Verified ${path.relative(process.cwd(), artifactPath)}`) + } +} + +try { + if (args.action === 'verify') { + verifyArtifacts(args) + process.exit(0) + } + + const { pluginDir } = readPluginManifest(args.name) + + // Run native build step if the plugin has one (e.g. scripts/build-cua-plugin-runtime.mjs) + const nativeBuildScript = path.resolve(`scripts/build-${args.name}-plugin-runtime.mjs`) + if (args.action === 'bundle' && existsSync(nativeBuildScript)) { + const buildArgs = [nativeBuildScript] + if (args.arch) buildArgs.push('--arch', args.arch) + execFileSync('node', buildArgs, { stdio: 'inherit' }) + } + + // Delegate to package-plugin.mjs + const pkgArgs = [path.resolve('scripts/package-plugin.mjs')] + if (args.action === 'validate') pkgArgs.push('--validate') + pkgArgs.push('--release-version-from-root') + if (args.platform) pkgArgs.push('--target-platform', args.platform) + if (args.arch) pkgArgs.push('--target-arch', args.arch) + if (args.action === 'bundle') pkgArgs.push('--out', path.resolve('build/bundled-plugins')) + pkgArgs.push(pluginDir) + + execFileSync('node', pkgArgs, { stdio: 'inherit' }) +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)) + process.exit(1) +} From f76d07f69e7c614363ee8edea813178917596f24 Mon Sep 17 00:00:00 2001 From: xiaomo Date: Sat, 16 May 2026 21:13:21 +0800 Subject: [PATCH 03/11] fix(tts): stabilize routing and model settings (#1632) Co-authored-by: zhangmo8 --- docs/features/unified-tts-provider/plan.md | 37 ++++ docs/features/unified-tts-provider/spec.md | 34 +++ docs/features/unified-tts-provider/tasks.md | 22 ++ resources/model-db/providers.json | 154 +++++++++++++ src/main/presenter/configPresenter/index.ts | 2 + .../presenter/configPresenter/modelConfig.ts | 8 +- .../llmProviderPresenter/aiSdk/runtime.ts | 203 ++++++++++++++++++ .../providers/aiSdkProvider.ts | 44 +++- .../settings/components/ProviderModelList.vue | 10 +- .../components/settings/ModelConfigDialog.vue | 42 +++- .../components/settings/TtsSettingsFields.vue | 159 ++++++++++++++ .../src/composables/useModelTypeDetection.ts | 11 +- src/shared/contracts/common.ts | 10 + src/shared/contracts/domainSchemas.ts | 4 +- src/shared/model.ts | 11 +- src/shared/ttsSettings.ts | 116 ++++++++++ src/shared/types/model-db.ts | 7 +- .../types/presenters/legacy.presenters.d.ts | 2 + 18 files changed, 856 insertions(+), 20 deletions(-) create mode 100644 docs/features/unified-tts-provider/plan.md create mode 100644 docs/features/unified-tts-provider/spec.md create mode 100644 docs/features/unified-tts-provider/tasks.md create mode 100644 src/renderer/src/components/settings/TtsSettingsFields.vue create mode 100644 src/shared/ttsSettings.ts diff --git a/docs/features/unified-tts-provider/plan.md b/docs/features/unified-tts-provider/plan.md new file mode 100644 index 000000000..2d6f02b05 --- /dev/null +++ b/docs/features/unified-tts-provider/plan.md @@ -0,0 +1,37 @@ +# Plan + +## Approach +Treat TTS as a first-class model capability and follow the `ImageGeneration` routing strategy: +- Extend shared model/type schema to include `tts`. +- Add runtime TTS routing ahead of default chat generation. +- Dispatch by model pattern: + - Pattern A: `/v1/audio/speech` + - Pattern B: `/v1/chat/completions` with `audio` output +- Normalize returned audio into data URL and cache through existing device cache, then emit `image_data` with audio MIME type. + +## Affected Areas +- Shared types/contracts: + - `src/shared/model.ts` + - `src/shared/types/model-db.ts` + - `src/shared/types/presenters/legacy.presenters.d.ts` + - `src/shared/contracts/common.ts` + - `src/shared/contracts/domainSchemas.ts` + - `src/shared/ttsSettings.ts` (new) +- Main runtime/provider: + - `src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts` + - `src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts` +- Model DB: + - `resources/model-db/providers.json` +- Renderer model type detection: + - `src/renderer/src/composables/useModelTypeDetection.ts` + +## Compatibility +- Existing chat and image generation paths remain unchanged. +- Existing renderer audio playback remains unchanged because it already handles `image_data` with `audio/*` MIME. + +## Verification Strategy +Run: +- `pnpm run typecheck` +- `pnpm run format` +- `pnpm run i18n` +- `pnpm run lint` diff --git a/docs/features/unified-tts-provider/spec.md b/docs/features/unified-tts-provider/spec.md new file mode 100644 index 000000000..beba05f9f --- /dev/null +++ b/docs/features/unified-tts-provider/spec.md @@ -0,0 +1,34 @@ +# Unified TTS Provider (Model-Level) + +## User Need +Users want TTS integrated as a model capability (`ModelType.TTS`) instead of per-provider custom integration, so any OpenAI-compatible provider can work if its model metadata marks TTS support. + +## Goal +Enable model-level TTS routing in DeepChat similar to image generation routing, including: +- Standard OpenAI `/v1/audio/speech` TTS models +- Chat-completions-audio TTS models that return base64 audio + +## Acceptance Criteria +1. `ModelType.TTS` is available in shared model contracts and model-db schema. +2. Runtime can route TTS models by model capability metadata and endpoint hints. +3. Runtime supports both TTS patterns and emits `image_data` events with `audio/*` MIME type for existing renderer playback. +4. Model DB can represent TTS model type for built-in provider entries. +5. Frontend model type detection exposes TTS model state for UI behavior alignment. +6. Validation commands pass: +- `pnpm run typecheck` +- `pnpm run format` +- `pnpm run i18n` +- `pnpm run lint` + +## Constraints +- Reuse existing audio rendering path via `image_data`; avoid introducing new stream event types. +- Keep provider integration generic for OpenAI-compatible providers. +- Do not introduce dedicated UI for TTS settings in this scope. + +## Non-Goals +- New TTS player UI. +- Voice catalog fetching UX. +- VoiceAI provider refactor. + +## Open Questions +- None for current scope. diff --git a/docs/features/unified-tts-provider/tasks.md b/docs/features/unified-tts-provider/tasks.md new file mode 100644 index 000000000..10e2e1cb3 --- /dev/null +++ b/docs/features/unified-tts-provider/tasks.md @@ -0,0 +1,22 @@ +# Tasks + +## Shared Types + Runtime +- [x] Add `ModelType.TTS` and `ApiEndpointType.AudioSpeech` in shared model enums. +- [x] Extend model-db schema and parser for `tts` type. +- [x] Add `src/shared/ttsSettings.ts` helpers for pattern detection and format normalization. +- [x] Extend presenter model config contracts with optional `tts` settings. +- [x] Add TTS route in runtime supporting pattern A and pattern B. +- [x] Inject `shouldUseTts` capability check from AI SDK provider. + +## Model DB +- [x] Mark relevant `aihubmix` models as `type: "tts"` in provider model list. +- [x] Evaluate whether built-in `xiaomimimo` provider entry exists; it does not, so built-in DB coverage is skipped. + +## Renderer +- [x] Extend `useModelTypeDetection` to include `tts` and expose `isTtsModel`. + +## Validation +- [x] Run `pnpm run typecheck`. +- [x] Run `pnpm run format`. +- [x] Run `pnpm run i18n`. +- [x] Run `pnpm run lint`. diff --git a/resources/model-db/providers.json b/resources/model-db/providers.json index fb0dc972c..66f45867c 100644 --- a/resources/model-db/providers.json +++ b/resources/model-db/providers.json @@ -181526,6 +181526,160 @@ }, "type": "chat" }, + { + "id": "tts-1", + "name": "tts-1", + "display_name": "tts-1", + "modalities": { + "input": [ + "text" + ], + "output": [ + "audio" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "temperature": false, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": false, + "cost": { + "input": 15, + "output": 15 + }, + "type": "tts" + }, + { + "id": "tts-1-hd", + "name": "tts-1-hd", + "display_name": "tts-1-hd", + "modalities": { + "input": [ + "text" + ], + "output": [ + "audio" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "temperature": false, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": false, + "cost": { + "input": 30, + "output": 30 + }, + "type": "tts" + }, + { + "id": "gpt-4o-mini-tts", + "name": "gpt-4o-mini-tts", + "display_name": "gpt-4o-mini-tts", + "modalities": { + "input": [ + "text" + ], + "output": [ + "audio" + ] + }, + "temperature": false, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "cost": { + "input": 0.48, + "output": 0.96 + }, + "type": "tts" + }, + { + "id": "gemini-2.5-flash-preview-tts", + "name": "gemini-2.5-flash-preview-tts", + "display_name": "gemini-2.5-flash-preview-tts", + "modalities": { + "input": [ + "text" + ], + "output": [ + "audio" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "temperature": false, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": false, + "knowledge": "2025-01", + "release_date": "2025-05-01", + "last_updated": "2025-05-01", + "cost": { + "input": 0.5, + "output": 0.5, + "cache_read": 0 + }, + "type": "tts" + }, + { + "id": "gemini-2.5-pro-preview-tts", + "name": "gemini-2.5-pro-preview-tts", + "display_name": "gemini-2.5-pro-preview-tts", + "modalities": { + "input": [ + "text" + ], + "output": [ + "audio" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "temperature": false, + "tool_call": false, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": false, + "knowledge": "2025-01", + "release_date": "2025-05-01", + "last_updated": "2025-05-01", + "cost": { + "input": 0.5, + "output": 0.5, + "cache_read": 0 + }, + "type": "tts" + }, { "id": "doubao-seed-2-0-pro", "name": "doubao-seed-2-0-pro", diff --git a/src/main/presenter/configPresenter/index.ts b/src/main/presenter/configPresenter/index.ts index 50975d173..b06bde6ec 100644 --- a/src/main/presenter/configPresenter/index.ts +++ b/src/main/presenter/configPresenter/index.ts @@ -984,6 +984,8 @@ export class ConfigPresenter implements IConfigPresenter { return ModelType.Rerank case 'imageGeneration': return ModelType.ImageGeneration + case 'tts': + return ModelType.TTS case 'chat': default: return ModelType.Chat diff --git a/src/main/presenter/configPresenter/modelConfig.ts b/src/main/presenter/configPresenter/modelConfig.ts index ec524ce8c..c6fe4fe04 100644 --- a/src/main/presenter/configPresenter/modelConfig.ts +++ b/src/main/presenter/configPresenter/modelConfig.ts @@ -121,6 +121,8 @@ export class ModelConfigHelper { return ModelType.Rerank case 'imageGeneration': return ModelType.ImageGeneration + case 'tts': + return ModelType.TTS default: // Invalid type, fall through to default break @@ -176,7 +178,11 @@ export class ModelConfigHelper { reasoning: Boolean(reasoningEnabled), type: modelType, apiEndpoint: - modelType === ModelType.ImageGeneration ? ApiEndpointType.Image : ApiEndpointType.Chat, + modelType === ModelType.ImageGeneration + ? ApiEndpointType.Image + : modelType === ModelType.TTS + ? ApiEndpointType.AudioSpeech + : ApiEndpointType.Chat, thinkingBudget, forceInterleavedThinkingCompat, reasoningEffort, diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts index ea7207aa4..84d7ce9e3 100644 --- a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts +++ b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts @@ -19,6 +19,13 @@ import { supportsOpenAIImageGenerationSettings, type ImageGenerationOptions } from '@shared/imageGenerationSettings' +import { + isChatAudioTtsModel, + isStandardTtsModel, + isTtsModelConfig, + normalizeTtsSettings, + ttsFormatToMimeType +} from '@shared/ttsSettings' import { presenter } from '@/presenter' import { EMBEDDING_TEST_KEY, isNormalized } from '@/utils/vector' import type { LLMCoreStreamEvent } from '@shared/types/core/llm-events' @@ -53,6 +60,7 @@ export interface AiSdkRuntimeContext { cleanHeaders?: boolean supportsNativeTools?: (modelId: string, modelConfig: ModelConfig) => boolean shouldUseImageGeneration?: (modelId: string, modelConfig: ModelConfig) => boolean + shouldUseTts?: (modelId: string, modelConfig: ModelConfig) => boolean } function resolveCapabilityProviderId(context: AiSdkRuntimeContext, modelId: string): string { @@ -158,6 +166,163 @@ function shouldUseImageGenerationRuntime( return modelConfig.apiEndpoint === ApiEndpointType.Image } +function shouldUseTtsRuntime( + context: AiSdkRuntimeContext, + modelId: string, + modelConfig: ModelConfig +): boolean { + if (context.shouldUseTts) { + return context.shouldUseTts(modelId, modelConfig) + } + + return ( + modelConfig.apiEndpoint === ApiEndpointType.AudioSpeech || + isTtsModelConfig(modelConfig) || + isStandardTtsModel(modelId) || + isChatAudioTtsModel(modelId) + ) +} + +/** + * Extracts the text to be synthesized from the last user message in the conversation. + */ +function extractTtsText(messages: ChatMessage[]): string { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.role === 'user') { + const text = normalizePromptValue(msg.content) + if (text.trim()) return text.trim() + } + } + return '' +} + +/** + * Pattern A: calls the standard OpenAI-compatible /audio/speech endpoint. + */ +async function executeTtsPatternA( + provider: LLM_PROVIDER, + defaultHeaders: Record, + text: string, + modelId: string, + modelConfig: ModelConfig, + timeout: number | undefined +): Promise<{ base64: string; mimeType: string }> { + const tts = normalizeTtsSettings(modelConfig.tts) + const format = tts?.responseFormat ?? 'mp3' + const baseUrl = (provider.baseUrl || '').replace(/\/+$/, '').replace(/\/v1$/i, '') + const url = `${baseUrl}/v1/audio/speech` + + const body: Record = { + model: modelId, + input: text, + voice: tts?.voice ?? 'alloy', + response_format: format + } + if (tts?.speed !== undefined) { + body.speed = tts.speed + } + if (tts?.instructions) { + body.instructions = tts.instructions + } + + const controller = new AbortController() + const timeoutId = timeout ? setTimeout(() => controller.abort(), timeout) : undefined + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + ...defaultHeaders, + 'Content-Type': 'application/json', + Authorization: `Bearer ${provider.oauthToken || provider.apiKey || ''}` + }, + body: JSON.stringify(body), + signal: controller.signal + }) + + if (!response.ok) { + const errText = await response.text().catch(() => '') + throw new Error(`TTS request failed (${response.status}): ${errText}`) + } + + const buffer = await response.arrayBuffer() + const base64 = Buffer.from(buffer).toString('base64') + return { base64, mimeType: ttsFormatToMimeType(format) } + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId) + } +} + +/** + * Pattern B: calls the chat completions endpoint with audio output + * (e.g. xiaomimimo mimo-v2.5-tts series). + */ +async function executeTtsPatternB( + provider: LLM_PROVIDER, + defaultHeaders: Record, + text: string, + modelId: string, + modelConfig: ModelConfig, + timeout: number | undefined +): Promise<{ base64: string; mimeType: string }> { + const tts = normalizeTtsSettings(modelConfig.tts) + const format = tts?.responseFormat ?? 'wav' + const baseUrl = (provider.baseUrl || '').replace(/\/+$/, '').replace(/\/v1$/i, '') + const url = `${baseUrl}/v1/chat/completions` + + const body: Record = { + model: modelId, + messages: [{ role: 'user', content: text }], + modalities: ['text', 'audio'], + audio: { + format, + ...(tts?.voice ? { voice: tts.voice } : {}) + } + } + + const controller = new AbortController() + const timeoutId = timeout ? setTimeout(() => controller.abort(), timeout) : undefined + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + ...defaultHeaders, + 'Content-Type': 'application/json', + Authorization: `Bearer ${provider.oauthToken || provider.apiKey || ''}` + }, + body: JSON.stringify(body), + signal: controller.signal + }) + + if (!response.ok) { + const errText = await response.text().catch(() => '') + throw new Error(`TTS (chat audio) request failed (${response.status}): ${errText}`) + } + + const json = (await response.json()) as { + choices?: Array<{ + message?: { + audio?: { data?: string } + content?: Array<{ type?: string; audio?: { data?: string } }> + } + }> + } + const firstMessage = json.choices?.[0]?.message + const audioData = + firstMessage?.audio?.data ?? + firstMessage?.content?.find((item) => item?.type === 'audio')?.audio?.data + if (!audioData) { + throw new Error('TTS response missing audio data in choices[0].message.audio.data') + } + + return { base64: audioData, mimeType: ttsFormatToMimeType(format) } + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId) + } +} + function resolveRequestTimeout(modelConfig: ModelConfig): number | undefined { const timeout = modelConfig.timeout if (typeof timeout !== 'number' || !Number.isFinite(timeout) || timeout <= 0) { @@ -398,6 +563,44 @@ export async function* runAiSdkCoreStream( const normalizedModelConfig = normalizeRuntimeModelConfig(context, modelId, modelConfig) const timeout = resolveRequestTimeout(normalizedModelConfig) + if (shouldUseTtsRuntime(context, modelId, normalizedModelConfig)) { + const text = extractTtsText(messages) + const usePatternB = isChatAudioTtsModel(modelId) + + const { base64, mimeType } = usePatternB + ? await executeTtsPatternB( + context.provider, + context.defaultHeaders, + text, + modelId, + normalizedModelConfig, + timeout + ) + : await executeTtsPatternA( + context.provider, + context.defaultHeaders, + text, + modelId, + normalizedModelConfig, + timeout + ) + + const dataUrl = `data:${mimeType};base64,${base64}` + const cachedAudio = await presenter.devicePresenter.cacheImage(dataUrl) + yield { + type: 'image_data', + image_data: { + data: cachedAudio, + mimeType + } + } + yield { + type: 'stop', + stop_reason: 'complete' + } + return + } + if (shouldUseImageGenerationRuntime(context, modelId, normalizedModelConfig)) { const prompt = extractImagePrompt(messages) diff --git a/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts b/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts index 8c87ba909..67380a16c 100644 --- a/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts +++ b/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts @@ -7,6 +7,7 @@ import { resolveProviderCapabilityProviderId, type NewApiEndpointType } from '@shared/model' +import { isChatAudioTtsModel, isStandardTtsModel, isTtsModelConfig } from '@shared/ttsSettings' import { DEFAULT_MODEL_CONTEXT_LENGTH, DEFAULT_MODEL_MAX_TOKENS, @@ -95,6 +96,12 @@ const shouldUseOpenAIImageGenerationRoute = (modelId: string, modelConfig: Model modelConfig.apiEndpoint === ApiEndpointType.Image || modelConfig.type === ModelType.ImageGeneration +const shouldUseOpenAITtsRoute = (modelId: string, modelConfig: ModelConfig): boolean => + isTtsModelConfig(modelConfig) || + modelConfig.apiEndpoint === ApiEndpointType.AudioSpeech || + isStandardTtsModel(modelId) || + isChatAudioTtsModel(modelId) + export function normalizeExtractedImageText(content: string): string { const normalized = content .replace(/\r\n/g, '\n') @@ -569,6 +576,16 @@ export class AiSdkProvider extends BaseLLMProvider { isOpenAIImageGenerationModel(runtimeModelId) || runtimeModelConfig.apiEndpoint === ApiEndpointType.Image + // TTS route: only applicable for OpenAI-compatible providers (not Azure, Gemini, Vertex) + const shouldUseTts = + this.isAzureOpenAI(decision, runtimeProvider) || + decision.providerKind === 'gemini' || + decision.providerKind === 'vertex' || + decision.providerKind === 'anthropic' + ? undefined + : (runtimeModelId: string, runtimeModelConfig: ModelConfig) => + shouldUseOpenAITtsRoute(runtimeModelId, runtimeModelConfig) + return { decision, resolvedModelConfig, @@ -585,7 +602,8 @@ export class AiSdkProvider extends BaseLLMProvider { cleanHeaders, supportsNativeTools: (_runtimeModelId, runtimeModelConfig) => runtimeModelConfig.functionCall === true, - shouldUseImageGeneration + shouldUseImageGeneration, + shouldUseTts } } } @@ -1656,13 +1674,17 @@ export class AiSdkProvider extends BaseLLMProvider { normalizedRawType === 'image' || supportedEndpointTypes.includes('image-generation') ? ModelType.ImageGeneration - : normalizedRawType === 'embedding' || - normalizedRawType === 'embeddings' || - normalizedModelId.includes('embedding') - ? ModelType.Embedding - : normalizedRawType === 'rerank' || normalizedModelId.includes('rerank') - ? ModelType.Rerank - : undefined + : normalizedRawType === 'tts' || + normalizedRawType === 'audio-speech' || + normalizedRawType === 'audiospeech' + ? ModelType.TTS + : normalizedRawType === 'embedding' || + normalizedRawType === 'embeddings' || + normalizedModelId.includes('embedding') + ? ModelType.Embedding + : normalizedRawType === 'rerank' || normalizedModelId.includes('rerank') + ? ModelType.Rerank + : undefined const contextLengthCandidate = [ rawModel.context_length, @@ -1725,7 +1747,11 @@ export class AiSdkProvider extends BaseLLMProvider { ...existingConfig, type: model.type ?? existingConfig.type, apiEndpoint: - model.endpointType === 'image-generation' ? ApiEndpointType.Image : ApiEndpointType.Chat, + model.endpointType === 'image-generation' + ? ApiEndpointType.Image + : model.type === ModelType.TTS + ? ApiEndpointType.AudioSpeech + : ApiEndpointType.Chat, endpointType: model.endpointType ?? existingConfig.endpointType }) } diff --git a/src/renderer/settings/components/ProviderModelList.vue b/src/renderer/settings/components/ProviderModelList.vue index 4e0c41a86..c970888d8 100644 --- a/src/renderer/settings/components/ProviderModelList.vue +++ b/src/renderer/settings/components/ProviderModelList.vue @@ -390,7 +390,8 @@ const TYPE_ICONS: Record = { [ModelType.Chat]: 'lucide:messages-square', [ModelType.Embedding]: 'lucide:database', [ModelType.Rerank]: 'lucide:arrow-up-wide-narrow', - [ModelType.ImageGeneration]: 'lucide:image' + [ModelType.ImageGeneration]: 'lucide:image', + [ModelType.TTS]: 'lucide:volume-2' } const props = defineProps<{ @@ -450,7 +451,12 @@ const hasModelCapability = (model: RENDERER_MODEL_META, capability: ModelCapabil } } -const getModelTypeLabel = (type: ModelType) => t(`model.filter.typeOptions.${type}`) +const getModelTypeLabel = (type: ModelType) => { + if (type === ModelType.TTS) { + return t('settings.provider.voiceai.title') + } + return t(`model.filter.typeOptions.${type}`) +} const getCapabilityLabel = (capability: ModelCapabilityKey) => t(`model.filter.capabilityOptions.${capability}`) diff --git a/src/renderer/src/components/settings/ModelConfigDialog.vue b/src/renderer/src/components/settings/ModelConfigDialog.vue index 93f706f4e..d87fdc3b0 100644 --- a/src/renderer/src/components/settings/ModelConfigDialog.vue +++ b/src/renderer/src/components/settings/ModelConfigDialog.vue @@ -123,6 +123,8 @@ v-model="config.imageGeneration" /> + +

{{ t('settings.model.modelConfig.type.options.imageGeneration') }} + + {{ t('settings.provider.voiceai.title') }} +

@@ -235,6 +240,9 @@ {{ t('settings.model.modelConfig.apiEndpoint.options.image') }} + + {{ t('settings.provider.voiceai.title') }} +

@@ -551,10 +559,12 @@ import { normalizeImageGenerationOptions, supportsOpenAIImageGenerationSettings } from '@shared/imageGenerationSettings' +import { normalizeTtsSettings } from '@shared/ttsSettings' import { useModelConfigStore } from '@/stores/modelConfigStore' import { useModelStore } from '@/stores/modelStore' import { useProviderStore } from '@/stores/providerStore' import OpenAIImageGenerationSettingsFields from './OpenAIImageGenerationSettingsFields.vue' +import TtsSettingsFields from './TtsSettingsFields.vue' import { createModelClient } from '@api/ModelClient' import { Dialog, @@ -701,6 +711,7 @@ const showOpenAIImageGenerationSettings = computed(() => const showOpenAIImageGenerationRouteControls = computed( () => showOpenAIImageGenerationSettings.value && canEditModelIdentity.value ) +const showTtsSettings = computed(() => config.value.type === ModelType.TTS) // 重置确认对话框 const showResetConfirm = ref(false) @@ -1091,6 +1102,10 @@ const loadConfig = async () => { if (showApiEndpointSelector.value && !config.value.apiEndpoint) { config.value.apiEndpoint = ApiEndpointType.Chat } + + if (config.value.type === ModelType.TTS && !config.value.apiEndpoint) { + config.value.apiEndpoint = ApiEndpointType.AudioSpeech + } } catch (error) { console.error('Failed to load model config:', error) config.value = createDefaultConfig() @@ -1247,7 +1262,8 @@ const handleSave = async () => { ...(normalizedTimeout !== undefined ? { timeout: normalizedTimeout } : {}), imageGeneration: showOpenAIImageGenerationSettings.value ? normalizeImageGenerationOptions(config.value.imageGeneration) - : undefined + : undefined, + tts: showTtsSettings.value ? normalizeTtsSettings(config.value.tts) : undefined } try { @@ -1348,6 +1364,30 @@ watch( } ) +watch( + () => [config.value.type, showApiEndpointSelector.value, showEndpointTypeSelector.value], + () => { + if (!showApiEndpointSelector.value || showEndpointTypeSelector.value) { + return + } + + if (config.value.type === ModelType.ImageGeneration) { + config.value.apiEndpoint = ApiEndpointType.Image + return + } + + if (config.value.type === ModelType.TTS) { + config.value.apiEndpoint = ApiEndpointType.AudioSpeech + return + } + + if (config.value.apiEndpoint === ApiEndpointType.Image) { + config.value.apiEndpoint = ApiEndpointType.Chat + } + }, + { immediate: true } +) + const supportsVerbosity = computed(() => capabilitySupportsVerbosity.value === true) const supportsReasoningVisibility = computed( () => diff --git a/src/renderer/src/components/settings/TtsSettingsFields.vue b/src/renderer/src/components/settings/TtsSettingsFields.vue new file mode 100644 index 000000000..8a7a40204 --- /dev/null +++ b/src/renderer/src/components/settings/TtsSettingsFields.vue @@ -0,0 +1,159 @@ + + + diff --git a/src/renderer/src/composables/useModelTypeDetection.ts b/src/renderer/src/composables/useModelTypeDetection.ts index 5456a878e..7fb0320b5 100644 --- a/src/renderer/src/composables/useModelTypeDetection.ts +++ b/src/renderer/src/composables/useModelTypeDetection.ts @@ -8,11 +8,12 @@ import { useModelConfigStore } from '@/stores/modelConfigStore' export interface UseModelTypeDetectionOptions { modelId: Ref providerId: Ref - modelType: Ref<'chat' | 'imageGeneration' | 'embedding' | 'rerank' | undefined> + modelType: Ref<'chat' | 'imageGeneration' | 'tts' | 'embedding' | 'rerank' | undefined> } export interface UseModelTypeDetectionReturn { isImageGenerationModel: ComputedRef + isTtsModel: ComputedRef isGPT5Model: ComputedRef isGeminiProvider: ComputedRef modelReasoning: Ref @@ -41,6 +42,13 @@ export function useModelTypeDetection( return modelType.value === 'imageGeneration' }) + /** + * Checks if current model is a TTS model + */ + const isTtsModel = computed(() => { + return modelType.value === 'tts' + }) + /** * Checks if current model is GPT-5 series * GPT-5 models have special UI requirements (no temperature slider) @@ -86,6 +94,7 @@ export function useModelTypeDetection( // === Return Public API === return { isImageGenerationModel, + isTtsModel, isGPT5Model, isGeminiProvider, modelReasoning diff --git a/src/shared/contracts/common.ts b/src/shared/contracts/common.ts index b340e59c1..d0d51e1c3 100644 --- a/src/shared/contracts/common.ts +++ b/src/shared/contracts/common.ts @@ -12,6 +12,7 @@ import { IMAGE_GENERATION_OUTPUT_FORMAT_VALUES, IMAGE_GENERATION_QUALITY_VALUES } from '../imageGenerationSettings' +import { TTS_RESPONSE_FORMAT_VALUES } from '../ttsSettings' export type JsonValue = | string @@ -58,6 +59,15 @@ export const ImageGenerationOptionsSchema = z }) .optional() +export const TtsSettingsSchema = z + .object({ + voice: z.string().optional(), + responseFormat: z.enum(TTS_RESPONSE_FORMAT_VALUES).optional(), + speed: z.number().min(0.25).max(4.0).optional(), + instructions: z.string().optional() + }) + .optional() + export const AppErrorSchema = z.object({ code: z.string(), message: z.string(), diff --git a/src/shared/contracts/domainSchemas.ts b/src/shared/contracts/domainSchemas.ts index 7b6ae99c3..0c0348065 100644 --- a/src/shared/contracts/domainSchemas.ts +++ b/src/shared/contracts/domainSchemas.ts @@ -4,6 +4,7 @@ import { ApiEndpointType, ModelType, NEW_API_ENDPOINT_TYPES } from '../model' import { FileMetadataValueSchema, ImageGenerationOptionsSchema, + TtsSettingsSchema, JsonValueSchema, ProviderModelSummarySchema } from './common' @@ -250,7 +251,8 @@ export const ModelConfigSchema = z enableSearch: z.boolean().optional(), forcedSearch: z.boolean().optional(), searchStrategy: z.enum(['turbo', 'balanced', 'precise']).optional(), - imageGeneration: ImageGenerationOptionsSchema + imageGeneration: ImageGenerationOptionsSchema, + tts: TtsSettingsSchema }) .passthrough() diff --git a/src/shared/model.ts b/src/shared/model.ts index bf80d9085..3bdb3ed98 100644 --- a/src/shared/model.ts +++ b/src/shared/model.ts @@ -5,13 +5,15 @@ export enum ModelType { Chat = 'chat', Embedding = 'embedding', Rerank = 'rerank', - ImageGeneration = 'imageGeneration' + ImageGeneration = 'imageGeneration', + TTS = 'tts' } export enum ApiEndpointType { Chat = 'chat', Image = 'image', - Video = 'video' + Video = 'video', + AudioSpeech = 'audio-speech' } export const NEW_API_ENDPOINT_TYPES = [ @@ -169,4 +171,7 @@ export const resolveProviderCapabilityProviderId = ( } export const isChatSelectableModelType = (type: ModelType | undefined): boolean => - type === undefined || type === ModelType.Chat || type === ModelType.ImageGeneration + type === undefined || + type === ModelType.Chat || + type === ModelType.ImageGeneration || + type === ModelType.TTS diff --git a/src/shared/ttsSettings.ts b/src/shared/ttsSettings.ts new file mode 100644 index 000000000..7f50b1c86 --- /dev/null +++ b/src/shared/ttsSettings.ts @@ -0,0 +1,116 @@ +import { ModelType } from './model' + +export const TTS_RESPONSE_FORMAT_VALUES = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'] as const +export type TtsResponseFormat = (typeof TTS_RESPONSE_FORMAT_VALUES)[number] + +export interface TtsSettings { + voice?: string + responseFormat?: TtsResponseFormat + speed?: number + instructions?: string +} + +/** + * Standard OpenAI-style TTS models that use the /audio/speech endpoint (Pattern A). + */ +export const OPENAI_STANDARD_TTS_MODELS = [ + 'tts-1', + 'tts-1-hd', + 'gpt-4o-mini-tts', + 'gemini-2.5-flash-preview-tts', + 'gemini-2.5-pro-preview-tts' +] as const + +/** + * Model ID prefixes for TTS models that use the chat completions endpoint + * with audio output (Pattern B), e.g. xiaomimimo mimo-v2.5-tts series. + */ +export const CHAT_AUDIO_TTS_MODEL_PREFIXES = ['mimo-v'] as const + +function normalizeTtsModelId(modelId: string): string { + const trimmed = modelId.trim().toLowerCase() + if (!trimmed) return '' + const slashIndex = trimmed.lastIndexOf('/') + return slashIndex >= 0 ? trimmed.slice(slashIndex + 1) : trimmed +} + +/** + * Returns true if the model uses the standard /audio/speech TTS endpoint (Pattern A). + */ +export function isStandardTtsModel(modelId: string): boolean { + const id = normalizeTtsModelId(modelId) + return (OPENAI_STANDARD_TTS_MODELS as readonly string[]).includes(id) +} + +/** + * Returns true if the model produces TTS audio via the chat completions endpoint (Pattern B). + */ +export function isChatAudioTtsModel(modelId: string): boolean { + const id = normalizeTtsModelId(modelId) + return ( + CHAT_AUDIO_TTS_MODEL_PREFIXES.some((prefix) => id.startsWith(prefix)) || + id.startsWith('xiaomi-mimo-v') + ) +} + +/** + * Returns true if the model is any kind of TTS model (either pattern). + */ +export function isTtsModelId(modelId: string): boolean { + return isStandardTtsModel(modelId) || isChatAudioTtsModel(modelId) +} + +/** + * Returns true if modelConfig indicates this is a TTS model. + */ +export function isTtsModelConfig(modelConfig: { type?: ModelType }): boolean { + return modelConfig.type === ModelType.TTS +} + +/** + * Maps a TtsResponseFormat value to an audio MIME type string. + */ +export function ttsFormatToMimeType(format: TtsResponseFormat | string | undefined): string { + switch (format) { + case 'mp3': + return 'audio/mpeg' + case 'opus': + return 'audio/ogg; codecs=opus' + case 'aac': + return 'audio/aac' + case 'flac': + return 'audio/flac' + case 'wav': + return 'audio/wav' + case 'pcm': + return 'audio/pcm' + default: + return 'audio/mpeg' + } +} + +/** + * Normalizes TtsSettings, returning undefined when no valid options are present. + */ +export function normalizeTtsSettings(options?: TtsSettings | null): TtsSettings | undefined { + if (!options) return undefined + const result: TtsSettings = {} + + if (typeof options.voice === 'string' && options.voice.trim()) { + result.voice = options.voice.trim() + } + if ( + typeof options.responseFormat === 'string' && + (TTS_RESPONSE_FORMAT_VALUES as readonly string[]).includes(options.responseFormat) + ) { + result.responseFormat = options.responseFormat as TtsResponseFormat + } + if (typeof options.speed === 'number' && Number.isFinite(options.speed)) { + result.speed = Math.max(0.25, Math.min(4.0, options.speed)) + } + if (typeof options.instructions === 'string' && options.instructions.trim()) { + result.instructions = options.instructions.trim() + } + + return Object.keys(result).length > 0 ? result : undefined +} diff --git a/src/shared/types/model-db.ts b/src/shared/types/model-db.ts index a599f63c3..3e3b225f4 100644 --- a/src/shared/types/model-db.ts +++ b/src/shared/types/model-db.ts @@ -129,7 +129,7 @@ export const ModelSchema = z.object({ release_date: z.string().optional(), last_updated: z.string().optional(), cost: z.record(z.union([z.string(), z.number()])).optional(), - type: z.enum(['chat', 'embedding', 'rerank', 'imageGeneration']).optional() + type: z.enum(['chat', 'embedding', 'rerank', 'imageGeneration', 'tts']).optional() }) export type ProviderModel = z.infer @@ -382,7 +382,7 @@ function getStringNumberRecord(obj: unknown): Record | return Object.keys(out).length ? out : undefined } -type ModelTypeValue = 'chat' | 'embedding' | 'rerank' | 'imageGeneration' +type ModelTypeValue = 'chat' | 'embedding' | 'rerank' | 'imageGeneration' | 'tts' function getEffortValue(v: unknown): ReasoningEffort | undefined { return isReasoningEffort(v) ? v : undefined @@ -464,6 +464,7 @@ function getModelTypeValue(v: unknown): ModelTypeValue | undefined { case 'embedding': case 'rerank': case 'imageGeneration': + case 'tts': return v } @@ -479,6 +480,8 @@ function getModelTypeValue(v: unknown): ModelTypeValue | undefined { case 'imagegeneration': case 'imagegen': return 'imageGeneration' + case 'tts': + return 'tts' default: return undefined } diff --git a/src/shared/types/presenters/legacy.presenters.d.ts b/src/shared/types/presenters/legacy.presenters.d.ts index 94de19ba6..175ac10b3 100644 --- a/src/shared/types/presenters/legacy.presenters.d.ts +++ b/src/shared/types/presenters/legacy.presenters.d.ts @@ -6,6 +6,7 @@ import { ShortcutKeySetting } from '@/presenter/configPresenter/shortcutKeySetti import type { NewApiEndpointType } from '@shared/model' import { ApiEndpointType, ModelType } from '@shared/model' import type { ImageGenerationOptions } from '../../imageGenerationSettings' +import type { TtsSettings } from '../../ttsSettings' import type { ReasoningEffort, ReasoningVisibility, Verbosity } from '../model-db' import type { HookTestResult, HooksNotificationsSettings } from '../../hooksNotifications' import type { NowledgeMemThread, NowledgeMemExportSummary } from '../nowledgeMem' @@ -180,6 +181,7 @@ export interface ModelConfig { forcedSearch?: boolean searchStrategy?: 'turbo' | 'balanced' | 'precise' imageGeneration?: ImageGenerationOptions + tts?: TtsSettings } export interface IModelConfig { From 9a415a0f993826841d22effb0695006ba33cf435 Mon Sep 17 00:00:00 2001 From: lil-goat <167018448+lil-goat@users.noreply.github.com> Date: Sat, 16 May 2026 21:21:35 +0800 Subject: [PATCH 04/11] test(questionTool): add parser coverage and reject unsupported fields (#1631) --- src/main/lib/agentRuntime/questionTool.ts | 1 + .../lib/agentRuntime/questionTool.test.ts | 89 +++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 test/main/lib/agentRuntime/questionTool.test.ts diff --git a/src/main/lib/agentRuntime/questionTool.ts b/src/main/lib/agentRuntime/questionTool.ts index 5160f259c..979dcecb6 100644 --- a/src/main/lib/agentRuntime/questionTool.ts +++ b/src/main/lib/agentRuntime/questionTool.ts @@ -59,6 +59,7 @@ export const questionToolSchema = z 'Whether free-form input is allowed for this question. The field name is `custom`, not `allowOther`.' ) }) + .strict() .describe( 'Ask exactly one blocking clarification question. For multiple clarifications, use multiple deepchat_question tool calls instead of sending a `questions` array.' ) diff --git a/test/main/lib/agentRuntime/questionTool.test.ts b/test/main/lib/agentRuntime/questionTool.test.ts new file mode 100644 index 000000000..1e6b3a30b --- /dev/null +++ b/test/main/lib/agentRuntime/questionTool.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'vitest' +import { + parseQuestionToolArgs, + QUESTION_TOOL_CONTRACT_HINT, + QUESTION_TOOL_NAME +} from '../../../../src/main/lib/agentRuntime/questionTool' + +describe('parseQuestionToolArgs', () => { + it('normalizes valid question tool arguments and applies defaults', () => { + const result = parseQuestionToolArgs( + JSON.stringify({ + header: ' Clarify ', + question: ' Which option should we use? ', + options: [ + { label: ' Option A ', description: ' First choice ' }, + { label: 'Option B', description: ' ' } + ] + }) + ) + + expect(result).toEqual({ + success: true, + data: { + header: 'Clarify', + question: 'Which option should we use?', + options: [{ label: 'Option A', description: 'First choice' }, { label: 'Option B' }], + multiple: false, + custom: true + } + }) + }) + + it('repairs recoverable JSON before validation', () => { + const result = parseQuestionToolArgs( + '{"question":"Pick one","options":[{"label":"A","description":" Alpha ",},],}' + ) + + expect(result).toEqual({ + success: true, + data: { + question: 'Pick one', + options: [{ label: 'A', description: 'Alpha' }], + multiple: false, + custom: true + } + }) + }) + + it('returns a contract hint when the JSON is not parseable', () => { + expect(parseQuestionToolArgs('{"question":"\\uZZZZ"}')).toEqual({ + success: false, + error: `Invalid JSON for question tool arguments. ${QUESTION_TOOL_CONTRACT_HINT}` + }) + }) + + it('rejects unsupported top-level fields even when required fields are present', () => { + const cases = [ + { + fieldName: 'allowOther', + payload: { + question: 'Pick one', + options: [{ label: 'A' }], + allowOther: true + } + }, + { + fieldName: 'questions', + payload: { + question: 'Pick one', + options: [{ label: 'A' }], + questions: [{ question: 'Nested', options: [{ label: 'B' }] }] + } + } + ] as const + + for (const testCase of cases) { + const result = parseQuestionToolArgs(JSON.stringify(testCase.payload)) + + expect(result.success).toBe(false) + if (result.success) { + throw new Error(`Expected ${testCase.fieldName} payload to be rejected`) + } + + expect(result.error).toContain(`Invalid arguments for ${QUESTION_TOOL_NAME}.`) + expect(result.error).toContain(QUESTION_TOOL_CONTRACT_HINT) + expect(result.error).toContain(testCase.fieldName) + } + }) +}) From 3ebe5a5995e341070931403c2f406a5a1e4a73d4 Mon Sep 17 00:00:00 2001 From: xiaomo Date: Mon, 18 May 2026 10:45:22 +0800 Subject: [PATCH 05/11] fix: update TTS references and localization across components (#1633) * fix: update TTS references and localization across components * chore: update --------- Co-authored-by: zhangmo8 --- .../presenter/agentRuntimePresenter/index.ts | 12 ++++--- .../llmProviderPresenter/aiSdk/runtime.ts | 18 +++++++--- .../settings/components/ProviderModelList.vue | 2 +- .../components/VoiceAIProviderConfig.vue | 36 +++++++++---------- .../src/components/chat/ChatStatusBar.vue | 11 ------ .../components/settings/ModelConfigDialog.vue | 4 +-- .../components/settings/TtsSettingsFields.vue | 16 ++++----- src/renderer/src/i18n/da-DK/settings.json | 4 +-- src/renderer/src/i18n/en-US/settings.json | 4 +-- src/renderer/src/i18n/fa-IR/settings.json | 4 +-- src/renderer/src/i18n/fr-FR/settings.json | 4 +-- src/renderer/src/i18n/he-IL/settings.json | 4 +-- src/renderer/src/i18n/ja-JP/settings.json | 4 +-- src/renderer/src/i18n/ko-KR/settings.json | 4 +-- src/renderer/src/i18n/pt-BR/settings.json | 4 +-- src/renderer/src/i18n/ru-RU/settings.json | 4 +-- src/renderer/src/i18n/zh-CN/settings.json | 4 +-- src/renderer/src/i18n/zh-HK/settings.json | 4 +-- src/renderer/src/i18n/zh-TW/settings.json | 4 +-- 19 files changed, 75 insertions(+), 72 deletions(-) diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts index ac7198b10..73dc22488 100644 --- a/src/main/presenter/agentRuntimePresenter/index.ts +++ b/src/main/presenter/agentRuntimePresenter/index.ts @@ -58,6 +58,7 @@ import { supportsOpenAIImageGenerationSettings } from '@shared/imageGenerationSettings' import { isDeepSeekSeriesModelId } from '@shared/model' +import { isTtsModelConfig, isTtsModelId } from '@shared/ttsSettings' import { nanoid } from 'nanoid' import type { SQLitePresenter } from '../sqlitePresenter' import { eventBus, SendTarget } from '@/eventbus' @@ -1963,6 +1964,9 @@ export class AgentRuntimePresenter implements IAgentImplementation { try { let providerMessages = injectedMessages let providerMaxTokens = requestMaxTokens + const isTtsRequest = + isTtsModelConfig(requestModelConfig) || isTtsModelId(requestModelId) + const effectiveRequestTools: MCPToolDefinition[] = isTtsRequest ? [] : requestTools if (!bypassContextBudget) { const protectedSteerTailCount = @@ -1971,7 +1975,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { : 0 let requestPreflight = preflightRequestContext({ messages: injectedMessages, - tools: requestTools, + tools: effectiveRequestTools, contextLength: requestModelConfig.contextLength, requestedMaxTokens: requestMaxTokens, minimumProtectedTailCount: protectedSteerTailCount @@ -1988,7 +1992,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { baseSystemPrompt, contextLength: requestModelConfig.contextLength, requestedMaxTokens: requestPreflight.requestedMaxTokens, - tools: requestTools, + tools: effectiveRequestTools, supportsVision, supportsAudioInput, interleavedReasoning, @@ -2001,7 +2005,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { } requestPreflight = preflightRequestContext({ messages: requestMessages, - tools: requestTools, + tools: effectiveRequestTools, contextLength: requestModelConfig.contextLength, requestedMaxTokens: requestMaxTokens, minimumProtectedTailCount: protectedSteerTailCount @@ -2041,7 +2045,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { requestModelConfig, requestTemperature, providerMaxTokens, - requestTools + effectiveRequestTools )) { if (!didConsumeSteerBatch && claimedSteerBatch.length > 0) { pendingInputCoordinator.consumeClaimedSteerBatch(sessionId) diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts index 84d7ce9e3..8ae3b368c 100644 --- a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts +++ b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts @@ -32,6 +32,8 @@ import type { LLMCoreStreamEvent } from '@shared/types/core/llm-events' import { mcpToolsToAISDKTools } from './toolMapper' import { mapMessagesToModelMessages } from './messageMapper' import { buildProviderOptions } from './providerOptionsMapper' +import { ProxyAgent } from 'undici' +import { proxyConfig } from '../../proxyConfig' import { type AiSdkProviderKind, createAiSdkProviderContext } from './providerFactory' import { adaptAiSdkStream } from './streamAdapter' @@ -228,9 +230,11 @@ async function executeTtsPatternA( const controller = new AbortController() const timeoutId = timeout ? setTimeout(() => controller.abort(), timeout) : undefined + const proxyUrl = proxyConfig.getProxyUrl() + const dispatcher = proxyUrl ? new ProxyAgent(proxyUrl) : undefined try { - const response = await fetch(url, { + const fetchInit: RequestInit & { dispatcher?: ProxyAgent } = { method: 'POST', headers: { ...defaultHeaders, @@ -239,7 +243,9 @@ async function executeTtsPatternA( }, body: JSON.stringify(body), signal: controller.signal - }) + } + if (dispatcher) fetchInit.dispatcher = dispatcher + const response = await fetch(url, fetchInit) if (!response.ok) { const errText = await response.text().catch(() => '') @@ -283,9 +289,11 @@ async function executeTtsPatternB( const controller = new AbortController() const timeoutId = timeout ? setTimeout(() => controller.abort(), timeout) : undefined + const proxyUrl = proxyConfig.getProxyUrl() + const dispatcher = proxyUrl ? new ProxyAgent(proxyUrl) : undefined try { - const response = await fetch(url, { + const fetchInit: RequestInit & { dispatcher?: ProxyAgent } = { method: 'POST', headers: { ...defaultHeaders, @@ -294,7 +302,9 @@ async function executeTtsPatternB( }, body: JSON.stringify(body), signal: controller.signal - }) + } + if (dispatcher) fetchInit.dispatcher = dispatcher + const response = await fetch(url, fetchInit) if (!response.ok) { const errText = await response.text().catch(() => '') diff --git a/src/renderer/settings/components/ProviderModelList.vue b/src/renderer/settings/components/ProviderModelList.vue index c970888d8..d4b45ff76 100644 --- a/src/renderer/settings/components/ProviderModelList.vue +++ b/src/renderer/settings/components/ProviderModelList.vue @@ -453,7 +453,7 @@ const hasModelCapability = (model: RENDERER_MODEL_META, capability: ModelCapabil const getModelTypeLabel = (type: ModelType) => { if (type === ModelType.TTS) { - return t('settings.provider.voiceai.title') + return t('settings.provider.tts.title') } return t(`model.filter.typeOptions.${type}`) } diff --git a/src/renderer/settings/components/VoiceAIProviderConfig.vue b/src/renderer/settings/components/VoiceAIProviderConfig.vue index 8cdb2a3a8..6483142c3 100644 --- a/src/renderer/settings/components/VoiceAIProviderConfig.vue +++ b/src/renderer/settings/components/VoiceAIProviderConfig.vue @@ -6,9 +6,9 @@

-

{{ t('settings.provider.voiceai.title') }}

+

{{ t('settings.provider.tts.title') }}

- {{ t('settings.provider.voiceai.description') }} + {{ t('settings.provider.tts.description') }}

@@ -18,11 +18,11 @@

- {{ t('settings.provider.voiceai.audioFormat.helper') }} + {{ t('settings.provider.tts.audioFormat.helper') }}

- {{ t('settings.provider.voiceai.language.helper') }} + {{ t('settings.provider.tts.language.helper') }}

- {{ t('settings.provider.voiceai.model.helper') }} + {{ t('settings.provider.tts.model.helper') }}

- {{ t('settings.provider.voiceai.agentId.helper') }} + {{ t('settings.provider.tts.agentId.helper') }}

@@ -95,7 +95,7 @@
{{ temperature.toFixed(2) }}
@@ -108,14 +108,14 @@ @update:model-value="onTemperatureChange" />

- {{ t('settings.provider.voiceai.temperature.helper') }} + {{ t('settings.provider.tts.temperature.helper') }}

{{ topP.toFixed(2) }}
@@ -128,7 +128,7 @@ @update:model-value="onTopPChange" />

- {{ t('settings.provider.voiceai.topP.helper') }} + {{ t('settings.provider.tts.topP.helper') }}

diff --git a/src/renderer/src/components/chat/ChatStatusBar.vue b/src/renderer/src/components/chat/ChatStatusBar.vue index dc79db218..8d7612cc0 100644 --- a/src/renderer/src/components/chat/ChatStatusBar.vue +++ b/src/renderer/src/components/chat/ChatStatusBar.vue @@ -103,12 +103,6 @@ :is-dark="themeStore.isDark" /> {{ displayModelText }} - (null) -const capabilitySupportsAudioInput = ref(null) const capabilityReasoningPortrait = ref(null) const capabilitySupportsTemperature = ref(null) const capabilityProviderId = ref('') @@ -2103,7 +2096,6 @@ const fetchCapabilities = async (providerId: string, modelId: string): Promise { localSettings.value = null loadedSettingsSelection.value = null capabilityProviderId.value = '' - capabilitySupportsAudioInput.value = null capabilitySupportsReasoning.value = null capabilityReasoningPortrait.value = null return @@ -2251,7 +2241,6 @@ const syncGenerationSettings = async () => { localSettings.value = null loadedSettingsSelection.value = null capabilityProviderId.value = '' - capabilitySupportsAudioInput.value = null capabilityReasoningPortrait.value = null capabilitySupportsReasoning.value = null return diff --git a/src/renderer/src/components/settings/ModelConfigDialog.vue b/src/renderer/src/components/settings/ModelConfigDialog.vue index d87fdc3b0..71eaed14f 100644 --- a/src/renderer/src/components/settings/ModelConfigDialog.vue +++ b/src/renderer/src/components/settings/ModelConfigDialog.vue @@ -177,7 +177,7 @@ {{ t('settings.model.modelConfig.type.options.imageGeneration') }} - {{ t('settings.provider.voiceai.title') }} + {{ t('settings.provider.tts.title') }} @@ -241,7 +241,7 @@ {{ t('settings.model.modelConfig.apiEndpoint.options.image') }} - {{ t('settings.provider.voiceai.title') }} + {{ t('settings.provider.tts.title') }} diff --git a/src/renderer/src/components/settings/TtsSettingsFields.vue b/src/renderer/src/components/settings/TtsSettingsFields.vue index 8a7a40204..93144525d 100644 --- a/src/renderer/src/components/settings/TtsSettingsFields.vue +++ b/src/renderer/src/components/settings/TtsSettingsFields.vue @@ -1,29 +1,29 @@ @@ -23,26 +55,94 @@ import { computed } from 'vue' import { Icon } from '@iconify/vue' import { useI18n } from 'vue-i18n' +import type { AgentPlanStepStatus } from '@shared/types/agent-plan' import type { DisplayAssistantMessageBlock } from '@/components/chat/messageListItems' +type NormalizedPlanEntry = { + label: string + status: AgentPlanStepStatus +} + const props = defineProps<{ block: DisplayAssistantMessageBlock }>() const { t } = useI18n() -const planEntries = computed(() => { - return (props.block.extra?.plan_entries as Array<{ status?: string | null }>) || [] +const isRecord = (value: unknown): value is Record => + Boolean(value) && typeof value === 'object' && !Array.isArray(value) + +const normalizeStatus = (value: unknown): AgentPlanStepStatus => { + if (value === 'completed' || value === 'done') { + return 'completed' + } + if (value === 'in_progress') { + return 'in_progress' + } + return 'pending' +} + +const entries = computed(() => { + const rawEntries = props.block.extra?.plan_entries + if (!Array.isArray(rawEntries)) { + return [] + } + + return rawEntries + .map((entry) => { + if (!isRecord(entry)) { + return null + } + + const rawLabel = typeof entry.step === 'string' ? entry.step : entry.content + const label = typeof rawLabel === 'string' ? rawLabel.trim() : '' + if (!label) { + return null + } + + return { + label, + status: normalizeStatus(entry.status) + } + }) + .filter((entry): entry is NormalizedPlanEntry => entry !== null) }) -const totalCount = computed(() => planEntries.value.length) +const explanation = computed(() => { + const value = props.block.extra?.plan_explanation + if (typeof value === 'string' && value.trim()) { + return value.trim() + } -const completedCount = computed(() => { - return planEntries.value.filter((e) => e.status === 'completed' || e.status === 'done').length + return props.block.content?.trim() ?? '' }) +const totalCount = computed(() => entries.value.length) + +const completedCount = computed( + () => entries.value.filter((entry) => entry.status === 'completed').length +) + const progressPercent = computed(() => { if (totalCount.value === 0) return 0 return Math.round((completedCount.value / totalCount.value) * 100) }) + +const getStatusIcon = (status: AgentPlanStepStatus): string => { + if (status === 'completed') return 'lucide:circle-check' + if (status === 'in_progress') return 'lucide:loader-circle' + return 'lucide:circle' +} + +const getStatusIconClass = (status: AgentPlanStepStatus): string => { + if (status === 'completed') return 'text-muted-foreground' + if (status === 'in_progress') return 'animate-spin text-primary' + return 'text-muted-foreground/80' +} + +const getEntryAriaLabel = (entry: NormalizedPlanEntry): string => + t('chat.workspace.plan.itemAriaLabel', { + status: t(`chat.workspace.plan.status.${entry.status}`), + step: entry.label + }) diff --git a/src/renderer/src/components/message/MessageItemAssistant.vue b/src/renderer/src/components/message/MessageItemAssistant.vue index 9d94c8e8f..142687322 100644 --- a/src/renderer/src/components/message/MessageItemAssistant.vue +++ b/src/renderer/src/components/message/MessageItemAssistant.vue @@ -53,7 +53,7 @@ /> { return false } +const isInternalToolCall = (block: DisplayAssistantMessageBlock): boolean => { + return block.tool_call?.name === 'update_plan' && block.extra?.internalTool === true +} + const isVideoUrl = (value: string): boolean => { if (!value) return false diff --git a/src/renderer/src/i18n/da-DK/chat.json b/src/renderer/src/i18n/da-DK/chat.json index 01c8a02bd..9afd6c544 100644 --- a/src/renderer/src/i18n/da-DK/chat.json +++ b/src/renderer/src/i18n/da-DK/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "Ingen opgaver endnu", "section": "plan", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "Afsluttet", "failed": "Mislykket", diff --git a/src/renderer/src/i18n/en-US/chat.json b/src/renderer/src/i18n/en-US/chat.json index 6ad39036a..a1724717f 100644 --- a/src/renderer/src/i18n/en-US/chat.json +++ b/src/renderer/src/i18n/en-US/chat.json @@ -234,6 +234,7 @@ "plan": { "section": "Plan", "empty": "No tasks yet", + "itemAriaLabel": "{status}: {step}", "status": { "pending": "Pending", "in_progress": "In Progress", diff --git a/src/renderer/src/i18n/fa-IR/chat.json b/src/renderer/src/i18n/fa-IR/chat.json index 2d545a6c4..0be5a7791 100644 --- a/src/renderer/src/i18n/fa-IR/chat.json +++ b/src/renderer/src/i18n/fa-IR/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "هنوز هیچ تکلیفی وجود ندارد", "section": "برنامه ریزی کنید", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "تکمیل شد", "failed": "شکست بخورد", diff --git a/src/renderer/src/i18n/fr-FR/chat.json b/src/renderer/src/i18n/fr-FR/chat.json index f04751aca..6e17a6e87 100644 --- a/src/renderer/src/i18n/fr-FR/chat.json +++ b/src/renderer/src/i18n/fr-FR/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "Aucune tâche pour l'instant", "section": "plan", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "Complété", "failed": "échouer", diff --git a/src/renderer/src/i18n/he-IL/chat.json b/src/renderer/src/i18n/he-IL/chat.json index 414140a52..3ff221677 100644 --- a/src/renderer/src/i18n/he-IL/chat.json +++ b/src/renderer/src/i18n/he-IL/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "עדיין אין משימות", "section": "לְתַכְנֵן", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "הושלם", "failed": "לְהִכָּשֵׁל", diff --git a/src/renderer/src/i18n/ja-JP/chat.json b/src/renderer/src/i18n/ja-JP/chat.json index b266e99ef..2dd76bdcd 100644 --- a/src/renderer/src/i18n/ja-JP/chat.json +++ b/src/renderer/src/i18n/ja-JP/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "まだタスクはありません", "section": "プラン", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "完了しました", "failed": "失敗", diff --git a/src/renderer/src/i18n/ko-KR/chat.json b/src/renderer/src/i18n/ko-KR/chat.json index 346bbd7fe..65249f0f2 100644 --- a/src/renderer/src/i18n/ko-KR/chat.json +++ b/src/renderer/src/i18n/ko-KR/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "아직 할 일이 없습니다.", "section": "계획", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "완전한", "failed": "실패하다", diff --git a/src/renderer/src/i18n/pt-BR/chat.json b/src/renderer/src/i18n/pt-BR/chat.json index 5191e20d8..97b3eef12 100644 --- a/src/renderer/src/i18n/pt-BR/chat.json +++ b/src/renderer/src/i18n/pt-BR/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "Nenhuma tarefa ainda", "section": "plano", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "Concluído", "failed": "falhar", diff --git a/src/renderer/src/i18n/ru-RU/chat.json b/src/renderer/src/i18n/ru-RU/chat.json index 0432a58fd..167c4c329 100644 --- a/src/renderer/src/i18n/ru-RU/chat.json +++ b/src/renderer/src/i18n/ru-RU/chat.json @@ -221,6 +221,7 @@ "plan": { "empty": "Заданий пока нет", "section": "план", + "itemAriaLabel": "{status}: {step}", "status": { "completed": "Завершенный", "failed": "неудача", diff --git a/src/renderer/src/i18n/zh-CN/chat.json b/src/renderer/src/i18n/zh-CN/chat.json index 57365b300..9d624d5f8 100644 --- a/src/renderer/src/i18n/zh-CN/chat.json +++ b/src/renderer/src/i18n/zh-CN/chat.json @@ -234,6 +234,7 @@ "plan": { "section": "计划", "empty": "暂无任务", + "itemAriaLabel": "{status}:{step}", "status": { "pending": "待处理", "in_progress": "进行中", diff --git a/src/renderer/src/i18n/zh-HK/chat.json b/src/renderer/src/i18n/zh-HK/chat.json index ab5189d06..a7e95e462 100644 --- a/src/renderer/src/i18n/zh-HK/chat.json +++ b/src/renderer/src/i18n/zh-HK/chat.json @@ -229,6 +229,7 @@ "plan": { "empty": "暫無任務", "section": "計劃", + "itemAriaLabel": "{status}:{step}", "status": { "completed": "已完成", "failed": "失敗", diff --git a/src/renderer/src/i18n/zh-TW/chat.json b/src/renderer/src/i18n/zh-TW/chat.json index 97d1bbdb5..b27fa206d 100644 --- a/src/renderer/src/i18n/zh-TW/chat.json +++ b/src/renderer/src/i18n/zh-TW/chat.json @@ -229,6 +229,7 @@ "plan": { "empty": "暫無任務", "section": "計劃", + "itemAriaLabel": "{status}:{step}", "status": { "completed": "已完成", "failed": "失敗", diff --git a/src/renderer/src/pages/ChatPage.vue b/src/renderer/src/pages/ChatPage.vue index e587ee40e..b2b64c958 100644 --- a/src/renderer/src/pages/ChatPage.vue +++ b/src/renderer/src/pages/ChatPage.vue @@ -28,7 +28,7 @@ /> -
+
- -