AstrBotDevs · makuralymi · Jun 27, 2026 · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
@@ -1742,6 +1742,25 @@
                         "dashscope_tts_voice": "loongstella",
                         "timeout": "20",
                     },
+                    "阿里云百炼 音色复刻 TTS(API)": {
+                        "hint": "使用阿里云百炼「音色复刻」生成的专属音色（Qwen3-TTS-VC 系列）。"
+                        "请先在百炼控制台或通过声音复刻 API 创建复刻音色，获取 voice_id 后填入下方配置。"
+                        "API Key 获取: https://bailian.console.aliyun.com/?tab=model#/api-key；"
+                        "声音复刻文档: https://help.aliyun.com/zh/model-studio/voice-cloning-user-guide",
+                        "id": "dashscope_voice_clone_tts",
+                        "provider": "dashscope",
+                        "type": "dashscope_voice_clone_tts",
+                        "provider_type": "text_to_speech",
+                        "enable": False,
+                        "api_key": "",
+                        "model": "qwen3-tts-vc-2026-01-22",
+                        "voice_id": "",
+                        "language_type": "",
+                        "workspace_id": "",
+                        "region": "cn-beijing",
+                        "base_url": "",
+                        "timeout": "20",
+                    },
                     "Azure TTS": {
                         "id": "azure_tts",
                         "type": "azure_tts",
@@ -2323,6 +2342,54 @@
                         "hint": "Azure_TTS 服务的订阅密钥（注意不是令牌）",
                     },
                     "dashscope_tts_voice": {"description": "音色", "type": "string"},
+                    "voice_id": {
+                        "description": "复刻音色 ID",
+                        "type": "string",
+                        "hint": "由阿里云百炼「音色复刻」接口返回（output.voice，例如 yourVoice）。"
+                        "请确保 voice_id 与所选模型属于同一次声音复刻创建结果。",
+                    },
+                    "language_type": {
+                        "description": "合成语种 (可选)",
+                        "type": "string",
+                        "hint": "合成音频的语种。留空表示由模型自动判断 (Auto)。"
+                        "指定单一语种通常能显著提升合成质量。",
+                        "options": [
+                            "",
+                            "Auto",
+                            "Chinese",
+                            "English",
+                            "German",
+                            "Italian",
+                            "Portuguese",
+                            "Spanish",
+                            "Japanese",
+                            "Korean",
+                            "French",
+                            "Russian",
+                        ],
+                    },
+                    "workspace_id": {
+                        "description": "workspace ID (可选)",
+                        "type": "string",
+                        "hint": "填写后会自动切换到百炼 workspace 专属域名 "
+                        "(https://{WorkspaceId}.{region}.maas.aliyuncs.com)，"
+                        "通常能获得更好的性能和稳定性。可在百炼控制台「workspace 详情」页面查看。",
+                    },
+                    "region": {
+                        "description": "URL区域 (可选)",
+                        "type": "string",
+                        "hint": "仅在填写了 workspace ID 时生效。北京地域选择 cn-beijing，新加坡地域选择 ap-southeast-1。",
+                        "options": [
+                            "cn-beijing",
+                            "ap-southeast-1",
+                        ],
+                    },
+                    "base_url": {
+                        "description": "自定义 DashScope Base URL (可选)",
+                        "type": "string",
+                        "hint": "完整的 HTTP API Base URL，例如 https://dashscope.aliyuncs.com/api/v1。"
+                        "若填写则优先级高于 workspace ID + 地域 的组合。一般无需填写。",
+                    },
                     "gm_resp_image_modal": {
                         "description": "启用图片模态",
                         "type": "bool",

diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py
@@ -451,6 +451,10 @@ def dynamic_import_provider(self, type: str) -> None:
                 from .sources.dashscope_tts import (
                     ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI,
                 )
+            case "dashscope_voice_clone_tts":
+                from .sources.dashscope_voice_clone_tts import (
+                    ProviderDashscopeVoiceCloneTTSAPI as ProviderDashscopeVoiceCloneTTSAPI,
+                )
             case "azure_tts":
                 from .sources.azure_tts_source import (
                     AzureTTSProvider as AzureTTSProvider,

diff --git a/astrbot/core/provider/sources/dashscope_voice_clone_tts.py b/astrbot/core/provider/sources/dashscope_voice_clone_tts.py
@@ -0,0 +1,206 @@
+"""阿里云百炼 - 音色复刻（Qwen-TTS Voice Clone）TTS 提供商。
+
+通过指定声音复刻产生的 voice_id（如 ``yourVoice``）与对应的 Qwen3 TTS-VC
+合成模型（如 ``qwen3-tts-vc-2026-01-22``）调用阿里云 DashScope 的多模态生成
+接口完成语音合成。该提供商仅负责"使用"已经在百炼控制台中创建好的复刻音色，
+音色的创建/管理流程请直接通过百炼控制台或 API 完成。
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import logging
+import os
+import uuid
+
+import aiohttp
+import dashscope
+
+try:
+    from dashscope.aigc.multimodal_conversation import MultiModalConversation
+except ImportError:  # pragma: no cover - 老版本 dashscope 没有 Qwen TTS 能力
+    MultiModalConversation = None
+
+from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
+
+from ..entities import ProviderType
+from ..provider import TTSProvider
+from ..register import register_provider_adapter
+
+
+@register_provider_adapter(
+    "dashscope_voice_clone_tts",
+    "阿里云百炼 音色复刻 TTS API (Qwen3-TTS-VC)",
+    provider_type=ProviderType.TEXT_TO_SPEECH,
+)
+class ProviderDashscopeVoiceCloneTTSAPI(TTSProvider):
+    """使用阿里云百炼 Qwen3-TTS-VC 系列模型合成"复刻音色"的 TTS 提供商。"""
+
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+        self.chosen_api_key: str = provider_config.get("api_key", "")
+        # 复刻音色 ID，由百炼音色复刻接口返回（output.voice）
+        self.voice_id: str = provider_config.get(
+            "voice_id",
+            "",
+        )
+        # 合成语种，可选；默认让模型自动判断
+        self.language_type: str = provider_config.get(
+            "language_type",
+            "",
+        )
+        # workspace ID（可选），填写后会切换到百炼 workspace 专属域名以获得更佳性能
+        self.workspace_id: str = provider_config.get(
+            "workspace_id",
+            "",
+        )
+        # 地域，默认 cn-beijing；可选 ap-southeast-1（新加坡）
+        self.region: str = (
+            provider_config.get(
+                "region",
+                "cn-beijing",
+            )
+            or "cn-beijing"
+        )
+        # 自定义 base url（优先级最高），不填时根据 workspace_id / region 推断
+        self.base_http_api_url: str = provider_config.get(
+            "base_url",
+            "",
+        )
+
+        self.set_model(
+            provider_config.get("model") or "qwen3-tts-vc-2026-01-22",
+        )
+        self.timeout_ms = float(provider_config.get("timeout", 20)) * 1000
+
+        # API Key 和 Base URL 将在每次调用时通过 kwargs 动态传入，避免修改全局配置
+
+    # public API#
+    async def get_audio(self, text: str) -> str:
+        model = self.get_model()
+        if not model:
+            raise RuntimeError("Dashscope Voice Clone TTS model is not configured.")
+        if not self.voice_id:
+            raise RuntimeError(
+                "未配置复刻音色 ID（voice_id），"
+                "请先在阿里云百炼控制台或 API 创建复刻音色后再填写。",
+            )
+
+        temp_dir = get_astrbot_temp_path()
+        os.makedirs(temp_dir, exist_ok=True)
+
+        # 每次调用前确保 dashscope 全局配置使用本提供商指定的值。
+        # 避免多 TTS 共存时被其它提供商覆盖。
+        # 每次调用时通过 kwargs 动态传入 API Key 和 Base URL，无需修改全局配置
+
+        audio_bytes = await self._synthesize(model, text)
+        if not audio_bytes:
+            raise RuntimeError(
+                "音色复刻语音合成失败，返回内容为空。请检查模型名、voice_id "
+                "以及对应的 API Key/地域是否匹配。",
+            )
+
+        path = os.path.join(
+            temp_dir,
+            f"dashscope_voice_clone_tts_{uuid.uuid4()}.wav",
+        )
+        with open(path, "wb") as f:
+            f.write(audio_bytes)
+        return path
+
+    # internal helpers#
+    def _resolve_base_url(self) -> str:
+        """根据配置推断 DashScope HTTP base url。"""
+        if self.base_http_api_url:
+            return self.base_http_api_url.rstrip("/")
+        if self.workspace_id:
+            region = self.region or "cn-beijing"
+            return f"https://{self.workspace_id}.{region}.maas.aliyuncs.com/api/v1"
+        # 不指定专属域名时返回空字符串，使用 dashscope SDK 内置默认域名
+        return ""
+
+    def _call_qwen_tts(self, model: str, text: str):
+        if MultiModalConversation is None:
+            raise RuntimeError(
+                "dashscope SDK 缺少 MultiModalConversation。请升级 dashscope "
+                "至最新版本以使用 Qwen TTS 系列模型。",
+            )
+
+        kwargs = {
+            "model": model,
+            "messages": None,
+            "api_key": self.chosen_api_key,
+            "voice": self.voice_id,
+            "text": text,
+        }
+        resolved_base_url = self._resolve_base_url()
+        if resolved_base_url:
+            kwargs["base_http_api_url"] = resolved_base_url
+        if self.language_type:
+            kwargs["language_type"] = self.language_type
+        return MultiModalConversation.call(**kwargs)
+
+    async def _synthesize(self, model: str, text: str) -> bytes | None:
+        loop = asyncio.get_running_loop()
+        response = await loop.run_in_executor(
+            None,
+            self._call_qwen_tts,
+            model,
+            text,
+        )
+        if hasattr(response, "status_code") and response.status_code != 200:
+            raise RuntimeError(
+                f"DashScope API 调用失败，状态码: {response.status_code}，" 
+                f"错误码: {getattr(response, 'code', 'Unknown')}，" 
+                f"错误信息: {getattr(response, 'message', 'Unknown')}"
+            )
+        audio_bytes = await self._extract_audio_from_response(response)
+        if not audio_bytes:
+            raise RuntimeError(
+                f"模型 '{model}' 音色复刻语音合成失败。返回内容为空。",
+            )
+        return audio_bytes
+
+    async def _extract_audio_from_response(self, response) -> bytes | None:
+        output = getattr(response, "output", None)
+        audio_obj = getattr(output, "audio", None) if output is not None else None
+        if not audio_obj:
+            return None
+
+        data_b64 = getattr(audio_obj, "data", None)
+        if data_b64:
+            try:
+                return base64.b64decode(data_b64)
+            except (ValueError, TypeError):
+                logging.exception("Failed to decode base64 audio data.")
+                return None
+
+        url = getattr(audio_obj, "url", None)
+        if url:
+            return await self._download_audio_from_url(url)
+        return None
+
+    async def _download_audio_from_url(self, url: str) -> bytes | None:
+        if not url:
+            return None
+        timeout = max(self.timeout_ms / 1000, 1) if self.timeout_ms else 20
+        try:
+            async with (
+                aiohttp.ClientSession() as session,
+                session.get(
+                    url,
+                    timeout=aiohttp.ClientTimeout(total=timeout),
+                ) as response,
+            ):
+                if response.status != 200:
+                    logging.error(f"Failed to download audio from URL {url}, HTTP status: {response.status}")
+                    return None
+                return await response.read()
+        except (aiohttp.ClientError, asyncio.TimeoutError, OSError) as e:
+            logging.exception(f"Failed to download audio from URL {url}: {e}")
+            return None
diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -1440,6 +1440,26 @@
       "dashscope_tts_voice": {
         "description": "Voice"
       },
+      "voice_id": {
+        "description": "Voice Clone ID",
+        "hint": "Returned by the Alibaba Cloud Bailian Voice Cloning API (output.voice, e.g. yourVoice). Make sure the voice_id and the selected model belong to the same voice clone creation result."
+      },
+      "language_type": {
+        "description": "Synthesis Language (optional)",
+        "hint": "Language for the synthesized audio. Leave empty for auto-detection (Auto). Specifying a single language usually improves synthesis quality significantly."
+      },
+      "workspace_id": {
+        "description": "Workspace ID (optional)",
+        "hint": "When filled, the provider switches to the workspace-specific domain (https://{WorkspaceId}.{region}.maas.aliyuncs.com) for better performance and stability. Find it in the Bailian console under Workspace Details."
+      },
+      "region": {
+        "description": "Region (optional)",
+        "hint": "Only effective when Workspace ID is set. Use cn-beijing for Beijing, ap-southeast-1 for Singapore."
+      },
+      "base_url": {
+        "description": "Custom DashScope Base URL (optional)",
+        "hint": "Full HTTP API Base URL, e.g. https://dashscope.aliyuncs.com/api/v1. Takes precedence over Workspace ID + Region when set. Usually not needed."
+      },
       "gm_resp_image_modal": {
         "description": "Enable image modality",
         "hint": "When enabled, responses can include images. Requires model support or it will error. See the Google Gemini website for supported models. Tip: if you need image generation, disable the `Enable member recognition` setting for better results."

diff --git a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json
@@ -1437,6 +1437,26 @@
             "dashscope_tts_voice": {
                 "description": "Голос"
             },
+            "voice_id": {
+                "description": "ID клонированного голоса",
+                "hint": "Возвращается API клонирования голоса Alibaba Cloud Bailian (output.voice, напр. yourVoice). Убедитесь, что voice_id и выбранная модель относятся к одному результату клонирования голоса."
+            },
+            "language_type": {
+                "description": "Язык синтеза (опционально)",
+                "hint": "Язык синтезируемого аудио. Оставьте пустым для автоопределения (Auto). Указание одного языка обычно значительно улучшает качество синтеза."
+            },
+            "workspace_id": {
+                "description": "ID рабочего пространства (опционально)",
+                "hint": "При заполнении провайдер переключается на домен рабочего пространства (https://{WorkspaceId}.{region}.maas.aliyuncs.com) для лучшей производительности. Можно найти в консоли Bailian в разделе сведений о рабочем пространстве."
+            },
+            "region": {
+                "description": "Регион (опционально)",
+                "hint": "Действует только при указании ID рабочего пространства. Используйте cn-beijing для Пекина, ap-southeast-1 для Сингапура."
+            },
+            "base_url": {
+                "description": "Пользовательский DashScope Base URL (опционально)",
+                "hint": "Полный HTTP API Base URL, напр. https://dashscope.aliyuncs.com/api/v1. Приоритет выше, чем ID рабочего пространства + регион. Обычно не требуется."
+            },
             "gm_resp_image_modal": {
                 "description": "Включить визуальную модальность",
                 "hint": "Если включено, ответы могут содержать изображения. Требует поддержки моделью. Совет: для генерации изображений отключите 'Распознавание участников'."

diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
@@ -1442,6 +1442,26 @@
       "dashscope_tts_voice": {
         "description": "音色"
       },
+      "voice_id": {
+        "description": "复刻音色 ID",
+        "hint": "由阿里云百炼「音色复刻」接口返回（output.voice，例如 yourVoice）。请确保 voice_id 与所选模型属于同一次声音复刻创建结果。"
+      },
+      "language_type": {
+        "description": "合成语种 (可选)",
+        "hint": "合成音频的语种。留空表示由模型自动判断 (Auto)。指定单一语种通常能显著提升合成质量。"
+      },
+      "workspace_id": {
+        "description": "workspace ID (可选)",
+        "hint": "填写后会自动切换到百炼 workspace 专属域名 (https://{WorkspaceId}.{region}.maas.aliyuncs.com)，通常能获得更好的性能和稳定性。可在百炼控制台「workspace 详情」页面查看。"
+      },
+      "region": {
+        "description": "URL区域 (可选)",
+        "hint": "仅在填写了 workspace ID 时生效。北京地域选择 cn-beijing，新加坡地域选择 ap-southeast-1。"
+      },
+      "base_url": {
+        "description": "自定义 DashScope Base URL (可选)",
+        "hint": "完整的 HTTP API Base URL，例如 https://dashscope.aliyuncs.com/api/v1。若填写则优先级高于 workspace ID + 地域 的组合。一般无需填写。"
+      },
       "gm_resp_image_modal": {
         "description": "启用图片模态",
         "hint": "启用后，将支持返回图片内容。需要模型支持，否则会报错。具体支持模型请查看 Google Gemini 官方网站。温馨提示，如果您需要生成图片，请关闭 `启用群员识别` 配置获得更好的效果。"