Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -1742,6 +1742,25 @@
"dashscope_tts_voice": "loongstella",
"timeout": "20",
},
"阿里云百炼 音色复刻 TTS(API)": {
"hint": "使用阿里云百炼「音色复刻」生成的专属音色(Qwen3-TTS-VC 系列)。"
"请先在百炼控制台或通过声音复刻 API 创建复刻音色,获取 voice_id 后填入下方配置。"
"API Key 获取: https://bailian.console.aliyun.com/?tab=model#/api-key;"
"声音复刻文档: https://help.aliyun.com/zh/model-studio/voice-cloning-user-guide",
"id": "dashscope_voice_clone_tts",
"provider": "dashscope",
"type": "dashscope_voice_clone_tts",
"provider_type": "text_to_speech",
"enable": False,
"api_key": "",
"model": "qwen3-tts-vc-2026-01-22",
"voice_id": "",
"language_type": "",
"workspace_id": "",
"region": "cn-beijing",
"base_url": "",
"timeout": "20",
},
"Azure TTS": {
"id": "azure_tts",
"type": "azure_tts",
Expand Down Expand Up @@ -2323,6 +2342,54 @@
"hint": "Azure_TTS 服务的订阅密钥(注意不是令牌)",
},
"dashscope_tts_voice": {"description": "音色", "type": "string"},
"voice_id": {
"description": "复刻音色 ID",
"type": "string",
"hint": "由阿里云百炼「音色复刻」接口返回(output.voice,例如 yourVoice)。"
"请确保 voice_id 与所选模型属于同一次声音复刻创建结果。",
},
"language_type": {
"description": "合成语种 (可选)",
"type": "string",
"hint": "合成音频的语种。留空表示由模型自动判断 (Auto)。"
"指定单一语种通常能显著提升合成质量。",
"options": [
"",
"Auto",
"Chinese",
"English",
"German",
"Italian",
"Portuguese",
"Spanish",
"Japanese",
"Korean",
"French",
"Russian",
],
},
"workspace_id": {
"description": "workspace ID (可选)",
"type": "string",
"hint": "填写后会自动切换到百炼 workspace 专属域名 "
"(https://{WorkspaceId}.{region}.maas.aliyuncs.com),"
"通常能获得更好的性能和稳定性。可在百炼控制台「workspace 详情」页面查看。",
},
"region": {
"description": "URL区域 (可选)",
"type": "string",
"hint": "仅在填写了 workspace ID 时生效。北京地域选择 cn-beijing,新加坡地域选择 ap-southeast-1。",
"options": [
"cn-beijing",
"ap-southeast-1",
],
},
"base_url": {
"description": "自定义 DashScope Base URL (可选)",
"type": "string",
"hint": "完整的 HTTP API Base URL,例如 https://dashscope.aliyuncs.com/api/v1。"
"若填写则优先级高于 workspace ID + 地域 的组合。一般无需填写。",
},
"gm_resp_image_modal": {
"description": "启用图片模态",
"type": "bool",
Expand Down
4 changes: 4 additions & 0 deletions astrbot/core/provider/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,10 @@ def dynamic_import_provider(self, type: str) -> None:
from .sources.dashscope_tts import (
ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI,
)
case "dashscope_voice_clone_tts":
from .sources.dashscope_voice_clone_tts import (
ProviderDashscopeVoiceCloneTTSAPI as ProviderDashscopeVoiceCloneTTSAPI,
)
case "azure_tts":
from .sources.azure_tts_source import (
AzureTTSProvider as AzureTTSProvider,
Expand Down
206 changes: 206 additions & 0 deletions astrbot/core/provider/sources/dashscope_voice_clone_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""阿里云百炼 - 音色复刻(Qwen-TTS Voice Clone)TTS 提供商。

通过指定声音复刻产生的 voice_id(如 ``yourVoice``)与对应的 Qwen3 TTS-VC
合成模型(如 ``qwen3-tts-vc-2026-01-22``)调用阿里云 DashScope 的多模态生成
接口完成语音合成。该提供商仅负责"使用"已经在百炼控制台中创建好的复刻音色,
音色的创建/管理流程请直接通过百炼控制台或 API 完成。
"""

from __future__ import annotations

import asyncio
import base64
import logging
import os
import uuid

import aiohttp
import dashscope

try:
from dashscope.aigc.multimodal_conversation import MultiModalConversation
except ImportError: # pragma: no cover - 老版本 dashscope 没有 Qwen TTS 能力
MultiModalConversation = None

from astrbot.core.utils.astrbot_path import get_astrbot_temp_path

from ..entities import ProviderType
from ..provider import TTSProvider
from ..register import register_provider_adapter


@register_provider_adapter(
"dashscope_voice_clone_tts",
"阿里云百炼 音色复刻 TTS API (Qwen3-TTS-VC)",
provider_type=ProviderType.TEXT_TO_SPEECH,
)
class ProviderDashscopeVoiceCloneTTSAPI(TTSProvider):
"""使用阿里云百炼 Qwen3-TTS-VC 系列模型合成"复刻音色"的 TTS 提供商。"""

def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
self.chosen_api_key: str = provider_config.get("api_key", "")
# 复刻音色 ID,由百炼音色复刻接口返回(output.voice)
self.voice_id: str = provider_config.get(
"voice_id",
"",
)
# 合成语种,可选;默认让模型自动判断
self.language_type: str = provider_config.get(
"language_type",
"",
)
# workspace ID(可选),填写后会切换到百炼 workspace 专属域名以获得更佳性能
self.workspace_id: str = provider_config.get(
"workspace_id",
"",
)
# 地域,默认 cn-beijing;可选 ap-southeast-1(新加坡)
self.region: str = (
provider_config.get(
"region",
"cn-beijing",
)
or "cn-beijing"
)
# 自定义 base url(优先级最高),不填时根据 workspace_id / region 推断
self.base_http_api_url: str = provider_config.get(
"base_url",
"",
)

self.set_model(
provider_config.get("model") or "qwen3-tts-vc-2026-01-22",
)
self.timeout_ms = float(provider_config.get("timeout", 20)) * 1000

# API Key 和 Base URL 将在每次调用时通过 kwargs 动态传入,避免修改全局配置

# public API#
async def get_audio(self, text: str) -> str:
model = self.get_model()
if not model:
raise RuntimeError("Dashscope Voice Clone TTS model is not configured.")
if not self.voice_id:
raise RuntimeError(
"未配置复刻音色 ID(voice_id),"
"请先在阿里云百炼控制台或 API 创建复刻音色后再填写。",
)

temp_dir = get_astrbot_temp_path()
os.makedirs(temp_dir, exist_ok=True)

# 每次调用前确保 dashscope 全局配置使用本提供商指定的值。
# 避免多 TTS 共存时被其它提供商覆盖。
# 每次调用时通过 kwargs 动态传入 API Key 和 Base URL,无需修改全局配置

audio_bytes = await self._synthesize(model, text)
if not audio_bytes:
raise RuntimeError(
"音色复刻语音合成失败,返回内容为空。请检查模型名、voice_id "
"以及对应的 API Key/地域是否匹配。",
)

path = os.path.join(
temp_dir,
f"dashscope_voice_clone_tts_{uuid.uuid4()}.wav",
)
with open(path, "wb") as f:
f.write(audio_bytes)
return path

# internal helpers#
def _resolve_base_url(self) -> str:
"""根据配置推断 DashScope HTTP base url。"""
if self.base_http_api_url:
return self.base_http_api_url.rstrip("/")
if self.workspace_id:
region = self.region or "cn-beijing"
return f"https://{self.workspace_id}.{region}.maas.aliyuncs.com/api/v1"
# 不指定专属域名时返回空字符串,使用 dashscope SDK 内置默认域名
return ""

def _call_qwen_tts(self, model: str, text: str):
if MultiModalConversation is None:
raise RuntimeError(
"dashscope SDK 缺少 MultiModalConversation。请升级 dashscope "
"至最新版本以使用 Qwen TTS 系列模型。",
)

kwargs = {
"model": model,
"messages": None,
"api_key": self.chosen_api_key,
"voice": self.voice_id,
"text": text,
}
resolved_base_url = self._resolve_base_url()
if resolved_base_url:
kwargs["base_http_api_url"] = resolved_base_url
if self.language_type:
kwargs["language_type"] = self.language_type
return MultiModalConversation.call(**kwargs)
Comment thread
makuralymi marked this conversation as resolved.

async def _synthesize(self, model: str, text: str) -> bytes | None:
loop = asyncio.get_running_loop()
response = await loop.run_in_executor(
None,
self._call_qwen_tts,
model,
text,
)
if hasattr(response, "status_code") and response.status_code != 200:
raise RuntimeError(
f"DashScope API 调用失败,状态码: {response.status_code},"
f"错误码: {getattr(response, 'code', 'Unknown')},"
f"错误信息: {getattr(response, 'message', 'Unknown')}"
)
audio_bytes = await self._extract_audio_from_response(response)
if not audio_bytes:
raise RuntimeError(
f"模型 '{model}' 音色复刻语音合成失败。返回内容为空。",
)
return audio_bytes
Comment thread
makuralymi marked this conversation as resolved.

async def _extract_audio_from_response(self, response) -> bytes | None:
output = getattr(response, "output", None)
audio_obj = getattr(output, "audio", None) if output is not None else None
if not audio_obj:
return None

data_b64 = getattr(audio_obj, "data", None)
if data_b64:
try:
return base64.b64decode(data_b64)
except (ValueError, TypeError):
logging.exception("Failed to decode base64 audio data.")
return None

url = getattr(audio_obj, "url", None)
if url:
return await self._download_audio_from_url(url)
return None

async def _download_audio_from_url(self, url: str) -> bytes | None:
if not url:
return None
timeout = max(self.timeout_ms / 1000, 1) if self.timeout_ms else 20
try:
async with (
aiohttp.ClientSession() as session,
session.get(
url,
timeout=aiohttp.ClientTimeout(total=timeout),
) as response,
):
if response.status != 200:
logging.error(f"Failed to download audio from URL {url}, HTTP status: {response.status}")
return None
return await response.read()
Comment thread
makuralymi marked this conversation as resolved.
except (aiohttp.ClientError, asyncio.TimeoutError, OSError) as e:
logging.exception(f"Failed to download audio from URL {url}: {e}")
return None
20 changes: 20 additions & 0 deletions dashboard/src/i18n/locales/en-US/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1440,6 +1440,26 @@
"dashscope_tts_voice": {
"description": "Voice"
},
"voice_id": {
"description": "Voice Clone ID",
"hint": "Returned by the Alibaba Cloud Bailian Voice Cloning API (output.voice, e.g. yourVoice). Make sure the voice_id and the selected model belong to the same voice clone creation result."
},
"language_type": {
"description": "Synthesis Language (optional)",
"hint": "Language for the synthesized audio. Leave empty for auto-detection (Auto). Specifying a single language usually improves synthesis quality significantly."
},
"workspace_id": {
"description": "Workspace ID (optional)",
"hint": "When filled, the provider switches to the workspace-specific domain (https://{WorkspaceId}.{region}.maas.aliyuncs.com) for better performance and stability. Find it in the Bailian console under Workspace Details."
},
"region": {
"description": "Region (optional)",
"hint": "Only effective when Workspace ID is set. Use cn-beijing for Beijing, ap-southeast-1 for Singapore."
},
"base_url": {
"description": "Custom DashScope Base URL (optional)",
"hint": "Full HTTP API Base URL, e.g. https://dashscope.aliyuncs.com/api/v1. Takes precedence over Workspace ID + Region when set. Usually not needed."
},
"gm_resp_image_modal": {
"description": "Enable image modality",
"hint": "When enabled, responses can include images. Requires model support or it will error. See the Google Gemini website for supported models. Tip: if you need image generation, disable the `Enable member recognition` setting for better results."
Expand Down
20 changes: 20 additions & 0 deletions dashboard/src/i18n/locales/ru-RU/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,26 @@
"dashscope_tts_voice": {
"description": "Голос"
},
"voice_id": {
"description": "ID клонированного голоса",
"hint": "Возвращается API клонирования голоса Alibaba Cloud Bailian (output.voice, напр. yourVoice). Убедитесь, что voice_id и выбранная модель относятся к одному результату клонирования голоса."
},
"language_type": {
"description": "Язык синтеза (опционально)",
"hint": "Язык синтезируемого аудио. Оставьте пустым для автоопределения (Auto). Указание одного языка обычно значительно улучшает качество синтеза."
},
"workspace_id": {
"description": "ID рабочего пространства (опционально)",
"hint": "При заполнении провайдер переключается на домен рабочего пространства (https://{WorkspaceId}.{region}.maas.aliyuncs.com) для лучшей производительности. Можно найти в консоли Bailian в разделе сведений о рабочем пространстве."
},
"region": {
"description": "Регион (опционально)",
"hint": "Действует только при указании ID рабочего пространства. Используйте cn-beijing для Пекина, ap-southeast-1 для Сингапура."
},
"base_url": {
"description": "Пользовательский DashScope Base URL (опционально)",
"hint": "Полный HTTP API Base URL, напр. https://dashscope.aliyuncs.com/api/v1. Приоритет выше, чем ID рабочего пространства + регион. Обычно не требуется."
},
"gm_resp_image_modal": {
"description": "Включить визуальную модальность",
"hint": "Если включено, ответы могут содержать изображения. Требует поддержки моделью. Совет: для генерации изображений отключите 'Распознавание участников'."
Expand Down
20 changes: 20 additions & 0 deletions dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1442,6 +1442,26 @@
"dashscope_tts_voice": {
"description": "音色"
},
"voice_id": {
"description": "复刻音色 ID",
"hint": "由阿里云百炼「音色复刻」接口返回(output.voice,例如 yourVoice)。请确保 voice_id 与所选模型属于同一次声音复刻创建结果。"
},
"language_type": {
"description": "合成语种 (可选)",
"hint": "合成音频的语种。留空表示由模型自动判断 (Auto)。指定单一语种通常能显著提升合成质量。"
},
"workspace_id": {
"description": "workspace ID (可选)",
"hint": "填写后会自动切换到百炼 workspace 专属域名 (https://{WorkspaceId}.{region}.maas.aliyuncs.com),通常能获得更好的性能和稳定性。可在百炼控制台「workspace 详情」页面查看。"
},
"region": {
"description": "URL区域 (可选)",
"hint": "仅在填写了 workspace ID 时生效。北京地域选择 cn-beijing,新加坡地域选择 ap-southeast-1。"
},
"base_url": {
"description": "自定义 DashScope Base URL (可选)",
"hint": "完整的 HTTP API Base URL,例如 https://dashscope.aliyuncs.com/api/v1。若填写则优先级高于 workspace ID + 地域 的组合。一般无需填写。"
},
"gm_resp_image_modal": {
"description": "启用图片模态",
"hint": "启用后,将支持返回图片内容。需要模型支持,否则会报错。具体支持模型请查看 Google Gemini 官方网站。温馨提示,如果您需要生成图片,请关闭 `启用群员识别` 配置获得更好的效果。"
Expand Down