Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,6 +1838,7 @@
"embedding_api_base": "",
"embedding_model": "",
"embedding_dimensions": 1024,
"max_batch_size": 100,
"timeout": 20,
"proxy": "",
},
Expand All @@ -1852,6 +1853,7 @@
"embedding_api_base": "",
"embedding_model": "gemini-embedding-exp-03-07",
"embedding_dimensions": 768,
"max_batch_size": 100,
"timeout": 20,
"proxy": "",
},
Expand All @@ -1867,6 +1869,7 @@
"embedding_model": "nvidia/llama-nemotron-embed-1b-v2",
"input_type": "passage",
"embedding_dimensions": 1024,
"max_batch_size": 100,
"timeout": 20,
"proxy": "",
},
Expand All @@ -1880,9 +1883,25 @@
"embedding_api_base": "http://localhost:11434",
"embedding_model": "nomic-embed-text",
"embedding_dimensions": 768,
"max_batch_size": 100,
"timeout": 60,
"proxy": "",
},
"阿里云百炼 Embedding": {
"id": "bailian_embedding",
"type": "openai_embedding",
"provider": "bailian",
"provider_type": "embedding",
"hint": "provider_group.provider.bailian_embedding.hint",
"enable": True,
"embedding_api_key": "",
"embedding_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"embedding_model": "text-embedding-v4",
"embedding_dimensions": 1024,
"max_batch_size": 10,
"timeout": 30,
"proxy": "",
},
"vLLM Rerank": {
"id": "vllm_rerank",
"type": "vllm_rerank",
Expand Down Expand Up @@ -2210,6 +2229,11 @@
"hint": "嵌入向量的维度。根据模型不同,可能需要调整,请参考具体模型的文档。此配置项请务必填写正确,否则将导致向量数据库无法正常工作。",
"_special": "get_embedding_dim",
},
"max_batch_size": {
"description": "最大批次大小",
"type": "int",
"hint": "每批向嵌入 API 发送的文本数量上限。各服务商限制不同:DashScope/阿里云百炼限制为 10;OpenAI 支持 2048;Ollama/Gemini/NVIDIA 通常可设为 100-2048。超过限制会触发 API 400 错误。",
},
"embedding_model": {
"description": "嵌入模型",
"type": "string",
Expand Down
5 changes: 4 additions & 1 deletion astrbot/core/knowledge_base/kb_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,10 @@ async def upload_document(
contents = []
metadatas = []
for idx, chunk_text in enumerate(chunks_text):
contents.append(chunk_text)
# Replace lone surrogates that break UTF-8 encoding (e.g., broken
# emoji codepoints from PDF text extraction).
sanitized = chunk_text.encode("utf-8", errors="replace").decode("utf-8")
contents.append(sanitized)
metadatas.append(
{
"kb_id": self.kb.kb_id,
Expand Down
26 changes: 26 additions & 0 deletions astrbot/core/provider/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections.abc import AsyncGenerator
from typing import Literal, TypeAlias, Union

from astrbot import logger
from astrbot.core.agent.message import ContentPart, Message, is_checkpoint_message
from astrbot.core.agent.tool import ToolSet
from astrbot.core.provider.entities import (
Expand Down Expand Up @@ -338,6 +339,21 @@ def get_dim(self) -> int:
"""获取向量的维度"""
...

@property
def max_batch_size(self) -> int:
"""Maximum batch size per single embedding API call.

Providers may set ``max_batch_size`` in their config to override this
value. For example, DashScope / Alibaba Cloud Bailian requires 10.

The default of 100 is safe for most providers (OpenAI supports up to
2048, Ollama and Gemini also handle large batches without issues).

Returns:
The maximum number of texts per batch.
"""
return int(self.provider_config.get("max_batch_size", 100))

async def test(self) -> None:
await self.get_embedding("astrbot")

Expand All @@ -362,6 +378,16 @@ async def get_embeddings_batch(
向量列表

"""
# Respect the provider's maximum batch size limit.
if batch_size > self.max_batch_size:
logger.debug(
"Batch size %d exceeds provider limit %d, capping to %d.",
batch_size,
self.max_batch_size,
self.max_batch_size,
)
batch_size = self.max_batch_size

semaphore = asyncio.Semaphore(tasks_limit)
all_embeddings: list[list[float]] = []
failed_batches: list[tuple[int, list[str]]] = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,9 @@
"openai_embedding": {
"hint": "If testing fails, try adding /v1 at the end for some OpenAI API versions."
},
"bailian_embedding": {
"hint": "Alibaba Cloud Bailian Embedding uses OpenAI-compatible API format. Get your API Key from https://bailian.console.aliyun.com/?tab=model#/api-key. The recommended embedding model is text-embedding-v4 (1024 dimensions), with a batch limit of 10."
},
"gemini_embedding": {
"hint": "Gemini Embedding does not require manually adding /v1beta."
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,9 @@
"openai_embedding": {
"hint": "如果测试不通过,可以尝试添加 /v1 在末尾以兼容部分 OpenAI API 版本。"
},
"bailian_embedding": {
"hint": "阿里云百炼 Embedding 使用兼容 OpenAI 的 API 格式。API Key 从 https://bailian.console.aliyun.com/?tab=model#/api-key 获取。嵌入模型推荐 text-embedding-v4(1024 维),批次上限 10。"
},
"gemini_embedding": {
"hint": "Gemini Embedding 无需手动添加 /v1beta。"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
<h3 class="text-h6 mb-4">{{ t('upload.batchSettings') }}</h3>
<v-row>
<v-col cols="12" sm="4">
<v-text-field v-model.number="uploadSettings.batch_size" :label="t('upload.batchSize')" hint="每批处理的文本数量"
<v-text-field v-model.number="uploadSettings.batch_size" :label="t('upload.batchSize')" hint="每批处理的文本数量,实际值不超过嵌入提供方的 max_batch_size 限制"
persistent-hint type="number" variant="outlined" density="compact" />
</v-col>
<v-col cols="12" sm="4">
Expand Down