From 3923b87f0847b9f875171b826a4cdb76381f8c7a Mon Sep 17 00:00:00 2001 From: Li Haoyuan <1513624626@qq.com> Date: Wed, 14 May 2025 11:01:28 +0800 Subject: [PATCH 01/10] feat: Add MiniMax TTS API provider --- astrbot/core/config/default.py | 66 ++++++++++ astrbot/core/provider/manager.py | 4 + .../sources/minimax_tts_api_source.py | 120 ++++++++++++++++++ dashboard/src/views/ProviderPage.vue | 59 ++++----- 4 files changed, 220 insertions(+), 29 deletions(-) create mode 100644 astrbot/core/provider/sources/minimax_tts_api_source.py diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 7a898524..65163a16 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -788,6 +788,25 @@ CONFIG_METADATA_2 = { "azure_tts_subscription_key": "", "azure_tts_region": "eastus" }, + "MiniMax TTS(API)": { + "id": "minimax_tts", + "type": "minimax_tts_api", + "provider_type": "text_to_speech", + "enable": False, + "api_key": "", + "api_base": "https://api.minimax.chat/v1/t2a_v2", + "minimax-group-id": "", + "model": "speech-02-turbo", + "minimax-langboost": "auto", + "minimax-voice-speed": 1.0, + "minimax-voice-vol": 1.0, + "minimax-voice-pitch": 0, + "minimax-voice-id": "female-shaonv", + "minimax-voice-emotion": "neutral", + "minimax-voice-latex": False, + "minimax-voice-english-normalization": False, + "timeout": "20", + }, }, "items": { "azure_tts_voice": { @@ -911,6 +930,53 @@ CONFIG_METADATA_2 = { }, }, }, + "minimax-group-id": { + "type": "string", + "description": "用户所属的组", + "hint": "于账户管理->基本信息中可见", + }, + "minimax-langboost": { + "type": "string", + "description": "指定语言/方言识别能力", + "hint": "增强对指定的小语种和方言的识别能力,设置后可以提升在指定小语种/方言场景下的语音表现", + "options": [ "Chinese","Chinese,Yue","English","Arabic","Russian","Spanish","French","Portuguese","German","Turkish","Dutch","Ukrainian","Vietnamese","Indonesian","Japanese","Italian","Korean","Thai","Polish","Romanian","Greek","Czech","Finnish","Hindi","auto",], + }, + "minimax-voice-speed": { + "type": "float", + "description": "语速取值越大,语速越快", + "hint": "生成声音的语速, 取值[0.5, 2], 默认为1.0, 取值越大,语速越快", + }, + "minimax-voice-vol": { + "type": "float", + "description": "音量", + "hint": "生成声音的音量, 取值(0, 10], 默认为1.0, 取值越大,音量越高", + }, + "minimax-voice-pitch": { + "type": "int", + "description": "语调", + "hint": "生成声音的语调, 取值[-12, 12], 默认为0", + }, + "minimax-voice-id": { + "type": "string", + "description": "音色编号", + "hint": "请求的音色编号, 请见官网文档", + }, + "minimax-voice-emotion": { + "type": "string", + "description": "语音情绪", + "hint": "控制合成语音的情绪", + "options": ["happy","sad","angry","fearful","disgusted","surprised","neutral",], + }, + "minimax-voice-latex": { + "type": "bool", + "description": "是否支持朗读latex公式", + "hint": "", + }, + "minimax-voice-english-normalization": { + "type": "bool", + "description": "是否支持英语文本规范化", + "hint": "可提升数字阅读场景的性能,但会略微增加延迟", + }, "rag_options": { "description": "RAG 选项", "type": "object", diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py index e61fbf92..596293ac 100644 --- a/astrbot/core/provider/manager.py +++ b/astrbot/core/provider/manager.py @@ -206,6 +206,10 @@ class ProviderManager: from .sources.azure_tts_source import ( AzureTTSProvider as AzureTTSProvider, ) + case "minimax_tts_api": + from .sources.minimax_tts_api_source import ( + ProviderMiniMaxTTSAPI as ProviderMiniMaxTTSAPI, + ) except (ImportError, ModuleNotFoundError) as e: logger.critical( f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。" diff --git a/astrbot/core/provider/sources/minimax_tts_api_source.py b/astrbot/core/provider/sources/minimax_tts_api_source.py new file mode 100644 index 00000000..52e8ccc4 --- /dev/null +++ b/astrbot/core/provider/sources/minimax_tts_api_source.py @@ -0,0 +1,120 @@ +import json +import os +import uuid +from typing import Iterator + +import requests + +from astrbot.core.utils.astrbot_path import get_astrbot_data_path + +from ..entities import ProviderType +from ..provider import TTSProvider +from ..register import register_provider_adapter + + +@register_provider_adapter( + "minimax_tts_api", "MiniMax TTS API", provider_type=ProviderType.TEXT_TO_SPEECH +) +class ProviderMiniMaxTTSAPI(TTSProvider): + def __init__( + self, + provider_config: dict, + provider_settings: dict, + ) -> None: + super().__init__(provider_config, provider_settings) + self.chosen_api_key: str = provider_config.get("api_key", "") + self.api_base: str = provider_config.get( + "api_base", "https://api.minimax.chat/v1/t2a_v2" + ) + self.group_id: str = provider_config.get("minimax-group-id", "") + self.set_model(provider_config.get("model", "")) + self.lang_boost: str = provider_config.get("minimax-langboost", "auto") + + self.voice_setting: dict = { + "speed": provider_config.get("minimax-voice-speed", 1.0), + "vol": provider_config.get("minimax-voice-vol", 1.0), + "pitch": provider_config.get("minimax-voice-pitch", 0), + "voice_id": provider_config.get("minimax-voice-id", ""), + "emotion": provider_config.get("minimax-voice-emotion", "neutral"), + "latex_read": provider_config.get("minimax-voice-latex", False), + "english_normalization": provider_config.get( + "minimax-voice-english-normalization", False + ), + } + + self.audio_setting: dict = { + "sample_rate": 32000, + "bitrate": 128000, + "format": "mp3", + } + + self.concat_base_url: str = self.api_base + "?GroupId=" + self.group_id + self.headers = { + "Authorization": f"Bearer {self.chosen_api_key}", + "accept": "application/json, text/plain, */*", + "content-type": "application/json", + } + + def _build_tts_stream_body(self, text: str): + """构建流式请求体""" + body = json.dumps( + { + "model": self.model_name, + "text": text, + "stream": True, + "language_boost": self.lang_boost, + "voice_setting": self.voice_setting, + "audio_setting": self.audio_setting, + } + ) + return body + + def _call_tts_stream(self, text: str) -> Iterator[bytes]: + """进行流式请求""" + tts_body = self._build_tts_stream_body(text) + try: + response = requests.request( + "POST", + self.concat_base_url, + stream=True, + headers=self.headers, + data=tts_body, + ) + response.raise_for_status() + for chunk in response.raw: + if chunk: + if chunk[:5] == b"data:": + data = json.loads(chunk[5:]) + if "data" in data and "extra_info" not in data: + if "audio" in data["data"]: + audio = data["data"]["audio"] + yield audio + except requests.exceptions.RequestException as e: + raise Exception(f"MiniMax TTS API请求失败: {str(e)}") + + def _audio_play(self, audio_stream: Iterator[bytes]) -> bytes: + """解码数据流到audio比特流""" + audio = b"" + for chunk in audio_stream: + if chunk is not None and chunk != "\n": + decoded_hex = bytes.fromhex(chunk) + audio += decoded_hex + + return audio + + async def get_audio(self, text: str) -> str: + temp_dir = os.path.join(get_astrbot_data_path(), "temp") + path = os.path.join(temp_dir, f"minimax_tts_api_{uuid.uuid4()}.mp3") + + try: + audio_chunk_iterator = self._call_tts_stream(text) + audio = self._audio_play(audio_chunk_iterator) + + # 结果保存至文件 + with open(path, "wb") as file: + file.write(audio) + + return path + + except requests.exceptions.RequestException as e: + raise e diff --git a/dashboard/src/views/ProviderPage.vue b/dashboard/src/views/ProviderPage.vue index 9f27854d..151aff56 100644 --- a/dashboard/src/views/ProviderPage.vue +++ b/dashboard/src/views/ProviderPage.vue @@ -30,7 +30,7 @@ mdi-tag - 提供商类型: + 提供商类型: {{ item.type }} @@ -94,7 +94,7 @@ mdi-close - + @@ -110,14 +110,14 @@ 文字转语音 - + - - @@ -155,17 +155,17 @@ {{ updatingMode ? 'mdi-pencil' : 'mdi-plus' }} {{ updatingMode ? '编辑' : '新增' }} {{ newSelectedProviderName }} 服务提供商 - + - - + - + @@ -183,7 +183,7 @@ location="top"> {{ save_message }} - + @@ -221,7 +221,7 @@ export default { save_message_success: "success", showConsole: false, - + // 新增提供商对话框相关 showAddProviderDialog: false, activeProviderTab: 'chat_completion', @@ -247,16 +247,16 @@ export default { getTemplatesByType(type) { const templates = this.metadata['provider_group']?.metadata?.provider?.config_template || {}; const filtered = {}; - + for (const [name, template] of Object.entries(templates)) { if (template.provider_type === type) { filtered[name] = template; } } - + return filtered; }, - + // 获取提供商类型对应的图标 getProviderIcon(type) { const icons = { @@ -278,6 +278,7 @@ export default { 'LM Studio': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/lmstudio.svg', 'FishAudio': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/fishaudio.svg', 'Azure': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/azure.svg', + 'MiniMax': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/minimax.svg', }; for (const key in icons) { if (type.startsWith(key)) { @@ -296,7 +297,7 @@ export default { }; return names[tabType] || tabType; }, - + // 获取提供商简介 getProviderDescription(template, name) { if (name == 'OpenAI') { @@ -304,7 +305,7 @@ export default { } return `${template.type} 服务提供商`; }, - + // 选择提供商模板 selectProviderTemplate(name) { this.newSelectedProviderName = name; @@ -334,7 +335,7 @@ export default { break; } } - + const mergeConfigWithOrder = (target, source, reference) => { // 首先复制所有source中的属性到target if (source && typeof source === 'object' && !Array.isArray(source)) { @@ -348,7 +349,7 @@ export default { } } } - + // 然后根据reference的结构添加或覆盖属性 for (let key in reference) { if (typeof reference[key] === 'object' && reference[key] !== null) { @@ -356,8 +357,8 @@ export default { target[key] = Array.isArray(reference[key]) ? [] : {}; } mergeConfigWithOrder( - target[key], - source && source[key] ? source[key] : {}, + target[key], + source && source[key] ? source[key] : {}, reference[key] ); } else if (!(key in target)) { @@ -366,7 +367,7 @@ export default { } } }; - + if (defaultConfig) { mergeConfigWithOrder(this.newSelectedProviderConfig, provider, defaultConfig); } @@ -417,7 +418,7 @@ export default { providerStatusChange(provider) { provider.enable = !provider.enable; // 切换状态 - + axios.post('/api/config/provider/update', { id: provider.id, config: provider @@ -429,13 +430,13 @@ export default { this.showError(err.response?.data?.message || err.message); }); }, - + showSuccess(message) { this.save_message = message; this.save_message_success = "success"; this.save_message_snack = true; }, - + showError(message) { this.save_message = message; this.save_message_success = "error"; @@ -475,4 +476,4 @@ export default { .v-window { border-radius: 4px; } - \ No newline at end of file + From a7823b352f2e45d713e615396fb70ca6f37f51ba Mon Sep 17 00:00:00 2001 From: Li Haoyuan <1513624626@qq.com> Date: Wed, 14 May 2025 13:09:09 +0800 Subject: [PATCH 02/10] docs: Adjust MiniMax TTS configuration info --- astrbot/core/config/default.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 8470a0c2..0bbbdf94 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -944,18 +944,18 @@ CONFIG_METADATA_2 = { }, "minimax-group-id": { "type": "string", - "description": "用户所属的组", + "description": "用户组", "hint": "于账户管理->基本信息中可见", }, "minimax-langboost": { "type": "string", - "description": "指定语言/方言识别能力", + "description": "指定语言/方言", "hint": "增强对指定的小语种和方言的识别能力,设置后可以提升在指定小语种/方言场景下的语音表现", "options": [ "Chinese","Chinese,Yue","English","Arabic","Russian","Spanish","French","Portuguese","German","Turkish","Dutch","Ukrainian","Vietnamese","Indonesian","Japanese","Italian","Korean","Thai","Polish","Romanian","Greek","Czech","Finnish","Hindi","auto",], }, "minimax-voice-speed": { "type": "float", - "description": "语速取值越大,语速越快", + "description": "语速", "hint": "生成声音的语速, 取值[0.5, 2], 默认为1.0, 取值越大,语速越快", }, "minimax-voice-vol": { @@ -970,23 +970,23 @@ CONFIG_METADATA_2 = { }, "minimax-voice-id": { "type": "string", - "description": "音色编号", - "hint": "请求的音色编号, 请见官网文档", + "description": "音色", + "hint": "音色编号, 详见官网文档", }, "minimax-voice-emotion": { "type": "string", - "description": "语音情绪", + "description": "情绪", "hint": "控制合成语音的情绪", "options": ["happy","sad","angry","fearful","disgusted","surprised","neutral",], }, "minimax-voice-latex": { "type": "bool", - "description": "是否支持朗读latex公式", - "hint": "", + "description": "支持朗读latex公式", + "hint": "朗读latex公式, 但是需要确保输入文本按官网要求格式化", }, "minimax-voice-english-normalization": { "type": "bool", - "description": "是否支持英语文本规范化", + "description": "支持英语文本规范化", "hint": "可提升数字阅读场景的性能,但会略微增加延迟", }, "rag_options": { From 2117b65487a27929e7554a58429f66f754d77b12 Mon Sep 17 00:00:00 2001 From: Li Haoyuan <1513624626@qq.com> Date: Wed, 14 May 2025 14:21:23 +0800 Subject: [PATCH 03/10] feat: Support timber_weights for MiniMax TTS --- astrbot/core/config/default.py | 16 +++++++- .../sources/minimax_tts_api_source.py | 38 +++++++++++++------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 0bbbdf94..6cc6dab6 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -813,7 +813,9 @@ CONFIG_METADATA_2 = { "minimax-voice-speed": 1.0, "minimax-voice-vol": 1.0, "minimax-voice-pitch": 0, + "minimax-is-timber-weight": False, "minimax-voice-id": "female-shaonv", + "minimax-timber-weight": '[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 1}]', "minimax-voice-emotion": "neutral", "minimax-voice-latex": False, "minimax-voice-english-normalization": False, @@ -968,10 +970,20 @@ CONFIG_METADATA_2 = { "description": "语调", "hint": "生成声音的语调, 取值[-12, 12], 默认为0", }, + "minimax-is-timber-weight": { + "type": "bool", + "description": "启用混合音色", + "hint": "启用混合音色, 支持以自定义权重混合最多四种音色, 启用后自动忽略单一音色设置", + }, + "minimax-timber-weight": { + "type": "string", + "description": "混合音色", + "hint": "混合音色及其权重, 最多支持四种音色, 权重为整数, 取值[1, 100]. 可在官网体验页面查看代码获得预设以及编写模板", + }, "minimax-voice-id": { "type": "string", - "description": "音色", - "hint": "音色编号, 详见官网文档", + "description": "单一音色", + "hint": "单一音色编号, 详见官网文档", }, "minimax-voice-emotion": { "type": "string", diff --git a/astrbot/core/provider/sources/minimax_tts_api_source.py b/astrbot/core/provider/sources/minimax_tts_api_source.py index 52e8ccc4..8bc61bfe 100644 --- a/astrbot/core/provider/sources/minimax_tts_api_source.py +++ b/astrbot/core/provider/sources/minimax_tts_api_source.py @@ -1,7 +1,7 @@ import json import os import uuid -from typing import Iterator +from typing import Dict, Iterator, List, Union import requests @@ -29,12 +29,23 @@ class ProviderMiniMaxTTSAPI(TTSProvider): self.group_id: str = provider_config.get("minimax-group-id", "") self.set_model(provider_config.get("model", "")) self.lang_boost: str = provider_config.get("minimax-langboost", "auto") + self.is_timber_weight: bool = provider_config.get( + "minimax-is-timber-weight", False + ) + self.timber_weight: List[Dict[str, Union[str, int]]] = json.loads( + provider_config.get( + "minimax-timber-weight", + '[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 1}]', + ) + ) self.voice_setting: dict = { "speed": provider_config.get("minimax-voice-speed", 1.0), "vol": provider_config.get("minimax-voice-vol", 1.0), "pitch": provider_config.get("minimax-voice-pitch", 0), - "voice_id": provider_config.get("minimax-voice-id", ""), + "voice_id": provider_config.get("minimax-voice-id", "") + if not self.is_timber_weight + else "", "emotion": provider_config.get("minimax-voice-emotion", "neutral"), "latex_read": provider_config.get("minimax-voice-latex", False), "english_normalization": provider_config.get( @@ -57,16 +68,19 @@ class ProviderMiniMaxTTSAPI(TTSProvider): def _build_tts_stream_body(self, text: str): """构建流式请求体""" - body = json.dumps( - { - "model": self.model_name, - "text": text, - "stream": True, - "language_boost": self.lang_boost, - "voice_setting": self.voice_setting, - "audio_setting": self.audio_setting, - } - ) + dict_body: Dict[str, object] = { + "model": self.model_name, + "text": text, + "stream": True, + "language_boost": self.lang_boost, + "voice_setting": self.voice_setting, + "audio_setting": self.audio_setting, + } + if self.is_timber_weight: + dict_body["timber_weights"] = self.timber_weight + + body = json.dumps(dict_body) + return body def _call_tts_stream(self, text: str) -> Iterator[bytes]: From e01d4264e3fa3d97d73f064ef3c4835dbd8d3f94 Mon Sep 17 00:00:00 2001 From: Li Haoyuan <1513624626@qq.com> Date: Wed, 14 May 2025 14:40:25 +0800 Subject: [PATCH 04/10] docs: Adjust MiniMax TTS timber_weights description --- astrbot/core/config/default.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 6cc6dab6..ef2a6960 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -815,7 +815,7 @@ CONFIG_METADATA_2 = { "minimax-voice-pitch": 0, "minimax-is-timber-weight": False, "minimax-voice-id": "female-shaonv", - "minimax-timber-weight": '[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 1}]', + "minimax-timber-weight": '[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 25}, {"voice_id": "Chinese (Mandarin)_BashfulGirl", "weight": 50}]', "minimax-voice-emotion": "neutral", "minimax-voice-latex": False, "minimax-voice-english-normalization": False, @@ -978,7 +978,7 @@ CONFIG_METADATA_2 = { "minimax-timber-weight": { "type": "string", "description": "混合音色", - "hint": "混合音色及其权重, 最多支持四种音色, 权重为整数, 取值[1, 100]. 可在官网体验页面查看代码获得预设以及编写模板", + "hint": "混合音色及其权重, 最多支持四种音色, 权重为整数, 取值[1, 100]. 可在官网API语音调试台预览代码获得预设以及编写模板, 需要严格按照json字符串格式编写, 可以查看控制台判断是否解析成功. 具体结构可参照默认值以及官网代码预览.", }, "minimax-voice-id": { "type": "string", From 25ef0039e42ae87fa9e15a20a03c99e2fd89e5cc Mon Sep 17 00:00:00 2001 From: Li Haoyuan <1513624626@qq.com> Date: Wed, 14 May 2025 20:59:45 +0800 Subject: [PATCH 05/10] refactor: Optimize MiniMax TTS API Provider --- astrbot/core/config/default.py | 2 +- .../sources/minimax_tts_api_source.py | 45 +++++++++---------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index ef2a6960..2b802b59 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -819,7 +819,7 @@ CONFIG_METADATA_2 = { "minimax-voice-emotion": "neutral", "minimax-voice-latex": False, "minimax-voice-english-normalization": False, - "timeout": "20", + "timeout": 20, }, }, "items": { diff --git a/astrbot/core/provider/sources/minimax_tts_api_source.py b/astrbot/core/provider/sources/minimax_tts_api_source.py index 8bc61bfe..26d08ac7 100644 --- a/astrbot/core/provider/sources/minimax_tts_api_source.py +++ b/astrbot/core/provider/sources/minimax_tts_api_source.py @@ -43,9 +43,9 @@ class ProviderMiniMaxTTSAPI(TTSProvider): "speed": provider_config.get("minimax-voice-speed", 1.0), "vol": provider_config.get("minimax-voice-vol", 1.0), "pitch": provider_config.get("minimax-voice-pitch", 0), - "voice_id": provider_config.get("minimax-voice-id", "") - if not self.is_timber_weight - else "", + "voice_id": "" + if self.is_timber_weight + else provider_config.get("minimax-voice-id", ""), "emotion": provider_config.get("minimax-voice-emotion", "neutral"), "latex_read": provider_config.get("minimax-voice-latex", False), "english_normalization": provider_config.get( @@ -59,7 +59,7 @@ class ProviderMiniMaxTTSAPI(TTSProvider): "format": "mp3", } - self.concat_base_url: str = self.api_base + "?GroupId=" + self.group_id + self.concat_base_url: str = f"{self.api_base}?GroupId={self.group_id}" self.headers = { "Authorization": f"Bearer {self.chosen_api_key}", "accept": "application/json, text/plain, */*", @@ -79,42 +79,37 @@ class ProviderMiniMaxTTSAPI(TTSProvider): if self.is_timber_weight: dict_body["timber_weights"] = self.timber_weight - body = json.dumps(dict_body) - - return body + return json.dumps(dict_body) def _call_tts_stream(self, text: str) -> Iterator[bytes]: """进行流式请求""" - tts_body = self._build_tts_stream_body(text) try: - response = requests.request( - "POST", + response = requests.post( self.concat_base_url, stream=True, headers=self.headers, - data=tts_body, + data=self._build_tts_stream_body(text), ) response.raise_for_status() + for chunk in response.raw: - if chunk: - if chunk[:5] == b"data:": - data = json.loads(chunk[5:]) - if "data" in data and "extra_info" not in data: - if "audio" in data["data"]: - audio = data["data"]["audio"] - yield audio + if not chunk or not chunk.startswith(b"data:"): + continue + data = json.loads(chunk[5:]) + if "extra_info" in data: + continue + audio = data.get("data", {}).get("audio") + if audio is not None: + yield audio + except requests.exceptions.RequestException as e: raise Exception(f"MiniMax TTS API请求失败: {str(e)}") def _audio_play(self, audio_stream: Iterator[bytes]) -> bytes: """解码数据流到audio比特流""" - audio = b"" - for chunk in audio_stream: - if chunk is not None and chunk != "\n": - decoded_hex = bytes.fromhex(chunk) - audio += decoded_hex - - return audio + return b"".join( + bytes.fromhex(chunk) for chunk in audio_stream if chunk and chunk != b"\n" + ) async def get_audio(self, text: str) -> str: temp_dir = os.path.join(get_astrbot_data_path(), "temp") From 7afc4752906857450c4d2f9fb0f7cae40b98b6e8 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 16 May 2025 10:29:22 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=F0=9F=90=9B=20fix:=20value=20cannot=20di?= =?UTF-8?q?splayed=20when=20fullscreen=20editior=20mode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/src/components/shared/AstrBotConfig.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/src/components/shared/AstrBotConfig.vue b/dashboard/src/components/shared/AstrBotConfig.vue index e98ef16c..c4785816 100644 --- a/dashboard/src/components/shared/AstrBotConfig.vue +++ b/dashboard/src/components/shared/AstrBotConfig.vue @@ -300,7 +300,7 @@ function saveEditedContent() { theme="vs-dark" :language="currentEditingLanguage" style="height: calc(100vh - 64px);" - v-model="currentEditingValue" + v-model:value="currentEditingValue" > From 35a8527c163918f822f13714305462a9cf077899 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 16 May 2025 10:29:46 +0800 Subject: [PATCH 07/10] =?UTF-8?q?=F0=9F=8E=88=20perf:=20update=20defaule?= =?UTF-8?q?=20value=20of=20minimax-timber-weight?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/config/default.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 2b802b59..c1ced0f1 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -815,7 +815,7 @@ CONFIG_METADATA_2 = { "minimax-voice-pitch": 0, "minimax-is-timber-weight": False, "minimax-voice-id": "female-shaonv", - "minimax-timber-weight": '[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 25}, {"voice_id": "Chinese (Mandarin)_BashfulGirl", "weight": 50}]', + "minimax-timber-weight": '[\n {\n "voice_id": "Chinese (Mandarin)_Warm_Girl",\n "weight": 25\n },\n {\n "voice_id": "Chinese (Mandarin)_BashfulGirl",\n "weight": 50\n }\n]', "minimax-voice-emotion": "neutral", "minimax-voice-latex": False, "minimax-voice-english-normalization": False, @@ -978,6 +978,7 @@ CONFIG_METADATA_2 = { "minimax-timber-weight": { "type": "string", "description": "混合音色", + "editor_mode": True, "hint": "混合音色及其权重, 最多支持四种音色, 权重为整数, 取值[1, 100]. 可在官网API语音调试台预览代码获得预设以及编写模板, 需要严格按照json字符串格式编写, 可以查看控制台判断是否解析成功. 具体结构可参照默认值以及官网代码预览.", }, "minimax-voice-id": { From 6723fe8271712113f0f969d4651e5f98390ccad0 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 16 May 2025 10:37:30 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=F0=9F=90=9B=20fix:=20cannot=20save=20val?= =?UTF-8?q?ue=20when=20fullscreen=20editor=20mode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/components/shared/AstrBotConfig.vue | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/dashboard/src/components/shared/AstrBotConfig.vue b/dashboard/src/components/shared/AstrBotConfig.vue index c4785816..5b339c70 100644 --- a/dashboard/src/components/shared/AstrBotConfig.vue +++ b/dashboard/src/components/shared/AstrBotConfig.vue @@ -4,20 +4,19 @@ import { ref } from 'vue' const dialog = ref(false) const currentEditingKey = ref('') -const currentEditingValue = ref('') const currentEditingLanguage = ref('json') +const currentEditingTheme = ref('vs-light') +let currentEditingKeyIterable = null -function openEditorDialog(key, value, language) { +function openEditorDialog(key, value, theme, language) { currentEditingKey.value = key - currentEditingValue.value = value currentEditingLanguage.value = language || 'json' + currentEditingTheme.value = theme || 'vs-light' + currentEditingKeyIterable = value dialog.value = true } function saveEditedContent() { - if (currentEditingKey.value && iterable[currentEditingKey.value] !== undefined) { - iterable[currentEditingKey.value] = currentEditingValue.value - } dialog.value = false } @@ -107,7 +106,7 @@ function saveEditedContent() { variant="text" color="primary" class="editor-fullscreen-btn" - @click="openEditorDialog(key, iterable[key], metadata[metadataKey].items[key]?.editor_language)" + @click="openEditorDialog(key, iterable, metadata[metadataKey].items[key]?.editor_theme, metadata[metadataKey].items[key]?.editor_language)" title="全屏编辑" > mdi-fullscreen @@ -297,10 +296,10 @@ function saveEditedContent() { From c6eaf3d01079c5f12e8fbd4af2d1012f756e2882 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 16 May 2025 11:04:01 +0800 Subject: [PATCH 09/10] refactor: use aiohttp --- .../sources/minimax_tts_api_source.py | 71 +++++++++++-------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/astrbot/core/provider/sources/minimax_tts_api_source.py b/astrbot/core/provider/sources/minimax_tts_api_source.py index 26d08ac7..04170b93 100644 --- a/astrbot/core/provider/sources/minimax_tts_api_source.py +++ b/astrbot/core/provider/sources/minimax_tts_api_source.py @@ -1,12 +1,10 @@ import json import os import uuid -from typing import Dict, Iterator, List, Union - -import requests - +import aiohttp +from typing import Dict, List, Union, AsyncIterator from astrbot.core.utils.astrbot_path import get_astrbot_data_path - +from astrbot.api import logger from ..entities import ProviderType from ..provider import TTSProvider from ..register import register_provider_adapter @@ -81,43 +79,54 @@ class ProviderMiniMaxTTSAPI(TTSProvider): return json.dumps(dict_body) - def _call_tts_stream(self, text: str) -> Iterator[bytes]: + async def _call_tts_stream(self, text: str) -> AsyncIterator[bytes]: """进行流式请求""" try: - response = requests.post( - self.concat_base_url, - stream=True, - headers=self.headers, - data=self._build_tts_stream_body(text), - ) - response.raise_for_status() + async with aiohttp.ClientSession() as session: + async with session.post( + self.concat_base_url, + headers=self.headers, + data=self._build_tts_stream_body(text), + timeout=aiohttp.ClientTimeout(total=60), + ) as response: + response.raise_for_status() - for chunk in response.raw: - if not chunk or not chunk.startswith(b"data:"): - continue - data = json.loads(chunk[5:]) - if "extra_info" in data: - continue - audio = data.get("data", {}).get("audio") - if audio is not None: - yield audio + async for chunk in response.content.iter_any(): + if not chunk or not chunk.startswith(b"data:"): + logger.warning(f"Minimax TTS resp: {chunk}") + if "invalid api key" in chunk.decode("utf-8"): + raise Exception("MiniMax TTS: 无效的 API 密钥") + continue + try: + data = json.loads(chunk[5:]) + if "extra_info" in data: + continue + audio = data.get("data", {}).get("audio") + if audio is not None: + yield audio + except json.JSONDecodeError: + continue - except requests.exceptions.RequestException as e: + except aiohttp.ClientError as e: raise Exception(f"MiniMax TTS API请求失败: {str(e)}") - def _audio_play(self, audio_stream: Iterator[bytes]) -> bytes: - """解码数据流到audio比特流""" - return b"".join( - bytes.fromhex(chunk) for chunk in audio_stream if chunk and chunk != b"\n" - ) + async def _audio_play(self, audio_stream: AsyncIterator[bytes]) -> bytes: + """解码数据流到 audio 比特流""" + chunks = [] + async for chunk in audio_stream: + if chunk and chunk != b"\n": + chunks.append(bytes.fromhex(chunk.decode("utf-8"))) + return b"".join(chunks) async def get_audio(self, text: str) -> str: temp_dir = os.path.join(get_astrbot_data_path(), "temp") + os.makedirs(temp_dir, exist_ok=True) path = os.path.join(temp_dir, f"minimax_tts_api_{uuid.uuid4()}.mp3") try: - audio_chunk_iterator = self._call_tts_stream(text) - audio = self._audio_play(audio_chunk_iterator) + # 直接将异步生成器传递给 _audio_play 方法 + audio_stream = self._call_tts_stream(text) + audio = await self._audio_play(audio_stream) # 结果保存至文件 with open(path, "wb") as file: @@ -125,5 +134,5 @@ class ProviderMiniMaxTTSAPI(TTSProvider): return path - except requests.exceptions.RequestException as e: + except aiohttp.ClientError as e: raise e From c15f9666694bd581ac5365d8889e7b33adaaee53 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 16 May 2025 18:32:08 +0800 Subject: [PATCH 10/10] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20minimax=20?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sources/minimax_tts_api_source.py | 47 ++++++++++++------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/astrbot/core/provider/sources/minimax_tts_api_source.py b/astrbot/core/provider/sources/minimax_tts_api_source.py index 04170b93..5b210835 100644 --- a/astrbot/core/provider/sources/minimax_tts_api_source.py +++ b/astrbot/core/provider/sources/minimax_tts_api_source.py @@ -91,31 +91,42 @@ class ProviderMiniMaxTTSAPI(TTSProvider): ) as response: response.raise_for_status() - async for chunk in response.content.iter_any(): - if not chunk or not chunk.startswith(b"data:"): - logger.warning(f"Minimax TTS resp: {chunk}") - if "invalid api key" in chunk.decode("utf-8"): - raise Exception("MiniMax TTS: 无效的 API 密钥") - continue - try: - data = json.loads(chunk[5:]) - if "extra_info" in data: - continue - audio = data.get("data", {}).get("audio") - if audio is not None: - yield audio - except json.JSONDecodeError: - continue + buffer = b"" + while True: + chunk = await response.content.read(8192) + if not chunk: + break + + buffer += chunk + + while b"\n\n" in buffer: + try: + message, buffer = buffer.split(b"\n\n", 1) + if message.startswith(b"data: "): + try: + data = json.loads(message[6:]) + if "extra_info" in data: + continue + audio = data.get("data", {}).get("audio") + if audio is not None: + yield audio + except json.JSONDecodeError: + logger.warning( + "Failed to parse JSON data from SSE message" + ) + continue + except ValueError: + buffer = buffer[-1024:] except aiohttp.ClientError as e: raise Exception(f"MiniMax TTS API请求失败: {str(e)}") - async def _audio_play(self, audio_stream: AsyncIterator[bytes]) -> bytes: + async def _audio_play(self, audio_stream: AsyncIterator[str]) -> bytes: """解码数据流到 audio 比特流""" chunks = [] async for chunk in audio_stream: - if chunk and chunk != b"\n": - chunks.append(bytes.fromhex(chunk.decode("utf-8"))) + if chunk.strip(): + chunks.append(bytes.fromhex(chunk.strip())) return b"".join(chunks) async def get_audio(self, text: str) -> str: