feat: introduce file extract capability (#3870)

* feat: introduce file extract capability powered by MoonshotAI * fix: correct indentation in default configuration file * fix: add error handling for file extract application in InternalAgentSubStage * fix: update file name handling in InternalAgentSubStage to correctly associate file names with extracted content * feat: add condition settings for local agent runner in default configuration * fix: enhance file naming logic in File component and update prompt handling in InternalAgentSubStage
2025-12-01 18:12:39 +08:00
parent 0e034f0fbd
commit 2ba0460f19
7 changed files with 188 additions and 10 deletions
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -76,6 +76,11 @@ DEFAULT_CONFIG = {
        "reachability_check": False,
        "max_agent_step": 30,
        "tool_call_timeout": 60,
+        "file_extract": {
+            "enable": False,
+            "provider": "moonshotai",
+            "moonshotai_api_key": "",
+        },
    },
    "provider_stt_settings": {
        "enable": False,
@@ -2069,6 +2074,20 @@ CONFIG_METADATA_2 = {
                    "tool_call_timeout": {
                        "type": "int",
                    },
+                    "file_extract": {
+                        "type": "object",
+                        "items": {
+                            "enable": {
+                                "type": "bool",
+                            },
+                            "provider": {
+                                "type": "string",
+                            },
+                            "moonshotai_api_key": {
+                                "type": "string",
+                            },
+                        },
+                    },
                },
            },
            "provider_stt_settings": {
@@ -2403,6 +2422,36 @@ CONFIG_METADATA_3 = {
                    "provider_settings.enable": True,
                },
            },
+            "file_extract": {
+                "description": "文档解析能力",
+                "type": "object",
+                "items": {
+                    "provider_settings.file_extract.enable": {
+                        "description": "启用文档解析能力",
+                        "type": "bool",
+                    },
+                    "provider_settings.file_extract.provider": {
+                        "description": "文档解析提供商",
+                        "type": "string",
+                        "options": ["moonshotai"],
+                        "condition": {
+                            "provider_settings.file_extract.enable": True,
+                        },
+                    },
+                    "provider_settings.file_extract.moonshotai_api_key": {
+                        "description": "Moonshot AI API Key",
+                        "type": "string",
+                        "condition": {
+                            "provider_settings.file_extract.provider": "moonshotai",
+                            "provider_settings.file_extract.enable": True,
+                        },
+                    },
+                },
+                "condition": {
+                    "provider_settings.agent_runner_type": "local",
+                    "provider_settings.enable": True,
+                },
+            },
            "others": {
                "description": "其他配置",
                "type": "object",
--- a/astrbot/core/message/components.py
+++ b/astrbot/core/message/components.py
@@ -722,7 +722,12 @@ class File(BaseMessageComponent):
        """下载文件"""
        download_dir = os.path.join(get_astrbot_data_path(), "temp")
        os.makedirs(download_dir, exist_ok=True)
-        file_path = os.path.join(download_dir, f"{uuid.uuid4().hex}")
+        if self.name:
+            name, ext = os.path.splitext(self.name)
+            filename = f"{name}_{uuid.uuid4().hex[:8]}{ext}"
+        else:
+            filename = f"{uuid.uuid4().hex}"
+        file_path = os.path.join(download_dir, filename)
        await download_file(self.url, file_path)
        self.file_ = os.path.abspath(file_path)

--- a/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py
+++ b/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py
@@ -9,7 +9,7 @@ from astrbot.core import logger
 from astrbot.core.agent.tool import ToolSet
 from astrbot.core.astr_agent_context import AstrAgentContext
 from astrbot.core.conversation_mgr import Conversation
-from astrbot.core.message.components import Image
+from astrbot.core.message.components import File, Image, Reply
 from astrbot.core.message.message_event_result import (
    MessageChain,
    MessageEventResult,
@@ -22,6 +22,7 @@ from astrbot.core.provider.entities import (
    ProviderRequest,
 )
 from astrbot.core.star.star_handler import EventType, star_map
+from astrbot.core.utils.file_extract import extract_file_moonshotai
 from astrbot.core.utils.metrics import Metric
 from astrbot.core.utils.session_lock import session_lock_manager

@@ -56,6 +57,13 @@ class InternalAgentSubStage(Stage):
        self.show_reasoning = settings.get("display_reasoning_text", False)
        self.kb_agentic_mode: bool = conf.get("kb_agentic_mode", False)

+        file_extract_conf: dict = settings.get("file_extract", {})
+        self.file_extract_enabled: bool = file_extract_conf.get("enable", False)
+        self.file_extract_prov: str = file_extract_conf.get("provider", "moonshotai")
+        self.file_extract_msh_api_key: str = file_extract_conf.get(
+            "moonshotai_api_key", ""
+        )
+
        self.conv_manager = ctx.plugin_manager.context.conversation_manager

    def _select_provider(self, event: AstrMessageEvent):
@@ -114,6 +122,50 @@ class InternalAgentSubStage(Stage):
                req.func_tool = ToolSet()
            req.func_tool.add_tool(KNOWLEDGE_BASE_QUERY_TOOL)

+    async def _apply_file_extract(
+        self,
+        event: AstrMessageEvent,
+        req: ProviderRequest,
+    ):
+        """Apply file extract to the provider request"""
+        file_paths = []
+        file_names = []
+        for comp in event.message_obj.message:
+            if isinstance(comp, File):
+                file_paths.append(await comp.get_file())
+                file_names.append(comp.name)
+            elif isinstance(comp, Reply) and comp.chain:
+                for reply_comp in comp.chain:
+                    if isinstance(reply_comp, File):
+                        file_paths.append(await reply_comp.get_file())
+                        file_names.append(reply_comp.name)
+        if not file_paths:
+            return
+        if not req.prompt:
+            req.prompt = "总结一下文件里面讲了什么？"
+        if self.file_extract_prov == "moonshotai":
+            if not self.file_extract_msh_api_key:
+                logger.error("Moonshot AI API key for file extract is not set")
+                return
+            file_contents = await asyncio.gather(
+                *[
+                    extract_file_moonshotai(file_path, self.file_extract_msh_api_key)
+                    for file_path in file_paths
+                ]
+            )
+        else:
+            logger.error(f"Unsupported file extract provider: {self.file_extract_prov}")
+            return
+
+        # add file extract results to contexts
+        for file_content, file_name in zip(file_contents, file_names):
+            req.contexts.append(
+                {
+                    "role": "system",
+                    "content": f"File Extract Results of user uploaded files:\n{file_content}\nFile Name: {file_name or 'Unknown'}",
+                },
+            )
+
    def _truncate_contexts(
        self,
        contexts: list[dict],
@@ -346,6 +398,17 @@ class InternalAgentSubStage(Stage):

                event.set_extra("provider_request", req)

+            # fix contexts json str
+            if isinstance(req.contexts, str):
+                req.contexts = json.loads(req.contexts)
+
+            # apply file extract
+            if self.file_extract_enabled:
+                try:
+                    await self._apply_file_extract(event, req)
+                except Exception as e:
+                    logger.error(f"Error occurred while applying file extract: {e}")
+
            if not req.prompt and not req.image_urls:
                return

@@ -356,10 +419,6 @@ class InternalAgentSubStage(Stage):
            # apply knowledge base feature
            await self._apply_kb(event, req)

-            # fix contexts json str
-            if isinstance(req.contexts, str):
-                req.contexts = json.loads(req.contexts)
-
            # truncate contexts to fit max length
            if req.contexts:
                req.contexts = self._truncate_contexts(req.contexts)
--- a/astrbot/core/platform/sources/telegram/tg_adapter.py
+++ b/astrbot/core/platform/sources/telegram/tg_adapter.py
@@ -381,7 +381,9 @@ class TelegramPlatformAdapter(Platform):
                    f"Telegram document file_path is None, cannot save the file {file_name}.",
                )
            else:
-                message.message.append(Comp.File(file=file_path, name=file_name))
+                message.message.append(
+                    Comp.File(file=file_path, name=file_name, url=file_path)
+                )

        elif update.message.video:
            file = await update.message.video.get_file()
--- a/astrbot/core/utils/file_extract.py
+++ b/astrbot/core/utils/file_extract.py
@@ -0,0 +1,23 @@
+from pathlib import Path
+
+from openai import AsyncOpenAI
+
+
+async def extract_file_moonshotai(file_path: str, api_key: str) -> str:
+    """Extract text from a file using Moonshot AI API"""
+    """
+    Args:
+        file_path: The path to the file to extract text from
+        api_key: The API key to use to extract text from the file
+    Returns:
+        The text extracted from the file
+    """
+    client = AsyncOpenAI(
+        api_key=api_key,
+        base_url="https://api.moonshot.cn/v1",
+    )
+    file_object = await client.files.create(
+        file=Path(file_path),
+        purpose="file-extract",  # type: ignore
+    )
+    return (await client.files.content(file_id=file_object.id)).text
--- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json
+++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -109,6 +109,22 @@
        }
      }
    },
+    "file_extract": {
+      "description": "File Extract",
+      "provider_settings": {
+        "file_extract": {
+          "enable": {
+            "description": "Enable File Extract"
+          },
+          "provider": {
+            "description": "File Extract Provider"
+          },
+          "moonshotai_api_key": {
+            "description": "Moonshot AI API Key"
+          }
+        }
+      }
+    },
    "others": {
      "description": "Other Settings",
      "provider_settings": {
--- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
+++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
@@ -11,7 +11,12 @@
        },
        "agent_runner_type": {
          "description": "执行器",
-          "labels": ["内置 Agent", "Dify", "Coze", "阿里云百炼应用"]
+          "labels": [
+            "内置 Agent",
+            "Dify",
+            "Coze",
+            "阿里云百炼应用"
+          ]
        },
        "coze_agent_runner_provider_id": {
          "description": "Coze Agent 执行器提供商 ID"
@@ -109,6 +114,22 @@
        }
      }
    },
+    "file_extract": {
+      "description": "文档解析能力",
+      "provider_settings": {
+        "file_extract": {
+          "enable": {
+            "description": "启用文档解析能力"
+          },
+          "provider": {
+            "description": "文档解析提供商"
+          },
+          "moonshotai_api_key": {
+            "description": "Moonshot AI API Key"
+          }
+        }
+      }
+    },
    "others": {
      "description": "其他配置",
      "provider_settings": {
@@ -142,7 +163,10 @@
        "unsupported_streaming_strategy": {
          "description": "不支持流式回复的平台",
          "hint": "选择在不支持流式回复的平台上的处理方式。实时分段回复会在系统接收流式响应检测到诸如标点符号等分段点时,立即发送当前已接收的内容",
-          "labels": ["实时分段回复", "关闭流式回复"]
+          "labels": [
+            "实时分段回复",
+            "关闭流式回复"
+          ]
        },
        "max_context_length": {
          "description": "最多携带对话轮数",
@@ -457,4 +481,4 @@
      }
    }
  }
-}
+}