feat: Add URL document parser for knowledge base (#3622)

* feat: 添加从 URL 上传文档的功能，支持进度回调和错误处理 * feat: 添加从 URL 上传文档的前端 * chore: 添加 URL 上传功能的警告提示，确保用户配置正确 * feat: 添加内容清洗功能，支持从 URL 上传文档时的清洗设置和服务提供商选择 * feat: 更新内容清洗系统提示，增强信息提取规则；添加 URL 上传功能的测试版标识 * style: format code * perf: 优化上传设置，增强 URL 上传时的禁用逻辑和清洗提供商验证 * refactor:使用自带chunking模块 * refactor: 提取prompt到单独文件 * feat: 添加 Tavily API Key 配置对话框，增强网页搜索功能的配置体验 * fix: update URL hint and warning messages for clarity in knowledge base upload settings * fix: 修复设置tavily_key的热重载问题 --------- Co-authored-by: Soulter <905617992@qq.com>
2025-11-17 19:05:14 +08:00
parent c7a58252fe
commit 270c89c12f
9 changed files with 1086 additions and 76 deletions
@@ -1,4 +1,7 @@
+import asyncio
 import json
+import re
+import time
 import uuid
 from pathlib import Path

@@ -8,12 +11,98 @@ from astrbot.core import logger
 from astrbot.core.db.vec_db.base import BaseVecDB
 from astrbot.core.db.vec_db.faiss_impl.vec_db import FaissVecDB
 from astrbot.core.provider.manager import ProviderManager
-from astrbot.core.provider.provider import EmbeddingProvider, RerankProvider
+from astrbot.core.provider.provider import (
+    EmbeddingProvider,
+    RerankProvider,
+)
+from astrbot.core.provider.provider import (
+    Provider as LLMProvider,
+)

 from .chunking.base import BaseChunker
+from .chunking.recursive import RecursiveCharacterChunker
 from .kb_db_sqlite import KBSQLiteDatabase
 from .models import KBDocument, KBMedia, KnowledgeBase
+from .parsers.url_parser import extract_text_from_url
 from .parsers.util import select_parser
+from .prompts import TEXT_REPAIR_SYSTEM_PROMPT
+
+
+class RateLimiter:
+    """一个简单的速率限制器"""
+
+    def __init__(self, max_rpm: int):
+        self.max_per_minute = max_rpm
+        self.interval = 60.0 / max_rpm if max_rpm > 0 else 0
+        self.last_call_time = 0
+
+    async def __aenter__(self):
+        if self.interval == 0:
+            return
+
+        now = time.monotonic()
+        elapsed = now - self.last_call_time
+
+        if elapsed < self.interval:
+            await asyncio.sleep(self.interval - elapsed)
+
+        self.last_call_time = time.monotonic()
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+
+async def _repair_and_translate_chunk_with_retry(
+    chunk: str,
+    repair_llm_service: LLMProvider,
+    rate_limiter: RateLimiter,
+    max_retries: int = 2,
+) -> list[str]:
+    """
+    Repairs, translates, and optionally re-chunks a single text chunk using the small LLM, with rate limiting.
+    """
+    # 为了防止 LLM 上下文污染，在 user_prompt 中也加入明确的指令
+    user_prompt = f"""IGNORE ALL PREVIOUS INSTRUCTIONS. Your ONLY task is to process the following text chunk according to the system prompt provided.
+
+Text chunk to process:
+---
+{chunk}
+---
+"""
+    for attempt in range(max_retries + 1):
+        try:
+            async with rate_limiter:
+                response = await repair_llm_service.text_chat(
+                    prompt=user_prompt, system_prompt=TEXT_REPAIR_SYSTEM_PROMPT
+                )
+
+            llm_output = response.completion_text
+
+            if "<discard_chunk />" in llm_output:
+                return []  # Signal to discard this chunk
+
+            # More robust regex to handle potential LLM formatting errors (spaces, newlines in tags)
+            matches = re.findall(
+                r"<\s*repaired_text\s*>\s*(.*?)\s*<\s*/\s*repaired_text\s*>",
+                llm_output,
+                re.DOTALL,
+            )
+
+            if matches:
+                # Further cleaning to ensure no empty strings are returned
+                return [m.strip() for m in matches if m.strip()]
+            else:
+                # If no valid tags and not explicitly discarded, discard it to be safe.
+                return []
+        except Exception as e:
+            logger.warning(
+                f"  - LLM call failed on attempt {attempt + 1}/{max_retries + 1}. Error: {str(e)}"
+            )
+
+    logger.error(
+        f"  - Failed to process chunk after {max_retries + 1} attempts. Using original text."
+    )
+    return [chunk]


 class KBHelper:
@@ -100,7 +189,7 @@ class KBHelper:
    async def upload_document(
        self,
        file_name: str,
-        file_content: bytes,
+        file_content: bytes | None,
        file_type: str,
        chunk_size: int = 512,
        chunk_overlap: int = 50,
@@ -108,6 +197,7 @@ class KBHelper:
        tasks_limit: int = 3,
        max_retries: int = 3,
        progress_callback=None,
+        pre_chunked_text: list[str] | None = None,
    ) -> KBDocument:
        """上传并处理文档（带原子性保证和失败清理）

@@ -130,46 +220,63 @@ class KBHelper:
        await self._ensure_vec_db()
        doc_id = str(uuid.uuid4())
        media_paths: list[Path] = []
+        file_size = 0

        # file_path = self.kb_files_dir / f"{doc_id}.{file_type}"
        # async with aiofiles.open(file_path, "wb") as f:
        #     await f.write(file_content)

        try:
-            # 阶段1: 解析文档
-            if progress_callback:
-                await progress_callback("parsing", 0, 100)
-
-            parser = await select_parser(f".{file_type}")
-            parse_result = await parser.parse(file_content, file_name)
-            text_content = parse_result.text
-            media_items = parse_result.media
-
-            if progress_callback:
-                await progress_callback("parsing", 100, 100)
-
-            # 保存媒体文件
+            chunks_text = []
            saved_media = []
-            for media_item in media_items:
-                media = await self._save_media(
-                    doc_id=doc_id,
-                    media_type=media_item.media_type,
-                    file_name=media_item.file_name,
-                    content=media_item.content,
-                    mime_type=media_item.mime_type,
+
+            if pre_chunked_text is not None:
+                # 如果提供了预分块文本，直接使用
+                chunks_text = pre_chunked_text
+                file_size = sum(len(chunk) for chunk in chunks_text)
+                logger.info(f"使用预分块文本进行上传，共 {len(chunks_text)} 个块。")
+            else:
+                # 否则，执行标准的文件解析和分块流程
+                if file_content is None:
+                    raise ValueError(
+                        "当未提供 pre_chunked_text 时，file_content 不能为空。"
+                    )
+
+                file_size = len(file_content)
+
+                # 阶段1: 解析文档
+                if progress_callback:
+                    await progress_callback("parsing", 0, 100)
+
+                parser = await select_parser(f".{file_type}")
+                parse_result = await parser.parse(file_content, file_name)
+                text_content = parse_result.text
+                media_items = parse_result.media
+
+                if progress_callback:
+                    await progress_callback("parsing", 100, 100)
+
+                # 保存媒体文件
+                for media_item in media_items:
+                    media = await self._save_media(
+                        doc_id=doc_id,
+                        media_type=media_item.media_type,
+                        file_name=media_item.file_name,
+                        content=media_item.content,
+                        mime_type=media_item.mime_type,
+                    )
+                    saved_media.append(media)
+                    media_paths.append(Path(media.file_path))
+
+                # 阶段2: 分块
+                if progress_callback:
+                    await progress_callback("chunking", 0, 100)
+
+                chunks_text = await self.chunker.chunk(
+                    text_content,
+                    chunk_size=chunk_size,
+                    chunk_overlap=chunk_overlap,
                )
-                saved_media.append(media)
-                media_paths.append(Path(media.file_path))
-
-            # 阶段2: 分块
-            if progress_callback:
-                await progress_callback("chunking", 0, 100)
-
-            chunks_text = await self.chunker.chunk(
-                text_content,
-                chunk_size=chunk_size,
-                chunk_overlap=chunk_overlap,
-            )
            contents = []
            metadatas = []
            for idx, chunk_text in enumerate(chunks_text):
@@ -205,7 +312,7 @@ class KBHelper:
                kb_id=self.kb.kb_id,
                doc_name=file_name,
                file_type=file_type,
-                file_size=len(file_content),
+                file_size=file_size,
                # file_path=str(file_path),
                file_path="",
                chunk_count=len(chunks_text),
@@ -359,3 +466,177 @@ class KBHelper:
        )

        return media
+
+    async def upload_from_url(
+        self,
+        url: str,
+        chunk_size: int = 512,
+        chunk_overlap: int = 50,
+        batch_size: int = 32,
+        tasks_limit: int = 3,
+        max_retries: int = 3,
+        progress_callback=None,
+        enable_cleaning: bool = False,
+        cleaning_provider_id: str | None = None,
+    ) -> KBDocument:
+        """从 URL 上传并处理文档（带原子性保证和失败清理）
+        Args:
+            url: 要提取内容的网页 URL
+            chunk_size: 文本块大小
+            chunk_overlap: 文本块重叠大小
+            batch_size: 批处理大小
+            tasks_limit: 并发任务限制
+            max_retries: 最大重试次数
+            progress_callback: 进度回调函数，接收参数 (stage, current, total)
+                - stage: 当前阶段 ('extracting', 'cleaning', 'parsing', 'chunking', 'embedding')
+                - current: 当前进度
+                - total: 总数
+        Returns:
+            KBDocument: 上传的文档对象
+        Raises:
+            ValueError: 如果 URL 为空或无法提取内容
+            IOError: 如果网络请求失败
+        """
+        # 获取 Tavily API 密钥
+        config = self.prov_mgr.acm.default_conf
+        tavily_keys = config.get("provider_settings", {}).get(
+            "websearch_tavily_key", []
+        )
+        if not tavily_keys:
+            raise ValueError(
+                "Error: Tavily API key is not configured in provider_settings."
+            )
+
+        # 阶段1: 从 URL 提取内容
+        if progress_callback:
+            await progress_callback("extracting", 0, 100)
+
+        try:
+            text_content = await extract_text_from_url(url, tavily_keys)
+        except Exception as e:
+            logger.error(f"Failed to extract content from URL {url}: {e}")
+            raise OSError(f"Failed to extract content from URL {url}: {e}") from e
+
+        if not text_content:
+            raise ValueError(f"No content extracted from URL: {url}")
+
+        if progress_callback:
+            await progress_callback("extracting", 100, 100)
+
+        # 阶段2: (可选)清洗内容并分块
+        final_chunks = await self._clean_and_rechunk_content(
+            content=text_content,
+            url=url,
+            progress_callback=progress_callback,
+            enable_cleaning=enable_cleaning,
+            cleaning_provider_id=cleaning_provider_id,
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+        )
+
+        if enable_cleaning and not final_chunks:
+            raise ValueError(
+                "内容清洗后未提取到有效文本。请尝试关闭内容清洗功能，或更换更高性能的LLM模型后重试。"
+            )
+
+        # 创建一个虚拟文件名
+        file_name = url.split("/")[-1] or f"document_from_{url}"
+        if not Path(file_name).suffix:
+            file_name += ".url"
+
+        # 复用现有的 upload_document 方法，但传入预分块文本
+        return await self.upload_document(
+            file_name=file_name,
+            file_content=None,
+            file_type="url",  # 使用 'url' 作为特殊文件类型
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            batch_size=batch_size,
+            tasks_limit=tasks_limit,
+            max_retries=max_retries,
+            progress_callback=progress_callback,
+            pre_chunked_text=final_chunks,
+        )
+
+    async def _clean_and_rechunk_content(
+        self,
+        content: str,
+        url: str,
+        progress_callback=None,
+        enable_cleaning: bool = False,
+        cleaning_provider_id: str | None = None,
+        repair_max_rpm: int = 60,
+        chunk_size: int = 512,
+        chunk_overlap: int = 50,
+    ) -> list[str]:
+        """
+        对从 URL 获取的内容进行清洗、修复、翻译和重新分块。
+        """
+        if not enable_cleaning:
+            # 如果不启用清洗，则使用从前端传递的参数进行分块
+            logger.info(
+                f"内容清洗未启用，使用指定参数进行分块: chunk_size={chunk_size}, chunk_overlap={chunk_overlap}"
+            )
+            return await self.chunker.chunk(
+                content, chunk_size=chunk_size, chunk_overlap=chunk_overlap
+            )
+
+        if not cleaning_provider_id:
+            logger.warning(
+                "启用了内容清洗，但未提供 cleaning_provider_id，跳过清洗并使用默认分块。"
+            )
+            return await self.chunker.chunk(content)
+
+        if progress_callback:
+            await progress_callback("cleaning", 0, 100)
+
+        try:
+            # 获取指定的 LLM Provider
+            llm_provider = await self.prov_mgr.get_provider_by_id(cleaning_provider_id)
+            if not llm_provider or not isinstance(llm_provider, LLMProvider):
+                raise ValueError(
+                    f"无法找到 ID 为 {cleaning_provider_id} 的 LLM Provider 或类型不正确"
+                )
+
+            # 初步分块
+            # 优化分隔符，优先按段落分割，以获得更高质量的文本块
+            text_splitter = RecursiveCharacterChunker(
+                chunk_size=chunk_size,
+                chunk_overlap=chunk_overlap,
+                separators=["\n\n", "\n", " "],  # 优先使用段落分隔符
+            )
+            initial_chunks = await text_splitter.chunk(content)
+            logger.info(f"初步分块完成，生成 {len(initial_chunks)} 个块用于修复。")
+
+            # 并发处理所有块
+            rate_limiter = RateLimiter(repair_max_rpm)
+            tasks = [
+                _repair_and_translate_chunk_with_retry(
+                    chunk, llm_provider, rate_limiter
+                )
+                for chunk in initial_chunks
+            ]
+
+            repaired_results = await asyncio.gather(*tasks, return_exceptions=True)
+
+            final_chunks = []
+            for i, result in enumerate(repaired_results):
+                if isinstance(result, Exception):
+                    logger.warning(f"块 {i} 处理异常: {str(result)}. 回退到原始块。")
+                    final_chunks.append(initial_chunks[i])
+                elif isinstance(result, list):
+                    final_chunks.extend(result)
+
+            logger.info(
+                f"文本修复完成: {len(initial_chunks)} 个原始块 -> {len(final_chunks)} 个最终块。"
+            )
+
+            if progress_callback:
+                await progress_callback("cleaning", 100, 100)
+
+            return final_chunks
+
+        except Exception as e:
+            logger.error(f"使用 Provider '{cleaning_provider_id}' 清洗内容失败: {e}")
+            # 清洗失败，返回默认分块结果，保证流程不中断
+            return await self.chunker.chunk(content)
@@ -8,7 +8,7 @@ from astrbot.core.provider.manager import ProviderManager
 from .chunking.recursive import RecursiveCharacterChunker
 from .kb_db_sqlite import KBSQLiteDatabase
 from .kb_helper import KBHelper
-from .models import KnowledgeBase
+from .models import KBDocument, KnowledgeBase
 from .retrieval.manager import RetrievalManager, RetrievalResult
 from .retrieval.rank_fusion import RankFusion
 from .retrieval.sparse_retriever import SparseRetriever
@@ -284,3 +284,47 @@ class KnowledgeBaseManager:
                await self.kb_db.close()
            except Exception as e:
                logger.error(f"关闭知识库元数据数据库失败: {e}")
+
+    async def upload_from_url(
+        self,
+        kb_id: str,
+        url: str,
+        chunk_size: int = 512,
+        chunk_overlap: int = 50,
+        batch_size: int = 32,
+        tasks_limit: int = 3,
+        max_retries: int = 3,
+        progress_callback=None,
+    ) -> KBDocument:
+        """从 URL 上传文档到指定的知识库
+
+        Args:
+            kb_id: 知识库 ID
+            url: 要提取内容的网页 URL
+            chunk_size: 文本块大小
+            chunk_overlap: 文本块重叠大小
+            batch_size: 批处理大小
+            tasks_limit: 并发任务限制
+            max_retries: 最大重试次数
+            progress_callback: 进度回调函数
+
+        Returns:
+            KBDocument: 上传的文档对象
+
+        Raises:
+            ValueError: 如果知识库不存在或 URL 为空
+            IOError: 如果网络请求失败
+        """
+        kb_helper = await self.get_kb(kb_id)
+        if not kb_helper:
+            raise ValueError(f"Knowledge base with id {kb_id} not found.")
+
+        return await kb_helper.upload_from_url(
+            url=url,
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            batch_size=batch_size,
+            tasks_limit=tasks_limit,
+            max_retries=max_retries,
+            progress_callback=progress_callback,
+        )
@@ -0,0 +1,103 @@
+import asyncio
+
+import aiohttp
+
+
+class URLExtractor:
+    """URL 内容提取器，封装了 Tavily API 调用和密钥管理"""
+
+    def __init__(self, tavily_keys: list[str]):
+        """
+        初始化 URL 提取器
+
+        Args:
+            tavily_keys: Tavily API 密钥列表
+        """
+        if not tavily_keys:
+            raise ValueError("Error: Tavily API keys are not configured.")
+
+        self.tavily_keys = tavily_keys
+        self.tavily_key_index = 0
+        self.tavily_key_lock = asyncio.Lock()
+
+    async def _get_tavily_key(self) -> str:
+        """并发安全的从列表中获取并轮换Tavily API密钥。"""
+        async with self.tavily_key_lock:
+            key = self.tavily_keys[self.tavily_key_index]
+            self.tavily_key_index = (self.tavily_key_index + 1) % len(self.tavily_keys)
+            return key
+
+    async def extract_text_from_url(self, url: str) -> str:
+        """
+        使用 Tavily API 从 URL 提取主要文本内容。
+        这是 web_searcher 插件中 tavily_extract_web_page 方法的简化版本，
+        专门为知识库模块设计，不依赖 AstrMessageEvent。
+
+        Args:
+            url: 要提取内容的网页 URL
+
+        Returns:
+            提取的文本内容
+
+        Raises:
+            ValueError: 如果 URL 为空或 API 密钥未配置
+            IOError: 如果请求失败或返回错误
+        """
+        if not url:
+            raise ValueError("Error: url must be a non-empty string.")
+
+        tavily_key = await self._get_tavily_key()
+        api_url = "https://api.tavily.com/extract"
+        headers = {
+            "Authorization": f"Bearer {tavily_key}",
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "urls": [url],
+            "extract_depth": "basic",  # 使用基础提取深度
+        }
+
+        try:
+            async with aiohttp.ClientSession(trust_env=True) as session:
+                async with session.post(
+                    api_url,
+                    json=payload,
+                    headers=headers,
+                    timeout=30.0,  # 增加超时时间，因为内容提取可能需要更长时间
+                ) as response:
+                    if response.status != 200:
+                        reason = await response.text()
+                        raise OSError(
+                            f"Tavily web extraction failed: {reason}, status: {response.status}"
+                        )
+
+                    data = await response.json()
+                    results = data.get("results", [])
+
+                    if not results:
+                        raise ValueError(f"No content extracted from URL: {url}")
+
+                    # 返回第一个结果的内容
+                    return results[0].get("raw_content", "")
+
+        except aiohttp.ClientError as e:
+            raise OSError(f"Failed to fetch URL {url}: {e}") from e
+        except Exception as e:
+            raise OSError(f"Failed to extract content from URL {url}: {e}") from e
+
+
+# 为了向后兼容，提供一个简单的函数接口
+async def extract_text_from_url(url: str, tavily_keys: list[str]) -> str:
+    """
+    简单的函数接口，用于从 URL 提取文本内容
+
+    Args:
+        url: 要提取内容的网页 URL
+        tavily_keys: Tavily API 密钥列表
+
+    Returns:
+        提取的文本内容
+    """
+    extractor = URLExtractor(tavily_keys)
+    return await extractor.extract_text_from_url(url)
@@ -0,0 +1,65 @@
+TEXT_REPAIR_SYSTEM_PROMPT = """You are a meticulous digital archivist. Your mission is to reconstruct a clean, readable article from raw, noisy text chunks.
+
+**Core Task:**
+1.  **Analyze:** Examine the text chunk to separate "signal" (substantive information) from "noise" (UI elements, ads, navigation, footers).
+2.  **Process:** Clean and repair the signal. **Do not translate it.** Keep the original language.
+
+**Crucial Rules:**
+- **NEVER discard a chunk if it contains ANY valuable information.** Your primary duty is to salvage content.
+- **If a chunk contains multiple distinct topics, split them.** Enclose each topic in its own `<repaired_text>` tag.
+- Your output MUST be ONLY `<repaired_text>...</repaired_text>` tags or a single `<discard_chunk />` tag.
+
+---
+**Example 1: Chunk with Noise and Signal**
+
+*Input Chunk:*
+"Home | About | Products | **The Llama is a domesticated South American camelid.** | © 2025 ACME Corp."
+
+*Your Thought Process:*
+1.  "Home | About | Products..." and "© 2025 ACME Corp." are noise.
+2.  "The Llama is a domesticated..." is the signal.
+3.  I must extract the signal and wrap it.
+
+*Your Output:*
+<repaired_text>
+The Llama is a domesticated South American camelid.
+</repaired_text>
+
+---
+**Example 2: Chunk with ONLY Noise**
+
+*Input Chunk:*
+"Next Page > | Subscribe to our newsletter | Follow us on X"
+
+*Your Thought Process:*
+1.  This entire chunk is noise. There is no signal.
+2.  I must discard this.
+
+*Your Output:*
+<discard_chunk />
+
+---
+**Example 3: Chunk with Multiple Topics (Requires Splitting)**
+
+*Input Chunk:*
+"## Chapter 1: The Sun
+The Sun is the star at the center of the Solar System.
+
+## Chapter 2: The Moon
+The Moon is Earth's only natural satellite."
+
+*Your Thought Process:*
+1.  This chunk contains two distinct topics.
+2.  I must process them separately to maintain semantic integrity.
+3.  I will create two `<repaired_text>` blocks.
+
+*Your Output:*
+<repaired_text>
+## Chapter 1: The Sun
+The Sun is the star at the center of the Solar System.
+</repaired_text>
+<repaired_text>
+## Chapter 2: The Moon
+The Moon is Earth's only natural satellite.
+</repaired_text>
+"""
@@ -48,6 +48,7 @@ class KnowledgeBaseRoute(Route):
            # 文档管理
            "/kb/document/list": ("GET", self.list_documents),
            "/kb/document/upload": ("POST", self.upload_document),
+            "/kb/document/upload/url": ("POST", self.upload_document_from_url),
            "/kb/document/upload/progress": ("GET", self.get_upload_progress),
            "/kb/document/get": ("GET", self.get_document),
            "/kb/document/delete": ("POST", self.delete_document),
@@ -1070,3 +1071,174 @@ class KnowledgeBaseRoute(Route):
            logger.error(f"删除会话知识库配置失败: {e}")
            logger.error(traceback.format_exc())
            return Response().error(f"删除会话知识库配置失败: {e!s}").__dict__
+
+    async def upload_document_from_url(self):
+        """从 URL 上传文档
+
+        Body:
+        - kb_id: 知识库 ID (必填)
+        - url: 要提取内容的网页 URL (必填)
+        - chunk_size: 分块大小 (可选, 默认512)
+        - chunk_overlap: 块重叠大小 (可选, 默认50)
+        - batch_size: 批处理大小 (可选, 默认32)
+        - tasks_limit: 并发任务限制 (可选, 默认3)
+        - max_retries: 最大重试次数 (可选, 默认3)
+
+        返回:
+        - task_id: 任务ID，用于查询上传进度和结果
+        """
+        try:
+            kb_manager = self._get_kb_manager()
+            data = await request.json
+
+            kb_id = data.get("kb_id")
+            if not kb_id:
+                return Response().error("缺少参数 kb_id").__dict__
+
+            url = data.get("url")
+            if not url:
+                return Response().error("缺少参数 url").__dict__
+
+            chunk_size = data.get("chunk_size", 512)
+            chunk_overlap = data.get("chunk_overlap", 50)
+            batch_size = data.get("batch_size", 32)
+            tasks_limit = data.get("tasks_limit", 3)
+            max_retries = data.get("max_retries", 3)
+            enable_cleaning = data.get("enable_cleaning", False)
+            cleaning_provider_id = data.get("cleaning_provider_id")
+
+            # 获取知识库
+            kb_helper = await kb_manager.get_kb(kb_id)
+            if not kb_helper:
+                return Response().error("知识库不存在").__dict__
+
+            # 生成任务ID
+            task_id = str(uuid.uuid4())
+
+            # 初始化任务状态
+            self.upload_tasks[task_id] = {
+                "status": "pending",
+                "result": None,
+                "error": None,
+            }
+
+            # 启动后台任务
+            asyncio.create_task(
+                self._background_upload_from_url_task(
+                    task_id=task_id,
+                    kb_helper=kb_helper,
+                    url=url,
+                    chunk_size=chunk_size,
+                    chunk_overlap=chunk_overlap,
+                    batch_size=batch_size,
+                    tasks_limit=tasks_limit,
+                    max_retries=max_retries,
+                    enable_cleaning=enable_cleaning,
+                    cleaning_provider_id=cleaning_provider_id,
+                ),
+            )
+
+            return (
+                Response()
+                .ok(
+                    {
+                        "task_id": task_id,
+                        "url": url,
+                        "message": "URL upload task created, processing in background",
+                    },
+                )
+                .__dict__
+            )
+
+        except ValueError as e:
+            return Response().error(str(e)).__dict__
+        except Exception as e:
+            logger.error(f"从URL上传文档失败: {e}")
+            logger.error(traceback.format_exc())
+            return Response().error(f"从URL上传文档失败: {e!s}").__dict__
+
+    async def _background_upload_from_url_task(
+        self,
+        task_id: str,
+        kb_helper,
+        url: str,
+        chunk_size: int,
+        chunk_overlap: int,
+        batch_size: int,
+        tasks_limit: int,
+        max_retries: int,
+        enable_cleaning: bool,
+        cleaning_provider_id: str | None,
+    ):
+        """后台上传URL任务"""
+        try:
+            # 初始化任务状态
+            self.upload_tasks[task_id] = {
+                "status": "processing",
+                "result": None,
+                "error": None,
+            }
+            self.upload_progress[task_id] = {
+                "status": "processing",
+                "file_index": 0,
+                "file_total": 1,
+                "file_name": f"URL: {url}",
+                "stage": "extracting",
+                "current": 0,
+                "total": 100,
+            }
+
+            # 创建进度回调函数
+            async def progress_callback(stage, current, total):
+                if task_id in self.upload_progress:
+                    self.upload_progress[task_id].update(
+                        {
+                            "status": "processing",
+                            "file_index": 0,
+                            "file_name": f"URL: {url}",
+                            "stage": stage,
+                            "current": current,
+                            "total": total,
+                        },
+                    )
+
+            # 上传文档
+            doc = await kb_helper.upload_from_url(
+                url=url,
+                chunk_size=chunk_size,
+                chunk_overlap=chunk_overlap,
+                batch_size=batch_size,
+                tasks_limit=tasks_limit,
+                max_retries=max_retries,
+                progress_callback=progress_callback,
+                enable_cleaning=enable_cleaning,
+                cleaning_provider_id=cleaning_provider_id,
+            )
+
+            # 更新任务完成状态
+            result = {
+                "task_id": task_id,
+                "uploaded": [doc.model_dump()],
+                "failed": [],
+                "total": 1,
+                "success_count": 1,
+                "failed_count": 0,
+            }
+
+            self.upload_tasks[task_id] = {
+                "status": "completed",
+                "result": result,
+                "error": None,
+            }
+            self.upload_progress[task_id]["status"] = "completed"
+
+        except Exception as e:
+            logger.error(f"后台上传URL任务 {task_id} 失败: {e}")
+            logger.error(traceback.format_exc())
+            self.upload_tasks[task_id] = {
+                "status": "failed",
+                "result": None,
+                "error": str(e),
+            }
+            if task_id in self.upload_progress:
+                self.upload_progress[task_id]["status"] = "failed"
@@ -4,6 +4,7 @@
  "tabs": {
    "overview": "Overview",
    "documents": "Documents",
+    "retrieval": "Retrieval",
    "sessions": "Sessions",
    "settings": "Settings"
  },
@@ -49,6 +50,10 @@
    "maxSize": "Max file size: 128MB",
    "chunkSettings": "Chunk Settings",
    "batchSettings": "Batch Settings",
+    "cleaningSettings": "Cleaning Settings",
+    "enableCleaning": "Enable Content Cleaning",
+    "cleaningProvider": "Cleaning Service Provider",
+    "cleaningProviderHint": "Select an LLM provider to clean and summarize the extracted web page content",
    "chunkSize": "Chunk Size",
    "chunkSizeHint": "Number of characters per chunk (default: 512)",
    "chunkOverlap": "Chunk Overlap",
@@ -61,7 +66,13 @@
    "maxRetriesHint": "Number of times to retry a failed upload task (default: 3)",
    "cancel": "Cancel",
    "submit": "Upload",
-    "fileRequired": "Please select a file to upload"
+    "fileRequired": "Please select a file to upload",
+    "fileUpload": "File Upload",
+    "fromUrl": "From URL",
+    "urlPlaceholder": "Enter the URL of the web page to extract content from",
+    "urlRequired": "Please enter a URL",
+    "urlHint": "The main content will be automatically extracted from the target URL as a document. Currently supports {supported} pages. Before use, please ensure that the target web page allows crawler access.",
+    "beta": "Beta"
  },
  "settings": {
    "title": "Knowledge Base Settings",
@@ -50,6 +50,10 @@
    "maxSize": "最大文件大小: 128MB",
    "chunkSettings": "分块设置",
    "batchSettings": "批处理设置",
+    "cleaningSettings": "清洗设置",
+    "enableCleaning": "启用内容清洗",
+    "cleaningProvider": "清洗服务提供商",
+    "cleaningProviderHint": "选择一个 LLM 服务商来对提取的网页内容进行清洗和总结",
    "chunkSize": "分块大小",
    "chunkSizeHint": "每个文本块的字符数 (默认: 512)",
    "chunkOverlap": "分块重叠",
@@ -62,7 +66,13 @@
    "maxRetriesHint": "上传失败任务的重试次数 (默认: 3)",
    "cancel": "取消",
    "submit": "上传",
-    "fileRequired": "请选择要上传的文件"
+    "fileRequired": "请选择要上传的文件",
+    "fileUpload": "文件上传",
+    "fromUrl": "从 URL",
+    "urlPlaceholder": "请输入要提取内容的网页 URL",
+    "urlRequired": "请输入 URL",
+    "urlHint": "将自动从目标 URL 提取主要内容作为文档。目前支持 {supported} 页面，请确保目标网页允许爬虫访问。",
+    "beta": "测试版"
  },
  "retrieval": {
    "title": "知识库检索",
@@ -57,7 +57,7 @@
    </v-card>

    <!-- 上传对话框 -->
-    <v-dialog v-model="showUploadDialog" max-width="600px" persistent @after-enter="initUploadSettings">
+    <v-dialog v-model="showUploadDialog" max-width="650px" persistent @after-enter="initUploadSettings">
      <v-card>
        <v-card-title class="pa-4 d-flex align-center">
          <span class="text-h5">{{ t('upload.title') }}</span>
@@ -67,40 +67,91 @@

        <v-divider />

-        <v-card-text class="pa-6">
-          <!-- 文件选择 -->
-          <div class="upload-dropzone" :class="{ 'dragover': isDragging }" @drop.prevent="handleDrop"
-            @dragover.prevent="isDragging = true" @dragleave="isDragging = false" @click="fileInput?.click()">
-            <v-icon size="64" color="primary">mdi-cloud-upload</v-icon>
-            <p class="mt-4 text-h6">{{ t('upload.dropzone') }}</p>
-            <p class="text-caption text-medium-emphasis mt-2">{{ t('upload.supportedFormats') }}.txt, .md, .pdf, .docx,
-              .xls, .xlsx</p>
-            <p class="text-caption text-medium-emphasis">{{ t('upload.maxSize') }}</p>
-            <p class="text-caption text-medium-emphasis">最多可上传 10 个文件</p>
-            <input ref="fileInput" type="file" multiple hidden accept=".txt,.md,.pdf,.docx,.xls,.xlsx"
-              @change="handleFileSelect" />
-          </div>
+        <v-tabs v-model="uploadMode" grow class="mb-4">
+          <v-tab value="file">{{ t('upload.fileUpload') }}</v-tab>
+          <v-tab value="url">
+            {{ t('upload.fromUrl') }}
+            <v-badge color="warning" :content="t('upload.beta')" inline class="ml-2" />
+          </v-tab>
+        </v-tabs>

-          <div v-if="selectedFiles.length > 0" class="mt-4">
-            <div class="d-flex align-center justify-space-between mb-2">
-              <span class="text-subtitle-2">已选择 {{ selectedFiles.length }} 个文件</span>
-              <v-btn variant="text" size="small" @click="selectedFiles = []">清空</v-btn>
-            </div>
-            <div class="files-list">
-              <div v-for="(file, index) in selectedFiles" :key="index"
-                class="file-item pa-3 mb-2 rounded bg-surface-variant">
-                <div class="d-flex align-center justify-space-between">
-                  <div class="d-flex align-center gap-2">
-                    <v-icon>{{ getFileIcon(file.name) }}</v-icon>
-                    <div>
-                      <div class="font-weight-medium">{{ file.name }}</div>
-                      <div class="text-caption">{{ formatFileSize(file.size) }}</div>
+        <v-card-text class="pa-6 pt-2">
+          <v-window v-model="uploadMode">
+            <!-- 文件上传 -->
+            <v-window-item value="file">
+              <!-- 文件选择 -->
+              <div class="upload-dropzone" :class="{ 'dragover': isDragging }" @drop.prevent="handleDrop"
+                @dragover.prevent="isDragging = true" @dragleave="isDragging = false" @click="fileInput?.click()">
+                <v-icon size="64" color="primary">mdi-cloud-upload</v-icon>
+                <p class="mt-4 text-h6">{{ t('upload.dropzone') }}</p>
+                <p class="text-caption text-medium-emphasis mt-2">{{ t('upload.supportedFormats') }}.txt, .md, .pdf,
+                  .docx,
+                  .xls, .xlsx</p>
+                <p class="text-caption text-medium-emphasis">{{ t('upload.maxSize') }}</p>
+                <p class="text-caption text-medium-emphasis">最多可上传 10 个文件</p>
+                <input ref="fileInput" type="file" multiple hidden accept=".txt,.md,.pdf,.docx,.xls,.xlsx"
+                  @change="handleFileSelect" />
+              </div>
+
+              <div v-if="selectedFiles.length > 0" class="mt-4">
+                <div class="d-flex align-center justify-space-between mb-2">
+                  <span class="text-subtitle-2">已选择 {{ selectedFiles.length }} 个文件</span>
+                  <v-btn variant="text" size="small" @click="selectedFiles = []">清空</v-btn>
+                </div>
+                <div class="files-list">
+                  <div v-for="(file, index) in selectedFiles" :key="index"
+                    class="file-item pa-3 mb-2 rounded bg-surface-variant">
+                    <div class="d-flex align-center justify-space-between">
+                      <div class="d-flex align-center gap-2">
+                        <v-icon>{{ getFileIcon(file.name) }}</v-icon>
+                        <div>
+                          <div class="font-weight-medium">{{ file.name }}</div>
+                          <div class="text-caption">{{ formatFileSize(file.size) }}</div>
+                        </div>
+                      </div>
+                      <v-btn icon="mdi-close" variant="text" size="small" @click="removeFile(index)" />
                    </div>
                  </div>
-                  <v-btn icon="mdi-close" variant="text" size="small" @click="removeFile(index)" />
                </div>
              </div>
+            </v-window-item>
+
+            <!-- URL上传 -->
+            <v-window-item value="url" class="pt-2">
+              <!-- Tavily Key 快速配置 -->
+              <div v-if="tavilyConfigStatus === 'not_configured' || tavilyConfigStatus === 'error'" class="mb-4">
+                <v-alert :type="tavilyConfigStatus === 'error' ? 'error' : 'info'" variant="tonal" density="compact">
+                  <div class="d-flex align-center justify-space-between">
+                    <span>
+                      {{ tavilyConfigStatus === 'error' ? '检查网页搜索配置失败' : '使用此功能需要配置 Tavily Key' }}
+                    </span>
+                    <v-btn size="small" variant="flat" @click="showTavilyDialog = true">
+                      配置
+                    </v-btn>
+                  </div>
+                </v-alert>
+              </div>
+
+              <v-text-field v-model="uploadUrl" :label="t('upload.urlPlaceholder')" variant="outlined" clearable :disabled="tavilyConfigStatus === 'not_configured'"
+                autofocus :hint="t('upload.urlHint', { supported: 'HTML' })" persistent-hint />
+            </v-window-item>
+          </v-window>
+
+          <!-- 清洗设置 (仅在URL模式下显示) -->
+          <div v-if="uploadMode === 'url'" class="mt-6">
+            <div class="d-flex align-center mb-4">
+              <h3 class="text-h6">{{ t('upload.cleaningSettings') }}</h3>
            </div>
+            <v-row>
+              <v-col cols="12" sm="4">
+                <v-switch v-model="uploadSettings.enable_cleaning" :label="t('upload.enableCleaning')" color="primary" />
+              </v-col>
+              <v-col cols="12" sm="8">
+                <v-select v-model="uploadSettings.cleaning_provider_id" :items="llmProviders" item-title="id"
+                  item-value="id" :label="t('upload.cleaningProvider')" :hint="t('upload.cleaningProviderHint')"
+                  persistent-hint variant="outlined" density="compact" :disabled="!uploadSettings.enable_cleaning" />
+              </v-col>
+            </v-row>
          </div>

          <!-- 分块设置 -->
@@ -151,8 +202,8 @@
          <v-btn variant="text" @click="closeUploadDialog" :disabled="uploading">
            {{ t('upload.cancel') }}
          </v-btn>
-          <v-btn color="primary" variant="elevated" @click="uploadDocument" :loading="uploading"
-            :disabled="selectedFiles.length === 0">
+          <v-btn color="primary" variant="elevated" @click="startUpload" :loading="uploading"
+            :disabled="isUploadDisabled">
            {{ t('upload.submit') }}
          </v-btn>
        </v-card-actions>
@@ -185,11 +236,15 @@
    <v-snackbar v-model="snackbar.show" :color="snackbar.color">
      {{ snackbar.text }}
    </v-snackbar>
+
+    <!-- Tavily Key 配置对话框 -->
+    <TavilyKeyDialog v-model="showTavilyDialog" @success="onTavilyKeySet" />
  </div>
 </template>

 <script setup lang="ts">
-import { ref, onMounted, onUnmounted } from 'vue'
+import TavilyKeyDialog from './TavilyKeyDialog.vue'
+import { ref, onMounted, onUnmounted, computed } from 'vue'
 import { useRouter } from 'vue-router'
 import axios from 'axios'
 import { useModuleI18n } from '@/i18n/composables'
@@ -216,10 +271,13 @@ const selectedFiles = ref<File[]>([])
 const deleteTarget = ref<any>(null)
 const isDragging = ref(false)
 const fileInput = ref<HTMLInputElement | null>(null)
-
-// 上传进度 - 用于轮询多个任务
+const uploadMode = ref('file') // 'file' or 'url'
+const uploadUrl = ref('')
+const llmProviders = ref<any[]>([])
 const uploadingTasks = ref<Map<string, any>>(new Map())
 const progressPollingInterval = ref<number | null>(null)
+const tavilyConfigStatus = ref('loading') // 'loading', 'configured', 'not_configured', 'error'
+const showTavilyDialog = ref(false)

 const snackbar = ref({
  show: false,
@@ -239,7 +297,9 @@ const uploadSettings = ref({
  chunk_overlap: null as number | null,
  batch_size: 32,
  tasks_limit: 3,
-  max_retries: 3
+  max_retries: 3,
+  enable_cleaning: false,
+  cleaning_provider_id: null as string | null
 })

 // 初始化上传设置
@@ -249,10 +309,31 @@ const initUploadSettings = () => {
    chunk_overlap: props.kb?.chunk_overlap || null,
    batch_size: 32,
    tasks_limit: 3,
-    max_retries: 3
+    max_retries: 3,
+    enable_cleaning: false,
+    cleaning_provider_id: null
  }
 }

+const isUploadDisabled = computed(() => {
+  if (uploading.value) {
+    return true
+  }
+  if (uploadMode.value === 'file') {
+    return selectedFiles.value.length === 0
+  }
+  if (uploadMode.value === 'url') {
+    if (!uploadUrl.value) {
+      return true
+    }
+    if (uploadSettings.value.enable_cleaning && !uploadSettings.value.cleaning_provider_id) {
+      return true
+    }
+    return false
+  }
+  return true
+})
+
 // 表格列
 const headers = [
  { title: t('documents.name'), key: 'doc_name', sortable: true },
@@ -314,8 +395,17 @@ const handleDrop = (event: DragEvent) => {
  }
 }

-// 上传文档
-const uploadDocument = async () => {
+// 上传调度器
+const startUpload = async () => {
+  if (uploadMode.value === 'file') {
+    await uploadFiles()
+  } else if (uploadMode.value === 'url') {
+    await uploadFromUrl()
+  }
+}
+
+// 上传文件
+const uploadFiles = async () => {
  if (selectedFiles.value.length === 0) {
    showSnackbar(t('upload.fileRequired'), 'warning')
    return
@@ -390,6 +480,80 @@ const uploadDocument = async () => {
  }
 }

+// 从 URL 上传
+const uploadFromUrl = async () => {
+  if (!uploadUrl.value) {
+    showSnackbar(t('upload.urlRequired'), 'warning')
+    return
+  }
+
+  uploading.value = true
+
+  try {
+    const payload: any = {
+      kb_id: props.kbId,
+      url: uploadUrl.value,
+      batch_size: uploadSettings.value.batch_size,
+      tasks_limit: uploadSettings.value.tasks_limit,
+      max_retries: uploadSettings.value.max_retries
+    }
+    if (uploadSettings.value.chunk_size) {
+      payload.chunk_size = uploadSettings.value.chunk_size
+    }
+    if (uploadSettings.value.chunk_overlap) {
+      payload.chunk_overlap = uploadSettings.value.chunk_overlap
+    }
+    if (uploadSettings.value.enable_cleaning) {
+      payload.enable_cleaning = true
+      if (uploadSettings.value.cleaning_provider_id) {
+        payload.cleaning_provider_id = uploadSettings.value.cleaning_provider_id
+      }
+    }
+
+
+    const response = await axios.post('/api/kb/document/upload/url', payload)
+
+    if (response.data.status === 'ok') {
+      const result = response.data.data
+      const taskId = result.task_id
+
+      showSnackbar(`正在从 URL 后台提取内容...`, 'info')
+
+      // 添加占位条目
+      const uploadingDoc = {
+        doc_id: `uploading_${taskId}_0`,
+        doc_name: result.url,
+        file_type: 'url',
+        file_size: 0, // URL has no size
+        chunk_count: 0,
+        created_at: new Date().toISOString(),
+        uploading: true,
+        taskId: taskId,
+        uploadProgress: {
+          stage: 'waiting',
+          current: 0,
+          total: 100
+        }
+      }
+
+      documents.value = [uploadingDoc, ...documents.value]
+      closeUploadDialog()
+
+      if (taskId) {
+        startProgressPolling(taskId)
+      }
+    } else {
+      showSnackbar(response.data.message || t('documents.uploadFailed'), 'error')
+    }
+  } catch (error: any) {
+    console.error('Failed to upload from URL:', error)
+    const message = error.response?.data?.message || t('documents.uploadFailed')
+    showSnackbar(message, 'error')
+  } finally {
+    uploading.value = false
+  }
+}
+
 // 开始轮询进度
 const startProgressPolling = (taskId: string) => {
  // 如果已经在轮询，先停止
@@ -490,6 +654,8 @@ const getUploadPercentage = (item: any) => {
 const getStageText = (stage: string) => {
  const stageMap: Record<string, string> = {
    'waiting': '等待中...',
+    'extracting': '提取内容...',
+    'cleaning': '清洗内容...',
    'parsing': '解析文档...',
    'chunking': '文本分块...',
    'embedding': '生成向量...'
@@ -501,6 +667,8 @@ const getStageText = (stage: string) => {
 const closeUploadDialog = () => {
  showUploadDialog.value = false
  selectedFiles.value = []
+  uploadUrl.value = ''
+  uploadMode.value = 'file'
  initUploadSettings()
 }

@@ -551,6 +719,7 @@ const getFileIcon = (fileType: string) => {
  if (type.includes('pdf')) return 'mdi-file-pdf-box'
  if (type.includes('md') || type.includes('markdown')) return 'mdi-language-markdown'
  if (type.includes('txt')) return 'mdi-file-document-outline'
+  if (type.includes('url')) return 'mdi-link-variant'
  return 'mdi-file'
 }

@@ -559,6 +728,7 @@ const getFileColor = (fileType: string) => {
  if (type.includes('pdf')) return 'error'
  if (type.includes('md')) return 'info'
  if (type.includes('txt')) return 'success'
+  if (type.includes('url')) return 'primary'
  return 'grey'
 }

@@ -585,8 +755,53 @@ const formatDate = (dateStr: string) => {
  })
 }

+// 加载LLM providers
+const loadLlmProviders = async () => {
+  try {
+    const response = await axios.get('/api/config/provider/list', {
+      params: { provider_type: 'chat_completion' }
+    })
+    if (response.data.status === 'ok') {
+      llmProviders.value = response.data.data
+    }
+  } catch (error) {
+    console.error('Failed to load LLM providers:', error)
+  }
+}
+
+// 检查Tavily Key配置
+const checkTavilyConfig = async () => {
+  tavilyConfigStatus.value = 'loading'
+  try {
+    const response = await axios.get('/api/config/abconf', {
+      params: { id: 'default' }
+    })
+    if (response.data.status === 'ok') {
+      const config = response.data.data.config
+      const tavilyKeys = config?.provider_settings?.websearch_tavily_key
+      if (Array.isArray(tavilyKeys) && tavilyKeys.length > 0 && tavilyKeys.some(key => key.trim() !== '')) {
+        tavilyConfigStatus.value = 'configured'
+      } else {
+        tavilyConfigStatus.value = 'not_configured'
+      }
+    } else {
+      tavilyConfigStatus.value = 'error'
+    }
+  } catch (error) {
+    console.warn('Failed to check Tavily key config:', error)
+    tavilyConfigStatus.value = 'error'
+  }
+}
+
+const onTavilyKeySet = () => {
+  showSnackbar('Tavily API Key 配置成功', 'success')
+  checkTavilyConfig()
+}
+
 onMounted(() => {
  loadDocuments()
+  loadLlmProviders()
+  checkTavilyConfig()
 })

 onUnmounted(() => {
@@ -0,0 +1,109 @@
+<template>
+  <v-dialog v-model="dialog" max-width="500px" persistent>
+    <v-card>
+      <v-card-title class="text-h5">
+        配置 Tavily API Key
+      </v-card-title>
+      <v-card-text>
+        <p class="mb-4 text-body-2 text-medium-emphasis">
+          为了使用基于网页的知识库功能，需要提供 Tavily API Key。您可以从 <a href="https://tavily.com/" target="_blank">Tavily 官网</a> 获取。
+        </p>
+        <v-text-field
+          v-model="apiKey"
+          label="Tavily API Key"
+          variant="outlined"
+          :loading="saving"
+          :error-messages="errorMessage"
+          autofocus
+          clearable
+          placeholder="tvly-..."
+        />
+      </v-card-text>
+      <v-card-actions>
+        <v-spacer />
+        <v-btn variant="text" @click="closeDialog" :disabled="saving">
+          取消
+        </v-btn>
+        <v-btn color="primary" variant="elevated" @click="saveKey" :loading="saving">
+          保存
+        </v-btn>
+      </v-card-actions>
+    </v-card>
+  </v-dialog>
+</template>
+
+<script setup lang="ts">
+import { ref, watch } from 'vue'
+import axios from 'axios'
+
+const props = defineProps<{
+  modelValue: boolean
+}>()
+
+const emit = defineEmits(['update:modelValue', 'success'])
+
+const dialog = ref(props.modelValue)
+const apiKey = ref('')
+const saving = ref(false)
+const errorMessage = ref('')
+
+watch(() => props.modelValue, (val) => {
+  dialog.value = val
+  if (val) {
+    // Reset state when dialog opens
+    apiKey.value = ''
+    errorMessage.value = ''
+    saving.value = false
+  }
+})
+
+const closeDialog = () => {
+  emit('update:modelValue', false)
+}
+
+const saveKey = async () => {
+  if (!apiKey.value.trim()) {
+    errorMessage.value = 'API Key 不能为空'
+    return
+  }
+  errorMessage.value = ''
+  saving.value = true
+  try {
+    // 1. 获取当前配置
+    const configResponse = await axios.get('/api/config/abconf', {
+      params: { id: 'default' }
+    })
+
+    if (configResponse.data.status !== 'ok') {
+      throw new Error('获取当前配置失败')
+    }
+
+    const currentConfig = configResponse.data.data.config
+
+    // 2. 更新配置
+    if (!currentConfig.provider_settings) {
+      currentConfig.provider_settings = {}
+    }
+    currentConfig.provider_settings.websearch_tavily_key = [apiKey.value.trim()]
+    // 同时将搜索提供商设置为 tavily
+    currentConfig.provider_settings.websearch_provider = 'tavily'
+
+    // 3. 保存整个配置
+    const saveResponse = await axios.post('/api/config/astrbot/update', {
+      conf_id: 'default',
+      config: currentConfig
+    })
+
+    if (saveResponse.data.status === 'ok') {
+      emit('success')
+      closeDialog()
+    } else {
+      errorMessage.value = saveResponse.data.message || '保存失败，请检查 Key 是否正确'
+    }
+  } catch (error: any) {
+    errorMessage.value = error.response?.data?.message || '保存失败，发生未知错误'
+  } finally {
+    saving.value = false
+  }
+}
+</script>