From 4e9cce76dae6dfb6cccdad2a8a617a564cecb73e Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 24 Oct 2025 17:51:30 +0800 Subject: [PATCH] feat: add timing logs for dense and sparse retrieval processes and adjust top K results in sparse retriever --- .../core/knowledge_base/retrieval/manager.py | 17 +++++++++++++++++ .../retrieval/sparse_retriever.py | 4 +++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/astrbot/core/knowledge_base/retrieval/manager.py b/astrbot/core/knowledge_base/retrieval/manager.py index 00d1649b..278e4da2 100644 --- a/astrbot/core/knowledge_base/retrieval/manager.py +++ b/astrbot/core/knowledge_base/retrieval/manager.py @@ -3,6 +3,8 @@ 协调稠密检索、稀疏检索和 Rerank,提供统一的检索接口 """ +import time + from dataclasses import dataclass from typing import List @@ -104,25 +106,40 @@ class RetrievalManager: kb_ids = new_kb_ids # 1. 稠密检索 + time_start = time.time() dense_results = await self._dense_retrieve( query=query, kb_ids=kb_ids, kb_options=kb_options, ) + time_end = time.time() + logger.debug( + f"Dense retrieval across {len(kb_ids)} bases took {time_end - time_start:.2f}s and returned {len(dense_results)} results." + ) # 2. 稀疏检索 + time_start = time.time() sparse_results = await self.sparse_retriever.retrieve( query=query, kb_ids=kb_ids, kb_options=kb_options, ) + time_end = time.time() + logger.debug( + f"Sparse retrieval across {len(kb_ids)} bases took {time_end - time_start:.2f}s and returned {len(sparse_results)} results." + ) # 3. 结果融合 + time_start = time.time() fused_results = await self.rank_fusion.fuse( dense_results=dense_results, sparse_results=sparse_results, top_k=top_k_fusion, ) + time_end = time.time() + logger.debug( + f"Rank fusion took {time_end - time_start:.2f}s and returned {len(fused_results)} results." + ) # 4. 转换为 RetrievalResult (获取元数据) retrieval_results = [] diff --git a/astrbot/core/knowledge_base/retrieval/sparse_retriever.py b/astrbot/core/knowledge_base/retrieval/sparse_retriever.py index 75ed8738..d9ff915d 100644 --- a/astrbot/core/knowledge_base/retrieval/sparse_retriever.py +++ b/astrbot/core/knowledge_base/retrieval/sparse_retriever.py @@ -68,6 +68,7 @@ class SparseRetriever: List[SparseResult]: 检索结果列表 """ # 1. 获取所有相关块 + top_k_sparse = 0 chunks = [] for kb_id in kb_ids: vec_db: FaissVecDB = kb_options.get(kb_id, {}).get("vec_db") @@ -88,6 +89,7 @@ class SparseRetriever: for doc, chunk_md in zip(result, chunk_mds) ] chunks.extend(result) + top_k_sparse += kb_options.get(kb_id, {}).get("top_k_sparse", 50) if not chunks: return [] @@ -127,4 +129,4 @@ class SparseRetriever: results.sort(key=lambda x: x.score, reverse=True) # return results[: len(results) // len(kb_ids)] - return results + return results[:top_k_sparse]