refactor: knowledge base database engine

This commit is contained in:
kangfenmao
2024-12-25 17:31:11 +08:00
parent b85765915e
commit 34ebab0af8
31 changed files with 614 additions and 383 deletions
-2
View File
@@ -56,8 +56,6 @@ class FileStorage {
const storedFilePath = path.join(this.storageDir, file)
const storedStats = fs.statSync(storedFilePath)
console.debug('storedFilePath', storedFilePath)
if (storedStats.size === fileSize) {
const [originalHash, storedHash] = await Promise.all([
this.getFileHash(filePath),
+13 -11
View File
@@ -3,7 +3,7 @@ import path from 'node:path'
import { LocalPathLoader, RAGApplication, RAGApplicationBuilder, TextLoader } from '@llm-tools/embedjs'
import { AddLoaderReturn, ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { LibSqlDb } from '@llm-tools/embedjs-libsql'
import { LanceDb } from '@llm-tools/embedjs-lancedb'
import { MarkdownLoader } from '@llm-tools/embedjs-loader-markdown'
import { DocxLoader, ExcelLoader, PptLoader } from '@llm-tools/embedjs-loader-msoffice'
import { PdfLoader } from '@llm-tools/embedjs-loader-pdf'
@@ -26,8 +26,13 @@ class KnowledgeService {
}
}
private getRagApplication = async ({ id, model, apiKey, baseURL }: KnowledgeBaseParams): Promise<RAGApplication> => {
console.debug('getRagApplication', path.join(this.storageDir, id))
private getRagApplication = async ({
id,
model,
apiKey,
baseURL,
dimensions
}: KnowledgeBaseParams): Promise<RAGApplication> => {
return new RAGApplicationBuilder()
.setModel('NO_MODEL')
.setEmbeddingModel(
@@ -35,19 +40,16 @@ class KnowledgeService {
model,
apiKey,
configuration: { baseURL },
dimensions: 1024,
batchSize: 10
dimensions,
batchSize: 20
})
)
.setVectorDatabase(new LibSqlDb({ path: path.join(this.storageDir, id) }))
.setVectorDatabase(new LanceDb({ path: path.join(this.storageDir, id) }))
.build()
}
public create = async (
_: Electron.IpcMainInvokeEvent,
{ id, model, apiKey, baseURL }: KnowledgeBaseParams
): Promise<void> => {
this.getRagApplication({ id, model, apiKey, baseURL })
public create = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
this.getRagApplication(base)
}
public reset = async (_: Electron.IpcMainInvokeEvent, { base }: { base: KnowledgeBaseParams }): Promise<void> => {
+39 -7
View File
@@ -5,15 +5,20 @@ interface ListItemProps {
active?: boolean
icon?: ReactNode
title: string
subtitle?: string
onClick?: () => void
}
const ListItem = ({ active, icon, title, onClick }: ListItemProps) => {
const ListItem = ({ active, icon, title, subtitle, onClick }: ListItemProps) => {
const borderRadius = subtitle ? '10px' : '16px'
return (
<ListItemContainer className={active ? 'active' : ''} onClick={onClick}>
<ListItemContainer className={active ? 'active' : ''} onClick={onClick} style={{ borderRadius }}>
<ListItemContent>
{icon && <span style={{ marginRight: '8px' }}>{icon}</span>}
{title}
{icon && <IconWrapper>{icon}</IconWrapper>}
<TextContainer>
<TitleText>{title}</TitleText>
{subtitle && <SubtitleText>{subtitle}</SubtitleText>}
</TextContainer>
</ListItemContent>
</ListItemContainer>
)
@@ -42,11 +47,38 @@ const ListItemContainer = styled.div`
`
const ListItemContent = styled.div`
display: -webkit-box;
-webkit-line-clamp: 1;
-webkit-box-orient: vertical;
display: flex;
align-items: center;
gap: 8px;
overflow: hidden;
font-size: 13px;
`
const IconWrapper = styled.span`
margin-right: 8px;
`
const TextContainer = styled.div`
display: flex;
flex-direction: column;
overflow: hidden;
`
const TitleText = styled.div`
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
`
const SubtitleText = styled.div`
font-size: 10px;
color: var(--color-text-soft);
margin-top: 2px;
display: -webkit-box;
-webkit-line-clamp: 1;
-webkit-box-orient: vertical;
overflow: hidden;
color: var(--color-text-3);
`
export default ListItem
@@ -14,7 +14,7 @@ interface PromptPopupShowParams {
}
interface Props extends PromptPopupShowParams {
resolve: (value: string) => void
resolve: (value: any) => void
}
const PromptPopupContainer: React.FC<Props> = ({
@@ -30,18 +30,21 @@ const PromptPopupContainer: React.FC<Props> = ({
const onOk = () => {
setOpen(false)
resolve(value)
}
const handleCancel = () => {
const onCancel = () => {
setOpen(false)
}
const onClose = () => {
resolve(value)
resolve(null)
}
PromptPopup.hide = onCancel
return (
<Modal title={title} open={open} onOk={onOk} onCancel={handleCancel} afterClose={onClose} centered>
<Modal title={title} open={open} onOk={onOk} onCancel={onCancel} afterClose={onClose} centered>
<Box mb={8}>{message}</Box>
<Input.TextArea
placeholder={inputPlaceholder}
@@ -57,10 +60,12 @@ const PromptPopupContainer: React.FC<Props> = ({
)
}
const TopViewKey = 'PromptPopup'
export default class PromptPopup {
static topviewId = 0
static hide() {
TopView.hide('PromptPopup')
TopView.hide(TopViewKey)
}
static show(props: PromptPopupShowParams) {
return new Promise<string>((resolve) => {
@@ -69,7 +74,7 @@ export default class PromptPopup {
{...props}
resolve={(v) => {
resolve(v)
this.hide()
TopView.hide(TopViewKey)
}}
/>,
'PromptPopup'
+2 -13
View File
@@ -50,23 +50,12 @@ export const SUMMARIZE_PROMPT =
export const TRANSLATE_PROMPT =
'You are a translation expert. Translate from input language to {{target_language}}, provide the translation result directly without any explanation and keep original format. Do not translate if the target language is the same as the source language.'
export const REFERENCE_PROMPT = `请根据参考资料回答问题,并使用脚注格式引用数据来源。参考资料可能和问题无关,请忽略无关的参考资料。
export const REFERENCE_PROMPT = `请根据参考资料回答问题,并使用脚注格式引用数据来源。请忽略无关的参考资料。
## 脚注格式:
1. **脚注标记**:在正文中使用 [^数字] 的形式标记脚注,例如 [^1]。
2. **脚注内容**:在文档末尾使用 [^数字]: 脚注内容 的形式定义脚注的具体内容
## 脚注示例和要求:
1. type 为 file 时:[^1]: [__name__](http://file/__url__)
2. type 为 directory 时:[^1]: [__name__](http://file/__url__)
3. type 为 url,sitemap 时:[^1]: [__name__](__url__)
4. type 为 note 时:[^1]: __note__
__url__ 替换成参考资料的 url
__name__ 请根据参考资料的 url 进行解析和替换
__note__ 请根据参考资料的 content 进行总结和替换
2. **脚注内容**:在文档末尾使用 [^数字]: 脚注内容 的形式定义脚注的具体内容
## 我的问题是:
+7 -3
View File
@@ -248,7 +248,8 @@
"upgrade.success.content": "Please restart the application to complete the upgrade",
"upgrade.success.title": "Upgrade successfully",
"regenerate.confirm": "Regenerating will replace current message",
"copy.success": "Copied!"
"copy.success": "Copied!",
"get_embedding_dimensions": "Failed to get embedding dimensions"
},
"minapp": {
"title": "MinApp"
@@ -559,7 +560,9 @@
"sitemap_placeholder": "Enter Website Map URL",
"directories": "Directories",
"add_directory": "Add Directory",
"directory_placeholder": "Enter Directory Path"
"directory_placeholder": "Enter Directory Path",
"model_info": "Model Info",
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base"
},
"models": {
"pinned": "Pinned",
@@ -577,7 +580,8 @@
"free": "Free",
"embedding": "Embedding",
"embedding_model": "Embedding Model",
"embedding_model_tooltip": "Add in Settings->Model Provider->Manage"
"embedding_model_tooltip": "Add in Settings->Model Provider->Manage",
"dimensions": "Dimensions {{dimensions}}"
}
}
}
+7 -3
View File
@@ -248,7 +248,8 @@
"upgrade.success.content": "Пожалуйста, перезапустите приложение для завершения обновления",
"upgrade.success.title": "Обновление успешно",
"regenerate.confirm": "Перегенерация заменит текущее сообщение",
"copy.success": "Скопировано!"
"copy.success": "Скопировано!",
"get_embedding_dimensions": "Не удалось получить размерность встраивания"
},
"minapp": {
"title": "Встроенные приложения"
@@ -559,7 +560,9 @@
"sitemap_placeholder": "Введите URL карты сайта",
"directories": "Директории",
"add_directory": "Добавить директорию",
"directory_placeholder": "Введите путь к директории"
"directory_placeholder": "Введите путь к директории",
"model_info": "Модель информации",
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний"
},
"models": {
"pinned": "Закреплено",
@@ -577,7 +580,8 @@
"free": "Бесплатные модели",
"embedding": "Встраиваемые модели",
"embedding_model": "Встраиваемые модели",
"embedding_model_tooltip": "Добавьте в настройки->модель сервиса->управление"
"embedding_model_tooltip": "Добавьте в настройки->модель сервиса->управление",
"dimensions": "{{dimensions}} мер"
}
}
}
+7 -3
View File
@@ -249,7 +249,8 @@
"upgrade.success.content": "重启用以完成升级",
"upgrade.success.title": "升级成功",
"regenerate.confirm": "重新生成会覆盖当前消息",
"copy.success": "复制成功"
"copy.success": "复制成功",
"get_embedding_dimensions": "获取嵌入维度失败"
},
"minapp": {
"title": "小程序"
@@ -548,7 +549,9 @@
"sitemap_placeholder": "请输入站点地图 URL",
"directories": "目录",
"add_directory": "添加目录",
"directory_placeholder": "请输入目录路径"
"directory_placeholder": "请输入目录路径",
"model_info": "模型信息",
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库"
},
"models": {
"pinned": "已固定",
@@ -566,7 +569,8 @@
"free": "免费模型",
"embedding": "嵌入模型",
"embedding_model": "嵌入模型",
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加"
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加",
"dimensions": "{{dimensions}} 维"
}
}
}
+7 -3
View File
@@ -248,7 +248,8 @@
"upgrade.success.content": "請重新啟動應用以完成升級",
"upgrade.success.title": "升級成功",
"regenerate.confirm": "重新生成會覆蓋當前訊息",
"copy.success": "複製成功"
"copy.success": "複製成功",
"get_embedding_dimensions": "獲取嵌入維度失敗"
},
"minapp": {
"title": "小程序"
@@ -547,7 +548,9 @@
"sitemap_placeholder": "請輸入網站地圖 URL",
"directories": "目錄",
"add_directory": "添加目錄",
"directory_placeholder": "請輸入目錄路徑"
"directory_placeholder": "請輸入目錄路徑",
"model_info": "模型信息",
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫"
},
"models": {
"pinned": "已固定",
@@ -565,7 +568,8 @@
"free": "免費模型",
"embedding": "嵌入模型",
"embedding_model": "嵌入模型",
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加"
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加",
"dimensions": "{{dimensions}} 維"
}
}
}
@@ -14,7 +14,7 @@ import Scrollbar from '@renderer/components/Scrollbar'
import { useKnowledge } from '@renderer/hooks/useKnowledge'
import FileManager from '@renderer/services/FileManager'
import { FileType, FileTypes, KnowledgeBase } from '@renderer/types'
import { Button, Card, message, Typography, Upload } from 'antd'
import { Alert, Button, Card, Divider, message, Tag, Typography, Upload } from 'antd'
import { FC } from 'react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
@@ -177,6 +177,9 @@ const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
return (
<MainContent>
{!base.dimensions && (
<Alert message={t('knowledge_base.not_support')} type="error" style={{ marginBottom: 20 }} showIcon />
)}
<FileSection>
<TitleWrapper>
<Title level={5}>{t('files.title')}</Title>
@@ -323,6 +326,15 @@ const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
</FlexColumn>
</ContentSection>
<Divider style={{ margin: '10px 0' }} />
<ModelInfo>
<label htmlFor="model-info">{t('knowledge_base.model_info')}</label>
<Tag color="blue">{base.model.name}</Tag>
<Tag color="cyan">{t('models.dimensions', { dimensions: base.dimensions || 0 })}</Tag>
<Tag color="purple">{base.model.provider}</Tag>
</ModelInfo>
<IndexSection>
<Button type="primary" onClick={() => KnowledgeSearchPopup.show({ base })} icon={<SearchOutlined />}>
{t('knowledge_base.search')}
@@ -340,6 +352,7 @@ const MainContent = styled(Scrollbar)`
flex-direction: column;
padding-bottom: 50px;
padding: 15px;
position: relative;
`
const FileSection = styled.div`
@@ -416,4 +429,15 @@ const IndexSection = styled.div`
justify-content: center;
`
const ModelInfo = styled.div`
display: flex;
align-items: center;
padding: 5px;
color: var(--color-text-3);
label {
margin-right: 8px;
color: var(--color-text-2);
}
`
export default KnowledgeContent
@@ -7,7 +7,7 @@ import Scrollbar from '@renderer/components/Scrollbar'
import { useKnowledgeBases } from '@renderer/hooks/useKnowledge'
import { KnowledgeBase } from '@renderer/types'
import { Dropdown, Empty, MenuProps } from 'antd'
import { FC, useCallback, useEffect, useState } from 'react'
import { FC, useCallback, useEffect, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
@@ -19,6 +19,7 @@ const KnowledgePage: FC = () => {
const { bases, renameKnowledgeBase, deleteKnowledgeBase, updateKnowledgeBases } = useKnowledgeBases()
const [selectedBase, setSelectedBase] = useState<KnowledgeBase>()
const [isDragging, setIsDragging] = useState(false)
const prevLength = useRef(0)
const handleAddKnowledge = async () => {
await AddKnowledgePopup.show({ title: t('knowledge_base.add.title') })
@@ -35,6 +36,14 @@ const KnowledgePage: FC = () => {
}
}, [bases, selectedBase])
useEffect(() => {
const currentLength = bases.length
if (currentLength > 0 && currentLength > prevLength.current) {
setSelectedBase(bases[currentLength - 1])
}
prevLength.current = currentLength
}, [bases])
const getMenuItems = useCallback(
(base: KnowledgeBase) => {
const menus: MenuProps['items'] = [
@@ -90,7 +99,7 @@ const KnowledgePage: FC = () => {
style={{ marginBottom: 0, paddingBottom: isDragging ? 50 : 0 }}
onDragStart={() => setIsDragging(true)}
onDragEnd={() => setIsDragging(false)}>
{(base) => (
{(base: KnowledgeBase) => (
<Dropdown menu={{ items: getMenuItems(base) }} trigger={['contextMenu']} key={base.id}>
<div>
<ListItem
@@ -2,6 +2,7 @@ import { TopView } from '@renderer/components/TopView'
import { isEmbeddingModel } from '@renderer/config/models'
import { useKnowledgeBases } from '@renderer/hooks/useKnowledge'
import { useProviders } from '@renderer/hooks/useProvider'
import AiProvider from '@renderer/providers/AiProvider'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { getModelUniqId } from '@renderer/services/ModelService'
import { Model } from '@renderer/types'
@@ -30,6 +31,7 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
const { t } = useTranslation()
const { providers } = useProviders()
const { addKnowledgeBase } = useKnowledgeBases()
const [loading, setLoading] = useState(false)
const allModels = providers
.map((p) => p.models)
.flat()
@@ -55,10 +57,29 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
const selectedModel = find(allModels, JSON.parse(values.model)) as Model
if (selectedModel) {
setLoading(true)
const provider = providers.find((p) => p.id === selectedModel.provider)
if (!provider) {
return
}
const aiProvider = new AiProvider(provider)
let dimensions = 0
try {
dimensions = await aiProvider.getEmbeddingDimensions(selectedModel)
} catch (error) {
console.error('Error getting embedding dimensions:', error)
window.message.error(t('message.error.get_embedding_dimensions'))
return
}
const newBase = {
id: nanoid(),
name: values.name,
model: selectedModel,
dimensions,
items: [],
created_at: Date.now(),
updated_at: Date.now()
@@ -84,7 +105,15 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
}
return (
<Modal title={title} open={open} onOk={onOk} onCancel={onCancel} afterClose={onClose} destroyOnClose centered>
<Modal
title={title}
open={open}
onOk={onOk}
onCancel={onCancel}
afterClose={onClose}
destroyOnClose
centered
okButtonProps={{ loading }}>
<Form form={form} layout="vertical">
<Form.Item
name="name"
+9 -1
View File
@@ -1,6 +1,6 @@
import BaseProvider from '@renderer/providers/BaseProvider'
import ProviderFactory from '@renderer/providers/ProviderFactory'
import { Assistant, Message, Provider, Suggestion } from '@renderer/types'
import { Assistant, Message, Model, Provider, Suggestion } from '@renderer/types'
import OpenAI from 'openai'
import { CompletionsParams } from '.'
@@ -60,4 +60,12 @@ export default class AiProvider {
}): Promise<string[]> {
return this.sdk.generateImage(params)
}
public async getEmbeddingDimensions(model: Model): Promise<number> {
return this.sdk.getEmbeddingDimensions(model)
}
public getBaseURL(): string {
return this.sdk.getBaseURL()
}
}
@@ -265,4 +265,8 @@ export default class AnthropicProvider extends BaseProvider {
public async models(): Promise<OpenAI.Models.Model[]> {
return []
}
public async getEmbeddingDimensions(): Promise<number> {
return 0
}
}
+4 -3
View File
@@ -2,7 +2,7 @@ import { REFERENCE_PROMPT } from '@renderer/config/prompts'
import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import store from '@renderer/store'
import { Assistant, Message, Provider, Suggestion } from '@renderer/types'
import { Assistant, Message, Model, Provider, Suggestion } from '@renderer/types'
import { delay } from '@renderer/utils'
import { take } from 'lodash'
import OpenAI from 'openai'
@@ -37,6 +37,7 @@ export default abstract class BaseProvider {
guidanceScale: number
signal?: AbortSignal
}): Promise<string[]>
abstract getEmbeddingDimensions(model: Model): Promise<number>
public getBaseURL(): string {
const host = this.provider.apiHost
@@ -106,12 +107,12 @@ export default abstract class BaseProvider {
return {
id: index,
content: item.pageContent,
url: encodeURIComponent(sourceUrl),
sourceUrl: sourceUrl.startsWith('http') ? sourceUrl : encodeURIComponent(sourceUrl),
type: baseItem?.type
}
})
const referencesContent = JSON.stringify(references, null, 2)
const referencesContent = `\`\`\`json\n${JSON.stringify(references, null, 2)}\n\`\`\``
return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', referencesContent)
}
+11 -2
View File
@@ -13,7 +13,7 @@ import { SUMMARIZE_PROMPT } from '@renderer/config/prompts'
import { getAssistantSettings, getDefaultModel, getTopNamingModel } from '@renderer/services/AssistantService'
import { EVENT_NAMES } from '@renderer/services/EventService'
import { filterContextMessages } from '@renderer/services/MessagesService'
import { Assistant, FileTypes, Message, Provider, Suggestion } from '@renderer/types'
import { Assistant, FileTypes, Message, Model, Provider, Suggestion } from '@renderer/types'
import axios from 'axios'
import { first, isEmpty, last, takeRight } from 'lodash'
import OpenAI from 'openai'
@@ -29,10 +29,14 @@ export default class GeminiProvider extends BaseProvider {
super(provider)
this.sdk = new GoogleGenerativeAI(this.apiKey)
this.requestOptions = {
baseUrl: this.provider.apiHost
baseUrl: this.getBaseURL()
}
}
public getBaseURL(): string {
return this.provider.apiHost
}
private async getMessageContents(message: Message): Promise<Content> {
const role = message.role === 'user' ? 'user' : 'model'
@@ -288,4 +292,9 @@ export default class GeminiProvider extends BaseProvider {
return []
}
}
public async getEmbeddingDimensions(model: Model): Promise<number> {
const data = await this.sdk.getGenerativeModel({ model: model.id }, this.requestOptions).embedContent('hi')
return data.embedding.values.length
}
}
@@ -378,4 +378,12 @@ export default class OpenAIProvider extends BaseProvider {
return response.data.map((item) => item.url)
}
public async getEmbeddingDimensions(model: Model): Promise<number> {
const data = await this.sdk.embeddings.create({
model: model.id,
input: 'hi'
})
return data.data[0].embedding.length
}
}
+1 -1
View File
@@ -9,7 +9,7 @@ class KnowledgeQueue {
private processing: Map<string, boolean> = new Map()
private pollingInterval: NodeJS.Timeout | null = null
// private readonly POLLING_INTERVAL = 5000
private readonly MAX_RETRIES = 3
private readonly MAX_RETRIES = 2
constructor() {
this.checkAllBases().catch(console.error)
@@ -67,7 +67,7 @@ export function getAssistantProvider(assistant: Assistant): Provider {
return provider || getDefaultProvider()
}
export function getProviderByModel(model?: Model) {
export function getProviderByModel(model?: Model): Provider {
const providers = store.getState().llm.providers
const providerId = model ? model.provider : getDefaultProvider().id
return providers.find((p) => p.id === providerId) as Provider
@@ -12,10 +12,17 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
provider.apiKey = 'empty'
}
let host = aiProvider.getBaseURL()
if (host.includes('generativelanguage.googleapis.com')) {
host = host + '/v1beta/openai/'
}
return {
id: base.id,
model: base.model.name,
model: base.model.id,
dimensions: base.dimensions,
apiKey: aiProvider.getApiKey(),
baseURL: provider.apiHost + '/v1'
baseURL: host
}
}
+2
View File
@@ -203,6 +203,7 @@ export interface KnowledgeBase {
id: string
name: string
model: Model
dimensions: number
description?: string
items: KnowledgeItem[]
created_at: number
@@ -212,6 +213,7 @@ export interface KnowledgeBase {
export type KnowledgeBaseParams = {
id: string
model: string
dimensions: number
apiKey: string
baseURL: string
}