feat: support json and draftsExport file in knowledge base (#1717)
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
import * as fs from 'node:fs'
|
||||
|
||||
import { JsonLoader } from '@llm-tools/embedjs'
|
||||
|
||||
/**
|
||||
* Drafts 应用导出的笔记文件加载器
|
||||
* 原始文件是一个 JSON 数组。每条笔记只保留 content、tags、modified_at 三个字段
|
||||
*/
|
||||
export class DraftsExportLoader extends JsonLoader {
|
||||
constructor(filePath: string) {
|
||||
const fileContent = fs.readFileSync(filePath, 'utf-8')
|
||||
const rawJson = JSON.parse(fileContent) as any[]
|
||||
const json = rawJson.map((item) => {
|
||||
return {
|
||||
content: item.content?.replace(/\n/g, '<br>'),
|
||||
tags: item.tags,
|
||||
modified_at: item.created_at
|
||||
}
|
||||
})
|
||||
super({ object: json })
|
||||
}
|
||||
}
|
||||
@@ -1,17 +1,18 @@
|
||||
import * as fs from 'node:fs'
|
||||
|
||||
import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
|
||||
import { JsonLoader, LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
|
||||
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
|
||||
import { WebLoader } from '@llm-tools/embedjs-loader-web'
|
||||
import { LoaderReturn } from '@shared/config/types'
|
||||
import { FileType, KnowledgeBaseParams } from '@types'
|
||||
import Logger from 'electron-log'
|
||||
|
||||
import { DraftsExportLoader } from './draftsExportLoader'
|
||||
import { EpubLoader } from './epubLoader'
|
||||
import { OdLoader, OdType } from './odLoader'
|
||||
|
||||
// embedjs内置loader类型
|
||||
const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md']
|
||||
const commonExts = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md']
|
||||
|
||||
export async function addOdLoader(
|
||||
ragApplication: RAGApplication,
|
||||
@@ -89,7 +90,19 @@ export async function addFileLoader(
|
||||
} as LoaderReturn
|
||||
}
|
||||
|
||||
// DraftsExport类型 (file.ext会自动转换成小写)
|
||||
if (['.draftsexport'].includes(file.ext)) {
|
||||
const loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
|
||||
return {
|
||||
entriesAdded: loaderReturn.entriesAdded,
|
||||
uniqueId: loaderReturn.uniqueId,
|
||||
uniqueIds: [loaderReturn.uniqueId],
|
||||
loaderType: loaderReturn.loaderType
|
||||
}
|
||||
}
|
||||
|
||||
const fileContent = fs.readFileSync(file.path, 'utf-8')
|
||||
|
||||
// HTML类型
|
||||
if (['.html', '.htm'].includes(file.ext)) {
|
||||
const loaderReturn = await ragApplication.addLoader(
|
||||
@@ -108,6 +121,18 @@ export async function addFileLoader(
|
||||
}
|
||||
}
|
||||
|
||||
// JSON类型
|
||||
if (['.json'].includes(file.ext)) {
|
||||
const jsonObject = JSON.parse(fileContent)
|
||||
const loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }))
|
||||
return {
|
||||
entriesAdded: loaderReturn.entriesAdded,
|
||||
uniqueId: loaderReturn.uniqueId,
|
||||
uniqueIds: [loaderReturn.uniqueId],
|
||||
loaderType: loaderReturn.loaderType
|
||||
}
|
||||
}
|
||||
|
||||
// 文本类型
|
||||
const loaderReturn = await ragApplication.addLoader(
|
||||
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
||||
|
||||
Reference in New Issue
Block a user