From 5be9b318eaeba317c8273ccbb6288a4d926eda04 Mon Sep 17 00:00:00 2001 From: eeee0717 Date: Wed, 26 Mar 2025 16:37:06 +0800 Subject: [PATCH] =?UTF-8?q?feat(KnowledgeService):=20=E6=8F=90=E5=8F=96OCR?= =?UTF-8?q?=E9=A2=84=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=E4=B8=BA=E5=8D=95?= =?UTF-8?q?=E7=8B=AC=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/services/KnowledgeService.ts | 38 +++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/main/services/KnowledgeService.ts b/src/main/services/KnowledgeService.ts index 2d954bcf0..58a63d382 100644 --- a/src/main/services/KnowledgeService.ts +++ b/src/main/services/KnowledgeService.ts @@ -167,22 +167,7 @@ class KnowledgeService { state: LoaderTaskItemState.PENDING, task: async () => { // 添加OCR预处理逻辑 - let fileToProcess: FileType = file - if (base.preprocessing && base.ocrProvider && file.ext.toLowerCase() === '.pdf') { - try { - const ocrProvider = new OcrProvider(base.ocrProvider) - Logger.info(`Starting OCR processing for file: ${file.path}`) - - const { processedFile } = await ocrProvider.parseFile(item.id, file) - Logger.info(`OCR processing completed: ${processedFile.path}`) - fileToProcess = processedFile - Logger.info(`OCR processing completed: ${fileToProcess.path}`) - } catch (err) { - Logger.error(`OCR processing failed: ${err}`) - // 如果OCR失败,使用原始文件 - fileToProcess = file - } - } + const fileToProcess: FileType = await this.preprocessing(file, base, item) // 使用处理后的文件进行加载 return addFileLoader(ragApplication, fileToProcess, base, forceReload) @@ -203,7 +188,6 @@ class KnowledgeService { return loaderTask } - private directoryTask( ragApplication: RAGApplication, options: KnowledgeBaseAddItemOptionsNonNullableAttribute @@ -503,6 +487,26 @@ class KnowledgeService { public getStorageDir = (): string => { return this.storageDir } + + private preprocessing = async (file: FileType, base: KnowledgeBaseParams, item: KnowledgeItem): Promise => { + let fileToProcess: FileType = file + if (base.preprocessing && base.ocrProvider && file.ext.toLowerCase() === '.pdf') { + try { + const ocrProvider = new OcrProvider(base.ocrProvider) + Logger.info(`Starting OCR processing for file: ${file.path}`) + + const { processedFile } = await ocrProvider.parseFile(item.id, file) + Logger.info(`OCR processing completed: ${processedFile.path}`) + fileToProcess = processedFile + Logger.info(`OCR processing completed: ${fileToProcess.path}`) + } catch (err) { + Logger.error(`OCR processing failed: ${err}`) + // 如果OCR失败,使用原始文件 + fileToProcess = file + } + } + return fileToProcess + } } export default new KnowledgeService()