rename ocr to preprocess
This commit is contained in:
@@ -1,12 +0,0 @@
|
||||
import { FileMetadata, OcrProvider } from '@types'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
|
||||
export default class DefaultOcrProvider extends BaseOcrProvider {
|
||||
constructor(provider: OcrProvider) {
|
||||
super(provider)
|
||||
}
|
||||
public parseFile(): Promise<{ processedFile: FileMetadata }> {
|
||||
throw new Error('Method not implemented.')
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
import { isMac } from '@main/constant'
|
||||
import { OcrProvider } from '@types'
|
||||
import Logger from 'electron-log'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
import DefaultOcrProvider from './DefaultOcrProvider'
|
||||
import Doc2xOcrProvider from './Doc2xOcrProvider'
|
||||
import MacSysOcrProvider from './MacSysOcrProvider'
|
||||
import MineruOcrProvider from './MineruOcrProvider'
|
||||
import MistralOcrProvider from './MistralOcrProvider'
|
||||
export default class OcrProviderFactory {
|
||||
static create(provider: OcrProvider): BaseOcrProvider {
|
||||
switch (provider.id) {
|
||||
case 'doc2x':
|
||||
return new Doc2xOcrProvider(provider)
|
||||
case 'mistral':
|
||||
return new MistralOcrProvider(provider)
|
||||
case 'system':
|
||||
if (!isMac) {
|
||||
Logger.warn('[OCR] System OCR provider is only available on macOS')
|
||||
}
|
||||
return new MacSysOcrProvider(provider)
|
||||
case 'mineru':
|
||||
return new MineruOcrProvider(provider)
|
||||
default:
|
||||
return new DefaultOcrProvider(provider)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,47 +3,47 @@ import path from 'node:path'
|
||||
|
||||
import { windowService } from '@main/services/WindowService'
|
||||
import { getFileExt } from '@main/utils/file'
|
||||
import { FileMetadata, OcrProvider } from '@types'
|
||||
import { FileMetadata, PreprocessProvider } from '@types'
|
||||
import { createCanvas, loadImage } from 'canvas'
|
||||
import { app } from 'electron'
|
||||
import { TypedArray } from 'pdfjs-dist/types/src/display/api'
|
||||
|
||||
export default abstract class BaseOcrProvider {
|
||||
protected provider: OcrProvider
|
||||
export default abstract class BasePreprocessProvider {
|
||||
protected provider: PreprocessProvider
|
||||
private storageDir = path.join(app.getPath('userData'), 'Data', 'Files')
|
||||
|
||||
constructor(provider: OcrProvider) {
|
||||
constructor(provider: PreprocessProvider) {
|
||||
if (!provider) {
|
||||
throw new Error('Ocr provider is not set')
|
||||
throw new Error('Preprocess provider is not set')
|
||||
}
|
||||
this.provider = provider
|
||||
}
|
||||
abstract parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }>
|
||||
|
||||
/**
|
||||
* 检查文件是否已经被OCR处理过
|
||||
* 统一检测方法:如果 Data/Files/{file.id} 是目录,说明已被OCR处理
|
||||
* 检查文件是否已经被预处理过
|
||||
* 统一检测方法:如果 Data/Files/{file.id} 是目录,说明已被预处理
|
||||
* @param file 文件信息
|
||||
* @returns 如果已处理返回处理后的文件信息,否则返回null
|
||||
*/
|
||||
public async checkIfAlreadyProcessed(file: FileMetadata): Promise<FileMetadata | null> {
|
||||
try {
|
||||
// 检查 Data/Files/{file.id} 是否是目录
|
||||
const ocrDirPath = path.join(this.storageDir, file.id)
|
||||
const preprocessDirPath = path.join(this.storageDir, file.id)
|
||||
|
||||
if (fs.existsSync(ocrDirPath)) {
|
||||
const stats = await fs.promises.stat(ocrDirPath)
|
||||
if (fs.existsSync(preprocessDirPath)) {
|
||||
const stats = await fs.promises.stat(preprocessDirPath)
|
||||
|
||||
// 如果是目录,说明已经被OCR处理过
|
||||
// 如果是目录,说明已经被预处理过
|
||||
if (stats.isDirectory()) {
|
||||
// 查找目录中的处理结果文件
|
||||
const files = await fs.promises.readdir(ocrDirPath)
|
||||
const files = await fs.promises.readdir(preprocessDirPath)
|
||||
|
||||
// 查找主要的处理结果文件(.md 或 .txt)
|
||||
const processedFile = files.find((fileName) => fileName.endsWith('.md') || fileName.endsWith('.txt'))
|
||||
|
||||
if (processedFile) {
|
||||
const processedFilePath = path.join(ocrDirPath, processedFile)
|
||||
const processedFilePath = path.join(preprocessDirPath, processedFile)
|
||||
const processedStats = await fs.promises.stat(processedFilePath)
|
||||
const ext = getFileExt(processedFile)
|
||||
|
||||
@@ -87,9 +87,9 @@ export default abstract class BaseOcrProvider {
|
||||
return document
|
||||
}
|
||||
|
||||
public async sendOcrProgress(sourceId: string, progress: number): Promise<void> {
|
||||
public async sendPreprocessProgress(sourceId: string, progress: number): Promise<void> {
|
||||
const mainWindow = windowService.getMainWindow()
|
||||
mainWindow?.webContents.send('file-ocr-progress', {
|
||||
mainWindow?.webContents.send('file-preprocess-progress', {
|
||||
itemId: sourceId,
|
||||
progress: progress
|
||||
})
|
||||
@@ -0,0 +1,12 @@
|
||||
import { FileMetadata, PreprocessProvider } from '@types'
|
||||
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
|
||||
export default class DefaultPreprocessProvider extends BasePreprocessProvider {
|
||||
constructor(provider: PreprocessProvider) {
|
||||
super(provider)
|
||||
}
|
||||
public parseFile(): Promise<{ processedFile: FileMetadata }> {
|
||||
throw new Error('Method not implemented.')
|
||||
}
|
||||
}
|
||||
@@ -2,12 +2,12 @@ import fs from 'node:fs'
|
||||
import path from 'node:path'
|
||||
|
||||
import { getFileDir } from '@main/utils/file'
|
||||
import { FileMetadata, OcrProvider } from '@types'
|
||||
import { FileMetadata, PreprocessProvider } from '@types'
|
||||
import AdmZip from 'adm-zip'
|
||||
import axios, { AxiosRequestConfig } from 'axios'
|
||||
import Logger from 'electron-log'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
|
||||
type ApiResponse<T> = {
|
||||
code: string
|
||||
@@ -30,8 +30,8 @@ type ParsedFileResponse = {
|
||||
url: string
|
||||
}
|
||||
|
||||
export default class Doc2xOcrProvider extends BaseOcrProvider {
|
||||
constructor(provider: OcrProvider) {
|
||||
export default class Doc2xPreprocessProvider extends BasePreprocessProvider {
|
||||
constructor(provider: PreprocessProvider) {
|
||||
super(provider)
|
||||
}
|
||||
|
||||
@@ -53,11 +53,11 @@ export default class Doc2xOcrProvider extends BaseOcrProvider {
|
||||
|
||||
public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> {
|
||||
try {
|
||||
Logger.info(`OCR processing started: ${file.path}`)
|
||||
Logger.info(`Preprocess processing started: ${file.path}`)
|
||||
|
||||
// 步骤1: 准备上传
|
||||
const { uid, url } = await this.preupload()
|
||||
Logger.info(`OCR preupload completed: uid=${uid}`)
|
||||
Logger.info(`Preprocess preupload completed: uid=${uid}`)
|
||||
|
||||
await this.validateFile(file.path)
|
||||
|
||||
@@ -66,7 +66,7 @@ export default class Doc2xOcrProvider extends BaseOcrProvider {
|
||||
|
||||
// 步骤3: 等待处理完成
|
||||
await this.waitForProcessing(sourceId, uid)
|
||||
Logger.info(`OCR parsing completed successfully for: ${file.path}`)
|
||||
Logger.info(`Preprocess parsing completed successfully for: ${file.path}`)
|
||||
|
||||
// 步骤4: 导出文件
|
||||
const { path: outputPath } = await this.exportFile(file, uid)
|
||||
@@ -76,7 +76,9 @@ export default class Doc2xOcrProvider extends BaseOcrProvider {
|
||||
processedFile: this.createProcessedFileInfo(file, outputPath)
|
||||
}
|
||||
} catch (error) {
|
||||
Logger.error(`OCR processing failed for ${file.path}: ${error instanceof Error ? error.message : String(error)}`)
|
||||
Logger.error(
|
||||
`Preprocess processing failed for ${file.path}: ${error instanceof Error ? error.message : String(error)}`
|
||||
)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
@@ -121,13 +123,13 @@ export default class Doc2xOcrProvider extends BaseOcrProvider {
|
||||
while (true) {
|
||||
await this.delay(1000)
|
||||
const { status, progress } = await this.getStatus(uid)
|
||||
await this.sendOcrProgress(sourceId, progress)
|
||||
Logger.info(`OCR processing status: ${status}, progress: ${progress}%`)
|
||||
await this.sendPreprocessProgress(sourceId, progress)
|
||||
Logger.info(`Preprocess processing status: ${status}, progress: ${progress}%`)
|
||||
|
||||
if (status === 'success') {
|
||||
return
|
||||
} else if (status === 'failed') {
|
||||
throw new Error('OCR processing failed')
|
||||
throw new Error('Preprocess processing failed')
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -211,7 +213,7 @@ export default class Doc2xOcrProvider extends BaseOcrProvider {
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR文件
|
||||
* Preprocess文件
|
||||
* @param uid 预上传响应的uid
|
||||
* @param filePath 文件路径
|
||||
*/
|
||||
@@ -1,13 +1,13 @@
|
||||
import { isMac } from '@main/constant'
|
||||
import { FileMetadata, OcrProvider } from '@types'
|
||||
import { FileMetadata, PreprocessProvider } from '@types'
|
||||
import Logger from 'electron-log'
|
||||
import * as fs from 'fs'
|
||||
import * as path from 'path'
|
||||
import { TextItem } from 'pdfjs-dist/types/src/display/api'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
|
||||
export default class MacSysOcrProvider extends BaseOcrProvider {
|
||||
export default class MacSysOcrProvider extends BasePreprocessProvider {
|
||||
private readonly MIN_TEXT_LENGTH = 1000
|
||||
private MacOCR: any
|
||||
|
||||
@@ -32,7 +32,7 @@ export default class MacSysOcrProvider extends BaseOcrProvider {
|
||||
return level === 0 ? this.MacOCR.RECOGNITION_LEVEL_FAST : this.MacOCR.RECOGNITION_LEVEL_ACCURATE
|
||||
}
|
||||
|
||||
constructor(provider: OcrProvider) {
|
||||
constructor(provider: PreprocessProvider) {
|
||||
super(provider)
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ export default class MacSysOcrProvider extends BaseOcrProvider {
|
||||
writeStream.write(ocrResult.text + '\n')
|
||||
|
||||
// Update progress
|
||||
await this.sendOcrProgress(sourceId, (pageNum / totalPages) * 100)
|
||||
await this.sendPreprocessProgress(sourceId, (pageNum / totalPages) * 100)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import fs from 'node:fs'
|
||||
import path from 'node:path'
|
||||
|
||||
import { FileMetadata, OcrProvider } from '@types'
|
||||
import { FileMetadata, PreprocessProvider } from '@types'
|
||||
import AdmZip from 'adm-zip'
|
||||
import axios from 'axios'
|
||||
import Logger from 'electron-log'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
|
||||
type ApiResponse<T> = {
|
||||
code: number
|
||||
@@ -39,14 +39,14 @@ type ExtractResultResponse = {
|
||||
extract_result: ExtractFileResult[]
|
||||
}
|
||||
|
||||
export default class MineruOcrProvider extends BaseOcrProvider {
|
||||
constructor(provider: OcrProvider) {
|
||||
export default class MineruPreprocessProvider extends BasePreprocessProvider {
|
||||
constructor(provider: PreprocessProvider) {
|
||||
super(provider)
|
||||
}
|
||||
|
||||
public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> {
|
||||
try {
|
||||
Logger.info(`MinerU OCR processing started: ${file.path}`)
|
||||
Logger.info(`MinerU preprocess processing started: ${file.path}`)
|
||||
await this.validateFile(file.path)
|
||||
|
||||
// 1. 获取上传URL并上传文件
|
||||
@@ -65,8 +65,8 @@ export default class MineruOcrProvider extends BaseOcrProvider {
|
||||
processedFile: this.createProcessedFileInfo(file, outputPath)
|
||||
}
|
||||
} catch (error: any) {
|
||||
Logger.error(`MinerU OCR processing failed for ${file.path}: ${error.message}`)
|
||||
throw new Error(`OCR processing failed: ${error.message}`)
|
||||
Logger.error(`MinerU preprocess processing failed for ${file.path}: ${error.message}`)
|
||||
throw new Error(`preprocess processing failed: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -309,11 +309,11 @@ export default class MineruOcrProvider extends BaseOcrProvider {
|
||||
const progress = Math.round(
|
||||
(fileResult.extract_progress.extracted_pages / fileResult.extract_progress.total_pages) * 100
|
||||
)
|
||||
await this.sendOcrProgress(sourceId, progress)
|
||||
await this.sendPreprocessProgress(sourceId, progress)
|
||||
Logger.info(`File ${fileName} processing progress: ${progress}%`)
|
||||
} else {
|
||||
// 如果没有具体进度信息,发送一个通用进度
|
||||
await this.sendOcrProgress(sourceId, 50)
|
||||
await this.sendPreprocessProgress(sourceId, 50)
|
||||
Logger.info(`File ${fileName} is still processing...`)
|
||||
}
|
||||
}
|
||||
+10
-10
@@ -6,19 +6,19 @@ import { Mistral } from '@mistralai/mistralai'
|
||||
import { DocumentURLChunk } from '@mistralai/mistralai/models/components/documenturlchunk'
|
||||
import { ImageURLChunk } from '@mistralai/mistralai/models/components/imageurlchunk'
|
||||
import { OCRResponse } from '@mistralai/mistralai/models/components/ocrresponse'
|
||||
import { FileMetadata, FileTypes, OcrProvider, Provider } from '@types'
|
||||
import { FileMetadata, FileTypes, PreprocessProvider, Provider } from '@types'
|
||||
import Logger from 'electron-log'
|
||||
import path from 'path'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
|
||||
type PreuploadResponse = DocumentURLChunk | ImageURLChunk
|
||||
|
||||
export default class MistralOcrProvider extends BaseOcrProvider {
|
||||
export default class MistralPreprocessProvider extends BasePreprocessProvider {
|
||||
private sdk: Mistral
|
||||
private fileService: MistralService
|
||||
|
||||
constructor(provider: OcrProvider) {
|
||||
constructor(provider: PreprocessProvider) {
|
||||
super(provider)
|
||||
const clientManager = MistralClientManager.getInstance()
|
||||
const aiProvider: Provider = {
|
||||
@@ -36,7 +36,7 @@ export default class MistralOcrProvider extends BaseOcrProvider {
|
||||
|
||||
private async preupload(file: FileMetadata): Promise<PreuploadResponse> {
|
||||
let document: PreuploadResponse
|
||||
Logger.info(`OCR preupload started for local file: ${file.path}`)
|
||||
Logger.info(`preprocess preupload started for local file: ${file.path}`)
|
||||
|
||||
if (file.ext.toLowerCase() === '.pdf') {
|
||||
const uploadResponse = await this.fileService.uploadFile(file)
|
||||
@@ -45,12 +45,12 @@ export default class MistralOcrProvider extends BaseOcrProvider {
|
||||
Logger.error('File upload failed:', uploadResponse)
|
||||
throw new Error('Failed to upload file: ' + uploadResponse.displayName)
|
||||
}
|
||||
await this.sendOcrProgress(file.id, 15)
|
||||
await this.sendPreprocessProgress(file.id, 15)
|
||||
const fileUrl = await this.sdk.files.getSignedUrl({
|
||||
fileId: uploadResponse.fileId
|
||||
})
|
||||
Logger.info('Got signed URL:', fileUrl)
|
||||
await this.sendOcrProgress(file.id, 20)
|
||||
await this.sendPreprocessProgress(file.id, 20)
|
||||
document = {
|
||||
type: 'document_url',
|
||||
documentUrl: fileUrl.url
|
||||
@@ -78,16 +78,16 @@ export default class MistralOcrProvider extends BaseOcrProvider {
|
||||
includeImageBase64: true
|
||||
})
|
||||
if (result) {
|
||||
await this.sendOcrProgress(sourceId, 100)
|
||||
await this.sendPreprocessProgress(sourceId, 100)
|
||||
const processedFile = this.convertFile(result, file)
|
||||
return {
|
||||
processedFile
|
||||
}
|
||||
} else {
|
||||
throw new Error('OCR processing failed: OCR response is empty')
|
||||
throw new Error('preprocess processing failed: OCR response is empty')
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error('OCR processing failed: ' + error)
|
||||
throw new Error('preprocess processing failed: ' + error)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import { FileMetadata, OcrProvider as Provider } from '@types'
|
||||
import { FileMetadata, PreprocessProvider as Provider } from '@types'
|
||||
|
||||
import BaseOcrProvider from './BaseOcrProvider'
|
||||
import OcrProviderFactory from './OcrProviderFactory'
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
import PreprocessProviderFactory from './PreprocessProviderFactory'
|
||||
|
||||
export default class OcrProvider {
|
||||
private sdk: BaseOcrProvider
|
||||
export default class PreprocessProvider {
|
||||
private sdk: BasePreprocessProvider
|
||||
constructor(provider: Provider) {
|
||||
this.sdk = OcrProviderFactory.create(provider)
|
||||
this.sdk = PreprocessProviderFactory.create(provider)
|
||||
}
|
||||
public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> {
|
||||
return this.sdk.parseFile(sourceId, file)
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文件是否已经被OCR处理过
|
||||
* 检查文件是否已经被预处理过
|
||||
* @param file 文件信息
|
||||
* @returns 如果已处理返回处理后的文件信息,否则返回null
|
||||
*/
|
||||
@@ -0,0 +1,29 @@
|
||||
import { isMac } from '@main/constant'
|
||||
import { PreprocessProvider } from '@types'
|
||||
import Logger from 'electron-log'
|
||||
|
||||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||||
import DefaultPreprocessProvider from './DefaultPreprocessProvider'
|
||||
import Doc2xPreprocessProvider from './Doc2xPreprocessProvider'
|
||||
import MacSysOcrProvider from './MacSysOcrProvider'
|
||||
import MineruPreprocessProvider from './MineruPreprocessProvider'
|
||||
import MistralPreprocessProvider from './MistralPreprocessProvider'
|
||||
export default class PreprocessProviderFactory {
|
||||
static create(provider: PreprocessProvider): BasePreprocessProvider {
|
||||
switch (provider.id) {
|
||||
case 'doc2x':
|
||||
return new Doc2xPreprocessProvider(provider)
|
||||
case 'mistral':
|
||||
return new MistralPreprocessProvider(provider)
|
||||
case 'system':
|
||||
if (!isMac) {
|
||||
Logger.warn('[OCR] System OCR provider is only available on macOS')
|
||||
}
|
||||
return new MacSysOcrProvider(provider)
|
||||
case 'mineru':
|
||||
return new MineruPreprocessProvider(provider)
|
||||
default:
|
||||
return new DefaultPreprocessProvider(provider)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ import { SitemapLoader } from '@cherrystudio/embedjs-loader-sitemap'
|
||||
import { WebLoader } from '@cherrystudio/embedjs-loader-web'
|
||||
import Embeddings from '@main/embeddings/Embeddings'
|
||||
import { addFileLoader } from '@main/loader'
|
||||
import OcrProvider from '@main/ocr/OcrProvider'
|
||||
import PreprocessProvider from '@main/preprocess/PreprocessProvider'
|
||||
import Reranker from '@main/reranker/Reranker'
|
||||
import { windowService } from '@main/services/WindowService'
|
||||
import { getAllFiles } from '@main/utils/file'
|
||||
@@ -167,7 +167,7 @@ class KnowledgeService {
|
||||
{
|
||||
state: LoaderTaskItemState.PENDING,
|
||||
task: async () => {
|
||||
// 添加OCR预处理逻辑
|
||||
// 添加预处理逻辑
|
||||
const fileToProcess: FileMetadata = await this.preprocessing(file, base, item)
|
||||
|
||||
// 使用处理后的文件进行加载
|
||||
@@ -498,25 +498,25 @@ class KnowledgeService {
|
||||
item: KnowledgeItem
|
||||
): Promise<FileMetadata> => {
|
||||
let fileToProcess: FileMetadata = file
|
||||
|
||||
if (base.preprocessing && base.ocrProvider && file.ext.toLowerCase() === '.pdf') {
|
||||
console.warn(`Preprocessing file`, JSON.stringify(base, null, 2))
|
||||
if (base.preprocessProvider && file.ext.toLowerCase() === '.pdf') {
|
||||
try {
|
||||
const ocrProvider = new OcrProvider(base.ocrProvider)
|
||||
const preprocessProvider = new PreprocessProvider(base.preprocessProvider)
|
||||
|
||||
// 首先检查文件是否已经被OCR处理过
|
||||
const alreadyProcessed = await ocrProvider.checkIfAlreadyProcessed(file)
|
||||
// 首先检查文件是否已经被预处理过
|
||||
const alreadyProcessed = await preprocessProvider.checkIfAlreadyProcessed(file)
|
||||
if (alreadyProcessed) {
|
||||
Logger.info(`File already OCR processed, using cached result: ${file.path}`)
|
||||
Logger.info(`File already preprocess processed, using cached result: ${file.path}`)
|
||||
return alreadyProcessed
|
||||
}
|
||||
|
||||
// 执行OCR处理
|
||||
Logger.info(`Starting OCR processing for scanned PDF: ${file.path}`)
|
||||
const { processedFile } = await ocrProvider.parseFile(item.id, file)
|
||||
// 执行预处理
|
||||
Logger.info(`Starting preprocess processing for scanned PDF: ${file.path}`)
|
||||
const { processedFile } = await preprocessProvider.parseFile(item.id, file)
|
||||
fileToProcess = processedFile
|
||||
} catch (err) {
|
||||
Logger.error(`OCR processing failed: ${err}`)
|
||||
// 如果OCR失败,使用原始文件
|
||||
Logger.error(`Preprocess processing failed: ${err}`)
|
||||
// 如果预处理失败,使用原始文件
|
||||
fileToProcess = file
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -3,7 +3,7 @@ import MinerULogo from '@renderer/assets/images/ocr/mineru.jpg'
|
||||
import MacOSLogo from '@renderer/assets/images/providers/macos.svg'
|
||||
import MistralLogo from '@renderer/assets/images/providers/mistral.png'
|
||||
|
||||
export function getOcrProviderLogo(providerId: string) {
|
||||
export function getPreprocessProviderLogo(providerId: string) {
|
||||
switch (providerId) {
|
||||
case 'doc2x':
|
||||
return Doc2xLogo
|
||||
@@ -18,7 +18,7 @@ export function getOcrProviderLogo(providerId: string) {
|
||||
}
|
||||
}
|
||||
|
||||
export const OCR_PROVIDER_CONFIG = {
|
||||
export const PREPROCESS_PROVIDER_CONFIG = {
|
||||
doc2x: {
|
||||
websites: {
|
||||
official: 'https://doc2x.noedgeai.com',
|
||||
@@ -1,45 +0,0 @@
|
||||
import { RootState } from '@renderer/store'
|
||||
import {
|
||||
setDefaultOcrProvider as _setDefaultOcrProvider,
|
||||
updateOcrProvider as _updateOcrProvider,
|
||||
updateOcrProviders as _updateOcrProviders
|
||||
} from '@renderer/store/ocr'
|
||||
import { OcrProvider } from '@renderer/types'
|
||||
import { useDispatch, useSelector } from 'react-redux'
|
||||
|
||||
export const useOcrProvider = (id: string) => {
|
||||
const dispatch = useDispatch()
|
||||
const ocrProviders = useSelector((state: RootState) => state.ocr.providers)
|
||||
const provider = ocrProviders.find((provider) => provider.id === id)
|
||||
if (!provider) {
|
||||
throw new Error(`ocr provider with id ${id} not found`)
|
||||
}
|
||||
const updateOcrProvider = (ocrProvider: OcrProvider) => {
|
||||
dispatch(_updateOcrProvider(ocrProvider))
|
||||
}
|
||||
return { provider, updateOcrProvider }
|
||||
}
|
||||
|
||||
export const useOcrProviders = () => {
|
||||
const dispatch = useDispatch()
|
||||
const ocrProviders = useSelector((state: RootState) => state.ocr.providers)
|
||||
return {
|
||||
ocrProviders,
|
||||
updateOcrProviders: (ocrProviders: OcrProvider[]) => dispatch(_updateOcrProviders(ocrProviders))
|
||||
}
|
||||
}
|
||||
|
||||
export const useDefaultOcrProvider = () => {
|
||||
const defaultProviderId = useSelector((state: RootState) => state.ocr.defaultProvider)
|
||||
const { ocrProviders } = useOcrProviders()
|
||||
const dispatch = useDispatch()
|
||||
const provider = defaultProviderId ? ocrProviders.find((provider) => provider.id === defaultProviderId) : undefined
|
||||
|
||||
const setDefaultOcrProvider = (ocrProvider: OcrProvider) => {
|
||||
dispatch(_setDefaultOcrProvider(ocrProvider.id))
|
||||
}
|
||||
const updateDefaultOcrProvider = (ocrProvider: OcrProvider) => {
|
||||
dispatch(_updateOcrProvider(ocrProvider))
|
||||
}
|
||||
return { provider, setDefaultOcrProvider, updateDefaultOcrProvider }
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
import { RootState } from '@renderer/store'
|
||||
import {
|
||||
setDefaultPreprocessProvider as _setDefaultPreprocessProvider,
|
||||
updatePreprocessProvider as _updatePreprocessProvider,
|
||||
updatePreprocessProviders as _updatePreprocessProviders
|
||||
} from '@renderer/store/preprocess'
|
||||
import { PreprocessProvider } from '@renderer/types'
|
||||
import { useDispatch, useSelector } from 'react-redux'
|
||||
|
||||
export const usePreprocessProvider = (id: string) => {
|
||||
const dispatch = useDispatch()
|
||||
const preprocessProviders = useSelector((state: RootState) => state.preprocess.providers)
|
||||
const provider = preprocessProviders.find((provider) => provider.id === id)
|
||||
if (!provider) {
|
||||
throw new Error(`preprocess provider with id ${id} not found`)
|
||||
}
|
||||
const updatePreprocessProvider = (preprocessProvider: PreprocessProvider) => {
|
||||
dispatch(_updatePreprocessProvider(preprocessProvider))
|
||||
}
|
||||
return { provider, updatePreprocessProvider }
|
||||
}
|
||||
|
||||
export const usePreprocessProviders = () => {
|
||||
const dispatch = useDispatch()
|
||||
const preprocessProviders = useSelector((state: RootState) => state.preprocess.providers)
|
||||
return {
|
||||
preprocessProviders: preprocessProviders,
|
||||
updatePreprocessProviders: (preprocessProviders: PreprocessProvider[]) =>
|
||||
dispatch(_updatePreprocessProviders(preprocessProviders))
|
||||
}
|
||||
}
|
||||
|
||||
export const useDefaultPreprocessProvider = () => {
|
||||
const defaultProviderId = useSelector((state: RootState) => state.preprocess.defaultProvider)
|
||||
const { preprocessProviders } = usePreprocessProviders()
|
||||
const dispatch = useDispatch()
|
||||
const provider = defaultProviderId
|
||||
? preprocessProviders.find((provider) => provider.id === defaultProviderId)
|
||||
: undefined
|
||||
|
||||
const setDefaultPreprocessProvider = (preprocessProvider: PreprocessProvider) => {
|
||||
dispatch(_setDefaultPreprocessProvider(preprocessProvider.id))
|
||||
}
|
||||
const updateDefaultPreprocessProvider = (preprocessProvider: PreprocessProvider) => {
|
||||
dispatch(_updatePreprocessProvider(preprocessProvider))
|
||||
}
|
||||
return { provider, setDefaultPreprocessProvider, updateDefaultPreprocessProvider }
|
||||
}
|
||||
@@ -1728,6 +1728,19 @@
|
||||
"min_confidence": "Minimum Confidence"
|
||||
}
|
||||
},
|
||||
"preprocess": {
|
||||
"title": "Pre Process",
|
||||
"provider": "Pre Process Provider",
|
||||
"provider_placeholder": "Choose a Pre Process provider",
|
||||
"mac_system_ocr_options": {
|
||||
"mode": {
|
||||
"title": "Recognition Mode",
|
||||
"accurate": "Accurate",
|
||||
"fast": "Fast"
|
||||
},
|
||||
"min_confidence": "Minimum Confidence"
|
||||
}
|
||||
},
|
||||
"websearch": {
|
||||
"blacklist": "Blacklist",
|
||||
"blacklist_description": "Results from the following websites will not appear in search results",
|
||||
|
||||
@@ -42,7 +42,7 @@ const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
|
||||
|
||||
useEffect(() => {
|
||||
const handlers = [
|
||||
window.electron.ipcRenderer.on('file-ocr-progress', (_, { itemId, progress }) => {
|
||||
window.electron.ipcRenderer.on('file-preprocess-progress', (_, { itemId, progress }) => {
|
||||
setProgressMap((prev) => new Map(prev).set(itemId, progress))
|
||||
}),
|
||||
|
||||
|
||||
@@ -4,11 +4,11 @@ import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT } from '@renderer/config/constant'
|
||||
import { getEmbeddingMaxContext } from '@renderer/config/embedings'
|
||||
import { isEmbeddingModel, isRerankModel } from '@renderer/config/models'
|
||||
import { useKnowledge } from '@renderer/hooks/useKnowledge'
|
||||
import { useOcrProviders } from '@renderer/hooks/useOcr'
|
||||
import { usePreprocessProviders } from '@renderer/hooks/usePreprocess'
|
||||
import { useProviders } from '@renderer/hooks/useProvider'
|
||||
import { getModelUniqId } from '@renderer/services/ModelService'
|
||||
import { KnowledgeBase, OcrProvider } from '@renderer/types'
|
||||
import { Alert, Input, InputNumber, Modal, Select, Slider, Switch, Tabs, TabsProps, Tooltip } from 'antd'
|
||||
import { KnowledgeBase, PreprocessProvider } from '@renderer/types'
|
||||
import { Alert, Input, InputNumber, Modal, Select, Slider, Tabs, TabsProps, Tooltip } from 'antd'
|
||||
import { sortBy } from 'lodash'
|
||||
import { useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
@@ -23,8 +23,8 @@ interface Props extends ShowParams {
|
||||
}
|
||||
|
||||
const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
|
||||
const { ocrProviders } = useOcrProviders()
|
||||
const [selectedProvider, setSelectedProvider] = useState<OcrProvider | undefined>(_base.ocrProvider)
|
||||
const { preprocessProviders } = usePreprocessProviders()
|
||||
const [selectedProvider, setSelectedProvider] = useState<PreprocessProvider | undefined>(_base.preprocessProvider)
|
||||
|
||||
const [open, setOpen] = useState(true)
|
||||
const { t } = useTranslation()
|
||||
@@ -98,18 +98,6 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
|
||||
onChange={(e) => setNewBase({ ...newBase, name: e.target.value })}
|
||||
/>
|
||||
</SettingsItem>
|
||||
<SettingsItem>
|
||||
<div className="settings-label">
|
||||
{t('knowledge.settings.preprocessing')}
|
||||
<Tooltip title={t('knowledge.settings.preprocessing_tooltip')} placement="right">
|
||||
<InfoCircleOutlined style={{ marginLeft: 8 }} />
|
||||
</Tooltip>
|
||||
</div>
|
||||
<Switch
|
||||
defaultValue={base.preprocessing}
|
||||
onChange={(checked: boolean) => setNewBase({ ...newBase, preprocessing: checked })}
|
||||
/>
|
||||
</SettingsItem>
|
||||
|
||||
<SettingsItem>
|
||||
<div className="settings-label">
|
||||
@@ -150,18 +138,18 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
|
||||
</SettingsItem>
|
||||
|
||||
<SettingsItem>
|
||||
<div className="settings-label">{t('settings.tool.ocr.provider')}</div>
|
||||
<div className="settings-label">{t('settings.tool.preprocess.title')}</div>
|
||||
<Select
|
||||
value={selectedProvider?.id}
|
||||
style={{ width: '100%' }}
|
||||
onChange={(value: string) => {
|
||||
const provider = ocrProviders.find((p) => p.id === value)
|
||||
const provider = preprocessProviders.find((p) => p.id === value)
|
||||
if (!provider) return
|
||||
setSelectedProvider(provider)
|
||||
setNewBase({ ...newBase, ocrProvider: provider })
|
||||
setNewBase({ ...newBase, preprocessProvider: provider })
|
||||
}}
|
||||
placeholder={t('settings.tool.ocr.provider_placeholder')}
|
||||
options={ocrProviders.filter((p) => p.apiKey !== '').map((p) => ({ value: p.id, label: p.name }))}
|
||||
placeholder={t('settings.tool.preprocess.provider_placeholder')}
|
||||
options={preprocessProviders.filter((p) => p.apiKey !== '').map((p) => ({ value: p.id, label: p.name }))}
|
||||
allowClear
|
||||
/>
|
||||
</SettingsItem>
|
||||
|
||||
@@ -9,7 +9,7 @@ import {
|
||||
LayoutGrid,
|
||||
MonitorCog,
|
||||
Package,
|
||||
PenTool,
|
||||
PencilRuler,
|
||||
Rocket,
|
||||
Settings2,
|
||||
SquareTerminal,
|
||||
@@ -66,7 +66,7 @@ const SettingsPage: FC = () => {
|
||||
</MenuItemLink>
|
||||
<MenuItemLink to="/settings/tool">
|
||||
<MenuItem className={isRoute('/settings/tool')}>
|
||||
<PenTool size={18} />
|
||||
<PencilRuler size={18} />
|
||||
{t('settings.tool.title')}
|
||||
</MenuItem>
|
||||
</MenuItemLink>
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
import { isMac } from '@renderer/config/constant'
|
||||
import { useTheme } from '@renderer/context/ThemeProvider'
|
||||
import { useDefaultOcrProvider, useOcrProviders } from '@renderer/hooks/useOcr'
|
||||
import { OcrProvider } from '@renderer/types'
|
||||
import { Select } from 'antd'
|
||||
import { FC, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
import { SettingContainer, SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '../..'
|
||||
import OcrProviderSettings from './OcrProviderSettings'
|
||||
|
||||
const KnowledgeSettings: FC = () => {
|
||||
const { ocrProviders } = useOcrProviders()
|
||||
const { provider: defaultProvider, setDefaultOcrProvider } = useDefaultOcrProvider()
|
||||
const { t } = useTranslation()
|
||||
const [selectedProvider, setSelectedProvider] = useState<OcrProvider | undefined>(defaultProvider)
|
||||
const { theme: themeMode } = useTheme()
|
||||
|
||||
function updateSelectedOcrProvider(providerId: string) {
|
||||
const provider = ocrProviders.find((p) => p.id === providerId)
|
||||
if (!provider) {
|
||||
return
|
||||
}
|
||||
setDefaultOcrProvider(provider)
|
||||
setSelectedProvider(provider)
|
||||
}
|
||||
|
||||
return (
|
||||
<SettingContainer theme={themeMode}>
|
||||
<SettingGroup theme={themeMode}>
|
||||
<SettingTitle>{t('settings.tool.ocr.title')}</SettingTitle>
|
||||
<SettingDivider />
|
||||
<SettingRow>
|
||||
<SettingRowTitle>{t('settings.tool.ocr.provider')}</SettingRowTitle>
|
||||
<div style={{ display: 'flex', gap: '8px' }}>
|
||||
<Select
|
||||
value={selectedProvider?.id}
|
||||
style={{ width: '200px' }}
|
||||
onChange={(value: string) => updateSelectedOcrProvider(value)}
|
||||
placeholder={t('settings.tool.ocr.provider_placeholder')}
|
||||
options={ocrProviders.map((p) => ({
|
||||
value: p.id,
|
||||
label: p.name,
|
||||
disabled: !isMac && p.id === 'system' // 在非 Mac 系统下禁用 system 选项
|
||||
}))}
|
||||
/>
|
||||
</div>
|
||||
</SettingRow>
|
||||
</SettingGroup>
|
||||
{selectedProvider && (
|
||||
<SettingGroup theme={themeMode}>
|
||||
<OcrProviderSettings provider={selectedProvider} />
|
||||
</SettingGroup>
|
||||
)}
|
||||
</SettingContainer>
|
||||
)
|
||||
}
|
||||
export default KnowledgeSettings
|
||||
+30
-30
@@ -1,8 +1,8 @@
|
||||
import { ExportOutlined } from '@ant-design/icons'
|
||||
import { getOcrProviderLogo, OCR_PROVIDER_CONFIG } from '@renderer/config/ocrProviders'
|
||||
import { useOcrProvider } from '@renderer/hooks/useOcr'
|
||||
import { getPreprocessProviderLogo, PREPROCESS_PROVIDER_CONFIG } from '@renderer/config/preprocessProviders'
|
||||
import { usePreprocessProvider } from '@renderer/hooks/usePreprocess'
|
||||
import { formatApiKeys } from '@renderer/services/ApiService'
|
||||
import { OcrProvider } from '@renderer/types'
|
||||
import { PreprocessProvider } from '@renderer/types'
|
||||
import { hasObjectKey } from '@renderer/utils'
|
||||
import { Avatar, Divider, Flex, Input, InputNumber, Segmented } from 'antd'
|
||||
import Link from 'antd/es/typography/Link'
|
||||
@@ -22,29 +22,29 @@ import {
|
||||
} from '../..'
|
||||
|
||||
interface Props {
|
||||
provider: OcrProvider
|
||||
provider: PreprocessProvider
|
||||
}
|
||||
|
||||
const OcrProviderSetting: FC<Props> = ({ provider: _provider }) => {
|
||||
const { provider: ocrProvider, updateOcrProvider } = useOcrProvider(_provider.id)
|
||||
const PreprocessProviderSettings: FC<Props> = ({ provider: _provider }) => {
|
||||
const { provider: preprocessProvider, updatePreprocessProvider } = usePreprocessProvider(_provider.id)
|
||||
const { t } = useTranslation()
|
||||
const [apiKey, setApiKey] = useState(ocrProvider.apiKey || '')
|
||||
const [apiHost, setApiHost] = useState(ocrProvider.apiHost || '')
|
||||
const [options, setOptions] = useState(ocrProvider.options || {})
|
||||
const [apiKey, setApiKey] = useState(preprocessProvider.apiKey || '')
|
||||
const [apiHost, setApiHost] = useState(preprocessProvider.apiHost || '')
|
||||
const [options, setOptions] = useState(preprocessProvider.options || {})
|
||||
|
||||
const ocrProviderConfig = OCR_PROVIDER_CONFIG[ocrProvider.id]
|
||||
const apiKeyWebsite = ocrProviderConfig?.websites?.apiKey
|
||||
const officialWebsite = ocrProviderConfig?.websites?.official
|
||||
const preprocessProviderConfig = PREPROCESS_PROVIDER_CONFIG[preprocessProvider.id]
|
||||
const apiKeyWebsite = preprocessProviderConfig?.websites?.apiKey
|
||||
const officialWebsite = preprocessProviderConfig?.websites?.official
|
||||
|
||||
useEffect(() => {
|
||||
setApiKey(ocrProvider.apiKey ?? '')
|
||||
setApiHost(ocrProvider.apiHost ?? '')
|
||||
setOptions(ocrProvider.options ?? {})
|
||||
}, [ocrProvider.apiKey, ocrProvider.apiHost, ocrProvider.options])
|
||||
setApiKey(preprocessProvider.apiKey ?? '')
|
||||
setApiHost(preprocessProvider.apiHost ?? '')
|
||||
setOptions(preprocessProvider.options ?? {})
|
||||
}, [preprocessProvider.apiKey, preprocessProvider.apiHost, preprocessProvider.options])
|
||||
|
||||
const onUpdateApiKey = () => {
|
||||
if (apiKey !== ocrProvider.apiKey) {
|
||||
updateOcrProvider({ ...ocrProvider, apiKey })
|
||||
if (apiKey !== preprocessProvider.apiKey) {
|
||||
updatePreprocessProvider({ ...preprocessProvider, apiKey })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,35 +53,35 @@ const OcrProviderSetting: FC<Props> = ({ provider: _provider }) => {
|
||||
if (trimmedHost.endsWith('/')) {
|
||||
trimmedHost = trimmedHost.slice(0, -1)
|
||||
}
|
||||
if (trimmedHost !== ocrProvider.apiHost) {
|
||||
updateOcrProvider({ ...ocrProvider, apiHost: trimmedHost })
|
||||
if (trimmedHost !== preprocessProvider.apiHost) {
|
||||
updatePreprocessProvider({ ...preprocessProvider, apiHost: trimmedHost })
|
||||
} else {
|
||||
setApiHost(ocrProvider.apiHost || '')
|
||||
setApiHost(preprocessProvider.apiHost || '')
|
||||
}
|
||||
}
|
||||
|
||||
const onUpdateOptions = (key: string, value: any) => {
|
||||
const newOptions = { ...options, [key]: value }
|
||||
setOptions(newOptions)
|
||||
updateOcrProvider({ ...ocrProvider, options: newOptions })
|
||||
updatePreprocessProvider({ ...preprocessProvider, options: newOptions })
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<SettingTitle>
|
||||
<Flex align="center" gap={8}>
|
||||
<ProviderLogo shape="square" src={getOcrProviderLogo(ocrProvider.id)} size={16} />
|
||||
<ProviderLogo shape="square" src={getPreprocessProviderLogo(preprocessProvider.id)} size={16} />
|
||||
|
||||
<ProviderName> {ocrProvider.name}</ProviderName>
|
||||
{officialWebsite && ocrProviderConfig?.websites && (
|
||||
<Link target="_blank" href={ocrProviderConfig.websites.official}>
|
||||
<ProviderName> {preprocessProvider.name}</ProviderName>
|
||||
{officialWebsite && preprocessProviderConfig?.websites && (
|
||||
<Link target="_blank" href={preprocessProviderConfig.websites.official}>
|
||||
<ExportOutlined style={{ color: 'var(--color-text)', fontSize: '12px' }} />
|
||||
</Link>
|
||||
)}
|
||||
</Flex>
|
||||
</SettingTitle>
|
||||
<Divider style={{ width: '100%', margin: '10px 0' }} />
|
||||
{hasObjectKey(ocrProvider, 'apiKey') && (
|
||||
{hasObjectKey(preprocessProvider, 'apiKey') && (
|
||||
<>
|
||||
<SettingSubtitle style={{ marginTop: 5, marginBottom: 10 }}>{t('settings.provider.api_key')}</SettingSubtitle>
|
||||
<Flex gap={8}>
|
||||
@@ -104,7 +104,7 @@ const OcrProviderSetting: FC<Props> = ({ provider: _provider }) => {
|
||||
</>
|
||||
)}
|
||||
|
||||
{hasObjectKey(ocrProvider, 'apiHost') && (
|
||||
{hasObjectKey(preprocessProvider, 'apiHost') && (
|
||||
<>
|
||||
<SettingSubtitle style={{ marginTop: 5, marginBottom: 10 }}>
|
||||
{t('settings.provider.api_host')}
|
||||
@@ -120,7 +120,7 @@ const OcrProviderSetting: FC<Props> = ({ provider: _provider }) => {
|
||||
</>
|
||||
)}
|
||||
|
||||
{hasObjectKey(ocrProvider, 'options') && ocrProvider.id === 'system' && (
|
||||
{hasObjectKey(preprocessProvider, 'options') && preprocessProvider.id === 'system' && (
|
||||
<>
|
||||
<SettingDivider style={{ marginTop: 15, marginBottom: 12 }} />
|
||||
<SettingRow>
|
||||
@@ -165,4 +165,4 @@ const ProviderLogo = styled(Avatar)`
|
||||
border: 0.5px solid var(--color-border);
|
||||
`
|
||||
|
||||
export default OcrProviderSetting
|
||||
export default PreprocessProviderSettings
|
||||
@@ -0,0 +1,58 @@
|
||||
import { isMac } from '@renderer/config/constant'
|
||||
import { useTheme } from '@renderer/context/ThemeProvider'
|
||||
import { useDefaultPreprocessProvider, usePreprocessProviders } from '@renderer/hooks/usePreprocess'
|
||||
import { PreprocessProvider } from '@renderer/types'
|
||||
import { Select } from 'antd'
|
||||
import { FC, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
import { SettingContainer, SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '../..'
|
||||
import PreprocessProviderSettings from './PreprocessSettings'
|
||||
|
||||
const PreprocessSettings: FC = () => {
|
||||
const { preprocessProviders } = usePreprocessProviders()
|
||||
const { provider: defaultProvider, setDefaultPreprocessProvider } = useDefaultPreprocessProvider()
|
||||
const { t } = useTranslation()
|
||||
const [selectedProvider, setSelectedProvider] = useState<PreprocessProvider | undefined>(defaultProvider)
|
||||
const { theme: themeMode } = useTheme()
|
||||
|
||||
function updateSelectedPreprocessProvider(providerId: string) {
|
||||
const provider = preprocessProviders.find((p) => p.id === providerId)
|
||||
if (!provider) {
|
||||
return
|
||||
}
|
||||
setDefaultPreprocessProvider(provider)
|
||||
setSelectedProvider(provider)
|
||||
}
|
||||
|
||||
return (
|
||||
<SettingContainer theme={themeMode}>
|
||||
<SettingGroup theme={themeMode}>
|
||||
<SettingTitle>{t('settings.tool.preprocess.title')}</SettingTitle>
|
||||
<SettingDivider />
|
||||
<SettingRow>
|
||||
<SettingRowTitle>{t('settings.tool.preprocess.provider')}</SettingRowTitle>
|
||||
<div style={{ display: 'flex', gap: '8px' }}>
|
||||
<Select
|
||||
value={selectedProvider?.id}
|
||||
style={{ width: '200px' }}
|
||||
onChange={(value: string) => updateSelectedPreprocessProvider(value)}
|
||||
placeholder={t('settings.tool.preprocess.provider_placeholder')}
|
||||
options={preprocessProviders.map((p) => ({
|
||||
value: p.id,
|
||||
label: p.name,
|
||||
disabled: !isMac && p.id === 'system' // 在非 Mac 系统下禁用 system 选项
|
||||
}))}
|
||||
/>
|
||||
</div>
|
||||
</SettingRow>
|
||||
</SettingGroup>
|
||||
{selectedProvider && (
|
||||
<SettingGroup theme={themeMode}>
|
||||
<PreprocessProviderSettings provider={selectedProvider} />
|
||||
</SettingGroup>
|
||||
)}
|
||||
</SettingContainer>
|
||||
)
|
||||
}
|
||||
export default PreprocessSettings
|
||||
@@ -1,14 +1,14 @@
|
||||
import { GlobalOutlined } from '@ant-design/icons'
|
||||
import OcrIcon from '@renderer/components/Icons/OcrIcon'
|
||||
import { HStack } from '@renderer/components/Layout'
|
||||
import ListItem from '@renderer/components/ListItem'
|
||||
import { theme } from 'antd'
|
||||
import { FileCode } from 'lucide-react'
|
||||
import { FC, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import styled from 'styled-components'
|
||||
|
||||
import { SettingContainer } from '..'
|
||||
import OcrSettings from './OcrSettings'
|
||||
import PreprocessSettings from './PreprocessSettings'
|
||||
import WebSearchSettings from './WebSearchSettings'
|
||||
|
||||
const ToolSettings: FC = () => {
|
||||
@@ -16,7 +16,8 @@ const ToolSettings: FC = () => {
|
||||
const [menu, setMenu] = useState<string>('web-search')
|
||||
const menuItems = [
|
||||
{ key: 'web-search', title: 'settings.tool.websearch.title', icon: <GlobalOutlined style={{ fontSize: 16 }} /> },
|
||||
{ key: 'ocr', title: 'settings.tool.ocr.title', icon: <OcrIcon /> }
|
||||
{ key: 'preprocess', title: 'settings.tool.preprocess.title', icon: <FileCode size={16} /> }
|
||||
// { key: 'ocr', title: 'settings.tool.ocr.title', icon: <OcrIcon /> }
|
||||
]
|
||||
return (
|
||||
<Container>
|
||||
@@ -34,7 +35,8 @@ const ToolSettings: FC = () => {
|
||||
</MenuList>
|
||||
<SettingContainer theme={theme} style={{ display: 'flex', flex: 1 }}>
|
||||
{menu == 'web-search' && <WebSearchSettings />}
|
||||
{menu == 'ocr' && <OcrSettings />}
|
||||
{menu == 'preprocess' && <PreprocessSettings />}
|
||||
{/* {menu == 'ocr' && <OcrSettings />} */}
|
||||
</SettingContainer>
|
||||
</Container>
|
||||
)
|
||||
|
||||
@@ -50,8 +50,8 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
|
||||
rerankModel: base.rerankModel?.id,
|
||||
rerankModelProvider: base.rerankModel?.provider,
|
||||
// topN: base.topN,
|
||||
preprocessing: base.preprocessing,
|
||||
ocrProvider: base.ocrProvider
|
||||
// preprocessing: base.preprocessing,
|
||||
preprocessProvider: base.preprocessProvider
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ import migrate from './migrate'
|
||||
import minapps from './minapps'
|
||||
import newMessagesReducer from './newMessage'
|
||||
import nutstore from './nutstore'
|
||||
import ocr from './ocr'
|
||||
import paintings from './paintings'
|
||||
import preprocess from './preprocess'
|
||||
import runtime from './runtime'
|
||||
import selectionStore from './selectionStore'
|
||||
import settings from './settings'
|
||||
@@ -42,7 +42,7 @@ const rootReducer = combineReducers({
|
||||
copilot,
|
||||
selectionStore,
|
||||
// messages: messagesReducer,
|
||||
ocr,
|
||||
preprocess,
|
||||
messages: newMessagesReducer,
|
||||
messageBlocks: messageBlocksReducer,
|
||||
inputTools: inputToolsReducer
|
||||
|
||||
@@ -1475,15 +1475,15 @@ const migrateConfig = {
|
||||
},
|
||||
'110': (state: RootState) => {
|
||||
try {
|
||||
if (!state.ocr) {
|
||||
state.ocr = {
|
||||
if (!state.preprocess) {
|
||||
state.preprocess = {
|
||||
defaultProvider: '',
|
||||
providers: []
|
||||
}
|
||||
}
|
||||
|
||||
if (state.ocr.providers.length === 0) {
|
||||
state.ocr.providers = [
|
||||
if (state.preprocess.providers.length === 0) {
|
||||
state.preprocess.providers = [
|
||||
{
|
||||
id: 'doc2x',
|
||||
name: 'Doc2x',
|
||||
@@ -1505,8 +1505,8 @@ const migrateConfig = {
|
||||
}
|
||||
]
|
||||
}
|
||||
if (!state.ocr.providers.find((provider) => provider.id === 'system')) {
|
||||
state.ocr.providers.push({
|
||||
if (!state.preprocess.providers.find((provider) => provider.id === 'system')) {
|
||||
state.preprocess.providers.push({
|
||||
id: 'system',
|
||||
name: 'System(Mac Only)',
|
||||
options: {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
|
||||
import { OcrProvider } from '@renderer/types'
|
||||
import { PreprocessProvider } from '@renderer/types'
|
||||
|
||||
export interface OcrState {
|
||||
providers: OcrProvider[]
|
||||
export interface PreprocessState {
|
||||
providers: PreprocessProvider[]
|
||||
defaultProvider: string
|
||||
}
|
||||
|
||||
const initialState: OcrState = {
|
||||
const initialState: PreprocessState = {
|
||||
providers: [
|
||||
{
|
||||
id: 'doc2x',
|
||||
@@ -38,20 +38,20 @@ const initialState: OcrState = {
|
||||
],
|
||||
defaultProvider: ''
|
||||
}
|
||||
const ocrSlice = createSlice({
|
||||
name: 'ocr',
|
||||
const preprocessSlice = createSlice({
|
||||
name: 'preprocess',
|
||||
initialState,
|
||||
reducers: {
|
||||
setDefaultOcrProvider(state, action: PayloadAction<string>) {
|
||||
setDefaultPreprocessProvider(state, action: PayloadAction<string>) {
|
||||
state.defaultProvider = action.payload
|
||||
},
|
||||
setOcrProviders(state, action: PayloadAction<OcrProvider[]>) {
|
||||
setPreprocessProviders(state, action: PayloadAction<PreprocessProvider[]>) {
|
||||
state.providers = action.payload
|
||||
},
|
||||
updateOcrProviders(state, action: PayloadAction<OcrProvider[]>) {
|
||||
updatePreprocessProviders(state, action: PayloadAction<PreprocessProvider[]>) {
|
||||
state.providers = action.payload
|
||||
},
|
||||
updateOcrProvider(state, action: PayloadAction<OcrProvider>) {
|
||||
updatePreprocessProvider(state, action: PayloadAction<PreprocessProvider>) {
|
||||
const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
|
||||
if (index !== -1) {
|
||||
state.providers[index] = action.payload
|
||||
@@ -60,6 +60,11 @@ const ocrSlice = createSlice({
|
||||
}
|
||||
})
|
||||
|
||||
export const { updateOcrProviders, updateOcrProvider, setDefaultOcrProvider, setOcrProviders } = ocrSlice.actions
|
||||
export const {
|
||||
updatePreprocessProviders,
|
||||
updatePreprocessProvider,
|
||||
setDefaultPreprocessProvider,
|
||||
setPreprocessProviders
|
||||
} = preprocessSlice.actions
|
||||
|
||||
export default ocrSlice.reducer
|
||||
export default preprocessSlice.reducer
|
||||
@@ -381,8 +381,8 @@ export interface KnowledgeBase {
|
||||
threshold?: number
|
||||
rerankModel?: Model
|
||||
// topN?: number
|
||||
preprocessing?: boolean
|
||||
ocrProvider?: OcrProvider
|
||||
// preprocessing?: boolean
|
||||
preprocessProvider?: PreprocessProvider
|
||||
}
|
||||
|
||||
export type KnowledgeBaseParams = {
|
||||
@@ -399,11 +399,11 @@ export type KnowledgeBaseParams = {
|
||||
rerankModel?: string
|
||||
rerankModelProvider?: string
|
||||
documentCount?: number
|
||||
preprocessing?: boolean
|
||||
ocrProvider?: OcrProvider
|
||||
// preprocessing?: boolean
|
||||
preprocessProvider?: PreprocessProvider
|
||||
}
|
||||
|
||||
export interface OcrProvider {
|
||||
export interface PreprocessProvider {
|
||||
id: string
|
||||
name: string
|
||||
apiKey?: string
|
||||
|
||||
Reference in New Issue
Block a user