Compare commits

...

103 Commits

Author SHA1 Message Date
icarus
2808a8aab1 Merge branch 'refactor/ocr' of github.com:CherryHQ/cherry-studio into refactor/ocr 2025-10-28 19:53:42 +08:00
icarus
1733a383e1 refactor(S3BackupManager): remove unused Space import from antd 2025-10-28 19:53:28 +08:00
GitHub Action
794c5311ef fix(i18n): Auto update translations for PR #10829 2025-10-28 11:51:19 +00:00
icarus
35ff0c63f4 Merge branch 'v2' of github.com:CherryHQ/cherry-studio into refactor/ocr 2025-10-28 19:49:07 +08:00
GitHub Action
835bce9079 fix(i18n): Auto update translations for PR #10829 2025-10-24 10:08:11 +00:00
icarus
ab9e1bf5a3 Merge branch 'v2' of github.com:CherryHQ/cherry-studio into refactor/ocr 2025-10-24 18:06:39 +08:00
icarus
472f2b1a6f Merge branch 'v2' of github.com:CherryHQ/cherry-studio into refactor/ocr 2025-10-21 14:02:33 +08:00
icarus
2420716983 feat(ocr): add new OcrProviderService with CRUD operations
Implement new service layer for OCR provider management following the IBaseService interface. Includes basic CRUD operations, pagination, and special methods for built-in providers. This is an early version pending final data architecture design.
2025-10-21 14:00:53 +08:00
icarus
332ff8b8cf refactor(db): remove unused schema index file 2025-10-21 13:35:52 +08:00
icarus
aae10322b8 refactor(db): move ocr provider schema to root schemas directory
The ocr provider schema was moved from `schemas/ocr/provider.ts` to `schemas/ocrProvider.ts` to simplify the directory structure and make imports more straightforward. All related imports were updated accordingly.
2025-10-21 13:34:57 +08:00
icarus
aee134110b feat(i18n): add translations for multiple languages
- Translate provider-related error messages in zh-tw, ja-jp, pt-pt, ru-ru, el-gr, es-es, fr-fr
- Add search section translations in ja-jp, pt-pt, ru-ru, el-gr, es-es, fr-fr
- Complete OVMS runtime error messages in all languages
2025-10-21 02:59:54 +08:00
icarus
4f2eaf4aed fix(ocr): include imageProviderId in error message and dependencies
Add imageProviderId to error message for better debugging and include it in useCallback dependencies to ensure consistency
2025-10-21 00:55:19 +08:00
icarus
d19e0de486 docs: update OCR architecture documentation with IPC details
Update both English and Chinese versions of the OCR architecture documentation to reflect current implementation where IPC serves as API layer. Clarify direct communication between renderer and business layer, and enhance data flow diagrams with new components and security aspects.
2025-10-21 00:12:43 +08:00
icarus
2f141e4761 docs: add OCR architecture documentation in English and Chinese
Add comprehensive technical documentation for the OCR system architecture, covering:
- Layered architecture design
- Provider system implementation
- Data flow and type system
- Configuration management
- Error handling and security
- Development guidelines

The documentation was automatically generated based on code analysis and reflects the current implementation state.
2025-10-20 23:24:25 +08:00
icarus
64c7601cc9 chore: update sharp and related dependencies to 0.34.4
Update sharp package and its platform-specific variants to version 0.34.4, including corresponding libvips dependencies. This ensures compatibility and includes latest fixes/improvements from the sharp library.
2025-10-20 23:04:19 +08:00
icarus
0c5a20a2e4 fix(translate): correct regex pattern for language code validation
fix(ocr): improve debug log by showing full provider details
2025-10-20 22:49:07 +08:00
icarus
917864be1c feat(utils): add safe json parsing utility
Add safeParseJson function to handle JSON parsing with error catching
2025-10-20 22:45:51 +08:00
icarus
e7e36d7df6 style(migrations): format json files with consistent indentation 2025-10-20 22:41:12 +08:00
icarus
0176cf7679 feat(ocr): add config validation and pass provider config to ocr handlers
Add type guards for OCR provider configs and ensure config is passed to OCR handlers
Update all built-in OCR services to validate config before processing
2025-10-20 22:40:12 +08:00
icarus
96f71f12ec fix(translate): show detailed error message when file processing fails 2025-10-20 22:21:00 +08:00
icarus
7942147ce0 feat(migrations): add initial sqlite migration for ocr_provider table
Add initial database migration files including schema definition for ocr_provider table and related metadata files. This sets up the foundation for OCR provider management in the system.
2025-10-20 22:15:55 +08:00
icarus
b7a6ed6b24 style: reorganize imports in ocr-related type files 2025-10-20 22:07:42 +08:00
icarus
790df761f0 refactor(types): move translate types to dedicated module
Centralize translate-related types and schemas in a dedicated module for better organization and maintainability. This change involves moving types from the shared index file to a new translate-specific file and updating import paths accordingly.
2025-10-20 21:58:34 +08:00
icarus
9215256d68 refactor(ocr): remove deprecated ocr slice actions and selectors
All functionality has been migrated elsewhere as indicated by the deprecation notice.
2025-10-20 21:52:49 +08:00
icarus
12b9b64ca8 refactor(ocr): move TimestampExtendShape to data.ts and clean up imports
Move TimestampExtendShape definition from api.ts to data.ts where it's primarily used
Clean up type imports and remove unnecessary comments
2025-10-20 21:02:26 +08:00
icarus
74e7979764 refactor(ocr): simplify response handling by removing wrapper objects
Remove unnecessary response wrapper objects ({ data: ... }) from OCR service methods and update types accordingly
Update API handlers to maintain consistent response structure
2025-10-20 20:58:08 +08:00
icarus
e0781e1bb0 refactor(ocr): restructure ocr types into modular files for better maintainability
- Split monolithic ocr.ts into separate files for base types, providers, models, and layers (api, data, business)
- Update related imports and references across the codebase
- Rename API request/response types to be more consistent (Patch->Update, Put->Replace)
- Adjust repository and service implementations to match new type structure
2025-10-20 20:39:24 +08:00
icarus
327d0dab7f refactor(ocr): remove ocr types to a single folder 2025-10-20 20:19:39 +08:00
icarus
75f513edb0 feat(i18n): add provider unavailable message for multiple locales
Add translation key for provider unavailable status in zh-cn locale and placeholders for other locales
2025-10-20 19:47:36 +08:00
icarus
52e2aff005 fix(ocr): add missing error message for unavailable provider
Add "not_availabel" translation key and use it when provider is unavailable. Also update type name from ImageOcrProvider to OcrProvider to better reflect its usage.
2025-10-20 19:46:57 +08:00
icarus
933d26e0f4 refactor(ocr): improve readability of updateProvider method signature
Split long method signature into multiple lines for better readability
2025-10-20 19:46:41 +08:00
icarus
4fd3300ed0 refactor(ocr): restructure ocr service and repository layers
- Extract database operations to new OcrProviderRepository
- Improve service initialization and provider management
- Add better error handling and logging
- Update API handlers to use new service methods
2025-10-20 19:35:39 +08:00
icarus
ad67d2558a refactor(ocr): update ocr settings components to use props instead of hooks
- Remove useOcrProvider hook usage in favor of direct props passing
- Add proper type casting for updateConfig functions
- Maintain consistent state management across all OCR provider settings
2025-10-20 09:15:41 +08:00
icarus
d47c3b1d63 refactor(ocr): restructure ocr provider settings and hooks
- Simplify useOcrImageProvider by directly using useOcrProvider
- Make useOcrProvider handle null provider IDs
- Update provider settings components to use passed props
- Remove styled-components in favor of tailwind classes
2025-10-20 09:10:04 +08:00
icarus
741bb94c8b refactor(hooks): rename provider to data for consistency with api response 2025-10-20 08:42:37 +08:00
icarus
46772b4f2a fix(ocr): include id in provider config update request
The id parameter was missing in the update request body, causing potential issues with identifying which provider to update. Add id to the request body to ensure correct provider is updated.
2025-10-20 08:39:46 +08:00
icarus
8aaf26e420 refactor(data): simplify ocr preferences mapping structure
Remove redundant ocr provider config mappings and consolidate to a single image provider id mapping
2025-10-20 08:33:59 +08:00
icarus
281632f859 feat(ocr): add validation for OCR provider operations
- Add params validation in API handlers to ensure path ID matches body ID
- Introduce isDbOcrProvider type guard for runtime validation
- Validate provider data before database operations
2025-10-20 08:28:15 +08:00
icarus
e4b5e70c34 refactor(ocr): update timestamp handling to use milliseconds
Use dayjs().valueOf() instead of dayjs().unix() to get timestamps in milliseconds for consistency with the updated schema comment
2025-10-20 08:21:18 +08:00
icarus
6f635472f3 refactor(ocr): improve provider schema and update handling
- Export DbOcrProviderSchema and add DbOcrProvider type
- Simplify provider update logic by merging entire object
- Add timestamps to create/update operations
- Maintain createdAt when updating existing providers
2025-10-20 08:18:54 +08:00
icarus
eb4927260a refactor(OcrImageSettings): remove logger and optimize setImageProvider
Replace direct logger usage with commented code and wrap setImageProvider in useCallback
2025-10-20 08:10:14 +08:00
icarus
a2e628d7e9 refactor(ocr): improve ocr provider handling and error states
- Add ListOcrProvidersQuery type for better type safety
- Update useOcrProviders hook to accept query params and handle undefined data
- Improve error handling and loading states in OcrImageSettings component
- Memoize filtered image providers for better performance
2025-10-20 08:07:32 +08:00
icarus
389dfc08f6 feat(ocr): add filtering by registration status to provider list
Add optional query parameter to filter OCR providers by registration status
Prevent modification and deletion of built-in OCR providers
2025-10-20 07:54:50 +08:00
icarus
7ea7e7134d refactor(ocr): add BuiltinOcrProviderIds constant for provider ids
Use objectValues utility to create a frozen array of provider ids for better maintainability and type safety
2025-10-20 07:47:11 +08:00
icarus
1423163b3a refactor(ocr): rename BuiltinOcrProviderIds to BuiltinOcrProviderIdMap for consistency 2025-10-20 07:45:53 +08:00
icarus
f9ed8343fe feat(ocr): implement delete provider API endpoint
Add DELETE endpoint for OCR providers with proper type definitions and handler implementation. The endpoint removes the provider from both the registry and database after validation checks.
2025-10-20 07:40:31 +08:00
icarus
a042892250 feat(ocr): implement create and update provider endpoints
add POST handler for creating new OCR providers
add PUT handler for updating existing OCR providers
add required request/response types and schemas
2025-10-20 07:35:03 +08:00
icarus
b67b4c8178 feat(ocr): update provider config by merging with existing values
Use lodash merge to combine existing provider config with updates instead of overwriting
2025-10-20 07:27:09 +08:00
icarus
4ab6961fcc feat(ocr): add type for OcrProviderId and getProvider method
Add OcrProviderId type definition and implement getProvider method in OcrService to fetch a single OCR provider by ID
2025-10-20 07:23:41 +08:00
icarus
4e7a67df59 feat(ocr): implement PATCH endpoint for OCR provider updates
Add PATCH handler for OCR provider updates with request/response schemas
Implement patchProvider method in OcrService to update provider data
2025-10-20 07:19:08 +08:00
icarus
1e9014b080 feat(ocr): implement ocr providers list endpoint
Add DbOcrProviderSchema and update response schemas for list and get endpoints
Implement the GET /ocr/providers endpoint using ocrService
2025-10-20 07:00:23 +08:00
icarus
8ac9344fef feat(i18n): add provider error messages and search translations
Add error messages for provider operations (create, delete, get, list, update) in multiple languages
Include search-related translations for various languages
Add new OVMS runtime error codes for installation process
2025-10-20 06:54:48 +08:00
icarus
3250d982fc docs(ocr): add todo comment for builtin providers registration 2025-10-20 06:48:49 +08:00
icarus
4dcfe276ac refactor(ocr): change provider listing to include db data
Replace simple registry key listing with combined db query to filter available providers
2025-10-20 06:47:44 +08:00
icarus
78126c3d0b refactor(ocr): simplify useOcrProvider hook by using data api
Replace complex provider and config management with useQuery and useMutation hooks
Add loading states and error handling
Remove unused imports and simplify return type
2025-10-20 06:47:22 +08:00
icarus
37ad896f6a refactor(ocr): restructure OCR provider configuration and types
- Remove separate configs from store and move them into provider definitions
- Add Zod schemas for OCR provider types and configurations
- Update migration to use new provider structure
- Make OCR provider config non-nullable in database schema
- Clean up unused OCR preference settings
2025-10-20 06:47:02 +08:00
icarus
84a513a6ae refactor(ocr): move provider registration to constructor
Initialize built-in OCR providers during service instantiation instead of after creation for better encapsulation and initialization control
2025-10-20 05:18:51 +08:00
icarus
f538e89976 Revert "refactor(ocr): simplify ocr providers api by returning string array"
This reverts commit 695afb6f75.
2025-10-20 05:16:35 +08:00
icarus
f10f0b21f9 Revert "refactor(db): remove unused ocr provider schema table"
This reverts commit 9c740f82ad.
2025-10-20 05:08:24 +08:00
icarus
49c80620ae refactor(ocr): simplify ocr service interface and params handling
- Replace OcrProvider with OcrParams to simplify interface
- Remove unused OcrApiClientFactory and related code
- Consolidate ocr service calls to use consistent params structure
2025-10-20 05:07:53 +08:00
Phantom
68aaf9df4a fix: use consistent sharp dependencies (#10832)
build: update sharp dependencies to version 0.34.3

Update sharp image processing library dependencies to latest version 0.34.3 across all platforms (darwin, linux, win32) to ensure consistent behavior and security fixes
2025-10-20 04:33:17 +08:00
icarus
b31b48fcaf refactor(ocr): remove unused OCR list providers functionality 2025-10-20 04:31:02 +08:00
icarus
82b244471b refactor(OcrImageSettings): simplify provider selection logic and improve UI
Remove unused imports and simplify the provider filtering logic by removing platform-specific checks
Update UI styling to use Tailwind classes instead of inline styles
2025-10-20 03:36:04 +08:00
icarus
062cbcc259 feat(ui): add skeleton component to shadcn-io exports
Export new Skeleton component from shadcn-io directory and add comment about potential future organization
2025-10-20 03:35:21 +08:00
icarus
b50d8b2a23 refactor(ocr): remove unused error message and simplify provider check
Move provider availability check outside of useCallback and remove unused error message from translations
2025-10-20 03:19:38 +08:00
icarus
b262410518 refactor(ocr): use config from useOcrProvider hook directly
Update OCR settings components to use config object returned by useOcrProvider hook instead of accessing it through provider.config. This provides more direct access to the configuration data and improves consistency across components.
2025-10-20 03:13:25 +08:00
icarus
a34426d431 refactor(ocr): improve type safety and config handling in useOcrProvider
- Replace dynamic provider lookup with type-safe registry pattern
- Add separate config management for each provider type
- Remove unused imports and simplify provider fallback logic
2025-10-20 03:06:11 +08:00
icarus
94ed39ab27 refactor(ocr): simplify provider fallback logic and remove unused methods
Remove unused provider management methods (add/remove) and simplify the fallback logic in useOcrProvider to always use Tesseract when provider is not found
2025-10-20 02:30:57 +08:00
icarus
ed8501961a refactor(ocr): extract image provider logic to separate hook
Move image provider related state and logic from useOcrProviders to new useOcrImageProvider hook
Update all components to use the new hook for better separation of concerns
2025-10-20 02:27:10 +08:00
icarus
78000816e5 refactor(useOcrProvider): rename useOcrProvider from tsx to ts 2025-10-20 02:21:29 +08:00
icarus
5900ff0c6e feat(ocr): add provider availability check and error message
Add validation to ensure OCR provider can process images before attempting OCR
2025-10-20 02:17:28 +08:00
icarus
b310ea1407 feat(ocr): add type guard for OcrProvider
Add isOcrProvider type guard function to validate unknown inputs against OcrProviderSchema
2025-10-20 02:11:29 +08:00
icarus
beb44eea61 refactor(ocr): move provider logo logic to component and consolidate hooks
Move OcrProviderLogo implementation from useOcrProviders hook to the component file
Extract common OCR provider logic into a separate useOcrProviders hook
Clean up and reorganize related imports and exports
2025-10-20 02:07:12 +08:00
icarus
7658b1e79f refactor(ocr): reorganize ocr hooks into dedicated directory
Move useOcr and useOcrProvider hooks to new ocr directory under hooks
Update all imports in settings components to reflect new paths
2025-10-20 02:01:56 +08:00
icarus
ea1aa6e5a8 refactor(ocr): remove unused langs config from ovocr provider
The langs configuration for ovocr provider is not currently configurable, so it's removed from both type definition and default config.
2025-10-20 01:58:03 +08:00
icarus
e823d97e31 feat(ocr): add provider config mappings and default preferences
Add OCR provider configuration mappings to PreferencesMappings.ts and define default preferences for OCR providers in preferenceSchemas.ts. This enables support for multiple OCR providers with their respective configurations.
2025-10-20 01:54:22 +08:00
icarus
515d3cd596 refactor(data): update PreferencesMappings type with PreferenceSchemas
Add type import for PreferenceSchemas and update REDUX_STORE_MAPPINGS type to use keyof PreferenceSchemas
Mark several mappings with TODO comments for future fixes
2025-10-20 01:53:49 +08:00
icarus
47366064ca refactor(ocr): move ocr config to shared and add utility function
Migrate ocr configuration from renderer to shared config and introduce getDefaultOcrProvider utility function to centralize default provider logic
2025-10-20 01:44:23 +08:00
icarus
61a71a0486 refactor(utils): reorganize utils files into module structure
Move defaultAppHeaders function from utils.ts to new net.ts module and create index.ts for exports
2025-10-20 01:40:01 +08:00
icarus
e640beb874 refactor(ocr): move ocr config to shared package for reuse
Centralize OCR configuration in shared package to avoid duplication and improve maintainability. This change affects multiple components that previously imported from renderer config.
2025-10-20 01:37:00 +08:00
icarus
9386a4d482 refactor(ocr): restructure ocr provider config handling
move provider configs from individual providers to a centralized config map
add migration for new ocr config structure
2025-10-20 01:26:37 +08:00
icarus
90e02e64b7 refactor(types): mark OcrProvider.config as deprecated
The config property is being phased out in favor of a more streamlined type structure. This change marks it as deprecated while maintaining backward compatibility.
2025-10-20 01:09:30 +08:00
icarus
08d8f70752 refactor(data): add type constraint to REDUX_STORE_MAPPINGS 2025-10-20 01:06:11 +08:00
icarus
695afb6f75 refactor(ocr): simplify ocr providers api by returning string array
Remove unused OcrProvider type and related endpoints. The GET endpoint now returns a simple array of provider IDs instead of full provider objects, as the detailed provider data will be handled separately.
2025-10-20 01:02:17 +08:00
icarus
471b1fae2d docs(IBaseService): add type parameter documentation to interface 2025-10-20 00:59:44 +08:00
icarus
9c740f82ad refactor(db): remove unused ocr provider schema table 2025-10-20 00:59:28 +08:00
icarus
ab7fed8907 docs(ocr): update provider schema comments with more details
Add more context about ID format for custom providers and clarify name usage for built-in providers
Explain JSON config validation requirements and mark timestamps as potentially unused
2025-10-20 00:48:40 +08:00
icarus
ec68886e4a refactor(ocr): convert OcrProvider type to zod schema
Use zod schema for better type safety and validation capabilities
2025-10-20 00:37:13 +08:00
icarus
a3bc279c74 feat(types): add OcrOvConfig to OcrProviderConfig union type 2025-10-20 00:36:04 +08:00
icarus
2e400d3f1c refactor(ocr): convert OcrProviderBaseConfig to zod schema
Use zod schema for type validation and inference to improve type safety
2025-10-20 00:35:26 +08:00
icarus
ed791a3bb3 refactor(ocr): replace manual type check with zod schema validation
Simplify type checking logic by using zod schema validation instead of manual type checks for OcrProviderApiConfig
2025-10-20 00:34:34 +08:00
icarus
2a8f819bee refactor(types): convert OcrModel interface to zod schema
Use zod schema for better type safety and validation capabilities
2025-10-20 00:34:08 +08:00
icarus
35280b4b8c refactor(ocr): replace manual record type with zod schema inference
Use zod's partialRecord and inference to define OcrProviderCapabilityRecord for better type safety and maintainability
2025-10-20 00:33:33 +08:00
icarus
b93ff89e9e refactor(types): add satisfies constraint to type assertions
Add satisfies constraint to BuiltinOcrProviderIds and OcrProviderCapabilities to ensure type safety and better intellisense
2025-10-20 00:31:31 +08:00
icarus
dedc591e1c refactor(ocr): replace manual capability check with zod schema
Use zod schema validation for OCR provider capabilities instead of manual object property check for better type safety and maintainability
2025-10-20 00:31:24 +08:00
icarus
5c049911ee refactor(ocr): replace manual type check with zod schema for provider ids
Use zod schema validation for BuiltinOcrProviderId type to improve type safety and maintainability
2025-10-20 00:30:08 +08:00
icarus
399f8cbd41 feat(db): add ocr provider schema with capabilities and config
Add new schema for OCR providers including fields for id, name, capabilities, and config. Capabilities and config are stored as JSON to accommodate various provider types and configurations.
2025-10-20 00:27:52 +08:00
icarus
c780552197 feat(ocr): add api schemas and handlers for ocr providers
Implement API schemas and handlers for OCR providers endpoints
Add TODO comments for future migration tasks
Fix endpoint path in OcrImageSettings component
2025-10-19 23:21:54 +08:00
icarus
d366ec5932 refactor(ocr-settings): simplify ocr settings by removing unused tab logic
Since only image OCR is currently supported, remove the tab component and related unused code while keeping the core functionality
2025-10-19 22:36:40 +08:00
icarus
d35d7029f7 refactor(ocr): simplify image provider state management
- Remove unnecessary state propagation between components
- Store image provider ID in preferences instead of redux
- Add null checks for provider existence
- Update tab navigation to use new ui components
2025-10-19 22:32:00 +08:00
icarus
2c78f5f906 feat(ui): add shadcn tabs component
Add new tabs component using @radix-ui/react-tabs as base implementation. Includes Tabs, TabsList, TabsTrigger and TabsContent subcomponents with styling utilities.
2025-10-19 22:28:37 +08:00
icarus
92638d138d refactor(ocr): rename OCR_ocr to OCR_Ocr for consistent naming 2025-10-19 19:11:37 +08:00
icarus
2dbf7c1c51 refactor(ocr): improve service initialization and registration
Move availability checks to service instantiation
Update registry to store service instances directly
Simplify registration logic by removing redundant bind calls
2025-10-19 19:00:13 +08:00
80 changed files with 7131 additions and 8678 deletions

View File

@@ -0,0 +1,260 @@
> [!NOTE]
> This technical documentation was automatically generated by Claude Code based on analysis of the current OCR implementation in the codebase. The content reflects the architecture as of the current branch state.
# OCR Architecture
## Overview
Cherry Studio's OCR (Optical Character Recognition) system is a modular, extensible architecture designed to support multiple OCR providers and file types. The architecture follows a layered approach with clear separation of concerns between data access, business logic, and provider implementations.
## Architecture Layers
The OCR architecture follows a layered approach where data interactions occur through RESTful APIs, while IPC serves as part of the API layer, allowing the renderer to interact directly with the business layer:
### 1. API Layer
**Location**: `src/main/data/api/handlers/`, `src/main/ipc.ts`, `src/preload/index.ts`
- **IPC Bridge**: Serves as API layer connecting renderer to main process
- **Request Routing**: Routes IPC calls to appropriate service methods
- **Type Safety**: Zod schemas for request/response validation
- **Error Handling**: Centralized error propagation across process boundaries
- **Security**: Secure communication sandbox between renderer and main processes
### 2. OCR Service Layer (Business Layer)
**Location**: `src/main/services/ocr/`
- **OcrService**: Main business logic orchestrator and central coordinator
- **Provider Registry**: Manages registered OCR providers
- **Data Integration**: Direct interaction with data layer for provider management
- **Lifecycle Management**: Handles provider initialization and disposal
- **Validation**: Ensures provider availability and data integrity
- **Orchestration**: Coordinates between providers and data services
- **Direct IPC Access**: Renderer can directly invoke business layer methods via IPC
### 3. Provider Services Layer
**Location**: `src/main/services/ocr/builtin/`
- **Base Service**: Abstract `OcrBaseService` defines common interface
- **Data Independence**: No direct database interactions, relies on injected data
- **Built-in Providers**:
- `TesseractService`: Local Tesseract.js implementation
- `SystemOcrService`: Platform-specific system OCR
- `PpocrService`: PaddleOCR integration
- `OvOcrService`: Intel OpenVINO (NPU) OCR
- **Pure OCR Logic**: Focus solely on OCR processing capabilities
### 4. Data Layer
**Location**: `src/main/data/db/schemas/ocr/`, `src/main/data/repositories/`
- **Database Schema**: Uses Drizzle ORM with SQLite database
- **Repository Pattern**: `OcrProviderRepository` handles all database operations
- **Provider Storage**: Stores provider configurations in `ocr_provider` table
- **JSON Configuration**: Polymorphic `config` field stores provider-specific settings
- **Data Access**: Exclusively accessed by OCR Service layer
### 5. Frontend Layer
**Location**: `src/renderer/src/services/ocr/`, `src/renderer/src/hooks/ocr/`
- **Direct IPC Communication**: Direct interaction with business layer via IPC
- **React Hooks**: Custom hooks for OCR operations and state management
- **Configuration UI**: Settings pages for provider configuration
- **State Management**: Frontend state synchronization with backend data
## Data Flow
```mermaid
graph TD
A[Frontend UI] --> B[Frontend OCR Service]
B --> C[API Layer - IPC Bridge]
C --> D[OCR Service Layer - Business Logic]
D --> E[Data Layer - Provider Repository]
D --> F[Provider Services Layer]
F --> G[OCR Processing]
G --> H[Result]
H --> F
F --> D
D --> C
C --> B
B --> A
style D fill:#e1f5fe
style F fill:#f3e5f5
style E fill:#e8f5e8
style C fill:#fff3e0
```
**Key Flow Characteristics:**
- **Direct Business Access**: Frontend communicates directly with OCR Service layer via IPC
- **IPC as API Gateway**: IPC bridge functions as the API layer, handling routing and validation
- **Data Isolation**: Only business layer interacts with data persistence
- **Provider Independence**: OCR providers remain isolated from data concerns
## Provider System
### Provider Registration
- **Built-in Providers**: Automatically registered on service initialization
- **Custom Providers**: Support for extensible provider system
- **Configuration**: Each provider has its own configuration schema
### Provider Capabilities
```typescript
interface OcrProviderCapabilityRecord {
image?: boolean // Image file OCR support
pdf?: boolean // PDF file OCR support (future)
}
```
### Configuration Architecture
- **Polymorphic Config**: JSON-based configuration adapts to provider needs
- **Type Safety**: Zod schemas validate provider-specific configurations
- **Runtime Validation**: Configuration validation before OCR operations
## Type System
### Core Types
- **`OcrProvider`**: Base provider interface
- **`OcrParams`**: OCR operation parameters
- **`OcrResult`**: Standardized OCR result format
- **`SupportedOcrFile`**: File types supported for OCR
### Business Types
- **`OcrProviderBusiness`**: Domain-level provider representation
- **Operations**: Create, Update, Replace, Delete operations
- **Queries**: List providers with filtering options
### Provider-Specific Types
- **TesseractConfig**: Language selection, model paths
- **SystemOcrConfig**: Language preferences
- **PaddleOCRConfig**: API endpoints, authentication
- **OpenVINOConfig**: Device selection, model paths
## Built-in Providers
### Tesseract OCR
- **Engine**: Tesseract.js
- **Languages**: Multi-language support with automatic download
- **Configuration**: Language selection, cache management
- **Performance**: Worker pooling for concurrent processing
### System OCR
- **Windows**: Windows Media Foundation OCR
- **macOS**: Vision framework OCR
- **Linux**: Platform-specific implementations
- **Features**: Native performance, system integration
### PaddleOCR
- **Deployment**: Remote API integration
- **Languages**: Chinese, English, and mixed language support
- **Configuration**: API endpoints and authentication
### Intel OpenVINO OCR
- **Hardware**: NPU acceleration support
- **Performance**: Optimized for Intel hardware
- **Use Case**: High-performance OCR scenarios
## Configuration Management
### Database Schema
```sql
CREATE TABLE ocr_provider (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
capabilities TEXT NOT NULL, -- JSON
config TEXT NOT NULL, -- JSON
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
```
### Provider Defaults
- **Initial Configuration**: Defined in `packages/shared/config/ocr.ts`
- **Migration System**: Automatic provider initialization on startup
- **User Customization**: Runtime configuration updates
## Error Handling
### Error Categories
- **Provider Errors**: OCR engine failures, missing dependencies
- **Configuration Errors**: Invalid settings, missing parameters
- **File Errors**: Unsupported formats, corrupted files
- **System Errors**: Resource exhaustion, permissions
### Error Propagation
- **Logging**: Centralized logging with context
- **User Feedback**: Translated error messages
- **Recovery**: Graceful fallback options
## Performance Considerations
### Resource Management
- **Worker Disposal**: Proper cleanup of OCR workers
- **Memory Management**: Limits on file sizes and concurrent operations
- **Caching**: Model and result caching where applicable
### Optimization
- **Lazy Loading**: Providers initialized on demand
- **Concurrent Processing**: Multiple workers for parallel operations
- **Hardware Acceleration**: NPU and GPU support where available
## Security
### Input Validation
- **File Type Checking**: Strict validation of supported formats
- **Size Limits**: Protection against resource exhaustion
- **Path Validation**: Prevention of path traversal attacks
### Configuration Security
- **API Key Storage**: Secure storage of sensitive configuration
- **Validation**: Runtime validation of configuration parameters
- **Sandboxing**: Isolated execution of OCR operations
## Extension Points
### Custom Providers
- **Interface**: Implement `OcrBaseService` for new providers
- **Registration**: Dynamic provider registration system
- **Configuration**: Extensible configuration schemas
### File Type Support
- **Handlers**: Modular file type processors
- **Capabilities**: Declarative provider capabilities
- **Future Support**: PDF, document formats planned
## Migration Strategy
### Legacy System
- **Data Migration**: Automatic migration from old configuration formats
- **Compatibility**: Backward compatibility during transition
- **Testing**: Comprehensive test coverage for migration paths
### Future Enhancements
- **PDF Support**: Planned extension to document OCR
- **Cloud Providers**: API-based OCR services integration
- **AI Enhancement**: Post-processing and accuracy improvements
## Development Guidelines
### Adding New Providers
1. Create provider service extending `OcrBaseService`
2. Define provider-specific configuration schema
3. Register provider in `OcrService`
4. Add configuration UI components
5. Include comprehensive tests
> [!WARNING]
> Provider services should never directly access the data layer. All data operations must go through the OCR Service layer to maintain proper separation of concerns.
### Configuration Changes
1. Update provider configuration schema
2. Add migration logic for existing configurations
3. Update UI validation and error handling
4. Test with various configuration scenarios
> [!WARNING]
> Always validate configuration changes before saving to the database. Use Zod schemas for runtime validation to prevent corrupted provider configurations.
### Testing
- **Unit Tests**: Provider implementation testing
- **Integration Tests**: End-to-end OCR workflows
- **Performance Tests**: Resource usage and timing
- **Error Scenarios**: Comprehensive error handling testing

View File

@@ -0,0 +1,260 @@
> [!NOTE]
> 本技术文档由 Claude Code 基于对当前代码库中 OCR 实现的分析自动生成。内容反映了当前分支状态的架构设计。
# OCR 架构文档
## 概述
Cherry Studio 的 OCR光学字符识别系统是一个模块化、可扩展的架构旨在支持多个 OCR 提供商和文件类型。该架构采用分层设计,在数据访问、业务逻辑和提供商实现之间有明确的关注点分离。
## 架构分层
OCR 架构采用分层方法,其中数据交互通过 RESTful API 进行,而 IPC 作为 API 层的一部分,允许 Renderer 直接与业务层交互:
### 1. API 层
**位置**: `src/main/data/api/handlers/`, `src/main/ipc.ts`, `src/preload/index.ts`
- **IPC 桥接**: 作为 API 层连接 Renderer 到主进程
- **请求路由**: 将 IPC 调用路由到相应的服务方法
- **类型安全**: 使用 Zod 模式进行请求/响应验证
- **错误处理**: 跨进程边界的集中式错误传播
- **安全**: Renderer 和主进程之间的安全通信沙盒
### 2. OCR 服务层(业务层)
**位置**: `src/main/services/ocr/`
- **OcrService**: 主要业务逻辑协调器和中央协调器
- **提供商注册表**: 管理已注册的 OCR 提供商
- **数据集成**: 与数据层直接交互进行提供商管理
- **生命周期管理**: 处理提供商初始化和销毁
- **验证**: 确保提供商可用性和数据完整性
- **协调**: 协调提供商和数据服务之间的交互
- **直接 IPC 访问**: Renderer 可通过 IPC 直接调用业务层方法
### 3. 提供商服务层
**位置**: `src/main/services/ocr/builtin/`
- **基础服务**: 抽象的 `OcrBaseService` 定义通用接口
- **数据独立性**: 无直接数据库交互,依赖外部传入的数据
- **内置提供商**:
- `TesseractService`: 本地 Tesseract.js 实现
- `SystemOcrService`: 平台特定的系统 OCR
- `PpocrService`: PaddleOCR 集成
- `OvOcrService`: Intel OpenVINO (NPU) OCR
- **纯 OCR 逻辑**: 专注于 OCR 处理能力
### 4. 数据层
**位置**: `src/main/data/db/schemas/ocr/`, `src/main/data/repositories/`
- **数据库架构**: 使用 Drizzle ORM 和 SQLite 数据库
- **仓储模式**: `OcrProviderRepository` 处理所有数据库操作
- **提供商存储**: 在 `ocr_provider` 表中存储提供商配置
- **JSON 配置**: 多态的 `config` 字段存储提供商特定的设置
- **数据访问**: 仅由 OCR 服务层访问
### 5. Renderer 层
**位置**: `src/renderer/src/services/ocr/`, `src/renderer/src/hooks/ocr/`
- **直接 IPC 通信**: 通过 IPC 与业务层直接交互
- **React Hooks**: 用于 OCR 操作和状态管理的自定义钩子
- **配置 UI**: 提供商配置的设置页面
- **状态管理**: Renderer 状态与后端数据同步
## 数据流
```mermaid
graph TD
A[Renderer UI] --> B[Renderer OCR 服务]
B --> C[API 层 - IPC 桥接]
C --> D[OCR 服务层 - 业务逻辑]
D --> E[数据层 - 提供商仓储]
D --> F[提供商服务层]
F --> G[OCR 处理]
G --> H[结果]
H --> F
F --> D
D --> C
C --> B
B --> A
style D fill:#e1f5fe
style F fill:#f3e5f5
style E fill:#e8f5e8
style C fill:#fff3e0
```
**关键流程特征**:
- **直接业务访问**: Renderer 通过 IPC 与 OCR 服务层直接通信
- **IPC 作为 API 网关**: IPC 桥接作为 API 层,处理路由和验证
- **数据隔离**: 只有业务层与数据持久化交互
- **提供商独立性**: OCR 提供商保持与数据关注点的隔离
## 提供商系统
### 提供商注册
- **内置提供商**: 在服务初始化时自动注册
- **自定义提供商**: 支持可扩展的提供商系统
- **配置**: 每个提供商都有自己的配置模式
### 提供商能力
```typescript
interface OcrProviderCapabilityRecord {
image?: boolean // 图像文件 OCR 支持
pdf?: boolean // PDF 文件 OCR 支持(未来)
}
```
### 配置架构
- **多态配置**: 基于 JSON 的配置适应提供商需求
- **类型安全**: Zod 模式验证提供商特定的配置
- **运行时验证**: OCR 操作前的配置验证
## 类型系统
### 核心类型
- **`OcrProvider`**: 基础提供商接口
- **`OcrParams`**: OCR 操作参数
- **`OcrResult`**: 标准化的 OCR 结果格式
- **`SupportedOcrFile`**: 支持 OCR 的文件类型
### 业务类型
- **`OcrProviderBusiness`**: 域级别的提供商表示
- **操作**: 创建、更新、替换、删除操作
- **查询**: 带过滤选项的提供商列表
### 提供商特定类型
- **TesseractConfig**: 语言选择、模型路径
- **SystemOcrConfig**: 语言偏好
- **PaddleOCRConfig**: API 端点、认证
- **OpenVINOConfig**: 设备选择、模型路径
## 内置提供商
### Tesseract OCR
- **引擎**: Tesseract.js
- **语言**: 支持多语言,自动下载
- **配置**: 语言选择、缓存管理
- **性能**: 工作池用于并发处理
### 系统 OCR
- **Windows**: Windows Media Foundation OCR
- **macOS**: Vision 框架 OCR
- **Linux**: 平台特定实现
- **特性**: 原生性能、系统集成
### PaddleOCR
- **部署**: 远程 API 集成
- **语言**: 中文、英文和混合语言支持
- **配置**: API 端点和认证
### Intel OpenVINO OCR
- **硬件**: NPU 加速支持
- **性能**: 为 Intel 硬件优化
- **用例**: 高性能 OCR 场景
## 配置管理
### 数据库架构
```sql
CREATE TABLE ocr_provider (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
capabilities TEXT NOT NULL, -- JSON
config TEXT NOT NULL, -- JSON
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
```
### 提供商默认值
- **初始配置**: 在 `packages/shared/config/ocr.ts` 中定义
- **迁移系统**: 启动时自动提供商初始化
- **用户自定义**: 运行时配置更新
## 错误处理
### 错误类别
- **提供商错误**: OCR 引擎故障、缺少依赖
- **配置错误**: 无效设置、缺少参数
- **文件错误**: 不支持的格式、损坏的文件
- **系统错误**: 资源耗尽、权限问题
### 错误传播
- **日志**: 带上下文的集中日志记录
- **用户反馈**: 翻译的错误消息
- **恢复**: 优雅的回退选项
## 性能考虑
### 资源管理
- **工作器销毁**: OCR 工作器的适当清理
- **内存管理**: 文件大小和并发操作限制
- **缓存**: 模型和结果缓存(如适用)
### 优化
- **延迟加载**: 按需初始化提供商
- **并发处理**: 多工作器用于并行操作
- **硬件加速**: NPU 和 GPU 支持(如可用)
## 安全
### 输入验证
- **文件类型检查**: 严格验证支持的格式
- **大小限制**: 防止资源耗尽
- **路径验证**: 防止路径遍历攻击
### 配置安全
- **API 密钥存储**: 敏感配置的安全存储
- **验证**: 配置参数的运行时验证
- **沙盒**: OCR 操作的隔离执行
## 扩展点
### 自定义提供商
- **接口**: 为新提供商实现 `OcrBaseService`
- **注册**: 动态提供商注册系统
- **配置**: 可扩展的配置模式
### 文件类型支持
- **处理器**: 模块化文件类型处理器
- **能力**: 声明式提供商能力
- **未来支持**: PDF、文档格式计划中
## 迁移策略
### 遗留系统
- **数据迁移**: 从旧配置格式自动迁移
- **兼容性**: 过渡期间的向后兼容性
- **测试**: 迁移路径的全面测试覆盖
### 未来增强
- **PDF 支持**: 计划扩展到文档 OCR
- **云提供商**: 基于 API 的 OCR 服务集成
- **AI 增强**: 后处理和准确性改进
## 开发指南
### 添加新提供商
1. 创建扩展 `OcrBaseService` 的提供商服务
2. 定义提供商特定的配置模式
3.`OcrService` 中注册提供商
4. 添加配置 UI 组件
5. 包含全面的测试
> [!WARNING]
> 提供商服务绝不应直接访问数据层。所有数据操作必须通过 OCR 服务层进行,以保持适当的关注点分离。
### 配置更改
1. 更新提供商配置模式
2. 为现有配置添加迁移逻辑
3. 更新 UI 验证和错误处理
4. 测试各种配置场景
> [!WARNING]
> 在保存到数据库之前,务必验证配置更改。使用 Zod 模式进行运行时验证,防止提供商配置损坏。
### 测试
- **单元测试**: 提供商实现测试
- **集成测试**: 端到端 OCR 工作流
- **性能测试**: 资源使用和时间
- **错误场景**: 全面的错误处理测试

View File

@@ -0,0 +1,10 @@
CREATE TABLE `ocr_provider` (
`id` text PRIMARY KEY NOT NULL,
`name` text NOT NULL,
`capabilities` text NOT NULL,
`config` text NOT NULL,
`created_at` integer,
`updated_at` integer
);
--> statement-breakpoint
CREATE INDEX `name` ON `ocr_provider` (`name`);

View File

@@ -0,0 +1,172 @@
{
"version": "6",
"dialect": "sqlite",
"id": "64f7ad88-7111-4574-988c-d7ef429e375d",
"prevId": "de8009d7-95b9-4f99-99fa-4b8795708f21",
"tables": {
"app_state": {
"name": "app_state",
"columns": {
"key": {
"name": "key",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"value": {
"name": "value",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"ocr_provider": {
"name": "ocr_provider",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"capabilities": {
"name": "capabilities",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"config": {
"name": "config",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"name": {
"name": "name",
"columns": ["name"],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"preference": {
"name": "preference",
"columns": {
"scope": {
"name": "scope",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"key": {
"name": "key",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"value": {
"name": "value",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"scope_name_idx": {
"name": "scope_name_idx",
"columns": ["scope", "key"],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View File

@@ -7,6 +7,13 @@
"tag": "0000_solid_lord_hawal",
"version": "6",
"when": 1754745234572
},
{
"idx": 1,
"version": "6",
"when": 1760969721294,
"tag": "0001_previous_sir_ram",
"breakpoints": true
}
],
"version": "7"

View File

@@ -85,6 +85,7 @@
"@libsql/client": "0.14.0",
"@libsql/win32-x64-msvc": "^0.4.7",
"@napi-rs/system-ocr": "patch:@napi-rs/system-ocr@npm%3A1.0.2#~/.yarn/patches/@napi-rs-system-ocr-npm-1.0.2-59e7a78e8b.patch",
"@radix-ui/react-tabs": "^1.1.13",
"@strongtz/win32-arm64-msvc": "^0.4.7",
"express": "^5.1.0",
"font-list": "^2.0.0",
@@ -94,7 +95,7 @@
"officeparser": "^4.2.0",
"os-proxy-config": "^1.1.2",
"selection-hook": "^1.0.12",
"sharp": "^0.34.3",
"sharp": "0.34.4",
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.1",
"tesseract.js": "patch:tesseract.js@npm%3A6.0.1#~/.yarn/patches/tesseract.js-npm-6.0.1-2562a7e46d.patch",
@@ -390,12 +391,12 @@
"vite": "npm:rolldown-vite@7.1.5",
"tesseract.js@npm:*": "patch:tesseract.js@npm%3A6.0.1#~/.yarn/patches/tesseract.js-npm-6.0.1-2562a7e46d.patch",
"@ai-sdk/google@npm:2.0.20": "patch:@ai-sdk/google@npm%3A2.0.20#~/.yarn/patches/@ai-sdk-google-npm-2.0.20-b9102f9d54.patch",
"@img/sharp-darwin-arm64": "0.34.3",
"@img/sharp-darwin-x64": "0.34.3",
"@img/sharp-linux-arm": "0.34.3",
"@img/sharp-linux-arm64": "0.34.3",
"@img/sharp-linux-x64": "0.34.3",
"@img/sharp-win32-x64": "0.34.3",
"@img/sharp-darwin-arm64": "0.34.4",
"@img/sharp-darwin-x64": "0.34.4",
"@img/sharp-linux-arm": "0.34.4",
"@img/sharp-linux-arm64": "0.34.4",
"@img/sharp-linux-x64": "0.34.4",
"@img/sharp-win32-x64": "0.34.4",
"openai@npm:5.12.2": "npm:@cherrystudio/openai@6.5.0"
},
"packageManager": "yarn@4.9.1",

View File

@@ -369,8 +369,7 @@ export enum IpcChannel {
CodeTools_RemoveCustomTerminalPath = 'code-tools:remove-custom-terminal-path',
// OCR
OCR_ocr = 'ocr:ocr',
OCR_ListProviders = 'ocr:list-providers',
OCR_Ocr = 'ocr:ocr',
// OVMS
Ovms_AddModel = 'ovms:add-model',

View File

@@ -0,0 +1,176 @@
import type {
BuiltinOcrProvider,
BuiltinOcrProviderId,
OcrOvProvider,
OcrPpocrProvider,
OcrSystemProvider,
OcrTesseractProvider,
TesseractLangCode
} from '@types'
import type { TranslateLanguageCode } from '../../../src/renderer/src/types/translate'
export const tesseract: OcrTesseractProvider = {
id: 'tesseract',
name: 'Tesseract',
capabilities: {
image: true
},
config: {
langs: {
chi_sim: true,
chi_tra: true,
eng: true
},
enabled: false
}
} as const
export const systemOcr: OcrSystemProvider = {
id: 'system',
name: 'System',
capabilities: {
image: true
// pdf: true
},
config: {
langs: ['en-us'],
enabled: false
}
} as const satisfies OcrSystemProvider
export const ppocrOcr: OcrPpocrProvider = {
id: 'paddleocr',
name: 'PaddleOCR',
capabilities: {
image: true
// pdf: true
},
config: { apiUrl: '', enabled: false }
} as const
export const ovOcr: OcrOvProvider = {
id: 'ovocr',
name: 'Intel OV(NPU) OCR',
capabilities: {
image: true
// pdf: true
},
config: {
enabled: false
}
} as const satisfies OcrOvProvider
export const INITIAL_BUILTIN_OCR_PROVIDER_MAP = {
tesseract,
system: systemOcr,
paddleocr: ppocrOcr,
ovocr: ovOcr
} as const satisfies Record<BuiltinOcrProviderId, BuiltinOcrProvider>
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(INITIAL_BUILTIN_OCR_PROVIDER_MAP)
export const TESSERACT_LANG_MAP: Record<TranslateLanguageCode, TesseractLangCode> = {
'af-za': 'afr',
'am-et': 'amh',
'ar-sa': 'ara',
'as-in': 'asm',
'az-az': 'aze',
'az-cyrl-az': 'aze_cyrl',
'be-by': 'bel',
'bn-bd': 'ben',
'bo-cn': 'bod',
'bs-ba': 'bos',
'bg-bg': 'bul',
'ca-es': 'cat',
'ceb-ph': 'ceb',
'cs-cz': 'ces',
'zh-cn': 'chi_sim',
'zh-tw': 'chi_tra',
'chr-us': 'chr',
'cy-gb': 'cym',
'da-dk': 'dan',
'de-de': 'deu',
'dz-bt': 'dzo',
'el-gr': 'ell',
'en-us': 'eng',
'enm-gb': 'enm',
'eo-world': 'epo',
'et-ee': 'est',
'eu-es': 'eus',
'fa-ir': 'fas',
'fi-fi': 'fin',
'fr-fr': 'fra',
'frk-de': 'frk',
'frm-fr': 'frm',
'ga-ie': 'gle',
'gl-es': 'glg',
'grc-gr': 'grc',
'gu-in': 'guj',
'ht-ht': 'hat',
'he-il': 'heb',
'hi-in': 'hin',
'hr-hr': 'hrv',
'hu-hu': 'hun',
'iu-ca': 'iku',
'id-id': 'ind',
'is-is': 'isl',
'it-it': 'ita',
'ita-it': 'ita_old',
'jv-id': 'jav',
'ja-jp': 'jpn',
'kn-in': 'kan',
'ka-ge': 'kat',
'kat-ge': 'kat_old',
'kk-kz': 'kaz',
'km-kh': 'khm',
'ky-kg': 'kir',
'ko-kr': 'kor',
'ku-tr': 'kur',
'la-la': 'lao',
'la-va': 'lat',
'lv-lv': 'lav',
'lt-lt': 'lit',
'ml-in': 'mal',
'mr-in': 'mar',
'mk-mk': 'mkd',
'mt-mt': 'mlt',
'ms-my': 'msa',
'my-mm': 'mya',
'ne-np': 'nep',
'nl-nl': 'nld',
'no-no': 'nor',
'or-in': 'ori',
'pa-in': 'pan',
'pl-pl': 'pol',
'pt-pt': 'por',
'ps-af': 'pus',
'ro-ro': 'ron',
'ru-ru': 'rus',
'sa-in': 'san',
'si-lk': 'sin',
'sk-sk': 'slk',
'sl-si': 'slv',
'es-es': 'spa',
'spa-es': 'spa_old',
'sq-al': 'sqi',
'sr-rs': 'srp',
'sr-latn-rs': 'srp_latn',
'sw-tz': 'swa',
'sv-se': 'swe',
'syr-sy': 'syr',
'ta-in': 'tam',
'te-in': 'tel',
'tg-tj': 'tgk',
'tl-ph': 'tgl',
'th-th': 'tha',
'ti-er': 'tir',
'tr-tr': 'tur',
'ug-cn': 'uig',
'uk-ua': 'ukr',
'ur-pk': 'urd',
'uz-uz': 'uzb',
'uz-cyrl-uz': 'uzb_cyrl',
'vi-vn': 'vie',
'yi-us': 'yid'
}

View File

@@ -1,5 +1,18 @@
// NOTE: Types are defined inline in the schema for simplicity
// If needed, specific types can be imported from './apiModels'
import type {
CreateOcrProviderRequest,
CreateOcrProviderResponse,
GetOcrProviderResponse,
ListOcrProvidersQuery,
ListOcrProvidersResponse,
OcrProviderId,
ReplaceOcrProviderRequest,
ReplaceOcrProviderResponse,
UpdateOcrProviderRequest,
UpdateOcrProviderResponse
} from '@types'
import type { BodyForPath, ConcreteApiPaths, QueryParamsForPath, ResponseForPath } from './apiPaths'
import type { HttpMethod, PaginatedResponse, PaginationParams } from './apiTypes'
@@ -345,6 +358,38 @@ export interface ApiSchemas {
}>
}
}
'/ocr/providers': {
GET: {
query: ListOcrProvidersQuery
response: ListOcrProvidersResponse
}
POST: {
body: CreateOcrProviderRequest
response: CreateOcrProviderResponse
}
}
'/ocr/providers/:id': {
GET: {
params: { id: OcrProviderId }
response: GetOcrProviderResponse
}
PATCH: {
params: { id: OcrProviderId }
body: UpdateOcrProviderRequest
response: UpdateOcrProviderResponse
}
PUT: {
params: { id: OcrProviderId }
body: ReplaceOcrProviderRequest
response: ReplaceOcrProviderResponse
}
DELETE: {
params: { id: OcrProviderId }
response: void
}
}
}
/**

View File

@@ -351,6 +351,8 @@ export interface PreferenceSchemas {
'feature.translate.model_prompt': string
// redux/settings/targetLanguage
'feature.translate.target_language': string
// redux/ocr/imageProviderId
'ocr.settings.image_provider_id': string | null
// redux/shortcuts/shortcuts.exit_fullscreen
'shortcut.app.exit_fullscreen': Record<string, unknown>
// redux/shortcuts/shortcuts.search_message
@@ -612,6 +614,7 @@ export const DefaultPreferences: PreferenceSchemas = {
'feature.selection.trigger_mode': PreferenceTypes.SelectionTriggerMode.Selected,
'feature.translate.model_prompt': TRANSLATE_PROMPT,
'feature.translate.target_language': 'en-us',
'ocr.settings.image_provider_id': null,
'shortcut.app.exit_fullscreen': { editable: false, enabled: true, key: ['Escape'], system: true },
'shortcut.app.search_message': {
editable: true,

View File

@@ -0,0 +1,2 @@
export * from './json'
export * from './net'

View File

@@ -0,0 +1,7 @@
export function safeParseJson(text: string): unknown | null {
try {
return JSON.parse(text)
} catch {
return null
}
}

View File

@@ -87,3 +87,5 @@ export * from './primitives/dialog'
export * from './primitives/popover'
export * from './primitives/radioGroup'
export * from './primitives/shadcn-io/dropzone'
export * from './primitives/shadcn-io/skeleton'
export * from './primitives/shadcn-io/tabs'

View File

@@ -0,0 +1,7 @@
import { cn } from '@cherrystudio/ui/utils'
function Skeleton({ className, ...props }: React.ComponentProps<'div'>) {
return <div data-slot="skeleton" className={cn('bg-accent animate-pulse rounded-md', className)} {...props} />
}
export { Skeleton }

View File

@@ -0,0 +1,39 @@
import { cn } from '@cherrystudio/ui/utils'
import * as TabsPrimitive from '@radix-ui/react-tabs'
import * as React from 'react'
function Tabs({ className, ...props }: React.ComponentProps<typeof TabsPrimitive.Root>) {
return <TabsPrimitive.Root data-slot="tabs" className={cn('flex flex-col gap-2', className)} {...props} />
}
function TabsList({ className, ...props }: React.ComponentProps<typeof TabsPrimitive.List>) {
return (
<TabsPrimitive.List
data-slot="tabs-list"
className={cn(
'bg-muted text-muted-foreground inline-flex h-9 w-fit items-center justify-center rounded-lg p-[3px]',
className
)}
{...props}
/>
)
}
function TabsTrigger({ className, ...props }: React.ComponentProps<typeof TabsPrimitive.Trigger>) {
return (
<TabsPrimitive.Trigger
data-slot="tabs-trigger"
className={cn(
"data-[state=active]:bg-background dark:data-[state=active]:text-foreground focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:outline-ring dark:data-[state=active]:border-input dark:data-[state=active]:bg-input/30 text-foreground dark:text-muted-foreground inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center gap-1.5 rounded-md border border-transparent px-2 py-1 text-sm font-medium whitespace-nowrap transition-[color,box-shadow] focus-visible:ring-[3px] focus-visible:outline-1 disabled:pointer-events-none disabled:opacity-50 data-[state=active]:shadow-sm [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
className
)}
{...props}
/>
)
}
function TabsContent({ className, ...props }: React.ComponentProps<typeof TabsPrimitive.Content>) {
return <TabsPrimitive.Content data-slot="tabs-content" className={cn('flex-1 outline-none', className)} {...props} />
}
export { Tabs, TabsContent, TabsList, TabsTrigger }

View File

@@ -4,9 +4,9 @@ const { downloadNpmPackage } = require('./utils')
// if you want to add new prebuild binaries packages with different architectures, you can add them here
// please add to allX64 and allArm64 from yarn.lock
const allArm64 = {
'@img/sharp-darwin-arm64': '0.34.3',
'@img/sharp-win32-arm64': '0.34.3',
'@img/sharp-linux-arm64': '0.34.3',
'@img/sharp-darwin-arm64': '0.34.4',
'@img/sharp-win32-arm64': '0.34.4',
'@img/sharp-linux-arm64': '0.34.4',
'@img/sharp-libvips-darwin-arm64': '1.2.0',
'@img/sharp-libvips-linux-arm64': '1.2.0',
@@ -20,9 +20,9 @@ const allArm64 = {
}
const allX64 = {
'@img/sharp-darwin-x64': '0.34.3',
'@img/sharp-linux-x64': '0.34.3',
'@img/sharp-win32-x64': '0.34.3',
'@img/sharp-darwin-x64': '0.34.4',
'@img/sharp-linux-x64': '0.34.4',
'@img/sharp-win32-x64': '0.34.4',
'@img/sharp-libvips-darwin-x64': '1.2.0',
'@img/sharp-libvips-linux-x64': '1.2.0',

View File

@@ -5,6 +5,7 @@
* TypeScript will error if any endpoint is missing.
*/
import { ocrService } from '@main/services/ocr/OcrService'
import type { ApiImplementation } from '@shared/data/api/apiSchemas'
import { TestService } from '../services/TestService'
@@ -12,6 +13,7 @@ import { TestService } from '../services/TestService'
// Service instances
const testService = TestService.getInstance()
// Defining all handlers here feels a bit bloated; perhaps we should modularize things?
/**
* Complete API handlers implementation
* Must implement every path+method combination from ApiSchemas
@@ -207,5 +209,40 @@ export const apiHandlers: ApiImplementation = {
data: { executed: true, timestamp: new Date().toISOString() }
}))
}
},
'/ocr/providers': {
GET: async ({ query }) => {
const result = await ocrService.listProviders(query)
return { data: result }
},
POST: async ({ body }) => {
const result = await ocrService.createProvider(body)
return { data: result }
}
},
'/ocr/providers/:id': {
GET: async ({ params }) => {
const result = await ocrService.getProvider(params.id)
return { data: result }
},
PATCH: async ({ params, body }) => {
if (params.id !== body.id) {
throw new Error('Provider ID in path does not match ID in body')
}
const result = await ocrService.updateProvider(params.id, body)
return { data: result }
},
PUT: async ({ params, body }) => {
if (params.id !== body.id) {
throw new Error('Provider ID in path does not match ID in body')
}
const result = await ocrService.replaceProvider(body)
return { data: result }
},
DELETE: async ({ params }) => {
return ocrService.deleteProvider(params.id)
}
}
}

View File

@@ -3,6 +3,9 @@ import type { PaginationParams, ServiceOptions } from '@shared/data/api/apiTypes
/**
* Standard service interface for data operations
* Defines the contract that all services should implement
* @template T - Type of the entity returned by service methods
* @template TCreate - Type of the data required to create a new entity
* @template TUpdate - Type of the data required to update an existing entity
*/
export interface IBaseService<T = any, TCreate = any, TUpdate = any> {
/**

View File

@@ -0,0 +1,299 @@
import { loggerService } from '@logger'
import { dbService } from '@main/data/db/DbService'
import { ocrProviderTable } from '@main/data/db/schemas/ocrProvider'
import type { PaginationParams, ServiceOptions } from '@shared/data/api/apiTypes'
import type { DbOcrProvider, DbOcrProviderCreate, DbOcrProviderReplace, DbOcrProviderUpdate } from '@types'
import { BuiltinOcrProviderIds, isDbOcrProvider } from '@types'
import dayjs from 'dayjs'
import { eq } from 'drizzle-orm'
import { merge } from 'lodash'
import type { IBaseService } from './IBaseService'
const logger = loggerService.withContext('OcrProviderService')
/**
* Service layer for OCR providers
* Implements the standard service interface and handles all OCR provider operations
* NOTE: Not completely finished since data architecture is not completely designed and implemented.
* It's a early version.
*/
export class OcrProviderService implements IBaseService<DbOcrProvider, DbOcrProviderCreate, DbOcrProviderUpdate> {
/**
* Find OCR provider by ID
*/
async findById(id: string, _options?: ServiceOptions): Promise<DbOcrProvider | null> {
try {
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, id))
.limit(1)
if (providers.length === 0) {
logger.warn(`OCR provider ${id} not found`)
return null
}
logger.debug(`Retrieved OCR provider: ${id}`)
return providers[0]
} catch (error) {
logger.error(`Failed to find OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Find multiple OCR providers with pagination
*/
async findMany(
params: PaginationParams & Record<string, any>,
_options?: ServiceOptions
): Promise<{
items: DbOcrProvider[]
total: number
hasNext?: boolean
nextCursor?: string
}> {
try {
const { page = 1, limit = 20, cursor } = params
let providers = await dbService.getDb().select().from(ocrProviderTable)
// Apply filters if provided
if (params.registered) {
// This filter would need access to the OCR service registry
// For now, we'll return all providers and let the service layer filter
logger.debug('Registered filter requested - returning all providers for service layer filtering')
}
const total = providers.length
// Apply pagination
if (cursor) {
// Cursor-based pagination
const index = providers.findIndex((p) => p.id === cursor)
if (index !== -1) {
providers = providers.slice(index + 1, index + 1 + limit)
}
} else {
// Offset-based pagination
const startIndex = (page - 1) * limit
providers = providers.slice(startIndex, startIndex + limit)
}
const hasNext =
providers.length === limit && (cursor ? providers[providers.length - 1] !== undefined : page * limit < total)
logger.debug(`Retrieved ${providers.length} OCR providers`, { total, page, limit })
return {
items: providers,
total,
hasNext,
nextCursor: hasNext && providers.length > 0 ? providers[providers.length - 1].id : undefined
}
} catch (error) {
logger.error('Failed to find OCR providers', error as Error)
throw error
}
}
/**
* Create new OCR provider
*/
async create(data: DbOcrProviderCreate, _options?: ServiceOptions): Promise<DbOcrProvider> {
try {
// Check if provider already exists
const existing = await this.findById(data.id)
if (existing) {
throw new Error(`OCR provider ${data.id} already exists`)
}
const timestamp = dayjs().valueOf()
const newProvider = {
...data,
createdAt: timestamp,
updatedAt: timestamp
} satisfies DbOcrProvider
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [created] = await dbService.getDb().insert(ocrProviderTable).values(newProvider).returning()
logger.info(`Created OCR provider: ${data.id}`)
return created
} catch (error) {
logger.error(`Failed to create OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Update existing OCR provider
*/
async update(id: string, data: DbOcrProviderUpdate, _options?: ServiceOptions): Promise<DbOcrProvider> {
try {
const existing = await this.findById(id)
if (!existing) {
throw new Error(`OCR provider ${id} not found`)
}
const newProvider = {
...merge({}, existing, data),
updatedAt: dayjs().valueOf()
} satisfies DbOcrProvider
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [updated] = await dbService
.getDb()
.update(ocrProviderTable)
.set(newProvider)
.where(eq(ocrProviderTable.id, id))
.returning()
logger.info(`Updated OCR provider: ${id}`)
return updated
} catch (error) {
logger.error(`Failed to update OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Delete OCR provider
*/
async delete(id: string, _options?: ServiceOptions): Promise<void> {
try {
// Check if it's a built-in provider
if (BuiltinOcrProviderIds.some((pid) => pid === id)) {
throw new Error('Built-in OCR providers cannot be deleted.')
}
// Check if provider exists
const existing = await this.findById(id)
if (!existing) {
throw new Error(`OCR provider ${id} not found`)
}
await dbService.getDb().delete(ocrProviderTable).where(eq(ocrProviderTable.id, id))
logger.info(`Deleted OCR provider: ${id}`)
} catch (error) {
logger.error(`Failed to delete OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Check if OCR provider exists
*/
async exists(id: string, _options?: ServiceOptions): Promise<boolean> {
try {
const provider = await this.findById(id)
return provider !== null
} catch (error) {
logger.error(`Failed to check if OCR provider ${id} exists`, error as Error)
throw error
}
}
/**
* Replace OCR provider (full update)
* This method is specific to OCR providers and not part of IBaseService
*/
async replace(data: DbOcrProviderReplace): Promise<DbOcrProvider> {
try {
// Check if it's a built-in provider
if (BuiltinOcrProviderIds.some((pid) => pid === data.id)) {
throw new Error('Built-in OCR providers cannot be modified with PUT method.')
}
const timestamp = dayjs().valueOf()
const existing = await this.exists(data.id)
let newProvider: DbOcrProvider
if (existing) {
// Update existing
const current = await this.findById(data.id)
if (!current) {
throw new Error(`OCR provider ${data.id} not found during replace operation`)
}
newProvider = {
...data,
updatedAt: timestamp,
createdAt: current.createdAt
}
} else {
// Create new
newProvider = {
...data,
createdAt: timestamp,
updatedAt: timestamp
}
}
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [saved] = await dbService
.getDb()
.insert(ocrProviderTable)
.values(newProvider)
.onConflictDoUpdate({
target: ocrProviderTable.id,
set: newProvider
})
.returning()
logger.info(`Replaced OCR provider: ${data.id}`)
return saved
} catch (error) {
logger.error(`Failed to replace OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Initialize built-in providers in database
* This method is specific to OCR providers and not part of IBaseService
*/
async initializeBuiltInProviders(): Promise<void> {
try {
// Import built-in provider configurations
const { BUILTIN_OCR_PROVIDERS } = await import('@shared/config/ocr')
logger.info('Initializing built-in OCR providers')
// Check and create each built-in provider if it doesn't exist
for (const provider of BUILTIN_OCR_PROVIDERS) {
const exists = await this.exists(provider.id)
if (!exists) {
logger.info(`Creating built-in OCR provider: ${provider.id}`)
await this.create(provider)
} else {
logger.debug(`Built-in OCR provider already exists: ${provider.id}`)
}
}
logger.info(`Initialized ${BUILTIN_OCR_PROVIDERS.length} built-in OCR providers`)
} catch (error) {
logger.error('Failed to initialize built-in OCR providers', error as Error)
throw error
}
}
}
// Export singleton instance
export const ocrProviderService = new OcrProviderService()

View File

@@ -0,0 +1,49 @@
import type { OcrProviderCapabilityRecord, OcrProviderConfig } from '@types'
import { index, sqliteTable, text } from 'drizzle-orm/sqlite-core'
import { createUpdateTimestamps } from './columnHelpers'
export const ocrProviderTable = sqliteTable(
'ocr_provider',
{
/**
* Unique identifier for the provider.
* For built-in providers, it's 'tesseract', 'system', etc.
* For custom providers, it can be any unique string (we typically use UUID v4).
* As the primary key, it ensures the uniqueness of each provider.
*/
id: text('id').primaryKey(),
/**
* Display name of the provider, e.g., "Tesseract OCR".
* For built-in providers, this value is used internally and is not exposed to users; the display name shown in the UI is locale-based by i18n.
* Cannot be null.
*/
name: text('name').notNull(),
/**
* Object describing the provider's capabilities, e.g., { image: true }.
* Stored as JSON in a text column. Drizzle's `mode: 'json'` handles
* serialization and deserialization automatically. `$type` provides strong typing.
* Cannot be null; should store an empty object `{}` even if no specific capabilities.
*/
capabilities: text('capabilities', { mode: 'json' }).$type<OcrProviderCapabilityRecord>().notNull(),
/**
* Provider-specific configuration. This is a polymorphic field, its structure varies by provider type.
* For example, Tesseract's configuration is entirely different from PaddleOCR's.
* Storing it as JSON is the most flexible approach to accommodate any configuration structure.
* Since this is a polymorphic field, both frontend and backend must validate
* that the structure matches the expected schema for the corresponding provider type
* before saving.
*/
config: text('config', { mode: 'json' }).$type<OcrProviderConfig>().notNull(),
/** Unix timestamp (milliseconds since epoch) for creation and last update. */
...createUpdateTimestamps
},
(t) => [index('name').on(t.name)]
)
export type OcrProviderInsert = typeof ocrProviderTable.$inferInsert
export type OcrProviderSelect = typeof ocrProviderTable.$inferSelect

View File

@@ -8,6 +8,8 @@
* === AUTO-GENERATED CONTENT START ===
*/
import type { PreferenceSchemas } from '@shared/data/preference/preferenceSchemas'
/**
* ElectronStore映射关系 - 简单一层结构
*
@@ -252,6 +254,8 @@ export const REDUX_STORE_MAPPINGS = {
},
{
originalKey: 'mathEngine',
// TODO
// @ts-expect-error check how to fix it later
targetKey: 'chat.message.math_engine'
},
{
@@ -336,6 +340,8 @@ export const REDUX_STORE_MAPPINGS = {
},
{
originalKey: 'topicNamingPrompt',
// TODO
// @ts-expect-error check how to fix it later
targetKey: 'topic.naming.prompt'
},
{
@@ -664,6 +670,8 @@ export const REDUX_STORE_MAPPINGS = {
},
{
originalKey: 'nutstoreSyncState',
// TODO
// @ts-expect-error check how to fix it later
targetKey: 'data.backup.nutstore.sync_state'
},
{
@@ -736,8 +744,17 @@ export const REDUX_STORE_MAPPINGS = {
originalKey: 'shortcuts.exit_fullscreen',
targetKey: 'shortcut.app.exit_fullscreen'
}
],
ocr: [
{
originalKey: 'ocr.imageProviderId',
targetKey: 'ocr.settings.image_provider_id'
}
]
} as const
} as const satisfies Record<
string,
Array<{ originalKey: string; targetKey: keyof PreferenceSchemas[keyof PreferenceSchemas] }>
>
// === AUTO-GENERATED CONTENT END ===

View File

@@ -0,0 +1,256 @@
import { dbService } from '@data/db/DbService'
import { ocrProviderTable } from '@data/db/schemas/ocrProvider'
import { loggerService } from '@logger'
import type {
DbOcrProvider,
DbOcrProviderCreate,
DbOcrProviderReplace,
DbOcrProviderUpdate,
OcrProviderId
} from '@types'
import { BuiltinOcrProviderIds, isDbOcrProvider } from '@types'
import dayjs from 'dayjs'
import { eq } from 'drizzle-orm'
import { merge } from 'lodash'
const logger = loggerService.withContext('OcrProviderRepository')
/**
* Data access layer for OCR providers
* Handles all database operations and data validation
*
* TODO: This class is already functional, but the data interaction service should be
* migrated to src/main/data/api/services.
*
* The reason why the migration hasn't been completed yet is that the data
* architecture is still under development, and we need to wait until the
* architectural design is finalized before proceeding with the migration.
*/
export class OcrProviderRepository {
/**
* Get all OCR providers
*/
public async findAll(): Promise<DbOcrProvider[]> {
try {
const providers = await dbService.getDb().select().from(ocrProviderTable)
return providers
} catch (error) {
logger.error('Failed to find all OCR providers', error as Error)
throw error
}
}
/**
* Get OCR provider by ID
*/
public async findById(id: OcrProviderId): Promise<DbOcrProvider> {
try {
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, id))
.limit(1)
if (providers.length === 0) {
throw new Error(`OCR provider ${id} not found`)
}
return providers[0]
} catch (error) {
logger.error(`Failed to find OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Check if provider exists
*/
public async exists(id: OcrProviderId): Promise<boolean> {
try {
const providers = await dbService
.getDb()
.select({ id: ocrProviderTable.id })
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, id))
.limit(1)
return providers.length > 0
} catch (error) {
logger.error(`Failed to check if OCR provider ${id} exists`, error as Error)
throw error
}
}
/**
* Create new OCR provider
*/
public async create(param: DbOcrProviderCreate): Promise<DbOcrProvider> {
try {
// Check if provider already exists
if (await this.exists(param.id)) {
throw new Error(`OCR provider ${param.id} already exists`)
}
const timestamp = dayjs().valueOf()
const newProvider = {
...param,
createdAt: timestamp,
updatedAt: timestamp
} satisfies DbOcrProvider
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [created] = await dbService.getDb().insert(ocrProviderTable).values(newProvider).returning()
logger.info(`Created OCR provider: ${param.id}`)
return created
} catch (error) {
logger.error(`Failed to create OCR provider ${param.id}`, error as Error)
throw error
}
}
/**
* Update OCR provider (partial update)
*/
public async update(id: OcrProviderId, update: DbOcrProviderUpdate): Promise<DbOcrProvider> {
try {
const existing = await this.findById(id)
const newProvider = {
...merge({}, existing, update),
updatedAt: dayjs().valueOf()
} satisfies DbOcrProvider
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [updated] = await dbService
.getDb()
.update(ocrProviderTable)
.set(newProvider)
.where(eq(ocrProviderTable.id, id))
.returning()
logger.info(`Updated OCR provider: ${id}`)
return updated
} catch (error) {
logger.error(`Failed to update OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Replace OCR provider (full update)
*/
public async replace(data: DbOcrProviderReplace): Promise<DbOcrProvider> {
try {
// Check if it's a built-in provider
if (BuiltinOcrProviderIds.some((pid) => pid === data.id)) {
throw new Error('Built-in OCR providers cannot be modified with PUT method.')
}
const timestamp = dayjs().valueOf()
const existing = await this.exists(data.id)
let newProvider: DbOcrProvider
if (existing) {
// Update existing
const current = await this.findById(data.id)
newProvider = {
...data,
updatedAt: timestamp,
createdAt: current.createdAt
}
} else {
// Create new
newProvider = {
...data,
createdAt: timestamp,
updatedAt: timestamp
}
}
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [saved] = await dbService
.getDb()
.insert(ocrProviderTable)
.values(newProvider)
.onConflictDoUpdate({
target: ocrProviderTable.id,
set: newProvider
})
.returning()
logger.info(`Replaced OCR provider: ${data.id}`)
return saved
} catch (error) {
logger.error(`Failed to replace OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Delete OCR provider
*/
public async delete(id: OcrProviderId): Promise<void> {
try {
// Check if it's a built-in provider
if (BuiltinOcrProviderIds.some((pid) => pid === id)) {
throw new Error('Built-in OCR providers cannot be deleted.')
}
// Check if provider exists
await this.findById(id)
await dbService.getDb().delete(ocrProviderTable).where(eq(ocrProviderTable.id, id))
logger.info(`Deleted OCR provider: ${id}`)
} catch (error) {
logger.error(`Failed to delete OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Initialize built-in providers in database
*/
public async initializeBuiltInProviders(): Promise<void> {
try {
// Import built-in provider configurations
const { BUILTIN_OCR_PROVIDERS } = await import('@shared/config/ocr')
logger.info('Initializing built-in OCR providers')
// Check and create each built-in provider if it doesn't exist
for (const provider of BUILTIN_OCR_PROVIDERS) {
const exists = await this.exists(provider.id)
if (!exists) {
logger.info(`Creating built-in OCR provider: ${provider.id}`)
await this.create(provider)
} else {
logger.debug(`Built-in OCR provider already exists: ${provider.id}`)
}
}
logger.info(`Initialized ${BUILTIN_OCR_PROVIDERS.length} built-in OCR providers`)
} catch (error) {
logger.error('Failed to initialize built-in OCR providers', error as Error)
throw error
}
}
}
export const ocrProviderRepository = new OcrProviderRepository()

View File

@@ -18,7 +18,7 @@ import type {
AgentPersistedMessage,
FileMetadata,
Notification,
OcrProvider,
OcrParams,
Provider,
Shortcut,
SupportedOcrFile
@@ -875,10 +875,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
)
// OCR
ipcMain.handle(IpcChannel.OCR_ocr, (_, file: SupportedOcrFile, provider: OcrProvider) =>
ocrService.ocr(file, provider)
)
ipcMain.handle(IpcChannel.OCR_ListProviders, () => ocrService.listProviderIds())
ipcMain.handle(IpcChannel.OCR_Ocr, (_, file: SupportedOcrFile, params: OcrParams) => ocrService.ocr(file, params))
// OVMS
ipcMain.handle(IpcChannel.Ovms_AddModel, (_, modelName: string, modelId: string, modelSource: string, task: string) =>

View File

@@ -1,8 +1,21 @@
import { loggerService } from '@logger'
import { isLinux } from '@main/constant'
import type { OcrHandler, OcrProvider, OcrResult, SupportedOcrFile } from '@types'
import { BuiltinOcrProviderIds } from '@types'
import { ocrProviderRepository } from '@main/data/repositories/OcrProviderRepository'
import type {
DbOcrProvider,
ListOcrProvidersQuery,
OcrParams,
OcrProvider,
OcrProviderBusiness,
OcrProviderCreateBusiness,
OcrProviderKeyBusiness,
OcrProviderReplaceBusiness,
OcrProviderUpdateBusiness,
OcrResult,
SupportedOcrFile
} from '@types'
import { BuiltinOcrProviderIdMap } from '@types'
import type { OcrBaseService } from './builtin/OcrBaseService'
import { ovOcrService } from './builtin/OvOcrService'
import { ppocrService } from './builtin/PpocrService'
import { systemOcrService } from './builtin/SystemOcrService'
@@ -10,40 +23,285 @@ import { tesseractService } from './builtin/TesseractService'
const logger = loggerService.withContext('OcrService')
export class OcrService {
private registry: Map<string, OcrHandler> = new Map()
/**
* Business logic layer for OCR operations
* Handles OCR provider registration, orchestration, and core OCR functionality
*/
class OcrService {
private registry: Map<OcrProviderKeyBusiness, OcrBaseService> = new Map()
private initialized: boolean = false
register(providerId: string, handler: OcrHandler): void {
if (this.registry.has(providerId)) {
logger.warn(`Provider ${providerId} has existing handler. Overwrited.`)
constructor() {
this.registerBuiltinProviders()
}
/**
* Ensure the service is initialized
*/
private async ensureInitialized(): Promise<void> {
if (!this.initialized) {
await this.initializeBuiltinProviders()
this.initialized = true
}
this.registry.set(providerId, handler)
}
unregister(providerId: string): void {
this.registry.delete(providerId)
/**
* Initialize built-in OCR providers
*/
private async initializeBuiltinProviders(): Promise<void> {
try {
// Ensure built-in providers exist in database
await ocrProviderRepository.initializeBuiltInProviders()
logger.info('OCR service initialized with built-in providers')
} catch (error) {
logger.error('Failed to initialize OCR service', error as Error)
throw error
}
}
public listProviderIds(): string[] {
/**
* Register built-in providers (sync)
*/
private registerBuiltinProviders(): void {
this.register(BuiltinOcrProviderIdMap.tesseract, tesseractService)
if (systemOcrService) {
this.register(BuiltinOcrProviderIdMap.system, systemOcrService)
}
this.register(BuiltinOcrProviderIdMap.paddleocr, ppocrService)
if (ovOcrService) {
this.register(BuiltinOcrProviderIdMap.ovocr, ovOcrService)
}
}
/**
* Register an OCR provider service
*/
private register(providerId: OcrProviderKeyBusiness, service: OcrBaseService): void {
if (this.registry.has(providerId)) {
logger.warn(`Provider ${providerId} already registered. Overwriting.`)
}
this.registry.set(providerId, service)
logger.info(`Registered OCR provider: ${providerId}`)
}
// Not sure when it will be needed.
/**
* Unregister an OCR provider service
*/
// private unregister(providerId: OcrProviderId): void {
// if (this.registry.delete(providerId)) {
// logger.info(`Unregistered OCR provider: ${providerId}`)
// }
// }
/**
* Get all registered provider IDs
*/
public getRegisteredProviderIds(): OcrProviderKeyBusiness[] {
return Array.from(this.registry.keys())
}
public async ocr(file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> {
const handler = this.registry.get(provider.id)
if (!handler) {
throw new Error(`Provider ${provider.id} is not registered`)
/**
* Check if a provider is registered
*/
public isProviderRegistered(providerId: OcrProviderKeyBusiness): boolean {
return this.registry.has(providerId)
}
/**
* Get list of OCR providers
*/
public async listProviders(query?: ListOcrProvidersQuery): Promise<OcrProviderBusiness[]> {
try {
await this.ensureInitialized()
const providers = await ocrProviderRepository.findAll()
let result = providers
if (query?.registered) {
// Filter by registered providers
const registeredIds = this.getRegisteredProviderIds()
result = providers.filter((provider) => registeredIds.includes(provider.id))
}
logger.debug(`Listed ${result.length} OCR providers`)
return result
} catch (error) {
logger.error('Failed to list OCR providers', error as Error)
throw error
}
return handler(file, provider.config)
}
/**
* Get OCR provider by ID
*/
public async getProvider(providerId: OcrProviderKeyBusiness): Promise<OcrProviderBusiness> {
try {
await this.ensureInitialized()
const provider = await ocrProviderRepository.findById(providerId)
logger.debug(`Retrieved OCR provider: ${providerId}`)
return provider
} catch (error) {
logger.error(`Failed to get OCR provider ${providerId}`, error as Error)
throw error
}
}
/**
* Create new OCR provider
*/
public async createProvider(data: OcrProviderCreateBusiness): Promise<OcrProviderBusiness> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.create(data)
logger.info(`Created OCR provider: ${data.id}`)
return result
} catch (error) {
logger.error(`Failed to create OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Update OCR provider (partial update)
*/
public async updateProvider(
id: OcrProviderKeyBusiness,
data: OcrProviderUpdateBusiness
): Promise<OcrProviderBusiness> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.update(id, data)
logger.info(`Updated OCR provider: ${id}`)
return result
} catch (error) {
logger.error(`Failed to update OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Replace OCR provider (full update)
*/
public async replaceProvider(data: OcrProviderReplaceBusiness): Promise<OcrProviderBusiness> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.replace(data)
logger.info(`Replaced OCR provider: ${data.id}`)
return result
} catch (error) {
logger.error(`Failed to replace OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Delete OCR provider
*/
public async deleteProvider(id: OcrProviderKeyBusiness): Promise<void> {
try {
await this.ensureInitialized()
await ocrProviderRepository.delete(id)
logger.info(`Deleted OCR provider: ${id}`)
} catch (error) {
logger.error(`Failed to delete OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Perform OCR on a file using the specified provider
*/
public async ocr(file: SupportedOcrFile, params: OcrParams): Promise<OcrResult> {
try {
await this.ensureInitialized()
const service = this.registry.get(params.providerId)
if (!service) {
throw new Error(`Provider ${params.providerId} is not registered`)
}
// Validate that the provider exists in database
const provider = await this.getProvider(params.providerId)
logger.debug(`Performing OCR with provider: ${JSON.stringify(provider, undefined, 2)}`)
const result = await service.ocr(file, provider.config)
logger.info(`OCR completed successfully with provider: ${params.providerId}`)
return result
} catch (error) {
logger.error(`OCR failed with provider ${params.providerId}`, error as Error)
throw error
}
}
/**
* Check if a provider is available and ready
*/
public async isProviderAvailable(providerId: OcrProviderKeyBusiness): Promise<boolean> {
try {
const service = this.registry.get(providerId)
if (!service) {
return false
}
// Check if provider exists in database
await this.getProvider(providerId)
// Additional availability checks can be added here
return true
} catch (error) {
logger.debug(`Provider ${providerId} is not available`, error as Error)
return false
}
}
private async _isProviderAvailable(provider: OcrProvider): Promise<boolean> {
try {
return this.registry.get(provider.id) !== undefined
} catch (error) {
logger.debug(`Provider ${provider.id} is not available`, error as Error)
return false
}
}
/**
* Get available providers
* It's only for image type. May re-designed for a specific file type in the future.
*
*/
public async getAvailableProvidersForFile(): Promise<DbOcrProvider[]> {
try {
const providers = await this.listProviders()
// Filter providers that can handle the file type
// This logic can be extended based on file type and provider capabilities
const availableProviders: DbOcrProvider[] = []
const capFilter = (provider: OcrProvider) => provider.capabilities.image
for (const provider of providers.filter(capFilter)) {
if (await this._isProviderAvailable(provider)) {
availableProviders.push(provider)
}
}
logger.debug(`Found ${availableProviders.length} available providers for file`)
return availableProviders
} catch (error) {
logger.error('Failed to get available providers for file', error as Error)
throw error
}
}
/**
* Cleanup resources
*/
public dispose(): void {
this.registry.clear()
logger.info('OCR service disposed')
}
}
export const ocrService = new OcrService()
// Register built-in providers
ocrService.register(BuiltinOcrProviderIds.tesseract, tesseractService.ocr.bind(tesseractService))
!isLinux && ocrService.register(BuiltinOcrProviderIds.system, systemOcrService.ocr.bind(systemOcrService))
ocrService.register(BuiltinOcrProviderIds.paddleocr, ppocrService.ocr.bind(ppocrService))
ovOcrService.isAvailable() && ocrService.register(BuiltinOcrProviderIds.ovocr, ovOcrService.ocr.bind(ovOcrService))

View File

@@ -1,7 +1,7 @@
import { loggerService } from '@logger'
import { isWin } from '@main/constant'
import type { OcrOvConfig, OcrResult, SupportedOcrFile } from '@types'
import { isImageFileMetadata } from '@types'
import type { OcrOvConfig, OcrProviderConfig, OcrResult, SupportedOcrFile } from '@types'
import { isImageFileMetadata, isOcrOvConfig } from '@types'
import { exec } from 'child_process'
import * as fs from 'fs'
import * as os from 'os'
@@ -15,20 +15,17 @@ const execAsync = promisify(exec)
const PATH_BAT_FILE = path.join(os.homedir(), '.cherrystudio', 'ovms', 'ovocr', 'run.npu.bat')
const isOvAvailable =
isWin &&
os.cpus()[0].model.toLowerCase().includes('intel') &&
os.cpus()[0].model.toLowerCase().includes('ultra') &&
fs.existsSync(PATH_BAT_FILE)
export class OvOcrService extends OcrBaseService {
constructor() {
super()
}
public isAvailable(): boolean {
return (
isWin &&
os.cpus()[0].model.toLowerCase().includes('intel') &&
os.cpus()[0].model.toLowerCase().includes('ultra') &&
fs.existsSync(PATH_BAT_FILE)
)
}
private getOvOcrPath(): string {
return path.join(os.homedir(), '.cherrystudio', 'ovms', 'ovocr')
}
@@ -81,8 +78,8 @@ export class OvOcrService extends OcrBaseService {
}
}
private async ocrImage(filePath: string, options?: OcrOvConfig): Promise<OcrResult> {
logger.info(`OV OCR called on ${filePath} with options ${JSON.stringify(options)}`)
private async ocrImage(filePath: string, config?: OcrOvConfig): Promise<OcrResult> {
logger.info(`OV OCR called on ${filePath} with options ${JSON.stringify(config)}`)
try {
// 1. Clear img directory and output directory
@@ -117,13 +114,16 @@ export class OvOcrService extends OcrBaseService {
}
}
public ocr = async (file: SupportedOcrFile, options?: OcrOvConfig): Promise<OcrResult> => {
public ocr = async (file: SupportedOcrFile, config?: OcrProviderConfig): Promise<OcrResult> => {
if (!isOcrOvConfig(config)) {
throw new Error('Invalid OCR OV config')
}
if (isImageFileMetadata(file)) {
return this.ocrImage(file.path, options)
return this.ocrImage(file.path, config)
} else {
throw new Error('Unsupported file type, currently only image files are supported')
}
}
}
export const ovOcrService = new OvOcrService()
export const ovOcrService = isOvAvailable ? new OvOcrService() : undefined

View File

@@ -1,6 +1,6 @@
import { loadOcrImage } from '@main/utils/ocr'
import type { ImageFileMetadata, OcrPpocrConfig, OcrResult, SupportedOcrFile } from '@types'
import { isImageFileMetadata } from '@types'
import { isImageFileMetadata, isOcrPpocrConfig } from '@types'
import { net } from 'electron'
import * as z from 'zod'
@@ -40,14 +40,17 @@ const OcrResponseSchema = z.object({
})
export class PpocrService extends OcrBaseService {
public ocr = async (file: SupportedOcrFile, options?: OcrPpocrConfig): Promise<OcrResult> => {
public ocr = async (file: SupportedOcrFile, config?: OcrPpocrConfig): Promise<OcrResult> => {
if (!isOcrPpocrConfig(config)) {
throw new Error('Invalid OCR config')
}
if (!isImageFileMetadata(file)) {
throw new Error('Only image files are supported currently')
}
if (!options) {
if (!config) {
throw new Error('config is required')
}
return this.imageOcr(file, options)
return this.imageOcr(file, config)
}
private async imageOcr(file: ImageFileMetadata, options: OcrPpocrConfig): Promise<OcrResult> {

View File

@@ -1,8 +1,8 @@
import { isLinux, isWin } from '@main/constant'
import { loadOcrImage } from '@main/utils/ocr'
import { OcrAccuracy, recognize } from '@napi-rs/system-ocr'
import type { ImageFileMetadata, OcrResult, OcrSystemConfig, SupportedOcrFile } from '@types'
import { isImageFileMetadata } from '@types'
import type { ImageFileMetadata, OcrProviderConfig, OcrResult, OcrSystemConfig, SupportedOcrFile } from '@types'
import { isImageFileMetadata, isOcrSystemConfig } from '@types'
import { OcrBaseService } from './OcrBaseService'
@@ -12,23 +12,26 @@ export class SystemOcrService extends OcrBaseService {
super()
}
private async ocrImage(file: ImageFileMetadata, options?: OcrSystemConfig): Promise<OcrResult> {
private async ocrImage(file: ImageFileMetadata, config?: OcrSystemConfig): Promise<OcrResult> {
if (isLinux) {
return { text: '' }
}
const buffer = await loadOcrImage(file)
const langs = isWin ? options?.langs : undefined
const langs = isWin ? config?.langs : undefined
const result = await recognize(buffer, OcrAccuracy.Accurate, langs)
return { text: result.text }
}
public ocr = async (file: SupportedOcrFile, options?: OcrSystemConfig): Promise<OcrResult> => {
public ocr = async (file: SupportedOcrFile, config?: OcrProviderConfig): Promise<OcrResult> => {
if (!isOcrSystemConfig(config)) {
throw new Error('Invalid OCR configuration')
}
if (isImageFileMetadata(file)) {
return this.ocrImage(file, options)
return this.ocrImage(file, config)
} else {
throw new Error('Unsupported file type, currently only image files are supported')
}
}
}
export const systemOcrService = new SystemOcrService()
export const systemOcrService = !isLinux ? new SystemOcrService() : undefined

View File

@@ -2,8 +2,8 @@ import { loggerService } from '@logger'
import { getIpCountry } from '@main/utils/ipService'
import { loadOcrImage } from '@main/utils/ocr'
import { MB } from '@shared/config/constant'
import type { ImageFileMetadata, OcrResult, OcrTesseractConfig, SupportedOcrFile } from '@types'
import { isImageFileMetadata } from '@types'
import type { ImageFileMetadata, OcrProviderConfig, OcrResult, OcrTesseractConfig, SupportedOcrFile } from '@types'
import { isImageFileMetadata, isOcrTesseractConfig } from '@types'
import { app } from 'electron'
import fs from 'fs'
import { isEqual } from 'lodash'
@@ -70,8 +70,8 @@ export class TesseractService extends OcrBaseService {
return this.worker
}
private async imageOcr(file: ImageFileMetadata, options?: OcrTesseractConfig): Promise<OcrResult> {
const worker = await this.getWorker(options)
private async imageOcr(file: ImageFileMetadata, config?: OcrTesseractConfig): Promise<OcrResult> {
const worker = await this.getWorker(config)
const stat = await fs.promises.stat(file.path)
if (stat.size > MB_SIZE_THRESHOLD * MB) {
throw new Error(`This image is too large (max ${MB_SIZE_THRESHOLD}MB)`)
@@ -81,11 +81,14 @@ export class TesseractService extends OcrBaseService {
return { text: result.data.text }
}
public ocr = async (file: SupportedOcrFile, options?: OcrTesseractConfig): Promise<OcrResult> => {
public ocr = async (file: SupportedOcrFile, config?: OcrProviderConfig): Promise<OcrResult> => {
if (!isOcrTesseractConfig(config)) {
throw new Error('Invalid Tesseract config')
}
if (!isImageFileMetadata(file)) {
throw new Error('Only image files are supported currently')
}
return this.imageOcr(file, options)
return this.imageOcr(file, config)
}
private async _getLangPath(): Promise<string> {

View File

@@ -12,7 +12,7 @@ import type {
} from '@shared/data/preference/preferenceTypes'
import type { UpgradeChannel } from '@shared/data/preference/preferenceTypes'
import { IpcChannel } from '@shared/IpcChannel'
import type { Notification } from '@types'
import type { Notification, OcrParams } from '@types'
import type {
AddMemoryOptions,
AssistantMessage,
@@ -27,7 +27,6 @@ import type {
MemoryConfig,
MemoryListOptions,
MemorySearchOptions,
OcrProvider,
OcrResult,
Provider,
RestartApiServerStatusResult,
@@ -476,9 +475,8 @@ const api = {
ipcRenderer.invoke(IpcChannel.CodeTools_RemoveCustomTerminalPath, terminalId)
},
ocr: {
ocr: (file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> =>
ipcRenderer.invoke(IpcChannel.OCR_ocr, file, provider),
listProviders: (): Promise<string[]> => ipcRenderer.invoke(IpcChannel.OCR_ListProviders)
ocr: (file: SupportedOcrFile, params: OcrParams): Promise<OcrResult> =>
ipcRenderer.invoke(IpcChannel.OCR_Ocr, file, params)
},
cherryai: {
generateSignature: (params: { method: string; path: string; query: string; body: Record<string, any> }) =>

View File

@@ -3,7 +3,7 @@ import { Button, Tooltip } from '@cherrystudio/ui'
import { restoreFromS3 } from '@renderer/services/BackupService'
import type { S3Config } from '@renderer/types'
import { formatFileSize } from '@renderer/utils'
import { Modal, Space, Table } from 'antd'
import { Modal, Table } from 'antd'
import dayjs from 'dayjs'
import { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'

View File

@@ -1,182 +1 @@
import type {
BuiltinOcrProvider,
BuiltinOcrProviderId,
OcrOvProvider,
OcrPpocrProvider,
OcrProviderCapability,
OcrSystemProvider,
OcrTesseractProvider,
TesseractLangCode,
TranslateLanguageCode
} from '@renderer/types'
import { isMac, isWin } from './constant'
const tesseract: OcrTesseractProvider = {
id: 'tesseract',
name: 'Tesseract',
capabilities: {
image: true
},
config: {
langs: {
chi_sim: true,
chi_tra: true,
eng: true
}
}
} as const
const systemOcr: OcrSystemProvider = {
id: 'system',
name: 'System',
config: {
langs: isWin ? ['en-us'] : undefined
},
capabilities: {
image: true
// pdf: true
}
} as const satisfies OcrSystemProvider
const ppocrOcr: OcrPpocrProvider = {
id: 'paddleocr',
name: 'PaddleOCR',
config: {
apiUrl: ''
},
capabilities: {
image: true
// pdf: true
}
} as const
const ovOcr: OcrOvProvider = {
id: 'ovocr',
name: 'Intel OV(NPU) OCR',
config: {
langs: isWin ? ['en-us', 'zh-cn'] : undefined
},
capabilities: {
image: true
// pdf: true
}
} as const satisfies OcrOvProvider
export const BUILTIN_OCR_PROVIDERS_MAP = {
tesseract,
system: systemOcr,
paddleocr: ppocrOcr,
ovocr: ovOcr
} as const satisfies Record<BuiltinOcrProviderId, BuiltinOcrProvider>
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP)
export const DEFAULT_OCR_PROVIDER = {
image: isWin || isMac ? systemOcr : tesseract
} as const satisfies Record<OcrProviderCapability, BuiltinOcrProvider>
export const TESSERACT_LANG_MAP: Record<TranslateLanguageCode, TesseractLangCode> = {
'af-za': 'afr',
'am-et': 'amh',
'ar-sa': 'ara',
'as-in': 'asm',
'az-az': 'aze',
'az-cyrl-az': 'aze_cyrl',
'be-by': 'bel',
'bn-bd': 'ben',
'bo-cn': 'bod',
'bs-ba': 'bos',
'bg-bg': 'bul',
'ca-es': 'cat',
'ceb-ph': 'ceb',
'cs-cz': 'ces',
'zh-cn': 'chi_sim',
'zh-tw': 'chi_tra',
'chr-us': 'chr',
'cy-gb': 'cym',
'da-dk': 'dan',
'de-de': 'deu',
'dz-bt': 'dzo',
'el-gr': 'ell',
'en-us': 'eng',
'enm-gb': 'enm',
'eo-world': 'epo',
'et-ee': 'est',
'eu-es': 'eus',
'fa-ir': 'fas',
'fi-fi': 'fin',
'fr-fr': 'fra',
'frk-de': 'frk',
'frm-fr': 'frm',
'ga-ie': 'gle',
'gl-es': 'glg',
'grc-gr': 'grc',
'gu-in': 'guj',
'ht-ht': 'hat',
'he-il': 'heb',
'hi-in': 'hin',
'hr-hr': 'hrv',
'hu-hu': 'hun',
'iu-ca': 'iku',
'id-id': 'ind',
'is-is': 'isl',
'it-it': 'ita',
'ita-it': 'ita_old',
'jv-id': 'jav',
'ja-jp': 'jpn',
'kn-in': 'kan',
'ka-ge': 'kat',
'kat-ge': 'kat_old',
'kk-kz': 'kaz',
'km-kh': 'khm',
'ky-kg': 'kir',
'ko-kr': 'kor',
'ku-tr': 'kur',
'la-la': 'lao',
'la-va': 'lat',
'lv-lv': 'lav',
'lt-lt': 'lit',
'ml-in': 'mal',
'mr-in': 'mar',
'mk-mk': 'mkd',
'mt-mt': 'mlt',
'ms-my': 'msa',
'my-mm': 'mya',
'ne-np': 'nep',
'nl-nl': 'nld',
'no-no': 'nor',
'or-in': 'ori',
'pa-in': 'pan',
'pl-pl': 'pol',
'pt-pt': 'por',
'ps-af': 'pus',
'ro-ro': 'ron',
'ru-ru': 'rus',
'sa-in': 'san',
'si-lk': 'sin',
'sk-sk': 'slk',
'sl-si': 'slv',
'es-es': 'spa',
'spa-es': 'spa_old',
'sq-al': 'sqi',
'sr-rs': 'srp',
'sr-latn-rs': 'srp_latn',
'sw-tz': 'swa',
'sv-se': 'swe',
'syr-sy': 'syr',
'ta-in': 'tam',
'te-in': 'tel',
'tg-tj': 'tgk',
'tl-ph': 'tgl',
'th-th': 'tha',
'ti-er': 'tir',
'tr-tr': 'tur',
'ug-cn': 'uig',
'uk-ua': 'ukr',
'ur-pk': 'urd',
'uz-uz': 'uzb',
'uz-cyrl-uz': 'uzb_cyrl',
'vi-vn': 'vie',
'yi-us': 'yid'
}
// All config are migrated to @shared/config/ocr

View File

@@ -1,18 +1,21 @@
import { loggerService } from '@logger'
import * as OcrService from '@renderer/services/ocr/OcrService'
import type { ImageFileMetadata, SupportedOcrFile } from '@renderer/types'
import type { ImageFileMetadata, OcrProvider, SupportedOcrFile } from '@renderer/types'
import { isImageFileMetadata } from '@renderer/types'
import { formatErrorMessage } from '@renderer/utils/error'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import { useOcrProviders } from './useOcrProvider'
import { useOcrImageProvider } from './useOcrImageProvider'
const logger = loggerService.withContext('useOcr')
const isProviderAvailable = (provider: OcrProvider | undefined | null): provider is OcrProvider =>
provider !== undefined
export const useOcr = () => {
const { t } = useTranslation()
const { imageProvider } = useOcrProviders()
const { imageProvider, imageProviderId } = useOcrImageProvider()
/**
* OCR识别
@@ -22,10 +25,16 @@ export const useOcr = () => {
*/
const ocrImage = useCallback(
async (image: ImageFileMetadata) => {
logger.debug('ocrImage', { config: imageProvider.config })
return OcrService.ocr(image, imageProvider)
if (isProviderAvailable(imageProvider)) {
logger.debug('ocrImage', { provider: imageProvider })
return OcrService.ocr(image, {
providerId: imageProvider.id
})
} else {
throw new Error(t('ocr.error.provider.not_availabel', { provider: imageProviderId }))
}
},
[imageProvider]
[imageProvider, imageProviderId, t]
)
/**

View File

@@ -0,0 +1,9 @@
import { usePreference } from '@data/hooks/usePreference'
import { useOcrProvider } from './useOcrProvider'
export const useOcrImageProvider = () => {
const [imageProviderId, setImageProviderId] = usePreference('ocr.settings.image_provider_id')
const { provider: imageProvider, mutating, loading, error, updateConfig } = useOcrProvider(imageProviderId)
return { imageProvider, loading, mutating, error, updateConfig, imageProviderId, setImageProviderId }
}

View File

@@ -0,0 +1,37 @@
import { useMutation, useQuery } from '@data/hooks/useDataApi'
import type { OcrProviderConfig } from '@renderer/types'
import { getErrorMessage } from '@renderer/utils'
import type { ConcreteApiPaths } from '@shared/data/api'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
// const logger = loggerService.withContext('useOcrProvider')
export const useOcrProvider = (id: string | null) => {
const { t } = useTranslation()
const path: ConcreteApiPaths = `/ocr/providers/${id}`
const { data, loading, error } = useQuery(path)
const { mutate, loading: mutating } = useMutation('PATCH', path)
const updateConfig = useCallback(
async (update: Partial<OcrProviderConfig>) => {
if (!id) return
try {
await mutate({ body: { id, config: update } })
} catch (e) {
window.toast.error({ title: t('ocr.provider.config.patch.error.failed'), description: getErrorMessage(e) })
}
},
[id, mutate, t]
)
return {
/** undefined: loading; null: invalid, id is null */
provider: id ? data?.data : null,
loading,
mutating,
error,
updateConfig
}
}

View File

@@ -0,0 +1,19 @@
import { useQuery } from '@data/hooks/useDataApi'
import { getBuiltinOcrProviderLabel } from '@renderer/i18n/label'
import type { ListOcrProvidersQuery, OcrProvider } from '@renderer/types'
import { isBuiltinOcrProvider } from '@renderer/types'
export const useOcrProviders = (query?: ListOcrProvidersQuery) => {
const { data, loading, error } = useQuery('/ocr/providers', { query })
const getOcrProviderName = (p: OcrProvider) => {
return isBuiltinOcrProvider(p) ? getBuiltinOcrProviderLabel(p.id) : p.name
}
return {
providers: data?.data,
loading,
error,
getOcrProviderName
}
}

View File

@@ -1,148 +0,0 @@
import { Avatar } from '@cherrystudio/ui'
import { loggerService } from '@logger'
import IntelLogo from '@renderer/assets/images/providers/intel.png'
import PaddleocrLogo from '@renderer/assets/images/providers/paddleocr.png'
import TesseractLogo from '@renderer/assets/images/providers/Tesseract.js.png'
import { BUILTIN_OCR_PROVIDERS_MAP, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr'
import { getBuiltinOcrProviderLabel } from '@renderer/i18n/label'
import { useAppSelector } from '@renderer/store'
import { addOcrProvider, removeOcrProvider, setImageOcrProviderId, updateOcrProviderConfig } from '@renderer/store/ocr'
import type { ImageOcrProvider, OcrProvider, OcrProviderConfig } from '@renderer/types'
import { isBuiltinOcrProvider, isBuiltinOcrProviderId, isImageOcrProvider } from '@renderer/types'
import { FileQuestionMarkIcon, MonitorIcon } from 'lucide-react'
import { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useDispatch } from 'react-redux'
const logger = loggerService.withContext('useOcrProvider')
export const useOcrProviders = () => {
const providers = useAppSelector((state) => state.ocr.providers)
const imageProviders = providers.filter(isImageOcrProvider)
const imageProviderId = useAppSelector((state) => state.ocr.imageProviderId)
const [imageProvider, setImageProvider] = useState<ImageOcrProvider>(DEFAULT_OCR_PROVIDER.image)
const dispatch = useDispatch()
const { t } = useTranslation()
/**
* 添加一个新的OCR服务提供者
* @param provider - OCR提供者对象包含id和其他配置信息
* @throws {Error} 当尝试添加一个已存在ID的提供者时抛出错误
*/
const addProvider = useCallback(
(provider: OcrProvider) => {
if (providers.some((p) => p.id === provider.id)) {
const msg = `Provider with id ${provider.id} already exists`
logger.error(msg)
window.toast.error(t('ocr.error.provider.existing'))
throw new Error(msg)
}
dispatch(addOcrProvider(provider))
},
[dispatch, providers, t]
)
/**
* 移除一个OCR服务提供者
* @param id - 要移除的OCR提供者ID
* @throws {Error} 当尝试移除一个内置提供商时抛出错误
*/
const removeProvider = (id: string) => {
if (isBuiltinOcrProviderId(id)) {
const msg = `Cannot remove builtin provider ${id}`
logger.error(msg)
window.toast.error(t('ocr.error.provider.cannot_remove_builtin'))
throw new Error(msg)
}
dispatch(removeOcrProvider(id))
}
const setImageProviderId = useCallback(
(id: string) => {
dispatch(setImageOcrProviderId(id))
},
[dispatch]
)
const getOcrProviderName = (p: OcrProvider) => {
return isBuiltinOcrProvider(p) ? getBuiltinOcrProviderLabel(p.id) : p.name
}
const OcrProviderLogo = ({ provider: p, size = 14 }: { provider: OcrProvider; size?: number }) => {
if (isBuiltinOcrProvider(p)) {
switch (p.id) {
case 'tesseract':
return <Avatar src={TesseractLogo} style={{ width: size, height: size }} />
case 'system':
return <MonitorIcon size={size} />
case 'paddleocr':
return <Avatar src={PaddleocrLogo} style={{ width: size, height: size }} />
case 'ovocr':
return <Avatar src={IntelLogo} style={{ width: size, height: size }} />
}
}
return <FileQuestionMarkIcon size={size} />
}
useEffect(() => {
const actualImageProvider = imageProviders.find((p) => p.id === imageProviderId)
if (!actualImageProvider) {
if (isBuiltinOcrProviderId(imageProviderId)) {
logger.warn(`Builtin ocr provider ${imageProviderId} not exist. Will add it to providers.`)
addProvider(BUILTIN_OCR_PROVIDERS_MAP[imageProviderId])
}
setImageProviderId(DEFAULT_OCR_PROVIDER.image.id)
setImageProvider(DEFAULT_OCR_PROVIDER.image)
} else {
setImageProviderId(actualImageProvider.id)
setImageProvider(actualImageProvider)
}
}, [addProvider, imageProviderId, imageProviders, setImageProviderId])
return {
providers,
imageProvider,
addProvider,
removeProvider,
setImageProviderId,
getOcrProviderName,
OcrProviderLogo
}
}
export const useOcrProvider = (id: string) => {
const { t } = useTranslation()
const dispatch = useDispatch()
const { providers, addProvider } = useOcrProviders()
let provider = providers.find((p) => p.id === id)
// safely fallback
if (!provider) {
logger.error(`Ocr Provider ${id} not found`)
window.toast.error(t('ocr.error.provider.not_found'))
if (isBuiltinOcrProviderId(id)) {
try {
addProvider(BUILTIN_OCR_PROVIDERS_MAP[id])
} catch (e) {
logger.warn(`Add ${BUILTIN_OCR_PROVIDERS_MAP[id].name} failed. Just use temp provider from config.`)
window.toast.warning(t('ocr.warning.provider.fallback', { name: BUILTIN_OCR_PROVIDERS_MAP[id].name }))
} finally {
provider = BUILTIN_OCR_PROVIDERS_MAP[id]
}
} else {
logger.warn(`Fallback to tesseract`)
window.toast.warning(t('ocr.warning.provider.fallback', { name: 'Tesseract' }))
provider = BUILTIN_OCR_PROVIDERS_MAP.tesseract
}
}
const updateConfig = (update: Partial<OcrProviderConfig>) => {
dispatch(updateOcrProviderConfig({ id: provider.id, update }))
}
return {
provider,
updateConfig
}
}

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "Cannot delete built-in provider",
"existing": "The provider already exists",
"get_providers": "Failed to get available providers",
"not_availabel": "Provide {{provider}} is not available",
"not_found": "OCR provider does not exist",
"update_failed": "Failed to update configuration"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Unsupported file type {{type}}"
},
"processing": "OCR processing...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Failed to update config"
}
}
},
"create": {
"error": {
"failed": "Failed to create provider"
}
},
"delete": {
"error": {
"failed": "Failed to delete provider {{provider}}"
}
},
"get": {
"error": {
"failed": "Failed to get provider {{provider}}"
}
},
"list": {
"error": {
"failed": "Failed to list providers"
}
},
"update": {
"error": {
"failed": "Failed to update the provider"
}
}
},
"warning": {
"provider": {
"fallback": "Reverted to {{name}}, which may cause issues"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "不能删除内置提供商",
"existing": "提供商已存在",
"get_providers": "获取可用提供商失败",
"not_availabel": "{{provider}} 暂不可用",
"not_found": "OCR 提供商不存在",
"update_failed": "更新配置失败"
},
@@ -2064,6 +2065,40 @@
"not_supported": "不支持的文件类型 {{type}}"
},
"processing": "OCR 处理中...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "更新配置失败"
}
}
},
"create": {
"error": {
"failed": "创建提供商失败"
}
},
"delete": {
"error": {
"failed": "删除提供商 {{provider}} 失败"
}
},
"get": {
"error": {
"failed": "获取提供商 {{provider}} 失败"
}
},
"list": {
"error": {
"failed": "获取提供商列表失败"
}
},
"update": {
"error": {
"failed": "更新提供商失败"
}
}
},
"warning": {
"provider": {
"fallback": "已回退到 {{name}},这可能导致问题"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "不能刪除內建提供者",
"existing": "提供者已存在",
"get_providers": "取得可用提供者失敗",
"not_availabel": "提供 {{provider}} 不可用",
"not_found": "OCR 提供者不存在",
"update_failed": "更新配置失敗"
},
@@ -2064,6 +2065,40 @@
"not_supported": "不支持的文件類型 {{type}}"
},
"processing": "OCR 處理中...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "更新設定失敗"
}
}
},
"create": {
"error": {
"failed": "無法建立提供者"
}
},
"delete": {
"error": {
"failed": "刪除提供者 {{provider}} 失敗"
}
},
"get": {
"error": {
"failed": "無法取得提供者 {{provider}}"
}
},
"list": {
"error": {
"failed": "無法列出提供者"
}
},
"update": {
"error": {
"failed": "無法更新提供者"
}
}
},
"warning": {
"provider": {
"fallback": "已回退到 {{name}},這可能導致問題"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "Eingebauter Anbieter kann nicht entfernt werden",
"existing": "Anbieter existiert bereits",
"get_providers": "Failed to obtain available providers",
"not_availabel": "{{provider}} ist nicht verfügbar",
"not_found": "OCR-Anbieter nicht gefunden",
"update_failed": "Konfiguration aktualisieren fehlgeschlagen"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Nicht unterstützter Dateityp {{type}}"
},
"processing": "OCR wird verarbeitet...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Fehler beim Aktualisieren der Konfiguration"
}
}
},
"create": {
"error": {
"failed": "Fehler beim Erstellen des Anbieters"
}
},
"delete": {
"error": {
"failed": "Fehler beim Löschen des Anbieters {{provider}}"
}
},
"get": {
"error": {
"failed": "Fehler beim Abrufen des Anbieters {{provider}}"
}
},
"list": {
"error": {
"failed": "Anbieter konnten nicht aufgelistet werden"
}
},
"update": {
"error": {
"failed": "Fehler beim Aktualisieren des Anbieters"
}
}
},
"warning": {
"provider": {
"fallback": "Auf {{name}} zurückgefallen, dies kann zu Problemen führen"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "Δεν είναι δυνατή η διαγραφή του ενσωματωμένου παρόχου",
"existing": "Ο πάροχος υπηρεσιών υπάρχει ήδη",
"get_providers": "Αποτυχία λήψης διαθέσιμων παρόχων",
"not_availabel": "Ο πάροχος {{provider}} δεν είναι διαθέσιμος",
"not_found": "Ο πάροχος OCR δεν υπάρχει",
"update_failed": "Αποτυχία ενημέρωσης της διαμόρφωσης"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Μη υποστηριζόμενος τύπος αρχείου {{type}}"
},
"processing": "Η επεξεργασία OCR βρίσκεται σε εξέλιξη...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Αποτυχία ενημέρωσης ρυθμίσεων"
}
}
},
"create": {
"error": {
"failed": "Αποτυχία δημιουργίας παρόχου"
}
},
"delete": {
"error": {
"failed": "Αποτυχία διαγραφής του παρόχου {{provider}}"
}
},
"get": {
"error": {
"failed": "Αποτυχία λήψης του παρόχου {{provider}}"
}
},
"list": {
"error": {
"failed": "Αποτυχία καταγραφής παρόχων"
}
},
"update": {
"error": {
"failed": "Αποτυχία ενημέρωσης του παρόχου"
}
}
},
"warning": {
"provider": {
"fallback": "Επαναφέρθηκε στο {{name}}, το οποίο μπορεί να προκαλέσει προβλήματα"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "No se puede eliminar el proveedor integrado",
"existing": "El proveedor ya existe",
"get_providers": "Error al obtener proveedores disponibles",
"not_availabel": "Proporcionar {{provider}} no está disponible",
"not_found": "El proveedor de OCR no existe",
"update_failed": "Actualización de la configuración fallida"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Tipo de archivo no compatible {{type}}"
},
"processing": "Procesando OCR...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Error al actualizar la configuración"
}
}
},
"create": {
"error": {
"failed": "Error al crear el proveedor"
}
},
"delete": {
"error": {
"failed": "Error al eliminar el proveedor {{provider}}"
}
},
"get": {
"error": {
"failed": "Error al obtener el proveedor {{provider}}"
}
},
"list": {
"error": {
"failed": "Error al listar proveedores"
}
},
"update": {
"error": {
"failed": "Error al actualizar el proveedor"
}
}
},
"warning": {
"provider": {
"fallback": "Se ha revertido a {{name}}, lo que podría causar problemas"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "Impossible de supprimer le fournisseur intégré",
"existing": "Le fournisseur existe déjà",
"get_providers": "Échec de l'obtention des fournisseurs disponibles",
"not_availabel": "Fournir {{provider}} nest pas disponible",
"not_found": "Le fournisseur OCR n'existe pas",
"update_failed": "Échec de la mise à jour de la configuration"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Type de fichier non pris en charge {{type}}"
},
"processing": "Traitement OCR en cours...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Échec de la mise à jour de la configuration"
}
}
},
"create": {
"error": {
"failed": "Échec de la création du fournisseur"
}
},
"delete": {
"error": {
"failed": "Échec de la suppression du fournisseur {{provider}}"
}
},
"get": {
"error": {
"failed": "Échec de l'obtention du fournisseur {{provider}}"
}
},
"list": {
"error": {
"failed": "Échec de la liste des fournisseurs"
}
},
"update": {
"error": {
"failed": "Échec de la mise à jour du fournisseur"
}
}
},
"warning": {
"provider": {
"fallback": "Revenu à {{name}}, ce qui pourrait entraîner des problèmes"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "組み込みプロバイダーは削除できません",
"existing": "プロバイダーはすでに存在します",
"get_providers": "利用可能なプロバイダーの取得に失敗しました",
"not_availabel": "{{provider}}が利用できません",
"not_found": "OCRプロバイダーが存在しません",
"update_failed": "更新構成に失敗しました"
},
@@ -2064,6 +2065,40 @@
"not_supported": "サポートされていないファイルタイプ {{type}}"
},
"processing": "OCR処理中...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "設定の更新に失敗しました"
}
}
},
"create": {
"error": {
"failed": "プロバイダーの作成に失敗しました"
}
},
"delete": {
"error": {
"failed": "プロバイダー {{provider}} の削除に失敗しました"
}
},
"get": {
"error": {
"failed": "プロバイダー {{provider}} の取得に失敗しました"
}
},
"list": {
"error": {
"failed": "プロバイダーの一覧取得に失敗しました"
}
},
"update": {
"error": {
"failed": "プロバイダーの更新に失敗しました"
}
}
},
"warning": {
"provider": {
"fallback": "{{name}} に戻されました。これにより問題が発生する可能性があります。"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "Não é possível excluir o provedor integrado",
"existing": "O provedor já existe",
"get_providers": "Falha ao obter provedores disponíveis",
"not_availabel": "Fornecedor {{provider}} não está disponível",
"not_found": "O provedor OCR não existe",
"update_failed": "Falha ao atualizar a configuração"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Tipo de arquivo não suportado {{type}}"
},
"processing": "Processamento OCR em andamento...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Falha ao atualizar a configuração"
}
}
},
"create": {
"error": {
"failed": "Falha ao criar o provedor"
}
},
"delete": {
"error": {
"failed": "Falha ao excluir o provedor {{provider}}"
}
},
"get": {
"error": {
"failed": "Falha ao obter o provedor {{provider}}"
}
},
"list": {
"error": {
"failed": "Falha ao listar provedores"
}
},
"update": {
"error": {
"failed": "Falha ao atualizar o provedor"
}
}
},
"warning": {
"provider": {
"fallback": "Revertido para {{name}}, o que pode causar problemas"

View File

@@ -2055,6 +2055,7 @@
"cannot_remove_builtin": "Не удается удалить встроенного поставщика",
"existing": "Поставщик уже существует",
"get_providers": "Не удалось получить доступных поставщиков",
"not_availabel": "Поставщик {{provider}} недоступен",
"not_found": "Поставщик OCR отсутствует",
"update_failed": "Обновление конфигурации не удалось"
},
@@ -2064,6 +2065,40 @@
"not_supported": "Неподдерживаемый тип файла {{type}}"
},
"processing": "Обработка OCR...",
"provider": {
"config": {
"patch": {
"error": {
"failed": "Не удалось обновить конфигурацию"
}
}
},
"create": {
"error": {
"failed": "Не удалось создать поставщика"
}
},
"delete": {
"error": {
"failed": "Не удалось удалить поставщика {{provider}}"
}
},
"get": {
"error": {
"failed": "Не удалось получить поставщика {{provider}}"
}
},
"list": {
"error": {
"failed": "Не удалось получить список поставщиков"
}
},
"update": {
"error": {
"failed": "Не удалось обновить поставщика"
}
}
},
"warning": {
"provider": {
"fallback": "Возвращено к {{name}}, это может вызвать проблемы"

View File

@@ -1,93 +1,85 @@
import { Alert, Skeleton } from '@heroui/react'
import { loggerService } from '@logger'
import { Skeleton } from '@cherrystudio/ui'
import { Alert } from '@heroui/react'
import { ErrorTag } from '@renderer/components/Tags/ErrorTag'
import { isMac, isWin } from '@renderer/config/constant'
import { useOcrProviders } from '@renderer/hooks/useOcrProvider'
import type { ImageOcrProvider, OcrProvider } from '@renderer/types'
import { BuiltinOcrProviderIds, isImageOcrProvider } from '@renderer/types'
import { useOcrImageProvider } from '@renderer/hooks/ocr/useOcrImageProvider'
import { useOcrProviders } from '@renderer/hooks/ocr/useOcrProviders'
import { BuiltinOcrProviderIdMap, isImageOcrProvider } from '@renderer/types'
import { getErrorMessage } from '@renderer/utils'
import { Select } from 'antd'
import { useCallback, useEffect, useMemo } from 'react'
import { useCallback, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import useSWRImmutable from 'swr/immutable'
import { SettingRow, SettingRowTitle } from '..'
const logger = loggerService.withContext('OcrImageSettings')
// const logger = loggerService.withContext('OcrImageSettings')
type Props = {
setProvider: (provider: OcrProvider) => void
}
const OcrImageSettings = ({ setProvider }: Props) => {
const OcrImageSettings = () => {
const { t } = useTranslation()
const { providers, imageProvider, getOcrProviderName, setImageProviderId } = useOcrProviders()
const fetcher = useCallback(() => {
return window.api.ocr.listProviders()
}, [])
const { providers, loading, error, getOcrProviderName } = useOcrProviders({ registered: true })
const { imageProvider, setImageProviderId, imageProviderId } = useOcrImageProvider()
const { data: validProviders, isLoading, error } = useSWRImmutable('ocr/providers', fetcher)
const imageProviders = useMemo(() => providers?.filter((p) => isImageOcrProvider(p)) ?? [], [providers])
const imageProviders = providers.filter((p) => isImageOcrProvider(p))
// 挂载时更新外部状态
// FIXME: Just keep the imageProvider always valid, so we don't need update it in this component.
useEffect(() => {
setProvider(imageProvider)
}, [imageProvider, setProvider])
const setImageProvider = (id: string) => {
const provider = imageProviders.find((p) => p.id === id)
if (!provider) {
logger.error(`Failed to find image provider by id: ${id}`)
window.toast.error(t('settings.tool.ocr.image.error.provider_not_found'))
return
}
setProvider(provider)
setImageProviderId(id)
}
const setImageProvider = useCallback(
(id: string) => {
setImageProviderId(id)
},
[setImageProviderId]
)
const platformSupport = isMac || isWin
const options = useMemo(() => {
if (!validProviders) return []
const platformFilter = platformSupport ? () => true : (p: ImageOcrProvider) => p.id !== BuiltinOcrProviderIds.system
const validFilter = (p: ImageOcrProvider) => validProviders.includes(p.id)
return imageProviders
.filter(platformFilter)
.filter(validFilter)
.map((p) => ({
value: p.id,
label: getOcrProviderName(p)
}))
}, [getOcrProviderName, imageProviders, platformSupport, validProviders])
return imageProviders.map((p) => ({
value: p.id,
label: getOcrProviderName(p)
}))
}, [getOcrProviderName, imageProviders])
const isSystem = imageProvider.id === BuiltinOcrProviderIds.system
const isSystem = imageProvider?.id === BuiltinOcrProviderIdMap.system
const content = useMemo(() => {
if (loading) {
return <Skeleton className="h-full w-50" />
}
if (error) {
return (
<Alert
color="danger"
title={t('ocr.provider.get.error.failed', { provider: imageProviderId })}
description={getErrorMessage(error)}
/>
)
}
if (!imageProvider) {
return <Alert color="danger" title={t('ocr.error.provider.not_found')} />
}
return (
<>
{!platformSupport && isSystem && <ErrorTag message={t('settings.tool.ocr.error.not_system')} />}
{!loading && !error && (
<Select
value={imageProvider.id}
className="w-50"
onChange={(id: string) => setImageProvider(id)}
options={options}
/>
)}
{!loading && error && (
<Alert color="danger" title={t('ocr.error.provider.get_providers')} description={getErrorMessage(error)} />
)}
</>
)
}, [error, imageProvider, imageProviderId, isSystem, loading, options, platformSupport, setImageProvider, t])
return (
<>
<SettingRow>
<SettingRowTitle>{t('settings.tool.ocr.image_provider')}</SettingRowTitle>
<div style={{ display: 'flex', gap: '8px', alignItems: 'center' }}>
{!platformSupport && isSystem && <ErrorTag message={t('settings.tool.ocr.error.not_system')} />}
<Skeleton isLoaded={!isLoading}>
{!error && (
<Select
value={imageProvider.id}
style={{ width: '200px' }}
onChange={(id: string) => setImageProvider(id)}
options={options}
/>
)}
{error && (
<Alert
color="danger"
title={t('ocr.error.provider.get_providers')}
description={getErrorMessage(error)}
/>
)}
</Skeleton>
</div>
<div className="flex items-center gap-2 self-stretch">{content}</div>
</SettingRow>
</>
)

View File

@@ -1,6 +1,4 @@
import { Flex } from '@cherrystudio/ui'
import { useOcrProvider } from '@renderer/hooks/useOcrProvider'
import { BuiltinOcrProviderIds, isOcrOVProvider } from '@renderer/types'
import { Tag } from 'antd'
import { useTranslation } from 'react-i18next'
@@ -8,11 +6,6 @@ import { SettingRow, SettingRowTitle } from '..'
export const OcrOVSettings = () => {
const { t } = useTranslation()
const { provider } = useOcrProvider(BuiltinOcrProviderIds.ovocr)
if (!isOcrOVProvider(provider)) {
throw new Error('Not OV OCR provider.')
}
return (
<>

View File

@@ -1,19 +1,26 @@
import { ErrorBoundary } from '@renderer/components/ErrorBoundary'
import { useOcrProvider } from '@renderer/hooks/useOcrProvider'
import { BuiltinOcrProviderIds, isOcrPpocrProvider } from '@renderer/types'
import type { OcrPpocrConfig, OcrPpocrProvider, OcrProviderConfig } from '@renderer/types'
import { isOcrPpocrProvider } from '@renderer/types'
import { Input } from 'antd'
import { startTransition, useCallback, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { SettingHelpLink, SettingHelpText, SettingHelpTextRow, SettingRow, SettingRowTitle } from '..'
export const OcrPpocrSettings = () => {
export const OcrPpocrSettings = ({
provider,
updateConfig: _updateConfig
}: {
provider: OcrPpocrProvider
updateConfig: (config: Partial<OcrProviderConfig>) => Promise<void>
}) => {
const updateConfig = _updateConfig as (config: Partial<OcrPpocrConfig>) => Promise<void>
// Hack: Hard-coded for now
const SERVING_DOC_URL = 'https://www.paddleocr.ai/latest/version3.x/deployment/serving.html'
const AISTUDIO_URL = 'https://aistudio.baidu.com/pipeline/mine'
const { t } = useTranslation()
const { provider, updateConfig } = useOcrProvider(BuiltinOcrProviderIds.paddleocr)
if (!isOcrPpocrProvider(provider)) {
throw new Error('Not PaddleOCR provider.')

View File

@@ -1,13 +1,23 @@
// import { loggerService } from '@logger'
import { Flex } from '@cherrystudio/ui'
import { Avatar, Flex } from '@cherrystudio/ui'
import IntelLogo from '@renderer/assets/images/providers/intel.png'
import PaddleocrLogo from '@renderer/assets/images/providers/paddleocr.png'
import TesseractLogo from '@renderer/assets/images/providers/Tesseract.js.png'
import { ErrorBoundary } from '@renderer/components/ErrorBoundary'
import { isMac, isWin } from '@renderer/config/constant'
import { useTheme } from '@renderer/context/ThemeProvider'
import { useOcrProviders } from '@renderer/hooks/useOcrProvider'
import type { OcrProvider } from '@renderer/types'
import { isBuiltinOcrProvider, isOcrSystemProvider } from '@renderer/types'
import { useOcrProviders } from '@renderer/hooks/ocr/useOcrProviders'
import type { OcrProvider, OcrProviderConfig } from '@renderer/types'
import {
isBuiltinOcrProvider,
isOcrOVProvider,
isOcrPpocrProvider,
isOcrSystemProvider,
isOcrTesseractProvider
} from '@renderer/types'
import { Divider } from 'antd'
import styled from 'styled-components'
import { FileQuestionMarkIcon, MonitorIcon } from 'lucide-react'
import { useMemo } from 'react'
import { SettingGroup, SettingTitle } from '..'
import { OcrOVSettings } from './OcrOVSettings'
@@ -18,34 +28,37 @@ import { OcrTesseractSettings } from './OcrTesseractSettings'
// const logger = loggerService.withContext('OcrTesseractSettings')
type Props = {
provider: OcrProvider
provider: OcrProvider | undefined | null
updateConfig: (config: Partial<OcrProviderConfig>) => Promise<void>
}
const OcrProviderSettings = ({ provider }: Props) => {
const OcrProviderSettings = ({ provider, updateConfig }: Props) => {
const { theme: themeMode } = useTheme()
const { OcrProviderLogo, getOcrProviderName } = useOcrProviders()
const { getOcrProviderName } = useOcrProviders()
if (!isWin && !isMac && isOcrSystemProvider(provider)) {
return null
}
const ProviderSettings = () => {
const settings = useMemo(() => {
if (!provider) return null
if (isBuiltinOcrProvider(provider)) {
switch (provider.id) {
case 'tesseract':
return <OcrTesseractSettings />
case 'system':
return <OcrSystemSettings />
case 'paddleocr':
return <OcrPpocrSettings />
case 'ovocr':
return <OcrOVSettings />
default:
return null
if (isOcrTesseractProvider(provider)) {
return <OcrTesseractSettings provider={provider} updateConfig={updateConfig} />
}
if (isOcrSystemProvider(provider)) {
return <OcrSystemSettings provider={provider} updateConfig={updateConfig} />
}
if (isOcrPpocrProvider(provider)) {
return <OcrPpocrSettings provider={provider} updateConfig={updateConfig} />
}
if (isOcrOVProvider(provider)) {
return <OcrOVSettings />
}
return null
} else {
throw new Error('Not supported OCR provider')
}
}, [provider, updateConfig])
if (!provider || (!isWin && !isMac && isOcrSystemProvider(provider))) {
return null
}
return (
@@ -53,20 +66,29 @@ const OcrProviderSettings = ({ provider }: Props) => {
<SettingTitle>
<Flex className="items-center gap-2">
<OcrProviderLogo provider={provider} />
<ProviderName> {getOcrProviderName(provider)}</ProviderName>
<span className="font-semibold text-sm"> {getOcrProviderName(provider)}</span>
</Flex>
</SettingTitle>
<Divider style={{ width: '100%', margin: '10px 0' }} />
<ErrorBoundary>
<ProviderSettings />
</ErrorBoundary>
<ErrorBoundary>{settings}</ErrorBoundary>
</SettingGroup>
)
}
const ProviderName = styled.span`
font-size: 14px;
font-weight: 500;
`
const OcrProviderLogo = ({ provider: p, size = 14 }: { provider: OcrProvider; size?: number }) => {
if (isBuiltinOcrProvider(p)) {
switch (p.id) {
case 'tesseract':
return <Avatar src={TesseractLogo} style={{ width: size, height: size }} />
case 'system':
return <MonitorIcon size={size} />
case 'paddleocr':
return <Avatar src={PaddleocrLogo} style={{ width: size, height: size }} />
case 'ovocr':
return <Avatar src={IntelLogo} style={{ width: size, height: size }} />
}
}
return <FileQuestionMarkIcon size={size} />
}
export default OcrProviderSettings

View File

@@ -1,42 +1,88 @@
import { PictureOutlined } from '@ant-design/icons'
import { ErrorBoundary } from '@renderer/components/ErrorBoundary'
import { useTheme } from '@renderer/context/ThemeProvider'
import { useOcrProviders } from '@renderer/hooks/useOcrProvider'
import type { OcrProvider } from '@renderer/types'
import type { TabsProps } from 'antd'
import { Tabs } from 'antd'
import { useOcrImageProvider } from '@renderer/hooks/ocr/useOcrImageProvider'
import type { FC } from 'react'
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { SettingDivider, SettingGroup, SettingTitle } from '..'
import OcrImageSettings from './OcrImageSettings'
import OcrProviderSettings from './OcrProviderSettings'
// const TabSchema = z.enum(['image'])
// type Tab = z.infer<typeof TabSchema>
// const isValidTab = (value: string): value is Tab => TabSchema.safeParse(value).success
// type TabItem = {
// name: string
// value: Tab
// icon: ReactNode
// content: ReactNode
// }
const OcrSettings: FC = () => {
const { t } = useTranslation()
const { theme: themeMode } = useTheme()
const { imageProvider } = useOcrProviders()
const [provider, setProvider] = useState<OcrProvider>(imageProvider) // since default to image provider
const { imageProvider: provider, updateConfig } = useOcrImageProvider()
// const [activeTab, setActiveTab] = useState<Tab>('image')
// const provider = useMemo(() => {
// switch (activeTab) {
// case 'image':
// return imageProvider
// default:
// return undefined
// }
// }, [imageProvider, activeTab])
const tabs: TabsProps['items'] = [
{
key: 'image',
label: t('settings.tool.ocr.image.title'),
icon: <PictureOutlined />,
children: <OcrImageSettings setProvider={setProvider} />
}
]
// const tabs = [
// {
// name: t('settings.tool.ocr.image.title'),
// value: 'image',
// icon: <PictureOutlined />,
// content: <OcrImageSettings />
// }
// ] satisfies TabItem[]
// const handleTabChange = useCallback((value: string) => {
// if (isValidTab(value)) {
// setActiveTab(value)
// } else {
// window.toast.error('Unexpected behavior: Not a valid tab.')
// }
// }, [])
return (
<ErrorBoundary>
<SettingGroup theme={themeMode}>
<SettingTitle>{t('settings.tool.ocr.title')}</SettingTitle>
<SettingDivider />
<Tabs defaultActiveKey="image" items={tabs} />
{/* <Tabs value={activeTab} onValueChange={handleTabChange}>
<TabsList>
{tabs.map((tab) => {
return (
<TabsTrigger key={tab.value} value={tab.value} className="cursor-pointer">
<div className={cn('flex items-center gap-1', tab.value === activeTab && 'text-primary')}>
{tab.icon}
{tab.name}
</div>
</TabsTrigger>
)
})}
</TabsList>
{tabs.map((tab) => {
return (
<TabsContent key={tab.value} value={tab.value} className="pl-1">
{tab.content}
</TabsContent>
)
})}
</Tabs> */}
{/* Since only image is supported for now, we just don't use tabs component,
but keep code of tabs. */}
<OcrImageSettings />
</SettingGroup>
<ErrorBoundary>
<OcrProviderSettings provider={provider} />
<OcrProviderSettings provider={provider} updateConfig={updateConfig} />
</ErrorBoundary>
</ErrorBoundary>
)

View File

@@ -3,10 +3,8 @@ import { Flex } from '@cherrystudio/ui'
import { InfoTooltip } from '@cherrystudio/ui'
import { SuccessTag } from '@renderer/components/Tags/SuccessTag'
import { isMac, isWin } from '@renderer/config/constant'
import { useOcrProvider } from '@renderer/hooks/useOcrProvider'
import useTranslate from '@renderer/hooks/useTranslate'
import type { TranslateLanguageCode } from '@renderer/types'
import { BuiltinOcrProviderIds, isOcrSystemProvider } from '@renderer/types'
import type { OcrProviderConfig, OcrSystemConfig, OcrSystemProvider, TranslateLanguageCode } from '@renderer/types'
import { Select } from 'antd'
import { startTransition, useCallback, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
@@ -15,21 +13,24 @@ import { SettingRow, SettingRowTitle } from '..'
// const logger = loggerService.withContext('OcrSystemSettings')
export const OcrSystemSettings = () => {
export const OcrSystemSettings = ({
provider,
updateConfig: _updateConfig
}: {
provider: OcrSystemProvider
updateConfig: (config: Partial<OcrProviderConfig>) => Promise<void>
}) => {
const updateConfig = _updateConfig as (config: Partial<OcrSystemConfig>) => Promise<void>
const { t } = useTranslation()
// 和翻译自定义语言耦合了应该还ok
const { translateLanguages } = useTranslate()
const { provider, updateConfig } = useOcrProvider(BuiltinOcrProviderIds.system)
if (!isOcrSystemProvider(provider)) {
throw new Error('Not system provider.')
}
if (!isWin && !isMac) {
throw new Error('Only Windows and MacOS is supported.')
}
const [langs, setLangs] = useState<TranslateLanguageCode[]>(provider.config?.langs ?? [])
const [langs, setLangs] = useState<TranslateLanguageCode[]>(provider.config.langs ?? [])
// currently static
const options = useMemo(

View File

@@ -2,11 +2,10 @@
import { Flex } from '@cherrystudio/ui'
import { InfoTooltip } from '@cherrystudio/ui'
import CustomTag from '@renderer/components/Tags/CustomTag'
import { TESSERACT_LANG_MAP } from '@renderer/config/ocr'
import { useOcrProvider } from '@renderer/hooks/useOcrProvider'
import useTranslate from '@renderer/hooks/useTranslate'
import type { TesseractLangCode } from '@renderer/types'
import { BuiltinOcrProviderIds, isOcrTesseractProvider } from '@renderer/types'
import type { OcrProviderConfig, OcrTesseractConfig, OcrTesseractProvider, TesseractLangCode } from '@renderer/types'
import { objectEntries } from '@renderer/types'
import { TESSERACT_LANG_MAP } from '@shared/config/ocr'
import { Select } from 'antd'
import { useCallback, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
@@ -15,15 +14,18 @@ import { SettingRow, SettingRowTitle } from '..'
// const logger = loggerService.withContext('OcrTesseractSettings')
export const OcrTesseractSettings = () => {
export const OcrTesseractSettings = ({
provider,
updateConfig: _updateConfig
}: {
provider: OcrTesseractProvider
updateConfig: (config: Partial<OcrProviderConfig>) => Promise<void>
}) => {
const updateConfig = _updateConfig as (config: Partial<OcrTesseractConfig>) => Promise<void>
const { t } = useTranslation()
const { provider, updateConfig } = useOcrProvider(BuiltinOcrProviderIds.tesseract)
if (!isOcrTesseractProvider(provider)) {
throw new Error('Not tesseract provider.')
}
const [langs, setLangs] = useState<Partial<Record<TesseractLangCode, boolean>>>(provider.config?.langs ?? {})
const [langs, setLangs] = useState<OcrTesseractConfig['langs'] | undefined>(provider?.config.langs)
const { translateLanguages } = useTranslate()
const options = useMemo(
@@ -37,14 +39,12 @@ export const OcrTesseractSettings = () => {
[translateLanguages]
)
// TODO: type safe objectKeys
const value = useMemo(
() =>
Object.entries(langs)
.filter(([, enabled]) => enabled)
.map(([lang]) => lang) as TesseractLangCode[],
[langs]
)
const selectedLangs = useMemo(() => {
if (!langs) return
return objectEntries(langs)
.filter(([, enabled]) => enabled)
.map(([lang]) => lang) as TesseractLangCode[]
}, [langs])
const onChange = useCallback((values: TesseractLangCode[]) => {
setLangs(() => {
@@ -69,11 +69,11 @@ export const OcrTesseractSettings = () => {
<InfoTooltip content={t('settings.tool.ocr.tesseract.langs_tooltip')} />
</Flex>
</SettingRowTitle>
<div style={{ display: 'flex', gap: '8px' }}>
<div className="flex gap-2">
<Select
mode="multiple"
style={{ minWidth: 200 }}
value={value}
value={selectedLangs}
options={options}
maxTagCount={1}
onChange={onChange}

View File

@@ -144,6 +144,9 @@ const CustomLanguageModal = ({ isOpen, editingCustomLanguage, onAdd, onEdit, onC
rules={[
{ required: true, message: t('settings.translate.custom.error.langCode.empty') },
{
// TODO: use TranslateLanguageCodeSchema here. Modify it when migrating to new UI.
// NOTE: Since any uppercase will be converted to lowercase before it's save to indexedDB,
// it's safe to replace it with case sensitive TranslateLanguageCodeSchema.
pattern: /^[a-zA-Z]{2,3}(-[a-zA-Z]{2,3})?$/,
message: t('settings.translate.custom.error.langCode.invalid')
},

View File

@@ -9,10 +9,10 @@ import { isEmbeddingModel, isRerankModel, isTextToImageModel } from '@renderer/c
import { LanguagesEnum, UNKNOWN } from '@renderer/config/translate'
import { useCodeStyle } from '@renderer/context/CodeStyleProvider'
import db from '@renderer/databases'
import { useOcr } from '@renderer/hooks/ocr/useOcr'
import { useDefaultModel } from '@renderer/hooks/useAssistant'
import { useDrag } from '@renderer/hooks/useDrag'
import { useFiles } from '@renderer/hooks/useFiles'
import { useOcr } from '@renderer/hooks/useOcr'
import { useTemporaryValue } from '@renderer/hooks/useTemporaryValue'
import { useTimer } from '@renderer/hooks/useTimer'
import useTranslate from '@renderer/hooks/useTranslate'
@@ -29,7 +29,7 @@ import {
type TranslateHistory,
type TranslateLanguage
} from '@renderer/types'
import { getFileExtension, isTextFile, runAsyncFunction, uuid } from '@renderer/utils'
import { getErrorMessage, getFileExtension, isTextFile, runAsyncFunction, uuid } from '@renderer/utils'
import { abortCompletion } from '@renderer/utils/abortController'
import { isAbortError } from '@renderer/utils/error'
import { formatErrorMessage } from '@renderer/utils/error'
@@ -671,7 +671,7 @@ const TranslatePage: FC = () => {
await processFile(selectedFile)
} catch (error) {
logger.error('onPaste:', error as Error)
window.toast.error(t('chat.input.file_error'))
window.toast.error({ title: t('chat.input.file_error'), description: getErrorMessage(error) })
}
}
setIsProcessing(false)

View File

@@ -1,8 +1,5 @@
import { loggerService } from '@logger'
import type { OcrProvider, OcrResult, SupportedOcrFile } from '@renderer/types'
import { isOcrApiProvider } from '@renderer/types'
import { OcrApiClientFactory } from './clients/OcrApiClientFactory'
import type { OcrParams, OcrResult, SupportedOcrFile } from '@renderer/types'
const logger = loggerService.withContext('renderer:OcrService')
@@ -13,12 +10,7 @@ const logger = loggerService.withContext('renderer:OcrService')
* @returns ocr result
* @throws {Error}
*/
export const ocr = async (file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> => {
export const ocr = async (file: SupportedOcrFile, params: OcrParams): Promise<OcrResult> => {
logger.info(`ocr file ${file.path}`)
if (isOcrApiProvider(provider)) {
const client = OcrApiClientFactory.create(provider)
return client.ocr(file, provider.config)
} else {
return window.api.ocr.ocr(file, provider)
}
return window.api.ocr.ocr(file, params)
}

View File

@@ -1,27 +1,29 @@
import { loggerService } from '@logger'
import type { OcrApiProvider } from '@renderer/types'
import type { OcrApiProvider, OcrApiProviderConfig } from '@renderer/types'
import type { OcrBaseApiClient } from './OcrBaseApiClient'
import { OcrExampleApiClient } from './OcrExampleApiClient'
const logger = loggerService.withContext('OcrApiClientFactory')
// Not being used for now.
// TODO: Migrate to main in the future.
export class OcrApiClientFactory {
/**
* Create an ApiClient instance for the given provider
* 为给定的提供者创建ApiClient实例
*/
static create(provider: OcrApiProvider): OcrBaseApiClient {
static create(provider: OcrApiProvider, config: OcrApiProviderConfig): OcrBaseApiClient {
logger.debug(`Creating ApiClient for provider:`, {
id: provider.id,
config: provider.config
config
})
let instance: OcrBaseApiClient
// Extend other clients here
// eslint-disable-next-line prefer-const
instance = new OcrExampleApiClient(provider)
instance = new OcrExampleApiClient(provider, config)
return instance
}

View File

@@ -1,26 +1,31 @@
import { cacheService } from '@data/CacheService'
import type { OcrApiProvider, OcrHandler } from '@renderer/types'
import type { OcrApiProvider, OcrApiProviderConfig, OcrHandler } from '@renderer/types'
// Not being used for now.
// TODO: Migrate to main in the future.
export abstract class OcrBaseApiClient {
public provider: OcrApiProvider
public config: OcrApiProviderConfig
protected host: string
protected apiKey: string
constructor(provider: OcrApiProvider) {
constructor(provider: OcrApiProvider, config: OcrApiProviderConfig) {
this.provider = provider
this.host = this.getHost()
this.apiKey = this.getApiKey()
this.config = config
}
abstract ocr: OcrHandler
// copy from BaseApiClient
public getHost(): string {
return this.provider.config.api.apiHost
return this.config.api.apiHost
}
// copy from BaseApiClient
public getApiKey() {
const keys = this.provider.config.api.apiKey.split(',').map((key) => key.trim())
const keys = this.config.api.apiKey.split(',').map((key) => key.trim())
const keyName = `ocr_provider:${this.provider.id}:last_used_key`
if (keys.length === 1) {

View File

@@ -1,12 +1,14 @@
import type { OcrApiProvider, SupportedOcrFile } from '@renderer/types'
import type { OcrApiProvider, OcrApiProviderConfig, SupportedOcrFile } from '@renderer/types'
import { OcrBaseApiClient } from './OcrBaseApiClient'
export type OcrExampleProvider = OcrApiProvider
// Not being used for now.
// TODO: Migrate to main in the future.
export class OcrExampleApiClient extends OcrBaseApiClient {
constructor(provider: OcrApiProvider) {
super(provider)
constructor(provider: OcrApiProvider, config: OcrApiProviderConfig) {
super(provider, config)
}
public ocr = async (file: SupportedOcrFile) => {

View File

@@ -11,7 +11,6 @@ import {
isNotSupportedTextDelta,
SYSTEM_MODELS
} from '@renderer/config/models'
import { BUILTIN_OCR_PROVIDERS, BUILTIN_OCR_PROVIDERS_MAP, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr'
import {
isSupportArrayContentProvider,
isSupportDeveloperRoleProvider,
@@ -33,7 +32,10 @@ import type {
} from '@renderer/types'
import { isSystemProvider, SystemProviderIds } from '@renderer/types'
import { getDefaultGroupName, getLeadingEmoji, runAsyncFunction, uuid } from '@renderer/utils'
import { getDefaultOcrProvider } from '@renderer/utils/ocr'
import { defaultByPassRules } from '@shared/config/constant'
import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr'
import { INITIAL_BUILTIN_OCR_PROVIDER_MAP } from '@shared/config/ocr'
import { TRANSLATE_PROMPT } from '@shared/config/prompts'
import { DefaultPreferences } from '@shared/data/preference/preferenceSchemas'
import { UpgradeChannel } from '@shared/data/preference/preferenceTypes'
@@ -2235,7 +2237,7 @@ const migrateConfig = {
try {
state.ocr = {
providers: BUILTIN_OCR_PROVIDERS,
imageProviderId: DEFAULT_OCR_PROVIDER.image.id
imageProviderId: getDefaultOcrProvider('image').id
}
state.translate.translateInput = ''
return state
@@ -2246,7 +2248,7 @@ const migrateConfig = {
},
'138': (state: RootState) => {
try {
addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.system)
addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.system)
return state
} catch (error) {
logger.error('migrate 138 error', error as Error)
@@ -2426,7 +2428,7 @@ const migrateConfig = {
},
'148': (state: RootState) => {
try {
addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.paddleocr)
addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.paddleocr)
return state
} catch (error) {
logger.error('migrate 148 error', error as Error)
@@ -2675,7 +2677,7 @@ const migrateConfig = {
},
'163': (state: RootState) => {
try {
addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.ovocr)
addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.ovocr)
state.llm.providers.forEach((provider) => {
if (provider.id === 'cherryin') {
provider.anthropicApiHost = 'https://open.cherryin.net'

View File

@@ -1,7 +1,8 @@
import type { PayloadAction } from '@reduxjs/toolkit'
/** @deprecated All migrated. */
import { createSlice } from '@reduxjs/toolkit'
import { BUILTIN_OCR_PROVIDERS, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr'
import type { OcrProvider, OcrProviderConfig } from '@renderer/types'
import type { OcrProvider } from '@renderer/types'
import { getDefaultOcrProvider } from '@renderer/utils/ocr'
import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr'
export interface OcrState {
providers: OcrProvider[]
@@ -10,7 +11,7 @@ export interface OcrState {
const initialState: OcrState = {
providers: BUILTIN_OCR_PROVIDERS,
imageProviderId: DEFAULT_OCR_PROVIDER.image.id
imageProviderId: getDefaultOcrProvider('image').id
}
const ocrSlice = createSlice({
@@ -22,48 +23,48 @@ const ocrSlice = createSlice({
}
},
reducers: {
setOcrProviders(state, action: PayloadAction<OcrProvider[]>) {
state.providers = action.payload
},
addOcrProvider(state, action: PayloadAction<OcrProvider>) {
state.providers.push(action.payload)
},
removeOcrProvider(state, action: PayloadAction<string>) {
state.providers = state.providers.filter((provider) => provider.id !== action.payload)
},
updateOcrProvider(state, action: PayloadAction<Partial<OcrProvider>>) {
const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
if (index !== -1) {
Object.assign(state.providers[index], action.payload)
}
},
updateOcrProviderConfig(
state,
action: PayloadAction<{ id: string; update: Omit<Partial<OcrProviderConfig>, 'id'> }>
) {
const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
if (index !== -1) {
if (!state.providers[index].config) {
state.providers[index].config = {}
}
Object.assign(state.providers[index].config, action.payload.update)
}
},
setImageOcrProviderId(state, action: PayloadAction<string>) {
state.imageProviderId = action.payload
}
// setOcrProviders(state, action: PayloadAction<OcrProvider[]>) {
// state.providers = action.payload
// },
// addOcrProvider(state, action: PayloadAction<OcrProvider>) {
// state.providers.push(action.payload)
// },
// removeOcrProvider(state, action: PayloadAction<string>) {
// state.providers = state.providers.filter((provider) => provider.id !== action.payload)
// },
// updateOcrProvider(state, action: PayloadAction<Partial<OcrProvider>>) {
// const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
// if (index !== -1) {
// Object.assign(state.providers[index], action.payload)
// }
// },
// updateOcrProviderConfig(
// state,
// action: PayloadAction<{ id: string; update: Omit<Partial<OcrProviderConfig>, 'id'> }>
// ) {
// const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
// if (index !== -1) {
// if (!state.providers[index].config) {
// state.providers[index].config = {}
// }
// Object.assign(state.providers[index].config, action.payload.update)
// }
// },
// setImageOcrProviderId(state, action: PayloadAction<string>) {
// state.imageProviderId = action.payload
// }
}
})
export const {
setOcrProviders,
addOcrProvider,
removeOcrProvider,
updateOcrProvider,
updateOcrProviderConfig,
setImageOcrProviderId
} = ocrSlice.actions
// export const {
// setOcrProviders,
// addOcrProvider,
// removeOcrProvider,
// updateOcrProvider,
// updateOcrProviderConfig,
// setImageOcrProviderId
// } = ocrSlice.actions
export const { getImageProvider } = ocrSlice.selectors
// export const { getImageProvider } = ocrSlice.selectors
export default ocrSlice.reducer

View File

@@ -14,6 +14,7 @@ import type { KnowledgeBase, KnowledgeReference } from './knowledge'
import type { MCPConfigSample, McpServerType } from './mcp'
import type { Message } from './newMessage'
import type { BaseTool, MCPTool } from './tool'
import { type TranslateLanguage } from './translate'
export * from './agent'
export * from './apiModels'
@@ -23,6 +24,7 @@ export * from './mcp'
export * from './notification'
export * from './ocr'
export * from './provider'
export * from './translate'
export type Assistant = {
id: string
@@ -494,48 +496,6 @@ export type GenerateImageResponse = {
images: string[]
}
// 为了支持自定义语言设置为string别名
/** zh-cn, en-us, etc. */
export type TranslateLanguageCode = string
// langCode应当能够唯一确认一种语言
export type TranslateLanguage = {
value: string
langCode: TranslateLanguageCode
label: () => string
emoji: string
}
export interface TranslateHistory {
id: string
sourceText: string
targetText: string
sourceLanguage: TranslateLanguageCode
targetLanguage: TranslateLanguageCode
createdAt: string
/** 收藏状态 */
star?: boolean
}
export type CustomTranslateLanguage = {
id: string
langCode: TranslateLanguageCode
value: string
emoji: string
}
export const AutoDetectionMethods = {
franc: 'franc',
llm: 'llm',
auto: 'auto'
} as const
export type AutoDetectionMethod = keyof typeof AutoDetectionMethods
export const isAutoDetectionMethod = (method: string): method is AutoDetectionMethod => {
return Object.hasOwn(AutoDetectionMethods, method)
}
// by fullex @ data refactor
// export type SidebarIcon =
// | 'assistants'

View File

@@ -1,208 +0,0 @@
import type Tesseract from 'tesseract.js'
import type { FileMetadata, ImageFileMetadata, TranslateLanguageCode } from '.'
import { isImageFileMetadata } from '.'
export const BuiltinOcrProviderIds = {
tesseract: 'tesseract',
system: 'system',
paddleocr: 'paddleocr',
ovocr: 'ovocr'
} as const
export type BuiltinOcrProviderId = keyof typeof BuiltinOcrProviderIds
export const isBuiltinOcrProviderId = (id: string): id is BuiltinOcrProviderId => {
return Object.hasOwn(BuiltinOcrProviderIds, id)
}
// extensible
export const OcrProviderCapabilities = {
image: 'image'
// pdf: 'pdf'
} as const
export type OcrProviderCapability = keyof typeof OcrProviderCapabilities
export const isOcrProviderCapability = (cap: string): cap is OcrProviderCapability => {
return Object.hasOwn(OcrProviderCapabilities, cap)
}
export type OcrProviderCapabilityRecord = Partial<Record<OcrProviderCapability, boolean>>
// OCR models and providers share the same type definition.
// A provider can offer capabilities to process multiple file types,
// while a model belonging to that provider may be limited to processing only one specific file type.
export type OcrModelCapabilityRecord = OcrProviderCapabilityRecord
export interface OcrModel {
id: string
name: string
providerId: string
capabilities: OcrModelCapabilityRecord
}
/**
* Extend this type to define provider-specefic config types.
*/
export type OcrProviderApiConfig = {
apiKey: string
apiHost: string
apiVersion?: string
}
export const isOcrProviderApiConfig = (config: unknown): config is OcrProviderApiConfig => {
return (
typeof config === 'object' &&
config !== null &&
'apiKey' in config &&
typeof config.apiKey === 'string' &&
'apiHost' in config &&
typeof config.apiHost === 'string' &&
(!('apiVersion' in config) || typeof config.apiVersion === 'string')
)
}
/**
* For future. Model based ocr, api based ocr. May different api client.
*
* Extend this type to define provider-specific config types.
*/
export type OcrProviderBaseConfig = {
/** Not used for now. Could safely remove. */
api?: OcrProviderApiConfig
/** Not used for now. Could safely remove. */
models?: OcrModel[]
/** Not used for now. Could safely remove. */
enabled?: boolean
}
export type OcrProviderConfig = OcrApiProviderConfig | OcrTesseractConfig | OcrSystemConfig | OcrPpocrConfig
export type OcrProvider = {
id: string
name: string
capabilities: OcrProviderCapabilityRecord
config?: OcrProviderBaseConfig
}
export type OcrApiProviderConfig = OcrProviderBaseConfig & {
api: OcrProviderApiConfig
}
export type OcrApiProvider = OcrProvider & {
config: OcrApiProviderConfig
}
export const isOcrApiProvider = (p: OcrProvider): p is OcrApiProvider => {
return !!(p.config && p.config.api && isOcrProviderApiConfig(p.config.api))
}
export type BuiltinOcrProvider = OcrProvider & {
id: BuiltinOcrProviderId
}
export const isBuiltinOcrProvider = (p: OcrProvider): p is BuiltinOcrProvider => {
return isBuiltinOcrProviderId(p.id)
}
// Not sure compatible api endpoint exists. May not support custom ocr provider
export type CustomOcrProvider = OcrProvider & {
id: Exclude<string, BuiltinOcrProviderId>
}
export type ImageOcrProvider = OcrProvider & {
capabilities: OcrProviderCapabilityRecord & {
[OcrProviderCapabilities.image]: true
}
}
// export type PdfOcrProvider = OcrProvider & {
// capabilities: OcrProviderCapabilityRecord & {
// [OcrProviderCapabilities.pdf]: true
// }
// }
export const isImageOcrProvider = (p: OcrProvider): p is ImageOcrProvider => {
return p.capabilities.image === true
}
export type SupportedOcrFile = ImageFileMetadata
export const isSupportedOcrFile = (file: FileMetadata): file is SupportedOcrFile => {
return isImageFileMetadata(file)
}
export type OcrResult = {
text: string
}
export type OcrHandler = (file: SupportedOcrFile, options?: OcrProviderBaseConfig) => Promise<OcrResult>
export type OcrImageHandler = (file: ImageFileMetadata, options?: OcrProviderBaseConfig) => Promise<OcrResult>
// Tesseract Types
export type OcrTesseractConfig = OcrProviderBaseConfig & {
langs?: Partial<Record<TesseractLangCode, boolean>>
}
export type OcrTesseractProvider = {
id: 'tesseract'
config: OcrTesseractConfig
} & ImageOcrProvider &
BuiltinOcrProvider
export const isOcrTesseractProvider = (p: OcrProvider): p is OcrTesseractProvider => {
return p.id === BuiltinOcrProviderIds.tesseract
}
export type TesseractLangCode = Tesseract.LanguageCode
// System Types
export type OcrSystemConfig = OcrProviderBaseConfig & {
langs?: TranslateLanguageCode[]
}
export type OcrSystemProvider = {
id: 'system'
config: OcrSystemConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
export const isOcrSystemProvider = (p: OcrProvider): p is OcrSystemProvider => {
return p.id === BuiltinOcrProviderIds.system
}
// PaddleOCR Types
export type OcrPpocrConfig = OcrProviderBaseConfig & {
apiUrl?: string
accessToken?: string
}
export type OcrPpocrProvider = {
id: 'paddleocr'
config: OcrPpocrConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
export const isOcrPpocrProvider = (p: OcrProvider): p is OcrPpocrProvider => {
return p.id === BuiltinOcrProviderIds.paddleocr
}
// OV OCR Types
export type OcrOvConfig = OcrProviderBaseConfig & {
langs?: TranslateLanguageCode[]
}
export type OcrOvProvider = {
id: 'ovocr'
config: OcrOvConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
export const isOcrOVProvider = (p: OcrProvider): p is OcrOvProvider => {
return p.id === BuiltinOcrProviderIds.ovocr
}

View File

@@ -0,0 +1,61 @@
import * as z from 'zod'
import { DbOcrProviderSchema } from './data'
import { OcrProviderSchema } from './provider/base'
import { OcrProviderNameSchema } from './provider/base'
import { OcrProviderIdSchema } from './provider/base'
import { OcrProviderConfigSchema } from './provider/base'
// ==========================================================
// API layer Types
// ==========================================================
export type ListOcrProvidersQuery = { registered?: boolean }
export const ListOcrProvidersResponseSchema = z.object({
data: z.array(DbOcrProviderSchema)
})
export type ListOcrProvidersResponse = z.infer<typeof ListOcrProvidersResponseSchema>
export const GetOcrProviderResponseSchema = z.object({
data: DbOcrProviderSchema
})
export type GetOcrProviderResponse = z.infer<typeof GetOcrProviderResponseSchema>
/**
* Request payload for updating an OCR provider.
* Only the following fields are modifiable:
* - `name`: provider display name
* - `config`: provider-specific configuration object (all properties optional)
*/
export const UpdateOcrProviderRequestSchema = z.object({
id: OcrProviderIdSchema,
name: OcrProviderNameSchema.optional(),
config: OcrProviderConfigSchema.partial().optional()
})
export type UpdateOcrProviderRequest = z.infer<typeof UpdateOcrProviderRequestSchema>
export const UpdateOcrProviderResponseSchema = z.object({
data: DbOcrProviderSchema
})
export type UpdateOcrProviderResponse = z.infer<typeof UpdateOcrProviderResponseSchema>
export const CreateOcrProviderRequestSchema = OcrProviderSchema
export type CreateOcrProviderRequest = z.infer<typeof CreateOcrProviderRequestSchema>
export const CreateOcrProviderResponseSchema = z.object({
data: DbOcrProviderSchema
})
export type CreateOcrProviderResponse = z.infer<typeof CreateOcrProviderResponseSchema>
export const ReplaceOcrProviderRequestSchema = OcrProviderSchema
export type ReplaceOcrProviderRequest = z.infer<typeof ReplaceOcrProviderRequestSchema>
export const ReplaceOcrProviderResponseSchema = z.object({
data: DbOcrProviderSchema
})
export type ReplaceOcrProviderResponse = z.infer<typeof ReplaceOcrProviderResponseSchema>

View File

@@ -0,0 +1,20 @@
import type { FileMetadata, ImageFileMetadata, OcrProviderConfig } from '..'
import { isImageFileMetadata } from '..'
export type SupportedOcrFile = ImageFileMetadata
export const isSupportedOcrFile = (file: FileMetadata): file is SupportedOcrFile => {
return isImageFileMetadata(file)
}
export type OcrParams = {
providerId: string
}
export type OcrResult = {
text: string
}
export type OcrHandler = (file: SupportedOcrFile, config?: OcrProviderConfig) => Promise<OcrResult>
export type OcrImageHandler = (file: ImageFileMetadata, config?: OcrProviderConfig) => Promise<OcrResult>

View File

@@ -0,0 +1,31 @@
import type { DbOcrProviderCreate } from './data'
import type { DbOcrProviderUpdate } from './data'
import type { DbOcrProviderReplace } from './data'
import type { DbOcrProviderKey } from './data'
import type { DbOcrProvider } from './data'
// ==========================================================
// Business layer Types
// ==========================================================
/**
* Business-level representation of an OCR provider.
* Mirrors the data layer but is intended for use in domain/business logic.
*/
export type OcrProviderBusiness = DbOcrProvider /**
* Business-level representation of an OCR provider creation payload.
*/
export type OcrProviderCreateBusiness = DbOcrProviderCreate /**
* Business-level representation of an OCR provider update payload.
*/
export type OcrProviderUpdateBusiness = DbOcrProviderUpdate /**
* Business-level representation of an OCR provider replacement payload.
*/
export type OcrProviderReplaceBusiness = DbOcrProviderReplace /**
* Business-level key type for identifying an OCR provider.
*/
export type OcrProviderKeyBusiness = DbOcrProviderKey

View File

@@ -0,0 +1,25 @@
import * as z from 'zod'
import { type UpdateOcrProviderRequest } from './api'
import { type OcrProvider } from './provider/base'
import { OcrProviderSchema } from './provider/base'
// ==========================================================
// Data layer Types
//
// NOTE: Timestamp operations are not exposed to outside.
// ==========================================================
export const TimestampExtendShape = {
createdAt: z.number().nullable(),
updatedAt: z.number().nullable()
}
export const DbOcrProviderSchema = OcrProviderSchema.extend(TimestampExtendShape)
export type DbOcrProvider = z.infer<typeof DbOcrProviderSchema>
export function isDbOcrProvider(p: unknown): p is DbOcrProvider {
return DbOcrProviderSchema.safeParse(p).success
}
export type DbOcrProviderCreate = OcrProvider
export type DbOcrProviderUpdate = UpdateOcrProviderRequest
export type DbOcrProviderReplace = OcrProvider
export type DbOcrProviderKey = DbOcrProvider['id']

View File

@@ -0,0 +1,6 @@
export * from './api'
export * from './base'
export * from './business'
export * from './data'
export * from './model'
export * from './provider'

View File

@@ -0,0 +1,17 @@
import * as z from 'zod'
import type { OcrProviderCapabilityRecord } from './provider/base'
import { OcrProviderCapabilityRecordSchema } from './provider/base'
// OCR models and providers share the same type definition.
// A provider can offer capabilities to process multiple file types,
// while a model belonging to that provider may be limited to processing only one specific file type.
export type OcrModelCapabilityRecord = OcrProviderCapabilityRecord
export const OcrModelSchema = z.object({
id: z.string(),
name: z.string(),
providerId: z.string(),
capabilities: OcrProviderCapabilityRecordSchema
})
export type OcrModel = z.infer<typeof OcrModelSchema>

View File

@@ -0,0 +1,118 @@
import { objectValues } from '@types'
import * as z from 'zod'
export const BuiltinOcrProviderIdMap = {
tesseract: 'tesseract',
system: 'system',
paddleocr: 'paddleocr',
ovocr: 'ovocr'
} as const satisfies Record<string, BuiltinOcrProviderId>
export const BuiltinOcrProviderIds = Object.freeze(objectValues(BuiltinOcrProviderIdMap))
export const BuiltinOcrProviderIdSchema = z.enum(['tesseract', 'system', 'paddleocr', 'ovocr'])
export type BuiltinOcrProviderId = z.infer<typeof BuiltinOcrProviderIdSchema>
export const isBuiltinOcrProviderId = (id: string): id is BuiltinOcrProviderId => {
return BuiltinOcrProviderIdSchema.safeParse(id).success
} // extensible
export const OcrProviderCapabilities = {
image: 'image'
// pdf: 'pdf'
} as const satisfies Record<string, OcrProviderCapability>
export const OcrProviderCapabilitySchema = z.enum(['image'])
export type OcrProviderCapability = z.infer<typeof OcrProviderCapabilitySchema>
export const isOcrProviderCapability = (cap: string): cap is OcrProviderCapability => {
return OcrProviderCapabilitySchema.safeParse(cap).success
}
export const OcrProviderCapabilityRecordSchema = z.partialRecord(OcrProviderCapabilitySchema, z.boolean())
export type OcrProviderCapabilityRecord = z.infer<typeof OcrProviderCapabilityRecordSchema>
/**
* Extend this type to define provider-specefic config types.
*/
export const OcrProviderApiConfigSchema = z.object({
apiKey: z.string(),
apiHost: z.string(),
apiVersion: z.string().optional()
})
export type OcrProviderApiConfig = z.infer<typeof OcrProviderApiConfigSchema>
export const isOcrProviderApiConfig = (config: unknown): config is OcrProviderApiConfig => {
return OcrProviderApiConfigSchema.safeParse(config).success
} /**
* For future. Model based ocr, api based ocr. May different api client.
*
* Extend this type to define provider-specific config types.
*/
export const OcrProviderBaseConfigSchema = z.object({
enabled: z.boolean().default(false)
})
export type OcrProviderBaseConfig = z.infer<typeof OcrProviderBaseConfigSchema>
export const OcrProviderConfigSchema = OcrProviderBaseConfigSchema.loose()
export type OcrProviderConfig = z.infer<typeof OcrProviderConfigSchema>
export const OcrProviderIdSchema = z.string()
export type OcrProviderId = z.infer<typeof OcrProviderIdSchema>
export const OcrProviderNameSchema = z.string()
export const OcrProviderSchema = z.object({
id: OcrProviderIdSchema,
name: OcrProviderNameSchema,
capabilities: OcrProviderCapabilityRecordSchema,
config: OcrProviderConfigSchema
})
export type OcrProvider = z.infer<typeof OcrProviderSchema>
export const isOcrProvider = (p: unknown): p is OcrProvider => {
return OcrProviderSchema.safeParse(p).success
}
export const OcrApiProviderConfigSchema = OcrProviderBaseConfigSchema.extend({
api: OcrProviderApiConfigSchema
})
export type OcrApiProviderConfig = z.infer<typeof OcrApiProviderConfigSchema>
export const isOcrApiProviderConfig = (config: unknown): config is OcrApiProviderConfig => {
return OcrApiProviderConfigSchema.safeParse(config).success
}
export const OcrApiProviderSchema = OcrProviderSchema
/** Currently, there is no API provider yet, but we've left room for expansion. */
export type OcrApiProvider = z.infer<typeof OcrApiProviderSchema>
export const isOcrApiProvider = (p: unknown): p is OcrApiProvider => {
return OcrApiProviderSchema.safeParse(p).success
}
export type BuiltinOcrProvider = OcrProvider & {
id: BuiltinOcrProviderId
}
export const isBuiltinOcrProvider = (p: OcrProvider): p is BuiltinOcrProvider => {
return isBuiltinOcrProviderId(p.id)
}
// Not sure compatible api endpoint exists. May not support custom ocr provider
export type CustomOcrProvider = OcrProvider & {
id: Exclude<string, BuiltinOcrProviderId>
}
export type ImageOcrProvider = OcrProvider & {
capabilities: OcrProviderCapabilityRecord & {
[OcrProviderCapabilities.image]: true
}
}
export const isImageOcrProvider = (p: OcrProvider): p is ImageOcrProvider => {
return p.capabilities.image === true
}

View File

@@ -0,0 +1,5 @@
export * from './base'
export * from './ov'
export * from './paddle'
export * from './system'
export * from './tesseract'

View File

@@ -0,0 +1,29 @@
import type * as z from 'zod'
import type { ImageOcrProvider } from './base'
import type { BuiltinOcrProvider } from './base'
import type { OcrProvider } from './base'
import { OcrProviderBaseConfigSchema } from './base'
import { BuiltinOcrProviderIdMap } from './base'
// ==========================================================
// OV OCR Types
// ==========================================================
export const OcrOvConfigSchema = OcrProviderBaseConfigSchema.extend({
// It's not configurable for now.
// langs: z.array(TranslateLanguageCodeSchema).optional()
})
export type OcrOvConfig = z.infer<typeof OcrOvConfigSchema>
export const isOcrOvConfig = (config: unknown): config is OcrOvConfig => {
return OcrOvConfigSchema.safeParse(config).success
}
export type OcrOvProvider = {
id: 'ovocr'
config: OcrOvConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
export const isOcrOVProvider = (p: OcrProvider): p is OcrOvProvider => {
return p.id === BuiltinOcrProviderIdMap.ovocr
}

View File

@@ -0,0 +1,29 @@
import * as z from 'zod'
import type { ImageOcrProvider } from './base'
import type { BuiltinOcrProvider } from './base'
import type { OcrProvider } from './base'
import { OcrProviderBaseConfigSchema } from './base'
import { BuiltinOcrProviderIdMap } from './base'
// ==========================================================
// PaddleOCR Types
// ==========================================================
export const OcrPpocrConfigSchema = OcrProviderBaseConfigSchema.extend({
apiUrl: z.string().optional(),
accessToken: z.string().optional()
})
export type OcrPpocrConfig = z.infer<typeof OcrPpocrConfigSchema>
export const isOcrPpocrConfig = (config: unknown): config is OcrPpocrConfig => {
return OcrPpocrConfigSchema.safeParse(config).success
}
export type OcrPpocrProvider = {
id: 'paddleocr'
config: OcrPpocrConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
export const isOcrPpocrProvider = (p: OcrProvider): p is OcrPpocrProvider => {
return p.id === BuiltinOcrProviderIdMap.paddleocr
}

View File

@@ -0,0 +1,31 @@
import * as z from 'zod'
import { TranslateLanguageCodeSchema } from '../../translate'
import type { OcrProvider } from './base'
import { type ImageOcrProvider, OcrProviderBaseConfigSchema } from './base'
import { type BuiltinOcrProvider } from './base'
import { BuiltinOcrProviderIdMap } from './base'
// ==========================================================
// System OCR Types
// ==========================================================
export const OcrSystemConfigSchema = OcrProviderBaseConfigSchema.extend({
langs: z.array(TranslateLanguageCodeSchema).optional()
})
export type OcrSystemConfig = z.infer<typeof OcrSystemConfigSchema>
export const isOcrSystemConfig = (c: unknown): c is OcrSystemConfig => {
return OcrSystemConfigSchema.safeParse(c).success
}
export type OcrSystemProvider = {
id: 'system'
config: OcrSystemConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
export const isOcrSystemProvider = (p: OcrProvider): p is OcrSystemProvider => {
return p.id === BuiltinOcrProviderIdMap.system
}

View File

@@ -0,0 +1,35 @@
import type Tesseract from 'tesseract.js'
import * as z from 'zod'
import { TranslateLanguageCodeSchema } from '../../translate'
import type { ImageOcrProvider } from './base'
import type { BuiltinOcrProvider } from './base'
import type { OcrProvider } from './base'
import { OcrProviderBaseConfigSchema } from './base'
import { BuiltinOcrProviderIdMap } from './base'
// ==========================================================
// Tesseract OCR Types
// ==========================================================
export const OcrTesseractConfigSchema = OcrProviderBaseConfigSchema.extend({
langs: z.record(TranslateLanguageCodeSchema, z.boolean()).optional()
})
export type OcrTesseractConfig = z.infer<typeof OcrTesseractConfigSchema>
export const isOcrTesseractConfig = (value: unknown): value is OcrTesseractConfig => {
return OcrTesseractConfigSchema.safeParse(value).success
}
export type OcrTesseractProvider = {
id: 'tesseract'
config: OcrTesseractConfig
} & ImageOcrProvider &
BuiltinOcrProvider
export const isOcrTesseractProvider = (p: OcrProvider): p is OcrTesseractProvider => {
return p.id === BuiltinOcrProviderIdMap.tesseract
}
export type TesseractLangCode = Tesseract.LanguageCode

View File

@@ -0,0 +1,48 @@
import * as z from 'zod'
/**
* Language code pattern used for translation features.
* Examples: "zh-cn", "en-us", "fr-fr", etc.
* Must be lowercase and follow the format: 2-3 letter language code
* followed by a hyphen and 2-letter region code.
*/
export const TranslateLanguageCodeSchema = z.string().regex(/^[a-z]{2,3}(-[a-z]{2,3})$/)
export type TranslateLanguageCode = z.infer<typeof TranslateLanguageCodeSchema> // langCode应当能够唯一确认一种语言
export type TranslateLanguage = {
value: string
langCode: TranslateLanguageCode
label: () => string
emoji: string
}
export interface TranslateHistory {
id: string
sourceText: string
targetText: string
sourceLanguage: TranslateLanguageCode
targetLanguage: TranslateLanguageCode
createdAt: string
/** 收藏状态 */
star?: boolean
}
export type CustomTranslateLanguage = {
id: string
langCode: TranslateLanguageCode
value: string
emoji: string
}
export const AutoDetectionMethods = {
franc: 'franc',
llm: 'llm',
auto: 'auto'
} as const
export type AutoDetectionMethod = keyof typeof AutoDetectionMethods
export const isAutoDetectionMethod = (method: string): method is AutoDetectionMethod => {
return Object.hasOwn(AutoDetectionMethods, method)
}

View File

@@ -0,0 +1,10 @@
import { isMac, isWin } from '@renderer/config/constant'
import type { OcrProviderCapability } from '@renderer/types'
import { systemOcr, tesseract } from '@shared/config/ocr'
export const getDefaultOcrProvider = (cap: OcrProviderCapability) => {
switch (cap) {
case 'image':
return isWin || isMac ? systemOcr : tesseract
}
}

View File

@@ -5,13 +5,13 @@
"src/main/**/*",
"src/preload/**/*",
"src/main/env.d.ts",
"src/renderer/src/types/*",
"src/renderer/src/types/**/*",
"packages/shared/**/*",
"scripts",
"packages/mcp-trace/**/*",
"src/renderer/src/services/traceApi.ts",
"tests/__mocks__/**/*"
],
, "src/renderer/src/types/ocr/base.ts" ],
"compilerOptions": {
"composite": true,
"incremental": true,

11700
yarn.lock

File diff suppressed because it is too large Load Diff