Compare commits

...

2 Commits

Author SHA1 Message Date
icarus
8d598ee3a2 feat(translate): support document files and increase size limit
Add support for document file types in translation file selection. Increase maximum file size limit to 20MB for documents while keeping text files at 5MB. Implement separate handling for document and text file reading.
2025-12-01 20:36:50 +08:00
icarus
dd71c7cee3 refactor(FileStorage): extract file reading logic into reusable method
Move common file reading functionality from readFile and readExternalFile into a new private readFileCore method
Improve error logging by distinguishing between document and text file failures
Add comprehensive JSDoc documentation for all file reading methods
2025-12-01 20:29:13 +08:00
2 changed files with 118 additions and 63 deletions

View File

@@ -478,13 +478,16 @@ class FileStorage {
}
}
public readFile = async (
_: Electron.IpcMainInvokeEvent,
id: string,
detectEncoding: boolean = false
): Promise<string> => {
const filePath = path.join(this.storageDir, id)
/**
* Core file reading logic that handles both documents and text files.
*
* @private
* @param filePath - Full path to the file
* @param detectEncoding - Whether to auto-detect text file encoding
* @returns Promise resolving to the extracted text content
* @throws Error if file reading fails
*/
private async readFileCore(filePath: string, detectEncoding: boolean = false): Promise<string> {
const fileExtension = path.extname(filePath)
if (documentExts.includes(fileExtension)) {
@@ -504,7 +507,7 @@ class FileStorage {
return data
} catch (error) {
chdir(originalCwd)
logger.error('Failed to read file:', error as Error)
logger.error('Failed to read document file:', error as Error)
throw error
}
}
@@ -516,11 +519,72 @@ class FileStorage {
return fs.readFileSync(filePath, 'utf-8')
}
} catch (error) {
logger.error('Failed to read file:', error as Error)
logger.error('Failed to read text file:', error as Error)
throw new Error(`Failed to read file: ${filePath}.`)
}
}
/**
* Reads and extracts content from a stored file.
*
* Supports multiple file formats including:
* - Complex documents: .pdf, .doc, .docx, .pptx, .xlsx, .odt, .odp, .ods
* - Text files: .txt, .md, .json, .csv, etc.
* - Code files: .js, .ts, .py, .java, etc.
*
* For document formats, extracts text content using specialized parsers:
* - .doc files: Uses word-extractor library
* - Other Office formats: Uses officeparser library
*
* For text files, can optionally detect encoding automatically.
*
* @param _ - Electron IPC invoke event (unused)
* @param id - File identifier with extension (e.g., "uuid.docx")
* @param detectEncoding - Whether to auto-detect text file encoding (default: false)
* @returns Promise resolving to the extracted text content of the file
* @throws Error if file reading fails or file is not found
*
* @example
* // Read a DOCX file
* const content = await readFile(event, "document.docx");
*
* @example
* // Read a text file with encoding detection
* const content = await readFile(event, "text.txt", true);
*
* @example
* // Read a PDF file
* const content = await readFile(event, "manual.pdf");
*/
public readFile = async (
_: Electron.IpcMainInvokeEvent,
id: string,
detectEncoding: boolean = false
): Promise<string> => {
const filePath = path.join(this.storageDir, id)
return this.readFileCore(filePath, detectEncoding)
}
/**
* Reads and extracts content from an external file path.
*
* Similar to readFile, but operates on external file paths instead of stored files.
* Supports the same file formats including complex documents and text files.
*
* @param _ - Electron IPC invoke event (unused)
* @param filePath - Absolute path to the external file
* @param detectEncoding - Whether to auto-detect text file encoding (default: false)
* @returns Promise resolving to the extracted text content of the file
* @throws Error if file does not exist or reading fails
*
* @example
* // Read an external DOCX file
* const content = await readExternalFile(event, "/path/to/document.docx");
*
* @example
* // Read an external text file with encoding detection
* const content = await readExternalFile(event, "/path/to/text.txt", true);
*/
public readExternalFile = async (
_: Electron.IpcMainInvokeEvent,
filePath: string,
@@ -530,40 +594,7 @@ class FileStorage {
throw new Error(`File does not exist: ${filePath}`)
}
const fileExtension = path.extname(filePath)
if (documentExts.includes(fileExtension)) {
const originalCwd = process.cwd()
try {
chdir(this.tempDir)
if (fileExtension === '.doc') {
const extractor = new WordExtractor()
const extracted = await extractor.extract(filePath)
chdir(originalCwd)
return extracted.getBody()
}
const data = await officeParser.parseOfficeAsync(filePath)
chdir(originalCwd)
return data
} catch (error) {
chdir(originalCwd)
logger.error('Failed to read file:', error as Error)
throw error
}
}
try {
if (detectEncoding) {
return readTextFileWithAutoEncoding(filePath)
} else {
return fs.readFileSync(filePath, 'utf-8')
}
} catch (error) {
logger.error('Failed to read file:', error as Error)
throw new Error(`Failed to read file: ${filePath}.`)
}
return this.readFileCore(filePath, detectEncoding)
}
public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise<string> => {

View File

@@ -39,6 +39,7 @@ import {
detectLanguage,
determineTargetLanguage
} from '@renderer/utils/translate'
import { documentExts } from '@shared/config/constant'
import { imageExts, MB, textExts } from '@shared/config/constant'
import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd'
import type { TextAreaRef } from 'antd/es/input/TextArea'
@@ -66,7 +67,7 @@ const TranslatePage: FC = () => {
const { prompt, getLanguageByLangcode, settings } = useTranslate()
const { autoCopy } = settings
const { shikiMarkdownIt } = useCodeStyle()
const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts] })
const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts, ...documentExts] })
const { ocr } = useOcr()
const { setTimeoutTimer } = useTimer()
@@ -484,33 +485,56 @@ const TranslatePage: FC = () => {
const readFile = useCallback(
async (file: FileMetadata) => {
const _readFile = async () => {
let isText: boolean
try {
// 检查文件是否为文本文件
isText = await isTextFile(file.path)
} catch (e) {
logger.error('Failed to check if file is text.', e as Error)
window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e))
return
}
const fileExtension = getFileExtension(file.path)
if (!isText) {
window.toast.error(t('common.file.not_supported', { type: getFileExtension(file.path) }))
logger.error('Unsupported file type.')
return
}
// Check if file is supported format (text file or document file)
let isText: boolean
const isDocument: boolean = documentExts.includes(fileExtension)
// the threshold may be too large
if (file.size > 5 * MB) {
window.toast.error(t('translate.files.error.too_large') + ' (0 ~ 5 MB)')
} else {
if (!isDocument) {
try {
// For non-document files, check if it's a text file
isText = await isTextFile(file.path)
} catch (e) {
logger.error('Failed to check file type.', e as Error)
window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e))
return
}
} else {
isText = false
}
if (!isText && !isDocument) {
window.toast.error(t('common.file.not_supported', { type: fileExtension }))
logger.error('Unsupported file type.')
return
}
// File size check - document files allowed to be larger
const maxSize = isDocument ? 20 * MB : 5 * MB
if (file.size > maxSize) {
window.toast.error(t('translate.files.error.too_large') + ` (0 ~ ${maxSize / MB} MB)`)
return
}
let result: string
try {
const result = await window.api.fs.readText(file.path)
if (isDocument) {
// Use the new document reading API
result = await window.api.file.readExternal(file.path, true)
} else {
// Read text file
result = await window.api.fs.readText(file.path)
}
setText(text + result)
} catch (e) {
logger.error('Failed to read text file.', e as Error)
logger.error('Failed to read file.', e as Error)
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
}
} catch (e) {
logger.error('Failed to read file.', e as Error)
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
}
}
const promise = _readFile()