fix(Markdown, LaTeX): do not touch escaped brackets (#7582)

- Keep `\\[` as is
- Use a custom match algorithm rather than balanced match
This commit is contained in:
one
2025-06-27 13:46:09 +08:00
committed by GitHub
parent a6b53457b0
commit e8e9a2d86f
4 changed files with 70 additions and 22 deletions

View File

@@ -112,7 +112,6 @@
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.3.0",
"@tryfabric/martian": "^1.2.4",
"@types/balanced-match": "^3",
"@types/diff": "^7",
"@types/fs-extra": "^11",
"@types/lodash": "^4.17.5",
@@ -139,7 +138,6 @@
"archiver": "^7.0.1",
"async-mutex": "^0.5.0",
"axios": "^1.7.3",
"balanced-match": "^3.0.1",
"browser-image-compression": "^2.0.2",
"color": "^5.0.0",
"dayjs": "^1.11.11",

View File

@@ -490,6 +490,22 @@ $$
it('should convert inline math \\(...\\) to $...$', () => {
expect(processLatexBrackets('The formula is \\(a+b=c\\)')).toBe('The formula is $a+b=c$')
})
it('should handle complex mathematical text with escaped brackets', () => {
const input = `\\(A\\) 为 \\(n\\times n\\) 的实可逆矩阵,
\\[
B=\\begin{pmatrix} O & A \\\\[2pt] A' & O \\end{pmatrix}\\;(2n\\times 2n,\\;B=B'),
\\]
\\(B\\) 的正惯性指数 \\(p(B)\\) 和负惯性指数 \\(q(B)\\)。`
const expected = `$A$$n\\times n$ 的实可逆矩阵,
$$
B=\\begin{pmatrix} O & A \\\\[2pt] A' & O \\end{pmatrix}\\;(2n\\times 2n,\\;B=B'),
$$
$B$ 的正惯性指数 $p(B)$ 和负惯性指数 $q(B)$`
expect(processLatexBrackets(input)).toBe(expected)
})
})
describe('code block protection', () => {

View File

@@ -1,5 +1,4 @@
import { languages } from '@shared/config/languages'
import { default as balanced } from 'balanced-match'
import remarkParse from 'remark-parse'
import remarkStringify from 'remark-stringify'
import removeMarkdown from 'remove-markdown'
@@ -31,7 +30,7 @@ export const findCitationInChildren = (children: any): string => {
}
// 检查是否包含潜在的 LaTeX 模式
const containsLatexRegex = /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/s
const containsLatexRegex = /\\\(.*?\\\)|\\\[.*?\\\]/s
/**
* 转换 LaTeX 公式括号 `\[\]` 和 `\(\)` 为 Markdown 格式 `$$...$$` 和 `$...$`
@@ -41,7 +40,7 @@ const containsLatexRegex = /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.
* 目前的实现:
* - 保护代码块和链接,避免被 remark-math 处理
* - 支持嵌套括号的平衡匹配
* - 转义 `\\(x\\)` 会被处理为 `\$x\$``\\[x\\]` 会被处理为 `\$$x\$$`
* - 转义括号 `\\(\\)` `\\[\\]` 会被处理
*
* @see https://github.com/remarkjs/remark-math/issues/39
* @param text 输入的 Markdown 文本
@@ -77,7 +76,7 @@ export const processLatexBrackets = (text: string) => {
let remaining = content
while (remaining.length > 0) {
const match = balanced(openDelim, closeDelim, remaining)
const match = findLatexMatch(remaining, openDelim, closeDelim)
if (!match) {
result += remaining
break
@@ -109,6 +108,57 @@ export const processLatexBrackets = (text: string) => {
return result
}
/**
* 查找 LaTeX 数学公式的匹配括号对
*
* 使用平衡括号算法处理嵌套结构,正确识别转义字符
*
* @param text 要搜索的文本
* @param openDelim 开始分隔符 (如 '\[' 或 '\(')
* @param closeDelim 结束分隔符 (如 '\]' 或 '\)')
* @returns 匹配结果对象或 null
*/
const findLatexMatch = (text: string, openDelim: string, closeDelim: string) => {
// 统计连续反斜杠:奇数个表示转义,偶数个表示未转义
const escaped = (i: number) => {
let count = 0
while (--i >= 0 && text[i] === '\\') count++
return count & 1
}
// 查找第一个有效的开始标记
for (let i = 0, n = text.length; i <= n - openDelim.length; i++) {
// 没有找到开始分隔符或被转义,跳过
if (!text.startsWith(openDelim, i) || escaped(i)) continue
// 处理嵌套结构
for (let j = i + openDelim.length, depth = 1; j <= n - closeDelim.length && depth; j++) {
// 计算当前位置对深度的影响:+1(开始), -1(结束), 0(无关)
const delta =
text.startsWith(openDelim, j) && !escaped(j) ? 1 : text.startsWith(closeDelim, j) && !escaped(j) ? -1 : 0
if (delta) {
depth += delta
// 找到了匹配的结束位置
if (!depth)
return {
start: i,
end: j + closeDelim.length,
pre: text.slice(0, i),
body: text.slice(i + openDelim.length, j),
post: text.slice(j + closeDelim.length)
}
// 跳过已处理的分隔符字符,避免重复检查
j += (delta > 0 ? openDelim : closeDelim).length - 1
}
}
}
return null
}
/**
* 转换数学公式格式:
* - 将 LaTeX 格式的 '\\[' 和 '\\]' 转换为 '$$$$'。

View File

@@ -4115,13 +4115,6 @@ __metadata:
languageName: node
linkType: hard
"@types/balanced-match@npm:^3":
version: 3.0.2
resolution: "@types/balanced-match@npm:3.0.2"
checksum: 10c0/833f6499609363537026c4ec2770af5c5a36e71b80f7b5b23884b15296301bfcf974cd40bc75fda940dea4994acd96c9222b284c248383a1ade59bf8835940b0
languageName: node
linkType: hard
"@types/cacheable-request@npm:^6.0.1":
version: 6.0.3
resolution: "@types/cacheable-request@npm:6.0.3"
@@ -5646,7 +5639,6 @@ __metadata:
"@testing-library/jest-dom": "npm:^6.6.3"
"@testing-library/react": "npm:^16.3.0"
"@tryfabric/martian": "npm:^1.2.4"
"@types/balanced-match": "npm:^3"
"@types/diff": "npm:^7"
"@types/fs-extra": "npm:^11"
"@types/lodash": "npm:^4.17.5"
@@ -5673,7 +5665,6 @@ __metadata:
archiver: "npm:^7.0.1"
async-mutex: "npm:^0.5.0"
axios: "npm:^1.7.3"
balanced-match: "npm:^3.0.1"
browser-image-compression: "npm:^2.0.2"
color: "npm:^5.0.0"
dayjs: "npm:^1.11.11"
@@ -6306,13 +6297,6 @@ __metadata:
languageName: node
linkType: hard
"balanced-match@npm:^3.0.1":
version: 3.0.1
resolution: "balanced-match@npm:3.0.1"
checksum: 10c0/ac8dd63a5b260610c2cbda982f436e964c1b9ae8764d368a523769da40a31710abd6e19f0fdf1773c4ad7b2ea7ba7b285d547375dc723f6e754369835afc8e9f
languageName: node
linkType: hard
"bare-events@npm:^2.2.0":
version: 2.5.4
resolution: "bare-events@npm:2.5.4"