fix: websearch block and citation formatting (#7776)
* feat: enhance citation handling for Perplexity web search results - Implemented formatting for Perplexity citations in MainTextBlock, including data-citation attributes. - Updated citation processing in message store and thunk to support new citation structure. - Added utility functions for link completion based on web search results. - Enhanced tests to verify correct handling of Perplexity citations and links. * refactor: streamline chunk processing in OpenAIApiClient - Replaced single choice handling with a loop to process all choices in the chunk. - Improved handling of content sources, ensuring fallback mechanisms are in place for delta and message fields. - Enhanced tool call processing to accommodate missing function names and arguments. - Maintained existing functionality for web search data and reasoning content processing. * fix: improve citation handling and web search integration - Enhanced citation formatting to support legacy data compatibility in messageBlock.ts. - Updated messageThunk.ts to manage main text block references and citation updates more effectively. - Removed unnecessary web search flag and streamlined block processing logic. * fix: improve citation transforms to skip code blocks - Add withCitationTags for better code structure - Add tests - Remove outdated code - The Citation type in @renderer/types/index.ts is not referenced anywhere, so removed - Move the actual Citation type from @renderer/pages/home/Messages/CitationsList.tsx to @renderer/types/index.ts - Allow text selecting in tooltip * test: update tests * refactor(messageThunk): streamline citation handling in response processing - Removed redundant citation block source retrieval during text chunk processing. - Updated citation references handling to ensure proper inclusion only when available. - Simplified the logic for managing citation references in both streaming and final text updates. * refactor: simplify determineCitationSource for backward compatibility --------- Co-authored-by: one <wangan.cs@gmail.com>
This commit is contained in:
@@ -49,7 +49,9 @@ import {
|
||||
LLMWebSearchCompleteChunk,
|
||||
LLMWebSearchInProgressChunk,
|
||||
MCPToolCreatedChunk,
|
||||
TextCompleteChunk,
|
||||
TextDeltaChunk,
|
||||
ThinkingCompleteChunk,
|
||||
ThinkingDeltaChunk
|
||||
} from '@renderer/types/chunk'
|
||||
import { type Message } from '@renderer/types/newMessage'
|
||||
@@ -517,7 +519,7 @@ export class AnthropicAPIClient extends BaseApiClient<
|
||||
return () => {
|
||||
let accumulatedJson = ''
|
||||
const toolCalls: Record<number, ToolUseBlock> = {}
|
||||
|
||||
const ChunkIdTypeMap: Record<number, ChunkType> = {}
|
||||
return {
|
||||
async transform(rawChunk: AnthropicSdkRawChunk, controller: TransformStreamDefaultController<GenericChunk>) {
|
||||
switch (rawChunk.type) {
|
||||
@@ -612,6 +614,19 @@ export class AnthropicAPIClient extends BaseApiClient<
|
||||
toolCalls[rawChunk.index] = contentBlock
|
||||
break
|
||||
}
|
||||
case 'text': {
|
||||
if (!ChunkIdTypeMap[rawChunk.index]) {
|
||||
ChunkIdTypeMap[rawChunk.index] = ChunkType.TEXT_DELTA // 用textdelta代表文本块
|
||||
}
|
||||
break
|
||||
}
|
||||
case 'thinking':
|
||||
case 'redacted_thinking': {
|
||||
if (!ChunkIdTypeMap[rawChunk.index]) {
|
||||
ChunkIdTypeMap[rawChunk.index] = ChunkType.THINKING_DELTA // 用thinkingdelta代表思考块
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
@@ -646,6 +661,15 @@ export class AnthropicAPIClient extends BaseApiClient<
|
||||
break
|
||||
}
|
||||
case 'content_block_stop': {
|
||||
if (ChunkIdTypeMap[rawChunk.index] === ChunkType.TEXT_DELTA) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.TEXT_COMPLETE
|
||||
} as TextCompleteChunk)
|
||||
} else if (ChunkIdTypeMap[rawChunk.index] === ChunkType.THINKING_DELTA) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.THINKING_COMPLETE
|
||||
} as ThinkingCompleteChunk)
|
||||
}
|
||||
const toolCall = toolCalls[rawChunk.index]
|
||||
if (toolCall) {
|
||||
try {
|
||||
|
||||
@@ -564,11 +564,11 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
|
||||
// Perplexity citations
|
||||
// @ts-ignore - citations may not be in standard type definitions
|
||||
if (context.provider?.id === 'perplexity' && chunk.citations && chunk.citations.length > 0) {
|
||||
if (context.provider?.id === 'perplexity' && chunk.search_results && chunk.search_results.length > 0) {
|
||||
hasBeenCollectedWebSearch = true
|
||||
return {
|
||||
// @ts-ignore - citations may not be in standard type definitions
|
||||
results: chunk.citations,
|
||||
results: chunk.search_results,
|
||||
source: WebSearchSource.PERPLEXITY
|
||||
}
|
||||
}
|
||||
@@ -672,74 +672,21 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
|
||||
// 处理chunk
|
||||
if ('choices' in chunk && chunk.choices && chunk.choices.length > 0) {
|
||||
const choice = chunk.choices[0]
|
||||
for (const choice of chunk.choices) {
|
||||
if (!choice) continue
|
||||
|
||||
if (!choice) return
|
||||
|
||||
// 对于流式响应,使用 delta;对于非流式响应,使用 message。
|
||||
// 然而某些 OpenAI 兼容平台在非流式请求时会错误地返回一个空对象的 delta 字段。
|
||||
// 如果 delta 为空对象,应当忽略它并回退到 message,避免造成内容缺失。
|
||||
let contentSource: OpenAISdkRawContentSource | null = null
|
||||
if ('delta' in choice && choice.delta && Object.keys(choice.delta).length > 0) {
|
||||
contentSource = choice.delta
|
||||
} else if ('message' in choice) {
|
||||
contentSource = choice.message
|
||||
}
|
||||
|
||||
if (!contentSource) return
|
||||
|
||||
const webSearchData = collectWebSearchData(chunk, contentSource, context)
|
||||
if (webSearchData) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||
llm_web_search: webSearchData
|
||||
})
|
||||
}
|
||||
|
||||
// 处理推理内容 (e.g. from OpenRouter DeepSeek-R1)
|
||||
// @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
|
||||
const reasoningText = contentSource.reasoning_content || contentSource.reasoning
|
||||
if (reasoningText) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.THINKING_DELTA,
|
||||
text: reasoningText
|
||||
})
|
||||
}
|
||||
|
||||
// 处理文本内容
|
||||
if (contentSource.content) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.TEXT_DELTA,
|
||||
text: contentSource.content
|
||||
})
|
||||
}
|
||||
|
||||
// 处理工具调用
|
||||
if (contentSource.tool_calls) {
|
||||
for (const toolCall of contentSource.tool_calls) {
|
||||
if ('index' in toolCall) {
|
||||
const { id, index, function: fun } = toolCall
|
||||
if (fun?.name) {
|
||||
toolCalls[index] = {
|
||||
id: id || '',
|
||||
function: {
|
||||
name: fun.name,
|
||||
arguments: fun.arguments || ''
|
||||
},
|
||||
type: 'function'
|
||||
}
|
||||
} else if (fun?.arguments) {
|
||||
toolCalls[index].function.arguments += fun.arguments
|
||||
}
|
||||
} else {
|
||||
toolCalls.push(toolCall)
|
||||
}
|
||||
// 对于流式响应,使用 delta;对于非流式响应,使用 message。
|
||||
// 然而某些 OpenAI 兼容平台在非流式请求时会错误地返回一个空对象的 delta 字段。
|
||||
// 如果 delta 为空对象,应当忽略它并回退到 message,避免造成内容缺失。
|
||||
let contentSource: OpenAISdkRawContentSource | null = null
|
||||
if ('delta' in choice && choice.delta && Object.keys(choice.delta).length > 0) {
|
||||
contentSource = choice.delta
|
||||
} else if ('message' in choice) {
|
||||
contentSource = choice.message
|
||||
}
|
||||
}
|
||||
|
||||
// 处理finish_reason,发送流结束信号
|
||||
if ('finish_reason' in choice && choice.finish_reason) {
|
||||
Logger.debug(`[OpenAIApiClient] Stream finished with reason: ${choice.finish_reason}`)
|
||||
if (!contentSource) continue
|
||||
|
||||
const webSearchData = collectWebSearchData(chunk, contentSource, context)
|
||||
if (webSearchData) {
|
||||
controller.enqueue({
|
||||
@@ -747,7 +694,60 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
llm_web_search: webSearchData
|
||||
})
|
||||
}
|
||||
emitCompletionSignals(controller)
|
||||
|
||||
// 处理推理内容 (e.g. from OpenRouter DeepSeek-R1)
|
||||
// @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
|
||||
const reasoningText = contentSource.reasoning_content || contentSource.reasoning
|
||||
if (reasoningText) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.THINKING_DELTA,
|
||||
text: reasoningText
|
||||
})
|
||||
}
|
||||
|
||||
// 处理文本内容
|
||||
if (contentSource.content) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.TEXT_DELTA,
|
||||
text: contentSource.content
|
||||
})
|
||||
}
|
||||
|
||||
// 处理工具调用
|
||||
if (contentSource.tool_calls) {
|
||||
for (const toolCall of contentSource.tool_calls) {
|
||||
if ('index' in toolCall) {
|
||||
const { id, index, function: fun } = toolCall
|
||||
if (fun?.name) {
|
||||
toolCalls[index] = {
|
||||
id: id || '',
|
||||
function: {
|
||||
name: fun.name,
|
||||
arguments: fun.arguments || ''
|
||||
},
|
||||
type: 'function'
|
||||
}
|
||||
} else if (fun?.arguments) {
|
||||
toolCalls[index].function.arguments += fun.arguments
|
||||
}
|
||||
} else {
|
||||
toolCalls.push(toolCall)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 处理finish_reason,发送流结束信号
|
||||
if ('finish_reason' in choice && choice.finish_reason) {
|
||||
Logger.debug(`[OpenAIApiClient] Stream finished with reason: ${choice.finish_reason}`)
|
||||
const webSearchData = collectWebSearchData(chunk, contentSource, context)
|
||||
if (webSearchData) {
|
||||
controller.enqueue({
|
||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||
llm_web_search: webSearchData
|
||||
})
|
||||
}
|
||||
emitCompletionSignals(controller)
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -492,6 +492,10 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
|
||||
case 'response.output_item.added':
|
||||
if (chunk.item.type === 'function_call') {
|
||||
outputItems.push(chunk.item)
|
||||
} else if (chunk.item.type === 'web_search_call') {
|
||||
controller.enqueue({
|
||||
type: ChunkType.LLM_WEB_SEARCH_IN_PROGRESS
|
||||
})
|
||||
}
|
||||
break
|
||||
case 'response.reasoning_summary_part.added':
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import Logger from '@renderer/config/logger'
|
||||
import { ChunkType, TextDeltaChunk } from '@renderer/types/chunk'
|
||||
import { ChunkType, TextCompleteChunk, TextDeltaChunk } from '@renderer/types/chunk'
|
||||
|
||||
import { CompletionsParams, CompletionsResult, GenericChunk } from '../schemas'
|
||||
import { CompletionsContext, CompletionsMiddleware } from '../types'
|
||||
@@ -38,7 +38,7 @@ export const TextChunkMiddleware: CompletionsMiddleware =
|
||||
|
||||
// 用于跨chunk的状态管理
|
||||
let accumulatedTextContent = ''
|
||||
let hasEnqueue = false
|
||||
let hasTextCompleteEventEnqueue = false
|
||||
const enhancedTextStream = resultFromUpstream.pipeThrough(
|
||||
new TransformStream<GenericChunk, GenericChunk>({
|
||||
transform(chunk: GenericChunk, controller) {
|
||||
@@ -53,30 +53,44 @@ export const TextChunkMiddleware: CompletionsMiddleware =
|
||||
|
||||
// 创建新的chunk,包含处理后的文本
|
||||
controller.enqueue(chunk)
|
||||
} else if (accumulatedTextContent) {
|
||||
if (chunk.type !== ChunkType.LLM_RESPONSE_COMPLETE) {
|
||||
controller.enqueue(chunk)
|
||||
hasEnqueue = true
|
||||
}
|
||||
const finalText = accumulatedTextContent
|
||||
ctx._internal.customState!.accumulatedText = finalText
|
||||
if (ctx._internal.toolProcessingState && !ctx._internal.toolProcessingState?.output) {
|
||||
ctx._internal.toolProcessingState.output = finalText
|
||||
}
|
||||
|
||||
// 处理 onResponse 回调 - 发送最终完整文本
|
||||
if (params.onResponse) {
|
||||
params.onResponse(finalText, true)
|
||||
}
|
||||
|
||||
} else if (chunk.type === ChunkType.TEXT_COMPLETE) {
|
||||
const textChunk = chunk as TextCompleteChunk
|
||||
controller.enqueue({
|
||||
type: ChunkType.TEXT_COMPLETE,
|
||||
text: finalText
|
||||
...textChunk,
|
||||
text: accumulatedTextContent
|
||||
})
|
||||
if (params.onResponse) {
|
||||
params.onResponse(accumulatedTextContent, true)
|
||||
}
|
||||
hasTextCompleteEventEnqueue = true
|
||||
accumulatedTextContent = ''
|
||||
if (!hasEnqueue) {
|
||||
} else if (accumulatedTextContent && !hasTextCompleteEventEnqueue) {
|
||||
if (chunk.type === ChunkType.LLM_RESPONSE_COMPLETE) {
|
||||
const finalText = accumulatedTextContent
|
||||
ctx._internal.customState!.accumulatedText = finalText
|
||||
if (ctx._internal.toolProcessingState && !ctx._internal.toolProcessingState?.output) {
|
||||
ctx._internal.toolProcessingState.output = finalText
|
||||
}
|
||||
|
||||
// 处理 onResponse 回调 - 发送最终完整文本
|
||||
if (params.onResponse) {
|
||||
params.onResponse(finalText, true)
|
||||
}
|
||||
|
||||
controller.enqueue({
|
||||
type: ChunkType.TEXT_COMPLETE,
|
||||
text: finalText
|
||||
})
|
||||
controller.enqueue(chunk)
|
||||
} else {
|
||||
controller.enqueue({
|
||||
type: ChunkType.TEXT_COMPLETE,
|
||||
text: accumulatedTextContent
|
||||
})
|
||||
controller.enqueue(chunk)
|
||||
}
|
||||
hasTextCompleteEventEnqueue = true
|
||||
accumulatedTextContent = ''
|
||||
} else {
|
||||
// 其他类型的chunk直接传递
|
||||
controller.enqueue(chunk)
|
||||
|
||||
@@ -65,6 +65,16 @@ export const ThinkChunkMiddleware: CompletionsMiddleware =
|
||||
thinking_millsec: thinkingStartTime > 0 ? Date.now() - thinkingStartTime : 0
|
||||
}
|
||||
controller.enqueue(enhancedChunk)
|
||||
} else if (chunk.type === ChunkType.THINKING_COMPLETE) {
|
||||
const thinkingCompleteChunk = chunk as ThinkingCompleteChunk
|
||||
controller.enqueue({
|
||||
...thinkingCompleteChunk,
|
||||
text: accumulatedThinkingContent,
|
||||
thinking_millsec: thinkingStartTime > 0 ? Date.now() - thinkingStartTime : 0
|
||||
})
|
||||
hasThinkingContent = false
|
||||
accumulatedThinkingContent = ''
|
||||
thinkingStartTime = 0
|
||||
} else if (hasThinkingContent && thinkingStartTime > 0) {
|
||||
// 收到任何非THINKING_DELTA的chunk时,如果有累积的思考内容,生成THINKING_COMPLETE
|
||||
const thinkingCompleteChunk: ThinkingCompleteChunk = {
|
||||
|
||||
@@ -42,7 +42,12 @@ export const WebSearchMiddleware: CompletionsMiddleware =
|
||||
const providerType = model.provider || 'openai'
|
||||
// 使用当前可用的Web搜索结果进行链接转换
|
||||
const text = chunk.text
|
||||
const result = smartLinkConverter(text, providerType, isFirstChunk)
|
||||
const result = smartLinkConverter(
|
||||
text,
|
||||
providerType,
|
||||
isFirstChunk,
|
||||
ctx._internal.webSearchState!.results
|
||||
)
|
||||
if (isFirstChunk) {
|
||||
isFirstChunk = false
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ const CitationTooltip: React.FC<CitationTooltipProps> = ({ children, citation })
|
||||
// 自定义悬浮卡片内容
|
||||
const tooltipContent = useMemo(
|
||||
() => (
|
||||
<div>
|
||||
<div style={{ userSelect: 'text' }}>
|
||||
<TooltipHeader role="button" aria-label={`Open ${sourceTitle} in new tab`} onClick={handleClick}>
|
||||
<Favicon hostname={hostname} alt={sourceTitle} />
|
||||
<TooltipTitle role="heading" aria-level={3} title={sourceTitle}>
|
||||
|
||||
@@ -58,7 +58,9 @@ exports[`CitationTooltip > basic rendering > should match snapshot 1`] = `
|
||||
<div
|
||||
data-testid="tooltip-content"
|
||||
>
|
||||
<div>
|
||||
<div
|
||||
style="user-select: text;"
|
||||
>
|
||||
<div
|
||||
aria-label="Open Example Article in new tab"
|
||||
class="c0"
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import { GroundingSupport } from '@google/genai'
|
||||
import { useSettings } from '@renderer/hooks/useSettings'
|
||||
import { getModelUniqId } from '@renderer/services/ModelService'
|
||||
import type { RootState } from '@renderer/store'
|
||||
import { selectFormattedCitationsByBlockId } from '@renderer/store/messageBlock'
|
||||
import { type Model, WebSearchSource } from '@renderer/types'
|
||||
import { type Model } from '@renderer/types'
|
||||
import type { MainTextMessageBlock, Message } from '@renderer/types/newMessage'
|
||||
import { cleanMarkdownContent, encodeHTML } from '@renderer/utils/formats'
|
||||
import { determineCitationSource, withCitationTags } from '@renderer/utils/citation'
|
||||
import { Flex } from 'antd'
|
||||
import React, { useMemo } from 'react'
|
||||
import { useSelector } from 'react-redux'
|
||||
@@ -28,113 +27,16 @@ const MainTextBlock: React.FC<Props> = ({ block, citationBlockId, role, mentions
|
||||
|
||||
const rawCitations = useSelector((state: RootState) => selectFormattedCitationsByBlockId(state, citationBlockId))
|
||||
|
||||
const formattedCitations = useMemo(() => {
|
||||
return rawCitations.map((citation) => ({
|
||||
...citation,
|
||||
content: citation.content ? cleanMarkdownContent(citation.content) : citation.content
|
||||
}))
|
||||
}, [rawCitations])
|
||||
|
||||
const processedContent = useMemo(() => {
|
||||
let content = block.content
|
||||
// Update condition to use citationBlockId
|
||||
if (!block.citationReferences?.length || !citationBlockId || formattedCitations.length === 0) {
|
||||
return content
|
||||
if (!block.citationReferences?.length || !citationBlockId || rawCitations.length === 0) {
|
||||
return block.content
|
||||
}
|
||||
|
||||
switch (block.citationReferences[0].citationBlockSource) {
|
||||
case WebSearchSource.OPENAI:
|
||||
case WebSearchSource.OPENAI_RESPONSE: {
|
||||
formattedCitations.forEach((citation) => {
|
||||
const citationNum = citation.number
|
||||
const supData = {
|
||||
id: citationNum,
|
||||
url: citation.url,
|
||||
title: citation.title || citation.hostname || '',
|
||||
content: citation.content?.substring(0, 200)
|
||||
}
|
||||
const citationJson = encodeHTML(JSON.stringify(supData))
|
||||
// 确定最适合的 source
|
||||
const sourceType = determineCitationSource(block.citationReferences)
|
||||
|
||||
// Handle[<sup>N</sup>](url)
|
||||
const preFormattedRegex = new RegExp(`\\[<sup>${citationNum}</sup>\\]\\(.*?\\)`, 'g')
|
||||
|
||||
const citationTag = `[<sup data-citation='${citationJson}'>${citationNum}</sup>](${citation.url})`
|
||||
|
||||
content = content.replace(preFormattedRegex, citationTag)
|
||||
})
|
||||
break
|
||||
}
|
||||
case WebSearchSource.GEMINI: {
|
||||
// First pass: Add basic citation marks using metadata
|
||||
let processedContent = content
|
||||
const firstCitation = formattedCitations[0]
|
||||
if (firstCitation?.metadata) {
|
||||
firstCitation.metadata.forEach((support: GroundingSupport) => {
|
||||
const citationNums = support.groundingChunkIndices!
|
||||
|
||||
if (support.segment) {
|
||||
const text = support.segment.text!
|
||||
// 生成引用标记
|
||||
const basicTag = citationNums
|
||||
.map((citationNum) => {
|
||||
const citation = formattedCitations.find((c) => c.number === citationNum + 1)
|
||||
return citation ? `[<sup>${citationNum + 1}</sup>](${citation.url})` : ''
|
||||
})
|
||||
.join('')
|
||||
|
||||
// 在文本后面添加引用标记,而不是替换
|
||||
if (text && basicTag) {
|
||||
processedContent = processedContent.replace(text, `${text}${basicTag}`)
|
||||
}
|
||||
}
|
||||
})
|
||||
content = processedContent
|
||||
}
|
||||
// Second pass: Replace basic citations with full citation data
|
||||
formattedCitations.forEach((citation) => {
|
||||
const citationNum = citation.number
|
||||
const supData = {
|
||||
id: citationNum,
|
||||
url: citation.url,
|
||||
title: citation.title || citation.hostname || '',
|
||||
content: citation.content?.substring(0, 200)
|
||||
}
|
||||
const citationJson = encodeHTML(JSON.stringify(supData))
|
||||
|
||||
// Replace basic citation with full citation including data
|
||||
const basicCitationRegex = new RegExp(`\\[<sup>${citationNum}</sup>\\]\\(${citation.url}\\)`, 'g')
|
||||
const fullCitationTag = `[<sup data-citation='${citationJson}'>${citationNum}</sup>](${citation.url})`
|
||||
content = content.replace(basicCitationRegex, fullCitationTag)
|
||||
})
|
||||
break
|
||||
}
|
||||
default: {
|
||||
// FIXME:性能问题,需要优化
|
||||
// Replace all citation numbers and pre-formatted links with formatted citations
|
||||
formattedCitations.forEach((citation) => {
|
||||
const citationNum = citation.number
|
||||
const supData = {
|
||||
id: citationNum,
|
||||
url: citation.url,
|
||||
title: citation.title || citation.hostname || '',
|
||||
content: citation.content?.substring(0, 200)
|
||||
}
|
||||
const isLink = citation.url.startsWith('http')
|
||||
const citationJson = encodeHTML(JSON.stringify(supData))
|
||||
|
||||
// Handle both plain references [N] and pre-formatted links [<sup>N</sup>](url)
|
||||
const plainRefRegex = new RegExp(`\\[${citationNum}\\]`, 'g')
|
||||
|
||||
const supTag = `<sup data-citation='${citationJson}'>${citationNum}</sup>`
|
||||
const citationTag = isLink ? `[${supTag}](${citation.url})` : supTag
|
||||
|
||||
content = content.replace(plainRefRegex, citationTag)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
}, [block.content, block.citationReferences, citationBlockId, formattedCitations])
|
||||
return withCitationTags(block.content, rawCitations, sourceType)
|
||||
}, [block.content, block.citationReferences, citationBlockId, rawCitations])
|
||||
|
||||
const ignoreToolUse = useMemo(() => {
|
||||
return processedContent.replace(toolUseRegex, '')
|
||||
|
||||
@@ -48,6 +48,28 @@ vi.mock('@renderer/utils/formats', () => ({
|
||||
encodeHTML: vi.fn((content: string) => content.replace(/"/g, '"'))
|
||||
}))
|
||||
|
||||
// Mock citation utilities
|
||||
vi.mock('@renderer/utils/citation', () => ({
|
||||
withCitationTags: vi.fn((content: string, citations: any[]) => {
|
||||
// Simple mock implementation that simulates citation processing
|
||||
if (citations.length > 0) {
|
||||
return `${content} [processed-citations]`
|
||||
}
|
||||
return content
|
||||
}),
|
||||
determineCitationSource: vi.fn((citationReferences: any[], citationBlock?: any) => {
|
||||
// Mock implementation that returns the first valid source from citationReferences
|
||||
if (citationBlock?.response?.source) {
|
||||
return citationBlock.response.source
|
||||
}
|
||||
if (citationReferences?.length) {
|
||||
const validReference = citationReferences.find((ref) => ref.citationBlockSource)
|
||||
return validReference?.citationBlockSource
|
||||
}
|
||||
return undefined
|
||||
})
|
||||
}))
|
||||
|
||||
// Mock services
|
||||
vi.mock('@renderer/services/ModelService', () => ({
|
||||
getModelUniqId: vi.fn()
|
||||
@@ -66,7 +88,8 @@ vi.mock('@renderer/pages/home/Markdown/Markdown', () => ({
|
||||
describe('MainTextBlock', () => {
|
||||
// Get references to mocked modules
|
||||
let mockGetModelUniqId: any
|
||||
let mockCleanMarkdownContent: any
|
||||
let mockWithCitationTags: any
|
||||
let mockDetermineCitationSource: any
|
||||
|
||||
// Create a mock store for Provider
|
||||
const mockStore = configureStore({
|
||||
@@ -80,9 +103,10 @@ describe('MainTextBlock', () => {
|
||||
|
||||
// Get the mocked functions
|
||||
const { getModelUniqId } = await import('@renderer/services/ModelService')
|
||||
const { cleanMarkdownContent } = await import('@renderer/utils/formats')
|
||||
const { withCitationTags, determineCitationSource } = await import('@renderer/utils/citation')
|
||||
mockGetModelUniqId = getModelUniqId as any
|
||||
mockCleanMarkdownContent = cleanMarkdownContent as any
|
||||
mockWithCitationTags = withCitationTags as any
|
||||
mockDetermineCitationSource = determineCitationSource as any
|
||||
|
||||
// Default mock implementations
|
||||
mockUseSettings.mockReturnValue({ renderInputMessageAsMarkdown: false })
|
||||
@@ -283,8 +307,16 @@ text after`,
|
||||
})
|
||||
|
||||
it('should process content through format utilities', () => {
|
||||
const block = createMainTextBlock({ content: 'Content to process' })
|
||||
mockUseSelector.mockReturnValue([{ id: '1', content: 'Citation content', number: 1 }])
|
||||
const block = createMainTextBlock({
|
||||
content: 'Content to process',
|
||||
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
|
||||
})
|
||||
const mockCitations = [{ id: '1', content: 'Citation content', number: 1 }]
|
||||
|
||||
// Mock the useSelector calls - first call for citations, second call for citationBlock
|
||||
mockUseSelector
|
||||
.mockReturnValueOnce(mockCitations) // selectFormattedCitationsByBlockId
|
||||
.mockReturnValueOnce(undefined) // messageBlocksSelectors.selectById
|
||||
|
||||
renderMainTextBlock({
|
||||
block,
|
||||
@@ -292,8 +324,14 @@ text after`,
|
||||
citationBlockId: 'test-citations'
|
||||
})
|
||||
|
||||
// Verify utility functions are called
|
||||
expect(mockCleanMarkdownContent).toHaveBeenCalled()
|
||||
// Verify determineCitationSource was called with correct parameters
|
||||
expect(mockDetermineCitationSource).toHaveBeenCalledWith(block.citationReferences)
|
||||
|
||||
// Verify citation processing was called with correct parameters
|
||||
expect(mockWithCitationTags).toHaveBeenCalledWith('Content to process', mockCitations, 'DEFAULT')
|
||||
|
||||
// Verify the processed content is rendered
|
||||
expect(screen.getByText('Markdown: Content to process [processed-citations]')).toBeInTheDocument()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -308,7 +346,7 @@ text after`,
|
||||
expect(mockUseSelector).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should integrate with citation system when citations exist', () => {
|
||||
it('should integrate with citation processing when all conditions are met', () => {
|
||||
const block = createMainTextBlock({
|
||||
content: 'Content with citation [1]',
|
||||
citationReferences: [{ citationBlockSource: WebSearchSource.OPENAI }]
|
||||
@@ -324,7 +362,11 @@ text after`,
|
||||
}
|
||||
]
|
||||
|
||||
mockUseSelector.mockReturnValue(mockCitations)
|
||||
// Mock the useSelector calls - first call for citations, second call for citationBlock
|
||||
mockUseSelector
|
||||
.mockReturnValueOnce(mockCitations) // selectFormattedCitationsByBlockId
|
||||
.mockReturnValueOnce(undefined) // messageBlocksSelectors.selectById
|
||||
|
||||
renderMainTextBlock({
|
||||
block,
|
||||
role: 'assistant',
|
||||
@@ -335,28 +377,58 @@ text after`,
|
||||
expect(mockUseSelector).toHaveBeenCalled()
|
||||
expect(getRenderedMarkdown()).toBeInTheDocument()
|
||||
|
||||
// Verify content processing occurred
|
||||
expect(mockCleanMarkdownContent).toHaveBeenCalledWith('Citation content')
|
||||
// Verify determineCitationSource was called
|
||||
expect(mockDetermineCitationSource).toHaveBeenCalledWith(block.citationReferences)
|
||||
|
||||
// Verify withCitationTags was called with correct parameters
|
||||
expect(mockWithCitationTags).toHaveBeenCalledWith(
|
||||
'Content with citation [1]',
|
||||
mockCitations,
|
||||
WebSearchSource.OPENAI
|
||||
)
|
||||
|
||||
// Verify the processed content is rendered
|
||||
expect(screen.getByText('Markdown: Content with citation [1] [processed-citations]')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should handle different citation sources correctly', () => {
|
||||
const testSources = [WebSearchSource.OPENAI, 'DEFAULT' as any, 'CUSTOM' as any]
|
||||
it('should skip citation processing when conditions are not met', () => {
|
||||
const testCases = [
|
||||
{
|
||||
name: 'no citationReferences',
|
||||
block: createMainTextBlock({ content: 'Content [1]' }),
|
||||
citationBlockId: 'test'
|
||||
},
|
||||
{
|
||||
name: 'no citationBlockId',
|
||||
block: createMainTextBlock({
|
||||
content: 'Content [1]',
|
||||
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
|
||||
}),
|
||||
citationBlockId: undefined
|
||||
},
|
||||
{
|
||||
name: 'no citations data',
|
||||
block: createMainTextBlock({
|
||||
content: 'Content [1]',
|
||||
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
|
||||
}),
|
||||
citationBlockId: 'test'
|
||||
}
|
||||
]
|
||||
|
||||
testSources.forEach((source) => {
|
||||
const block = createMainTextBlock({
|
||||
content: `Citation test for ${source}`,
|
||||
citationReferences: [{ citationBlockSource: source }]
|
||||
})
|
||||
|
||||
mockUseSelector.mockReturnValue([{ id: '1', number: 1, url: 'https://test.com', title: 'Test' }])
|
||||
testCases.forEach(({ block, citationBlockId }) => {
|
||||
mockUseSelector.mockReturnValue([]) // No citations
|
||||
|
||||
const { unmount } = renderMainTextBlock({
|
||||
block,
|
||||
role: 'assistant',
|
||||
citationBlockId: `test-${source}`
|
||||
citationBlockId
|
||||
})
|
||||
|
||||
expect(getRenderedMarkdown()).toBeInTheDocument()
|
||||
// Should render original content without citation processing
|
||||
expect(screen.getByText(`Markdown: ${block.content}`)).toBeInTheDocument()
|
||||
|
||||
unmount()
|
||||
})
|
||||
})
|
||||
@@ -400,51 +472,7 @@ text after`,
|
||||
})
|
||||
})
|
||||
|
||||
describe('edge cases and robustness', () => {
|
||||
it('should handle large content without performance issues', () => {
|
||||
const largeContent = 'A'.repeat(1000) + ' with citations [1]'
|
||||
const block = createMainTextBlock({ content: largeContent })
|
||||
|
||||
const largeCitations = [
|
||||
{
|
||||
id: '1',
|
||||
number: 1,
|
||||
url: 'https://large.com',
|
||||
title: 'Large',
|
||||
content: 'B'.repeat(500)
|
||||
}
|
||||
]
|
||||
|
||||
mockUseSelector.mockReturnValue(largeCitations)
|
||||
|
||||
expect(() => {
|
||||
renderMainTextBlock({
|
||||
block,
|
||||
role: 'assistant',
|
||||
citationBlockId: 'large-test'
|
||||
})
|
||||
}).not.toThrow()
|
||||
|
||||
expect(getRenderedMarkdown()).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should handle special characters and Unicode gracefully', () => {
|
||||
const specialContent = '测试内容 🚀 📝 ✨ <>&"\'` [1]'
|
||||
const block = createMainTextBlock({ content: specialContent })
|
||||
|
||||
mockUseSelector.mockReturnValue([{ id: '1', number: 1, title: '特殊字符测试', content: '内容 with 🎉' }])
|
||||
|
||||
expect(() => {
|
||||
renderMainTextBlock({
|
||||
block,
|
||||
role: 'assistant',
|
||||
citationBlockId: 'unicode-test'
|
||||
})
|
||||
}).not.toThrow()
|
||||
|
||||
expect(getRenderedMarkdown()).toBeInTheDocument()
|
||||
})
|
||||
|
||||
describe('integration and robustness', () => {
|
||||
it('should handle null and undefined values gracefully', () => {
|
||||
const block = createMainTextBlock({ content: 'Null safety test' })
|
||||
|
||||
@@ -460,7 +488,7 @@ text after`,
|
||||
expect(getRenderedMarkdown()).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should integrate properly with Redux store', () => {
|
||||
it('should integrate properly with Redux store for citations', () => {
|
||||
const block = createMainTextBlock({
|
||||
content: 'Redux integration test',
|
||||
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import ContextMenu from '@renderer/components/ContextMenu'
|
||||
import Favicon from '@renderer/components/Icons/FallbackFavicon'
|
||||
import { Citation } from '@renderer/types'
|
||||
import { fetchWebContent } from '@renderer/utils/fetch'
|
||||
import { cleanMarkdownContent } from '@renderer/utils/formats'
|
||||
import { QueryClient, QueryClientProvider, useQuery } from '@tanstack/react-query'
|
||||
@@ -9,17 +10,6 @@ import React, { useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import styled from 'styled-components'
|
||||
|
||||
export interface Citation {
|
||||
number: number
|
||||
url: string
|
||||
title?: string
|
||||
hostname?: string
|
||||
content?: string
|
||||
showFavicon?: boolean
|
||||
type?: string
|
||||
metadata?: Record<string, any>
|
||||
}
|
||||
|
||||
interface CitationsListProps {
|
||||
citations: Citation[]
|
||||
}
|
||||
|
||||
@@ -359,9 +359,6 @@ export async function fetchChatCompletion({
|
||||
|
||||
// --- Call AI Completions ---
|
||||
onChunkReceived({ type: ChunkType.LLM_RESPONSE_CREATED })
|
||||
if (enableWebSearch) {
|
||||
onChunkReceived({ type: ChunkType.LLM_WEB_SEARCH_IN_PROGRESS })
|
||||
}
|
||||
await AI.completions(
|
||||
{
|
||||
callType: 'chat',
|
||||
|
||||
@@ -43,6 +43,7 @@ export function createStreamProcessor(callbacks: StreamProcessorCallbacks = {})
|
||||
return (chunk: Chunk) => {
|
||||
try {
|
||||
const data = chunk
|
||||
// console.log('data: ', chunk)
|
||||
switch (data.type) {
|
||||
case ChunkType.BLOCK_COMPLETE: {
|
||||
if (callbacks.onComplete) callbacks.onComplete(AssistantMessageStatus.SUCCESS, data?.response)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { WebSearchResultBlock } from '@anthropic-ai/sdk/resources'
|
||||
import type { GroundingMetadata } from '@google/genai'
|
||||
import { createEntityAdapter, createSelector, createSlice, type PayloadAction } from '@reduxjs/toolkit'
|
||||
import type { Citation } from '@renderer/pages/home/Messages/CitationsList'
|
||||
import { WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
|
||||
import { Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
|
||||
import type { CitationMessageBlock, MessageBlock } from '@renderer/types/newMessage'
|
||||
import { MessageBlockType } from '@renderer/types/newMessage'
|
||||
import type OpenAI from 'openai'
|
||||
@@ -160,9 +159,19 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined
|
||||
}
|
||||
}) || []
|
||||
break
|
||||
case WebSearchSource.PERPLEXITY: {
|
||||
formattedCitations =
|
||||
(block.response.results as any[])?.map((result, index) => ({
|
||||
number: index + 1,
|
||||
url: result.url || result, // 兼容旧数据
|
||||
title: result.title || new URL(result).hostname, // 兼容旧数据
|
||||
showFavicon: true,
|
||||
type: 'websearch'
|
||||
})) || []
|
||||
break
|
||||
}
|
||||
case WebSearchSource.GROK:
|
||||
case WebSearchSource.OPENROUTER:
|
||||
case WebSearchSource.PERPLEXITY:
|
||||
formattedCitations =
|
||||
(block.response.results as any[])?.map((url, index) => {
|
||||
try {
|
||||
|
||||
@@ -8,7 +8,15 @@ import { createStreamProcessor, type StreamProcessorCallbacks } from '@renderer/
|
||||
import { estimateMessagesUsage } from '@renderer/services/TokenService'
|
||||
import store from '@renderer/store'
|
||||
import { updateTopicUpdatedAt } from '@renderer/store/assistants'
|
||||
import type { Assistant, ExternalToolResult, FileMetadata, MCPToolResponse, Model, Topic } from '@renderer/types'
|
||||
import {
|
||||
type Assistant,
|
||||
type ExternalToolResult,
|
||||
type FileMetadata,
|
||||
type MCPToolResponse,
|
||||
type Model,
|
||||
type Topic,
|
||||
WebSearchSource
|
||||
} from '@renderer/types'
|
||||
import type {
|
||||
CitationMessageBlock,
|
||||
FileMessageBlock,
|
||||
@@ -353,7 +361,7 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
let thinkingBlockId: string | null = null
|
||||
let imageBlockId: string | null = null
|
||||
let toolBlockId: string | null = null
|
||||
let hasWebSearch = false
|
||||
|
||||
const toolCallIdToBlockIdMap = new Map<string, string>()
|
||||
const notificationService = NotificationService.getInstance()
|
||||
|
||||
@@ -433,8 +441,7 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
const initialChanges: Partial<MessageBlock> = {
|
||||
type: MessageBlockType.MAIN_TEXT,
|
||||
content: accumulatedContent,
|
||||
status: MessageBlockStatus.STREAMING,
|
||||
citationReferences: citationBlockId ? [{ citationBlockId }] : []
|
||||
status: MessageBlockStatus.STREAMING
|
||||
}
|
||||
mainTextBlockId = initialPlaceholderBlockId
|
||||
// 清理占位块
|
||||
@@ -444,8 +451,7 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState)
|
||||
} else {
|
||||
const newBlock = createMainTextBlock(assistantMsgId, accumulatedContent, {
|
||||
status: MessageBlockStatus.STREAMING,
|
||||
citationReferences: citationBlockId ? [{ citationBlockId }] : []
|
||||
status: MessageBlockStatus.STREAMING
|
||||
})
|
||||
mainTextBlockId = newBlock.id // 立即设置ID,防止竞态条件
|
||||
await handleBlockTransition(newBlock, MessageBlockType.MAIN_TEXT)
|
||||
@@ -453,27 +459,27 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
},
|
||||
onTextComplete: async (finalText) => {
|
||||
if (mainTextBlockId) {
|
||||
let citationBlockSource: WebSearchSource | undefined
|
||||
if (citationBlockId) {
|
||||
const citationBlock = getState().messageBlocks.entities[citationBlockId] as CitationMessageBlock
|
||||
citationBlockSource = citationBlock.response?.source
|
||||
}
|
||||
const changes = {
|
||||
content: finalText,
|
||||
status: MessageBlockStatus.SUCCESS
|
||||
status: MessageBlockStatus.SUCCESS,
|
||||
citationReferences: citationBlockSource ? [{ citationBlockId, citationBlockSource }] : []
|
||||
}
|
||||
cancelThrottledBlockUpdate(mainTextBlockId)
|
||||
dispatch(updateOneBlock({ id: mainTextBlockId, changes }))
|
||||
saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState)
|
||||
mainTextBlockId = null
|
||||
if (!assistant.enableWebSearch) {
|
||||
mainTextBlockId = null
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
`[onTextComplete] Received text.complete but last block was not MAIN_TEXT (was ${lastBlockType}) or lastBlockId is null.`
|
||||
)
|
||||
}
|
||||
if (citationBlockId && !hasWebSearch) {
|
||||
const changes: Partial<CitationMessageBlock> = {
|
||||
status: MessageBlockStatus.SUCCESS
|
||||
}
|
||||
dispatch(updateOneBlock({ id: citationBlockId, changes }))
|
||||
saveUpdatedBlockToDB(citationBlockId, assistantMsgId, topicId, getState)
|
||||
citationBlockId = null
|
||||
}
|
||||
},
|
||||
onThinkingChunk: async (text, thinking_millsec) => {
|
||||
accumulatedThinking += text
|
||||
@@ -616,15 +622,44 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
}
|
||||
},
|
||||
onLLMWebSearchComplete: async (llmWebSearchResult) => {
|
||||
if (citationBlockId) {
|
||||
hasWebSearch = true
|
||||
const blockId = citationBlockId || initialPlaceholderBlockId
|
||||
if (blockId) {
|
||||
const changes: Partial<CitationMessageBlock> = {
|
||||
type: MessageBlockType.CITATION,
|
||||
response: llmWebSearchResult,
|
||||
status: MessageBlockStatus.SUCCESS
|
||||
}
|
||||
dispatch(updateOneBlock({ id: citationBlockId, changes }))
|
||||
saveUpdatedBlockToDB(citationBlockId, assistantMsgId, topicId, getState)
|
||||
dispatch(updateOneBlock({ id: blockId, changes }))
|
||||
saveUpdatedBlockToDB(blockId, assistantMsgId, topicId, getState)
|
||||
|
||||
if (mainTextBlockId) {
|
||||
const state = getState()
|
||||
const existingMainTextBlock = state.messageBlocks.entities[mainTextBlockId]
|
||||
if (existingMainTextBlock && existingMainTextBlock.type === MessageBlockType.MAIN_TEXT) {
|
||||
const currentRefs = existingMainTextBlock.citationReferences || []
|
||||
const mainTextChanges = {
|
||||
citationReferences: [...currentRefs, { blockId, citationBlockSource: llmWebSearchResult.source }]
|
||||
}
|
||||
dispatch(updateOneBlock({ id: mainTextBlockId, changes: mainTextChanges }))
|
||||
saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState)
|
||||
}
|
||||
mainTextBlockId = null
|
||||
}
|
||||
if (initialPlaceholderBlockId) {
|
||||
citationBlockId = initialPlaceholderBlockId
|
||||
initialPlaceholderBlockId = null
|
||||
}
|
||||
} else {
|
||||
const citationBlock = createCitationBlock(
|
||||
assistantMsgId,
|
||||
{
|
||||
response: llmWebSearchResult
|
||||
},
|
||||
{
|
||||
status: MessageBlockStatus.SUCCESS
|
||||
}
|
||||
)
|
||||
citationBlockId = citationBlock.id
|
||||
if (mainTextBlockId) {
|
||||
const state = getState()
|
||||
const existingMainTextBlock = state.messageBlocks.entities[mainTextBlockId]
|
||||
@@ -641,6 +676,7 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
}
|
||||
mainTextBlockId = null
|
||||
}
|
||||
await handleBlockTransition(citationBlock, MessageBlockType.CITATION)
|
||||
}
|
||||
},
|
||||
onImageCreated: async () => {
|
||||
|
||||
@@ -55,7 +55,6 @@ export interface LLMResponseInProgressChunk {
|
||||
response?: Response
|
||||
type: ChunkType.LLM_RESPONSE_IN_PROGRESS
|
||||
}
|
||||
|
||||
export interface TextDeltaChunk {
|
||||
/**
|
||||
* The text content of the chunk
|
||||
|
||||
@@ -728,9 +728,12 @@ export interface QuickPhrase {
|
||||
export interface Citation {
|
||||
number: number
|
||||
url: string
|
||||
hostname: string
|
||||
title?: string
|
||||
hostname?: string
|
||||
content?: string
|
||||
showFavicon?: boolean
|
||||
type?: string
|
||||
metadata?: Record<string, any>
|
||||
}
|
||||
|
||||
export type MathEngine = 'KaTeX' | 'MathJax' | 'none'
|
||||
|
||||
562
src/renderer/src/utils/__tests__/citation.test.ts
Normal file
562
src/renderer/src/utils/__tests__/citation.test.ts
Normal file
@@ -0,0 +1,562 @@
|
||||
import { GroundingSupport } from '@google/genai'
|
||||
import { Citation, WebSearchSource } from '@renderer/types'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import {
|
||||
determineCitationSource,
|
||||
generateCitationTag,
|
||||
mapCitationMarksToTags,
|
||||
normalizeCitationMarks,
|
||||
withCitationTags
|
||||
} from '../citation'
|
||||
|
||||
// Mock dependencies
|
||||
vi.mock('@renderer/utils/formats', () => ({
|
||||
cleanMarkdownContent: vi.fn((content: string) => content.replace(/[*_~`]/g, '')),
|
||||
encodeHTML: vi.fn((str: string) =>
|
||||
str.replace(/[&<>"']/g, (match) => {
|
||||
const entities: { [key: string]: string } = {
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
'"': '"',
|
||||
"'": '''
|
||||
}
|
||||
return entities[match]
|
||||
})
|
||||
)
|
||||
}))
|
||||
|
||||
describe('citation', () => {
|
||||
const createCitationMap = (citations: Citation[]) => new Map(citations.map((c) => [c.number, c]))
|
||||
|
||||
describe('determineCitationSource', () => {
|
||||
it('should find the the citation source', () => {
|
||||
const citationReferences = [{ citationBlockId: 'block1', citationBlockSource: WebSearchSource.OPENAI }]
|
||||
|
||||
const result = determineCitationSource(citationReferences)
|
||||
expect(result).toBe(WebSearchSource.OPENAI)
|
||||
})
|
||||
|
||||
it('should find first valid source in citation references', () => {
|
||||
const citationReferences = [
|
||||
{ citationBlockId: 'block1' }, // no source
|
||||
{ citationBlockId: 'block2', citationBlockSource: WebSearchSource.GEMINI },
|
||||
{ citationBlockId: 'block3', citationBlockSource: WebSearchSource.GEMINI }
|
||||
]
|
||||
|
||||
const result = determineCitationSource(citationReferences)
|
||||
expect(result).toBe(WebSearchSource.GEMINI)
|
||||
})
|
||||
|
||||
it('should return undefined when no sources available', () => {
|
||||
const citationReferences = [
|
||||
{ citationBlockId: 'block1' }, // no source
|
||||
{ citationBlockId: 'block2' } // no source
|
||||
]
|
||||
|
||||
const result = determineCitationSource(citationReferences)
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should return undefined for empty citation references', () => {
|
||||
const result = determineCitationSource([])
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should return undefined for undefined citation references', () => {
|
||||
const result = determineCitationSource(undefined)
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe('withCitationTags', () => {
|
||||
it('should process citations with default source type', () => {
|
||||
const content = 'Test content [1] with citation'
|
||||
const citations: Citation[] = [
|
||||
{
|
||||
number: 1,
|
||||
url: 'https://example.com',
|
||||
title: 'Example'
|
||||
}
|
||||
]
|
||||
|
||||
const result = withCitationTags(content, citations)
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example.com)')
|
||||
})
|
||||
|
||||
it('should process citations with OpenAI source type', () => {
|
||||
const content = 'Test content [<sup>1</sup>](https://example.com)'
|
||||
const citations: Citation[] = [
|
||||
{
|
||||
number: 1,
|
||||
url: 'https://example.com',
|
||||
title: 'Example',
|
||||
content: 'Some **content**'
|
||||
}
|
||||
]
|
||||
|
||||
const result = withCitationTags(content, citations, WebSearchSource.OPENAI)
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example.com)')
|
||||
})
|
||||
|
||||
it('should process citations with Gemini source type', () => {
|
||||
const content = 'Test content from Gemini'
|
||||
const metadata: GroundingSupport[] = [
|
||||
{
|
||||
segment: { text: 'Test content' },
|
||||
groundingChunkIndices: [0]
|
||||
}
|
||||
]
|
||||
const citations: Citation[] = [
|
||||
{
|
||||
number: 1,
|
||||
url: 'https://example.com',
|
||||
title: 'Example',
|
||||
metadata
|
||||
}
|
||||
]
|
||||
|
||||
const result = withCitationTags(content, citations, WebSearchSource.GEMINI)
|
||||
|
||||
expect(result).toContain('Test content[<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example.com)')
|
||||
})
|
||||
|
||||
it('should handle empty citations array', () => {
|
||||
const content = 'This is test content [1]'
|
||||
const result = withCitationTags(content, [])
|
||||
expect(result).toBe(content)
|
||||
})
|
||||
})
|
||||
|
||||
describe('normalizeCitationMarks with markdown', () => {
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Example 1' },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Example 2' },
|
||||
{ number: 3, url: 'https://example3.com', title: 'Example 3' }
|
||||
]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
it('should not process citations in inline code', () => {
|
||||
const content = 'Here is `code with [1] citation` and normal [2] citation'
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
|
||||
// 内联代码中的 [1] 应该保持不变
|
||||
expect(result).toContain('`code with [1] citation`')
|
||||
// 普通文本中的 [2] 应该被处理
|
||||
expect(result).toContain('[cite:2]')
|
||||
})
|
||||
|
||||
it('should not process citations in code blocks', () => {
|
||||
const content = `Text with citation [1]
|
||||
|
||||
\`\`\`python
|
||||
# Python code with [2] reference
|
||||
def func():
|
||||
data = [3, 4, 5] # Array with [1] element reference
|
||||
return data
|
||||
\`\`\`
|
||||
|
||||
\`\`\`bash
|
||||
echo "Command with [2] parameter"
|
||||
\`\`\`
|
||||
|
||||
// Indented code block is not skipped
|
||||
echo "Indented code block [3]"
|
||||
|
||||
Normal text with [3] citation`
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
|
||||
// 代码块内的内容应该保持原样
|
||||
expect(result).toContain('# Python code with [2] reference')
|
||||
expect(result).toContain('data = [3, 4, 5] # Array with [1] element reference')
|
||||
expect(result).toContain('echo "Command with [2] parameter"')
|
||||
|
||||
// 代码块外的引用应该被处理
|
||||
expect(result).toContain('Text with citation [cite:1]')
|
||||
expect(result).toContain('Indented code block [cite:3]')
|
||||
expect(result).toContain('Normal text with [cite:3]')
|
||||
})
|
||||
|
||||
it('should handle malformed code blocks', () => {
|
||||
const content = `Text with [1]
|
||||
|
||||
\`\`\`unclosed
|
||||
Code block without closing
|
||||
With [2] citation
|
||||
|
||||
Normal text with [3] continues`
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
|
||||
expect(result).toContain('[cite:1]')
|
||||
expect(result).toContain('[cite:2]')
|
||||
expect(result).toContain('[cite:3]')
|
||||
})
|
||||
|
||||
it('should handle citations in various markdown structures', () => {
|
||||
const content = `Normal citation [1]
|
||||
|
||||
> This is a blockquote with [2] citation
|
||||
> And another line with [3]
|
||||
|
||||
Back to normal **with [1] again**
|
||||
|
||||
# Heading with [3] citation
|
||||
## Subheading with [2] citation
|
||||
|
||||
List:
|
||||
- list item with citation [1]
|
||||
|
||||
Numbered list:
|
||||
1. item with [2]`
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
console.log(result)
|
||||
|
||||
expect(result).toContain('citation [cite:1]')
|
||||
expect(result).toContain('blockquote with [cite:2]')
|
||||
expect(result).toContain('another line with [cite:3]')
|
||||
expect(result).toContain('with [cite:1] again')
|
||||
expect(result).toContain('Heading with [cite:3]')
|
||||
expect(result).toContain('Subheading with [cite:2]')
|
||||
expect(result).toContain('list item with citation [cite:1]')
|
||||
expect(result).toContain('item with [cite:2]')
|
||||
})
|
||||
})
|
||||
|
||||
describe('normalizeCitationMarks simple', () => {
|
||||
describe('OpenAI format citations', () => {
|
||||
it('should normalize OpenAI format citations', () => {
|
||||
const content = 'Text with [<sup>1</sup>](https://example.com) citation'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
for (const sourceType of [WebSearchSource.OPENAI, WebSearchSource.OPENAI_RESPONSE]) {
|
||||
const result = normalizeCitationMarks(content, citationMap, sourceType)
|
||||
expect(result).toBe('Text with [cite:1] citation')
|
||||
}
|
||||
})
|
||||
|
||||
it('should preserve non-matching OpenAI citations', () => {
|
||||
const content = 'Text with [<sup>3</sup>](https://missing.com) citation'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
for (const sourceType of [WebSearchSource.OPENAI, WebSearchSource.OPENAI_RESPONSE]) {
|
||||
const result = normalizeCitationMarks(content, citationMap, sourceType)
|
||||
expect(result).toBe('Text with [<sup>3</sup>](https://missing.com) citation')
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('Perplexity format citations', () => {
|
||||
it('should normalize Perplexity format citations', () => {
|
||||
const content = 'Perplexity citations [<sup>1</sup>](https://example.com)'
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example.com', title: 'Example Citation', content: 'Citation content' }
|
||||
]
|
||||
const citationMap = new Map(citations.map((c) => [c.number, c]))
|
||||
|
||||
const normalized = normalizeCitationMarks(content, citationMap, WebSearchSource.PERPLEXITY)
|
||||
expect(normalized).toBe('Perplexity citations [cite:1]')
|
||||
})
|
||||
|
||||
it('should preserve unmatched Perplexity citations', () => {
|
||||
const content = 'Text with [<sup>2</sup>](https://notfound.com) citation'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Example Citation' }]
|
||||
const citationMap = new Map(citations.map((c) => [c.number, c]))
|
||||
|
||||
// 2号引用不存在,应该保持原样
|
||||
const normalized = normalizeCitationMarks(content, citationMap, WebSearchSource.PERPLEXITY)
|
||||
expect(normalized).toBe('Text with [<sup>2</sup>](https://notfound.com) citation')
|
||||
})
|
||||
})
|
||||
|
||||
describe('Gemini format citations', () => {
|
||||
it('should normalize Gemini format citations', () => {
|
||||
const content = 'This is test content from Gemini'
|
||||
const metadata: GroundingSupport[] = [
|
||||
{
|
||||
segment: { text: 'test content' },
|
||||
groundingChunkIndices: [0, 1]
|
||||
}
|
||||
]
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Test 1', metadata },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
|
||||
]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap, WebSearchSource.GEMINI)
|
||||
|
||||
expect(result).toBe('This is test content[cite:1][cite:2] from Gemini')
|
||||
})
|
||||
|
||||
it('should handle Gemini citations without metadata', () => {
|
||||
const content = 'Content without metadata'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap, WebSearchSource.GEMINI)
|
||||
|
||||
expect(result).toBe('Content without metadata')
|
||||
})
|
||||
})
|
||||
|
||||
describe('default format citations', () => {
|
||||
it('should normalize default format citations', () => {
|
||||
const content = 'Text with [1][2] and [3] citations'
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Test 2' },
|
||||
{ number: 3, url: 'https://example3.com', title: 'Test 3' }
|
||||
]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
|
||||
expect(result).toBe('Text with [cite:1][cite:2] and [cite:3] citations')
|
||||
})
|
||||
|
||||
it('should preserve non-matching default format citations', () => {
|
||||
const content = 'Text with [1] and [3] citations'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example1.com', title: 'Test 1' }]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
|
||||
expect(result).toBe('Text with [cite:1] and [3] citations')
|
||||
})
|
||||
|
||||
it('should handle nested citation patterns', () => {
|
||||
const content = 'Text with [[1]] and [cite:[2]] patterns'
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
|
||||
]
|
||||
const citationMap = new Map(citations.map((c) => [c.number, c]))
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap)
|
||||
|
||||
// 最里面的会被处理
|
||||
expect(result).toBe('Text with [[cite:1]] and [cite:[cite:2]] patterns')
|
||||
})
|
||||
|
||||
it('should handle mixed citation formats', () => {
|
||||
const content = 'Text with [1] and [<sup>2</sup>](url) and other [3] formats'
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
|
||||
]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = normalizeCitationMarks(content, citationMap, WebSearchSource.OPENAI)
|
||||
|
||||
expect(result).toBe('Text with [1] and [cite:2] and other [3] formats')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('mapCitationMarksToTags', () => {
|
||||
const createCitationMap = (citations: Citation[]) => new Map(citations.map((c) => [c.number, c]))
|
||||
|
||||
it('should convert cite marks to tags', () => {
|
||||
const content = 'Text with [cite:1] citation'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = mapCitationMarksToTags(content, citationMap)
|
||||
|
||||
expect(result).toContain('with [<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example.com) citation')
|
||||
})
|
||||
|
||||
it('should handle multiple cite marks', () => {
|
||||
const content = 'Text with [cite:1][cite:2] and [cite:3] citations'
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Test 2' },
|
||||
{ number: 3, url: 'https://example3.com', title: 'Test 3' }
|
||||
]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = mapCitationMarksToTags(content, citationMap)
|
||||
|
||||
expect(result).toContain('with [<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example1.com)[<sup data-citation=')
|
||||
expect(result).toContain('2</sup>](https://example2.com) and')
|
||||
expect(result).toContain('3</sup>](https://example3.com) citations')
|
||||
})
|
||||
|
||||
it('should preserve non-matching cite marks', () => {
|
||||
const content = 'Text with [cite:1] and [cite:3] citations'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example1.com', title: 'Test 1' }]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = mapCitationMarksToTags(content, citationMap)
|
||||
|
||||
expect(result).toContain('1</sup>](https://example1.com)')
|
||||
expect(result).toContain('[cite:3]') // Should remain unchanged
|
||||
})
|
||||
|
||||
it('should handle nested cite marks', () => {
|
||||
const content = 'Text with [cite:[cite:1]] and [cite:2] citations'
|
||||
const citations: Citation[] = [
|
||||
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
|
||||
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
|
||||
]
|
||||
const citationMap = createCitationMap(citations)
|
||||
|
||||
const result = mapCitationMarksToTags(content, citationMap)
|
||||
|
||||
expect(result).toContain('[cite:[<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example1.com)]')
|
||||
expect(result).toContain('2</sup>](https://example2.com)')
|
||||
})
|
||||
|
||||
it('should handle content without cite marks', () => {
|
||||
const content = 'Text without citations'
|
||||
const citationMap = new Map()
|
||||
|
||||
const result = mapCitationMarksToTags(content, citationMap)
|
||||
|
||||
expect(result).toBe('Text without citations')
|
||||
})
|
||||
|
||||
it('should handle malformed citation numbers', () => {
|
||||
const content = 'Text with [cite:abc] and [cite:] marks'
|
||||
const citationMap = new Map()
|
||||
|
||||
const result = mapCitationMarksToTags(content, citationMap)
|
||||
|
||||
expect(result).toBe('Text with [cite:abc] and [cite:] marks')
|
||||
})
|
||||
})
|
||||
|
||||
describe('generateCitationTag', () => {
|
||||
it('should generate citation tag with valid URL', () => {
|
||||
const citation: Citation = {
|
||||
number: 1,
|
||||
url: 'https://example.com',
|
||||
title: 'Example Title',
|
||||
content: 'Some content here'
|
||||
}
|
||||
|
||||
const result = generateCitationTag(citation)
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(result).toContain('1</sup>](https://example.com)')
|
||||
expect(result).toContain('Example Title')
|
||||
})
|
||||
|
||||
it('should generate citation tag without URL when invalid', () => {
|
||||
const citation: Citation = {
|
||||
number: 2,
|
||||
url: 'invalid-url',
|
||||
title: 'Test Title'
|
||||
}
|
||||
|
||||
const result = generateCitationTag(citation)
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(result).toContain('2</sup>]()')
|
||||
expect(result).not.toContain('](invalid-url)')
|
||||
})
|
||||
|
||||
it('should handle citation without URL', () => {
|
||||
const citation: Citation = {
|
||||
number: 3,
|
||||
url: '',
|
||||
title: 'No URL Title'
|
||||
}
|
||||
|
||||
const result = generateCitationTag(citation)
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(result).toContain('3</sup>]()')
|
||||
})
|
||||
|
||||
it('should use hostname when title is missing', () => {
|
||||
const citation: Citation = {
|
||||
number: 4,
|
||||
url: 'https://example.com',
|
||||
hostname: 'example.com'
|
||||
}
|
||||
|
||||
const result = generateCitationTag(citation)
|
||||
|
||||
expect(result).toContain('example.com')
|
||||
})
|
||||
|
||||
it('should handle citation with all empty values', () => {
|
||||
const citation: Citation = {
|
||||
number: 6,
|
||||
url: '',
|
||||
title: '',
|
||||
hostname: '',
|
||||
content: ''
|
||||
}
|
||||
|
||||
const result = generateCitationTag(citation)
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(result).toContain('6</sup>]()')
|
||||
})
|
||||
|
||||
it('should truncate content to 200 characters in data-citation', () => {
|
||||
const longContent = 'a'.repeat(300)
|
||||
const citation: Citation = {
|
||||
number: 1,
|
||||
url: 'https://example.com',
|
||||
title: 'Test',
|
||||
content: longContent
|
||||
}
|
||||
|
||||
const result = generateCitationTag(citation)
|
||||
const match = result.match(/data-citation='([^']+)'/)
|
||||
expect(match).not.toBeNull()
|
||||
if (match) {
|
||||
const citationData = JSON.parse(match[1].replace(/"/g, '"'))
|
||||
expect(citationData.content.length).toBe(200)
|
||||
expect(citationData.content).toBe(longContent.substring(0, 200))
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('performance', () => {
|
||||
it('should handle large content efficiently', () => {
|
||||
const largeContent = 'Test content '.repeat(10000) + '[1]'
|
||||
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
|
||||
|
||||
const start = Date.now()
|
||||
const result = withCitationTags(largeContent, citations)
|
||||
const end = Date.now()
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(end - start).toBeLessThan(100) // Should complete within 100ms
|
||||
})
|
||||
|
||||
it('should handle many citations efficiently', () => {
|
||||
const citations: Citation[] = Array.from({ length: 100 }, (_, i) => ({
|
||||
number: i + 1,
|
||||
url: `https://example${i + 1}.com`,
|
||||
title: `Test ${i + 1}`
|
||||
}))
|
||||
const content = citations.map((c) => `[${c.number}]`).join(' ')
|
||||
|
||||
const start = Date.now()
|
||||
const result = withCitationTags(content, citations)
|
||||
const end = Date.now()
|
||||
|
||||
expect(result).toContain('[<sup data-citation=')
|
||||
expect(end - start).toBeLessThan(100) // Should complete within 200ms
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
cleanLinkCommas,
|
||||
completeLinks,
|
||||
completionPerplexityLinks,
|
||||
convertLinks,
|
||||
convertLinksToHunyuan,
|
||||
convertLinksToOpenRouter,
|
||||
@@ -88,6 +89,13 @@ describe('linkConverter', () => {
|
||||
})
|
||||
|
||||
describe('convertLinks', () => {
|
||||
it('should convert number links to numbered links', () => {
|
||||
const input = '参考 [1](https://example.com/1) 和 [2](https://example.com/2)'
|
||||
const result = convertLinks(input, true)
|
||||
expect(result.text).toBe('参考 [<sup>1</sup>](https://example.com/1) 和 [<sup>2</sup>](https://example.com/2)')
|
||||
expect(result.hasBufferedContent).toBe(false)
|
||||
})
|
||||
|
||||
it('should convert links with domain-like text to numbered links', () => {
|
||||
const input = '查看这个网站 [example.com](https://example.com)'
|
||||
const result = convertLinks(input, true)
|
||||
@@ -375,4 +383,13 @@ describe('linkConverter', () => {
|
||||
expect(result).toBe('[链接1](https://example.com)[链接2](https://other.com)')
|
||||
})
|
||||
})
|
||||
|
||||
describe('completionPerplexityLinks', () => {
|
||||
it('should complete links with webSearch data', () => {
|
||||
const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }]
|
||||
const input = '参考 [1] 和 [2]'
|
||||
const result = completionPerplexityLinks(input, webSearch)
|
||||
expect(result).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
210
src/renderer/src/utils/citation.ts
Normal file
210
src/renderer/src/utils/citation.ts
Normal file
@@ -0,0 +1,210 @@
|
||||
import { GroundingSupport } from '@google/genai'
|
||||
import { Citation, WebSearchSource } from '@renderer/types'
|
||||
|
||||
import { cleanMarkdownContent, encodeHTML } from './formats'
|
||||
|
||||
/**
|
||||
* 从多个 citationReference 中获取第一个有效的 source
|
||||
* @returns WebSearchSource
|
||||
*/
|
||||
export function determineCitationSource(
|
||||
citationReferences: Array<{ citationBlockId?: string; citationBlockSource?: WebSearchSource }> | undefined
|
||||
): WebSearchSource | undefined {
|
||||
// 从 citationReferences 获取第一个有效的 source
|
||||
if (citationReferences?.length) {
|
||||
const validReference = citationReferences.find((ref) => ref.citationBlockSource)
|
||||
return validReference?.citationBlockSource
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* 把文本内容中的引用标记转换为完整的引用标签
|
||||
* - 标准化引用标记
|
||||
* - 转换标记为用于渲染的标签
|
||||
*
|
||||
* @param content 原始文本内容
|
||||
* @param citations 原始引用列表
|
||||
* @param sourceType 引用来源类型
|
||||
* @returns 处理后的文本内容
|
||||
*/
|
||||
export function withCitationTags(content: string, citations: Citation[], sourceType?: WebSearchSource): string {
|
||||
if (!content || citations.length === 0) return content
|
||||
|
||||
const formattedCitations = citations.map((citation) => ({
|
||||
...citation,
|
||||
content: citation.content ? cleanMarkdownContent(citation.content) : citation.content
|
||||
}))
|
||||
|
||||
const citationMap = new Map(formattedCitations.map((c) => [c.number, c]))
|
||||
|
||||
const normalizedContent = normalizeCitationMarks(content, citationMap, sourceType)
|
||||
|
||||
return mapCitationMarksToTags(normalizedContent, citationMap)
|
||||
}
|
||||
|
||||
/**
|
||||
* 标准化引用标记,统一转换为 [cite:N] 格式:
|
||||
* - OpenAI 格式: [<sup>N</sup>](url) → [cite:N]
|
||||
* - Gemini 格式: 根据metadata添加 [cite:N]
|
||||
* - 其他格式: [N] → [cite:N]
|
||||
*
|
||||
* 算法:
|
||||
* - one pass + 正则替换
|
||||
* - 跳过代码块等特殊上下文
|
||||
*
|
||||
* @param content 原始文本内容
|
||||
* @param citationMap 引用映射表
|
||||
* @param sourceType 引用来源类型
|
||||
* @returns 标准化后的文本内容
|
||||
*/
|
||||
export function normalizeCitationMarks(
|
||||
content: string,
|
||||
citationMap: Map<number, Citation>,
|
||||
sourceType?: WebSearchSource
|
||||
): string {
|
||||
// 识别需要跳过的代码区域,注意:indented code block已被禁用,不需要跳过
|
||||
const codeBlockRegex = /```[\s\S]*?```|`[^`\n]*`/gm
|
||||
const skipRanges: Array<{ start: number; end: number }> = []
|
||||
|
||||
let match
|
||||
while ((match = codeBlockRegex.exec(content)) !== null) {
|
||||
skipRanges.push({
|
||||
start: match.index,
|
||||
end: match.index + match[0].length
|
||||
})
|
||||
}
|
||||
|
||||
// 检查位置是否在代码块内
|
||||
const shouldSkip = (pos: number): boolean => {
|
||||
for (const range of skipRanges) {
|
||||
if (pos >= range.start && pos < range.end) return true
|
||||
if (range.start > pos) break // 已排序,可以提前结束
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// 统一的替换函数
|
||||
const applyReplacements = (regex: RegExp, getReplacementFn: (match: RegExpExecArray) => string | null) => {
|
||||
const replacements: Array<{ start: number; end: number; replacement: string }> = []
|
||||
|
||||
regex.lastIndex = 0 // 重置正则状态
|
||||
let match: RegExpExecArray | null
|
||||
while ((match = regex.exec(content)) !== null) {
|
||||
if (!shouldSkip(match.index)) {
|
||||
const replacement = getReplacementFn(match)
|
||||
if (replacement !== null) {
|
||||
replacements.push({
|
||||
start: match.index,
|
||||
end: match.index + match[0].length,
|
||||
replacement
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 从后往前替换避免位置偏移
|
||||
replacements.reverse().forEach(({ start, end, replacement }) => {
|
||||
content = content.slice(0, start) + replacement + content.slice(end)
|
||||
})
|
||||
}
|
||||
|
||||
switch (sourceType) {
|
||||
case WebSearchSource.OPENAI:
|
||||
case WebSearchSource.OPENAI_RESPONSE:
|
||||
case WebSearchSource.PERPLEXITY: {
|
||||
// OpenAI 格式: [<sup>N</sup>](url) → [cite:N]
|
||||
applyReplacements(/\[<sup>(\d+)<\/sup>\]\([^)]*\)/g, (match) => {
|
||||
const citationNum = parseInt(match[1], 10)
|
||||
return citationMap.has(citationNum) ? `[cite:${citationNum}]` : null
|
||||
})
|
||||
break
|
||||
}
|
||||
case WebSearchSource.GEMINI: {
|
||||
// Gemini 格式: 根据metadata添加 [cite:N]
|
||||
const firstCitation = Array.from(citationMap.values())[0]
|
||||
if (firstCitation?.metadata) {
|
||||
const textReplacements = new Map<string, string>()
|
||||
|
||||
// 收集所有需要替换的文本
|
||||
firstCitation.metadata.forEach((support: GroundingSupport) => {
|
||||
if (!support.groundingChunkIndices || !support.segment?.text) return
|
||||
|
||||
const citationNums = support.groundingChunkIndices
|
||||
const text = support.segment.text
|
||||
const basicTag = citationNums
|
||||
.map((citationNum) => {
|
||||
const citation = citationMap.get(citationNum + 1)
|
||||
return citation ? `[cite:${citationNum + 1}]` : ''
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join('')
|
||||
|
||||
if (basicTag) {
|
||||
textReplacements.set(text, `${text}${basicTag}`)
|
||||
}
|
||||
})
|
||||
|
||||
// 一次性应用所有替换
|
||||
textReplacements.forEach((replacement, originalText) => {
|
||||
const escapedText = originalText.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
applyReplacements(new RegExp(escapedText, 'g'), () => replacement)
|
||||
})
|
||||
}
|
||||
break
|
||||
}
|
||||
default: {
|
||||
// 简单数字格式: [N] → [cite:N]
|
||||
applyReplacements(/\[(\d+)\]/g, (match) => {
|
||||
const citationNum = parseInt(match[1], 10)
|
||||
return citationMap.has(citationNum) ? `[cite:${citationNum}]` : null
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
/**
|
||||
* 把文本内容中的 [cite:N] 标记转换为用于渲染的标签
|
||||
* @param content 原始文本内容
|
||||
* @param citationMap 引用映射表
|
||||
* @returns 处理后的文本内容
|
||||
*/
|
||||
export function mapCitationMarksToTags(content: string, citationMap: Map<number, Citation>): string {
|
||||
// 统一替换所有 [cite:N] 标记
|
||||
return content.replace(/\[cite:(\d+)\]/g, (match, num) => {
|
||||
const citationNum = parseInt(num, 10)
|
||||
const citation = citationMap.get(citationNum)
|
||||
|
||||
if (citation) {
|
||||
return generateCitationTag(citation)
|
||||
}
|
||||
|
||||
// 如果没找到对应的引用数据,保持原样(应该不会发生)
|
||||
return match
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成单个用于渲染的引用标签
|
||||
* @param citation 引用数据
|
||||
* @returns 渲染后的引用标签
|
||||
*/
|
||||
export function generateCitationTag(citation: Citation): string {
|
||||
const supData = {
|
||||
id: citation.number,
|
||||
url: citation.url,
|
||||
title: citation.title || citation.hostname || '',
|
||||
content: citation.content?.substring(0, 200)
|
||||
}
|
||||
const citationJson = encodeHTML(JSON.stringify(supData))
|
||||
|
||||
// 判断是否为有效链接
|
||||
const isLink = citation.url && citation.url.startsWith('http')
|
||||
|
||||
// 生成链接格式: [<sup data-citation='...'>N</sup>](url)
|
||||
// 或者生成空括号格式: [<sup data-citation='...'>N</sup>]()
|
||||
return `[<sup data-citation='${citationJson}'>${citation.number}</sup>]` + (isLink ? `(${citation.url})` : '()')
|
||||
}
|
||||
@@ -88,34 +88,6 @@ export function removeSvgEmptyLines(text: string): string {
|
||||
})
|
||||
}
|
||||
|
||||
// export function withGeminiGrounding(block: MainTextMessageBlock | TranslationMessageBlock): string {
|
||||
// // TODO
|
||||
// // const citationBlock = findCitationBlockWithGrounding(block)
|
||||
// // const groundingSupports = citationBlock?.groundingMetadata?.groundingSupports
|
||||
|
||||
// const content = block.content
|
||||
|
||||
// // if (!groundingSupports || groundingSupports.length === 0) {
|
||||
// // return content
|
||||
// // }
|
||||
|
||||
// // groundingSupports.forEach((support) => {
|
||||
// // const text = support?.segment?.text
|
||||
// // const indices = support?.groundingChunkIndices
|
||||
|
||||
// // if (!text || !indices) return
|
||||
|
||||
// // const nodes = indices.reduce((acc, index) => {
|
||||
// // acc.push(`<sup>${index + 1}</sup>`)
|
||||
// // return acc
|
||||
// // }, [] as string[])
|
||||
|
||||
// // content = content.replace(text, `${text} ${nodes.join(' ')}`)
|
||||
// // })
|
||||
|
||||
// return content
|
||||
// }
|
||||
|
||||
export function withGenerateImage(message: Message): { content: string; images?: string[] } {
|
||||
const originalContent = getMainTextContent(message)
|
||||
const imagePattern = new RegExp(`!\\[[^\\]]*\\]\\((.*?)\\s*("(?:.*[^"])")?\\s*\\)`)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { WebSearchResponse, WebSearchSource } from '@renderer/types'
|
||||
|
||||
// Counter for numbering links
|
||||
let linkCounter = 1
|
||||
// Buffer to hold incomplete link fragments across chunks
|
||||
@@ -236,11 +238,13 @@ export function convertLinks(
|
||||
}
|
||||
|
||||
// Rule 3: If the link text is not a URL/host, keep the text and add the numbered link
|
||||
if (!isHost(linkText)) {
|
||||
result += `${linkText} [<sup>${counter}</sup>](${url})`
|
||||
} else {
|
||||
// Rule 2: If the link text is a URL/host, replace with numbered link
|
||||
// 增加一个条件:如果 linkText 是纯数字,也直接替换
|
||||
if (isHost(linkText) || /^\d+$/.test(linkText)) {
|
||||
// Rule 2: If the link text is a URL/host or purely digits, replace with numbered link
|
||||
result += `[<sup>${counter}</sup>](${url})`
|
||||
} else {
|
||||
// If the link text is neither a URL/host nor purely digits, keep the text and add the numbered link
|
||||
result += `${linkText} [<sup>${counter}</sup>](${url})`
|
||||
}
|
||||
|
||||
position += match[0].length
|
||||
@@ -337,6 +341,25 @@ export function completeLinks(text: string, webSearch: any[]): string {
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据webSearch结果补全链接,将[num]转换为[num](webSearch[num-1].url)
|
||||
* @param {string} text 原始文本
|
||||
* @param {any[]} webSearch webSearch结果
|
||||
* @returns {string} 补全后的文本
|
||||
*/
|
||||
export function completionPerplexityLinks(text: string, webSearch: any[]): string {
|
||||
return text.replace(/\[(\d+)\]/g, (match, numStr) => {
|
||||
const num = parseInt(numStr)
|
||||
const index = num - 1
|
||||
// 检查 webSearch 数组中是否存在对应的 URL
|
||||
if (index >= 0 && index < webSearch.length && webSearch[index].url) {
|
||||
return `[${num}](${webSearch[index].url})`
|
||||
}
|
||||
// 如果没有找到对应的 URL,保持原样
|
||||
return match
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 从Markdown文本中提取所有URL
|
||||
* 支持以下格式:
|
||||
@@ -463,8 +486,18 @@ export function extractWebSearchReferences(text: string): Array<{
|
||||
export function smartLinkConverter(
|
||||
text: string,
|
||||
providerType: string = 'openai',
|
||||
resetCounter: boolean = false
|
||||
resetCounter: boolean = false,
|
||||
webSearchResults?: WebSearchResponse
|
||||
): { text: string; hasBufferedContent: boolean } {
|
||||
if (webSearchResults) {
|
||||
const webSearch = webSearchResults.results
|
||||
switch (webSearchResults.source) {
|
||||
case WebSearchSource.PERPLEXITY: {
|
||||
text = completionPerplexityLinks(text, webSearch as any[])
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// 检测文本中的引用模式
|
||||
const references = extractWebSearchReferences(text)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user