fix: websearch block and citation formatting (#7776)

* feat: enhance citation handling for Perplexity web search results

- Implemented formatting for Perplexity citations in MainTextBlock, including data-citation attributes.
- Updated citation processing in message store and thunk to support new citation structure.
- Added utility functions for link completion based on web search results.
- Enhanced tests to verify correct handling of Perplexity citations and links.

* refactor: streamline chunk processing in OpenAIApiClient

- Replaced single choice handling with a loop to process all choices in the chunk.
- Improved handling of content sources, ensuring fallback mechanisms are in place for delta and message fields.
- Enhanced tool call processing to accommodate missing function names and arguments.
- Maintained existing functionality for web search data and reasoning content processing.

* fix: improve citation handling and web search integration

- Enhanced citation formatting to support legacy data compatibility in messageBlock.ts.
- Updated messageThunk.ts to manage main text block references and citation updates more effectively.
- Removed unnecessary web search flag and streamlined block processing logic.

* fix: improve citation transforms to skip code blocks
- Add withCitationTags for better code structure
- Add tests
- Remove outdated code
- The Citation type in @renderer/types/index.ts is not referenced anywhere, so removed
- Move the actual Citation type from @renderer/pages/home/Messages/CitationsList.tsx to @renderer/types/index.ts
- Allow text selecting in tooltip

* test: update tests

* refactor(messageThunk): streamline citation handling in response processing

- Removed redundant citation block source retrieval during text chunk processing.
- Updated citation references handling to ensure proper inclusion only when available.
- Simplified the logic for managing citation references in both streaming and final text updates.

* refactor: simplify determineCitationSource for backward compatibility

---------

Co-authored-by: one <wangan.cs@gmail.com>
This commit is contained in:
SuYao
2025-07-04 17:03:45 +08:00
committed by GitHub
parent 2fad7c0ff6
commit 134ea51b0f
22 changed files with 1156 additions and 338 deletions

View File

@@ -49,7 +49,9 @@ import {
LLMWebSearchCompleteChunk,
LLMWebSearchInProgressChunk,
MCPToolCreatedChunk,
TextCompleteChunk,
TextDeltaChunk,
ThinkingCompleteChunk,
ThinkingDeltaChunk
} from '@renderer/types/chunk'
import { type Message } from '@renderer/types/newMessage'
@@ -517,7 +519,7 @@ export class AnthropicAPIClient extends BaseApiClient<
return () => {
let accumulatedJson = ''
const toolCalls: Record<number, ToolUseBlock> = {}
const ChunkIdTypeMap: Record<number, ChunkType> = {}
return {
async transform(rawChunk: AnthropicSdkRawChunk, controller: TransformStreamDefaultController<GenericChunk>) {
switch (rawChunk.type) {
@@ -612,6 +614,19 @@ export class AnthropicAPIClient extends BaseApiClient<
toolCalls[rawChunk.index] = contentBlock
break
}
case 'text': {
if (!ChunkIdTypeMap[rawChunk.index]) {
ChunkIdTypeMap[rawChunk.index] = ChunkType.TEXT_DELTA // 用textdelta代表文本块
}
break
}
case 'thinking':
case 'redacted_thinking': {
if (!ChunkIdTypeMap[rawChunk.index]) {
ChunkIdTypeMap[rawChunk.index] = ChunkType.THINKING_DELTA // 用thinkingdelta代表思考块
}
break
}
}
break
}
@@ -646,6 +661,15 @@ export class AnthropicAPIClient extends BaseApiClient<
break
}
case 'content_block_stop': {
if (ChunkIdTypeMap[rawChunk.index] === ChunkType.TEXT_DELTA) {
controller.enqueue({
type: ChunkType.TEXT_COMPLETE
} as TextCompleteChunk)
} else if (ChunkIdTypeMap[rawChunk.index] === ChunkType.THINKING_DELTA) {
controller.enqueue({
type: ChunkType.THINKING_COMPLETE
} as ThinkingCompleteChunk)
}
const toolCall = toolCalls[rawChunk.index]
if (toolCall) {
try {

View File

@@ -564,11 +564,11 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
// Perplexity citations
// @ts-ignore - citations may not be in standard type definitions
if (context.provider?.id === 'perplexity' && chunk.citations && chunk.citations.length > 0) {
if (context.provider?.id === 'perplexity' && chunk.search_results && chunk.search_results.length > 0) {
hasBeenCollectedWebSearch = true
return {
// @ts-ignore - citations may not be in standard type definitions
results: chunk.citations,
results: chunk.search_results,
source: WebSearchSource.PERPLEXITY
}
}
@@ -672,74 +672,21 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
// 处理chunk
if ('choices' in chunk && chunk.choices && chunk.choices.length > 0) {
const choice = chunk.choices[0]
for (const choice of chunk.choices) {
if (!choice) continue
if (!choice) return
// 对于流式响应,使用 delta对于非流式响应使用 message
// 然而某些 OpenAI 兼容平台在非流式请求时会错误地返回一个空对象的 delta 字段。
// 如果 delta 为空对象,应当忽略它并回退到 message避免造成内容缺失。
let contentSource: OpenAISdkRawContentSource | null = null
if ('delta' in choice && choice.delta && Object.keys(choice.delta).length > 0) {
contentSource = choice.delta
} else if ('message' in choice) {
contentSource = choice.message
}
if (!contentSource) return
const webSearchData = collectWebSearchData(chunk, contentSource, context)
if (webSearchData) {
controller.enqueue({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
llm_web_search: webSearchData
})
}
// 处理推理内容 (e.g. from OpenRouter DeepSeek-R1)
// @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
const reasoningText = contentSource.reasoning_content || contentSource.reasoning
if (reasoningText) {
controller.enqueue({
type: ChunkType.THINKING_DELTA,
text: reasoningText
})
}
// 处理文本内容
if (contentSource.content) {
controller.enqueue({
type: ChunkType.TEXT_DELTA,
text: contentSource.content
})
}
// 处理工具调用
if (contentSource.tool_calls) {
for (const toolCall of contentSource.tool_calls) {
if ('index' in toolCall) {
const { id, index, function: fun } = toolCall
if (fun?.name) {
toolCalls[index] = {
id: id || '',
function: {
name: fun.name,
arguments: fun.arguments || ''
},
type: 'function'
}
} else if (fun?.arguments) {
toolCalls[index].function.arguments += fun.arguments
}
} else {
toolCalls.push(toolCall)
}
// 对于流式响应,使用 delta对于非流式响应使用 message。
// 然而某些 OpenAI 兼容平台在非流式请求时会错误地返回一个空对象的 delta 字段。
// 如果 delta 为空对象,应当忽略它并回退到 message避免造成内容缺失
let contentSource: OpenAISdkRawContentSource | null = null
if ('delta' in choice && choice.delta && Object.keys(choice.delta).length > 0) {
contentSource = choice.delta
} else if ('message' in choice) {
contentSource = choice.message
}
}
// 处理finish_reason发送流结束信号
if ('finish_reason' in choice && choice.finish_reason) {
Logger.debug(`[OpenAIApiClient] Stream finished with reason: ${choice.finish_reason}`)
if (!contentSource) continue
const webSearchData = collectWebSearchData(chunk, contentSource, context)
if (webSearchData) {
controller.enqueue({
@@ -747,7 +694,60 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
llm_web_search: webSearchData
})
}
emitCompletionSignals(controller)
// 处理推理内容 (e.g. from OpenRouter DeepSeek-R1)
// @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
const reasoningText = contentSource.reasoning_content || contentSource.reasoning
if (reasoningText) {
controller.enqueue({
type: ChunkType.THINKING_DELTA,
text: reasoningText
})
}
// 处理文本内容
if (contentSource.content) {
controller.enqueue({
type: ChunkType.TEXT_DELTA,
text: contentSource.content
})
}
// 处理工具调用
if (contentSource.tool_calls) {
for (const toolCall of contentSource.tool_calls) {
if ('index' in toolCall) {
const { id, index, function: fun } = toolCall
if (fun?.name) {
toolCalls[index] = {
id: id || '',
function: {
name: fun.name,
arguments: fun.arguments || ''
},
type: 'function'
}
} else if (fun?.arguments) {
toolCalls[index].function.arguments += fun.arguments
}
} else {
toolCalls.push(toolCall)
}
}
}
// 处理finish_reason发送流结束信号
if ('finish_reason' in choice && choice.finish_reason) {
Logger.debug(`[OpenAIApiClient] Stream finished with reason: ${choice.finish_reason}`)
const webSearchData = collectWebSearchData(chunk, contentSource, context)
if (webSearchData) {
controller.enqueue({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
llm_web_search: webSearchData
})
}
emitCompletionSignals(controller)
}
}
}
},

View File

@@ -492,6 +492,10 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
case 'response.output_item.added':
if (chunk.item.type === 'function_call') {
outputItems.push(chunk.item)
} else if (chunk.item.type === 'web_search_call') {
controller.enqueue({
type: ChunkType.LLM_WEB_SEARCH_IN_PROGRESS
})
}
break
case 'response.reasoning_summary_part.added':

View File

@@ -1,5 +1,5 @@
import Logger from '@renderer/config/logger'
import { ChunkType, TextDeltaChunk } from '@renderer/types/chunk'
import { ChunkType, TextCompleteChunk, TextDeltaChunk } from '@renderer/types/chunk'
import { CompletionsParams, CompletionsResult, GenericChunk } from '../schemas'
import { CompletionsContext, CompletionsMiddleware } from '../types'
@@ -38,7 +38,7 @@ export const TextChunkMiddleware: CompletionsMiddleware =
// 用于跨chunk的状态管理
let accumulatedTextContent = ''
let hasEnqueue = false
let hasTextCompleteEventEnqueue = false
const enhancedTextStream = resultFromUpstream.pipeThrough(
new TransformStream<GenericChunk, GenericChunk>({
transform(chunk: GenericChunk, controller) {
@@ -53,30 +53,44 @@ export const TextChunkMiddleware: CompletionsMiddleware =
// 创建新的chunk包含处理后的文本
controller.enqueue(chunk)
} else if (accumulatedTextContent) {
if (chunk.type !== ChunkType.LLM_RESPONSE_COMPLETE) {
controller.enqueue(chunk)
hasEnqueue = true
}
const finalText = accumulatedTextContent
ctx._internal.customState!.accumulatedText = finalText
if (ctx._internal.toolProcessingState && !ctx._internal.toolProcessingState?.output) {
ctx._internal.toolProcessingState.output = finalText
}
// 处理 onResponse 回调 - 发送最终完整文本
if (params.onResponse) {
params.onResponse(finalText, true)
}
} else if (chunk.type === ChunkType.TEXT_COMPLETE) {
const textChunk = chunk as TextCompleteChunk
controller.enqueue({
type: ChunkType.TEXT_COMPLETE,
text: finalText
...textChunk,
text: accumulatedTextContent
})
if (params.onResponse) {
params.onResponse(accumulatedTextContent, true)
}
hasTextCompleteEventEnqueue = true
accumulatedTextContent = ''
if (!hasEnqueue) {
} else if (accumulatedTextContent && !hasTextCompleteEventEnqueue) {
if (chunk.type === ChunkType.LLM_RESPONSE_COMPLETE) {
const finalText = accumulatedTextContent
ctx._internal.customState!.accumulatedText = finalText
if (ctx._internal.toolProcessingState && !ctx._internal.toolProcessingState?.output) {
ctx._internal.toolProcessingState.output = finalText
}
// 处理 onResponse 回调 - 发送最终完整文本
if (params.onResponse) {
params.onResponse(finalText, true)
}
controller.enqueue({
type: ChunkType.TEXT_COMPLETE,
text: finalText
})
controller.enqueue(chunk)
} else {
controller.enqueue({
type: ChunkType.TEXT_COMPLETE,
text: accumulatedTextContent
})
controller.enqueue(chunk)
}
hasTextCompleteEventEnqueue = true
accumulatedTextContent = ''
} else {
// 其他类型的chunk直接传递
controller.enqueue(chunk)

View File

@@ -65,6 +65,16 @@ export const ThinkChunkMiddleware: CompletionsMiddleware =
thinking_millsec: thinkingStartTime > 0 ? Date.now() - thinkingStartTime : 0
}
controller.enqueue(enhancedChunk)
} else if (chunk.type === ChunkType.THINKING_COMPLETE) {
const thinkingCompleteChunk = chunk as ThinkingCompleteChunk
controller.enqueue({
...thinkingCompleteChunk,
text: accumulatedThinkingContent,
thinking_millsec: thinkingStartTime > 0 ? Date.now() - thinkingStartTime : 0
})
hasThinkingContent = false
accumulatedThinkingContent = ''
thinkingStartTime = 0
} else if (hasThinkingContent && thinkingStartTime > 0) {
// 收到任何非THINKING_DELTA的chunk时如果有累积的思考内容生成THINKING_COMPLETE
const thinkingCompleteChunk: ThinkingCompleteChunk = {

View File

@@ -42,7 +42,12 @@ export const WebSearchMiddleware: CompletionsMiddleware =
const providerType = model.provider || 'openai'
// 使用当前可用的Web搜索结果进行链接转换
const text = chunk.text
const result = smartLinkConverter(text, providerType, isFirstChunk)
const result = smartLinkConverter(
text,
providerType,
isFirstChunk,
ctx._internal.webSearchState!.results
)
if (isFirstChunk) {
isFirstChunk = false
}

View File

@@ -32,7 +32,7 @@ const CitationTooltip: React.FC<CitationTooltipProps> = ({ children, citation })
// 自定义悬浮卡片内容
const tooltipContent = useMemo(
() => (
<div>
<div style={{ userSelect: 'text' }}>
<TooltipHeader role="button" aria-label={`Open ${sourceTitle} in new tab`} onClick={handleClick}>
<Favicon hostname={hostname} alt={sourceTitle} />
<TooltipTitle role="heading" aria-level={3} title={sourceTitle}>

View File

@@ -58,7 +58,9 @@ exports[`CitationTooltip > basic rendering > should match snapshot 1`] = `
<div
data-testid="tooltip-content"
>
<div>
<div
style="user-select: text;"
>
<div
aria-label="Open Example Article in new tab"
class="c0"

View File

@@ -1,11 +1,10 @@
import { GroundingSupport } from '@google/genai'
import { useSettings } from '@renderer/hooks/useSettings'
import { getModelUniqId } from '@renderer/services/ModelService'
import type { RootState } from '@renderer/store'
import { selectFormattedCitationsByBlockId } from '@renderer/store/messageBlock'
import { type Model, WebSearchSource } from '@renderer/types'
import { type Model } from '@renderer/types'
import type { MainTextMessageBlock, Message } from '@renderer/types/newMessage'
import { cleanMarkdownContent, encodeHTML } from '@renderer/utils/formats'
import { determineCitationSource, withCitationTags } from '@renderer/utils/citation'
import { Flex } from 'antd'
import React, { useMemo } from 'react'
import { useSelector } from 'react-redux'
@@ -28,113 +27,16 @@ const MainTextBlock: React.FC<Props> = ({ block, citationBlockId, role, mentions
const rawCitations = useSelector((state: RootState) => selectFormattedCitationsByBlockId(state, citationBlockId))
const formattedCitations = useMemo(() => {
return rawCitations.map((citation) => ({
...citation,
content: citation.content ? cleanMarkdownContent(citation.content) : citation.content
}))
}, [rawCitations])
const processedContent = useMemo(() => {
let content = block.content
// Update condition to use citationBlockId
if (!block.citationReferences?.length || !citationBlockId || formattedCitations.length === 0) {
return content
if (!block.citationReferences?.length || !citationBlockId || rawCitations.length === 0) {
return block.content
}
switch (block.citationReferences[0].citationBlockSource) {
case WebSearchSource.OPENAI:
case WebSearchSource.OPENAI_RESPONSE: {
formattedCitations.forEach((citation) => {
const citationNum = citation.number
const supData = {
id: citationNum,
url: citation.url,
title: citation.title || citation.hostname || '',
content: citation.content?.substring(0, 200)
}
const citationJson = encodeHTML(JSON.stringify(supData))
// 确定最适合的 source
const sourceType = determineCitationSource(block.citationReferences)
// Handle[<sup>N</sup>](url)
const preFormattedRegex = new RegExp(`\\[<sup>${citationNum}</sup>\\]\\(.*?\\)`, 'g')
const citationTag = `[<sup data-citation='${citationJson}'>${citationNum}</sup>](${citation.url})`
content = content.replace(preFormattedRegex, citationTag)
})
break
}
case WebSearchSource.GEMINI: {
// First pass: Add basic citation marks using metadata
let processedContent = content
const firstCitation = formattedCitations[0]
if (firstCitation?.metadata) {
firstCitation.metadata.forEach((support: GroundingSupport) => {
const citationNums = support.groundingChunkIndices!
if (support.segment) {
const text = support.segment.text!
// 生成引用标记
const basicTag = citationNums
.map((citationNum) => {
const citation = formattedCitations.find((c) => c.number === citationNum + 1)
return citation ? `[<sup>${citationNum + 1}</sup>](${citation.url})` : ''
})
.join('')
// 在文本后面添加引用标记,而不是替换
if (text && basicTag) {
processedContent = processedContent.replace(text, `${text}${basicTag}`)
}
}
})
content = processedContent
}
// Second pass: Replace basic citations with full citation data
formattedCitations.forEach((citation) => {
const citationNum = citation.number
const supData = {
id: citationNum,
url: citation.url,
title: citation.title || citation.hostname || '',
content: citation.content?.substring(0, 200)
}
const citationJson = encodeHTML(JSON.stringify(supData))
// Replace basic citation with full citation including data
const basicCitationRegex = new RegExp(`\\[<sup>${citationNum}</sup>\\]\\(${citation.url}\\)`, 'g')
const fullCitationTag = `[<sup data-citation='${citationJson}'>${citationNum}</sup>](${citation.url})`
content = content.replace(basicCitationRegex, fullCitationTag)
})
break
}
default: {
// FIXME性能问题需要优化
// Replace all citation numbers and pre-formatted links with formatted citations
formattedCitations.forEach((citation) => {
const citationNum = citation.number
const supData = {
id: citationNum,
url: citation.url,
title: citation.title || citation.hostname || '',
content: citation.content?.substring(0, 200)
}
const isLink = citation.url.startsWith('http')
const citationJson = encodeHTML(JSON.stringify(supData))
// Handle both plain references [N] and pre-formatted links [<sup>N</sup>](url)
const plainRefRegex = new RegExp(`\\[${citationNum}\\]`, 'g')
const supTag = `<sup data-citation='${citationJson}'>${citationNum}</sup>`
const citationTag = isLink ? `[${supTag}](${citation.url})` : supTag
content = content.replace(plainRefRegex, citationTag)
})
}
}
return content
}, [block.content, block.citationReferences, citationBlockId, formattedCitations])
return withCitationTags(block.content, rawCitations, sourceType)
}, [block.content, block.citationReferences, citationBlockId, rawCitations])
const ignoreToolUse = useMemo(() => {
return processedContent.replace(toolUseRegex, '')

View File

@@ -48,6 +48,28 @@ vi.mock('@renderer/utils/formats', () => ({
encodeHTML: vi.fn((content: string) => content.replace(/"/g, '&quot;'))
}))
// Mock citation utilities
vi.mock('@renderer/utils/citation', () => ({
withCitationTags: vi.fn((content: string, citations: any[]) => {
// Simple mock implementation that simulates citation processing
if (citations.length > 0) {
return `${content} [processed-citations]`
}
return content
}),
determineCitationSource: vi.fn((citationReferences: any[], citationBlock?: any) => {
// Mock implementation that returns the first valid source from citationReferences
if (citationBlock?.response?.source) {
return citationBlock.response.source
}
if (citationReferences?.length) {
const validReference = citationReferences.find((ref) => ref.citationBlockSource)
return validReference?.citationBlockSource
}
return undefined
})
}))
// Mock services
vi.mock('@renderer/services/ModelService', () => ({
getModelUniqId: vi.fn()
@@ -66,7 +88,8 @@ vi.mock('@renderer/pages/home/Markdown/Markdown', () => ({
describe('MainTextBlock', () => {
// Get references to mocked modules
let mockGetModelUniqId: any
let mockCleanMarkdownContent: any
let mockWithCitationTags: any
let mockDetermineCitationSource: any
// Create a mock store for Provider
const mockStore = configureStore({
@@ -80,9 +103,10 @@ describe('MainTextBlock', () => {
// Get the mocked functions
const { getModelUniqId } = await import('@renderer/services/ModelService')
const { cleanMarkdownContent } = await import('@renderer/utils/formats')
const { withCitationTags, determineCitationSource } = await import('@renderer/utils/citation')
mockGetModelUniqId = getModelUniqId as any
mockCleanMarkdownContent = cleanMarkdownContent as any
mockWithCitationTags = withCitationTags as any
mockDetermineCitationSource = determineCitationSource as any
// Default mock implementations
mockUseSettings.mockReturnValue({ renderInputMessageAsMarkdown: false })
@@ -283,8 +307,16 @@ text after`,
})
it('should process content through format utilities', () => {
const block = createMainTextBlock({ content: 'Content to process' })
mockUseSelector.mockReturnValue([{ id: '1', content: 'Citation content', number: 1 }])
const block = createMainTextBlock({
content: 'Content to process',
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
})
const mockCitations = [{ id: '1', content: 'Citation content', number: 1 }]
// Mock the useSelector calls - first call for citations, second call for citationBlock
mockUseSelector
.mockReturnValueOnce(mockCitations) // selectFormattedCitationsByBlockId
.mockReturnValueOnce(undefined) // messageBlocksSelectors.selectById
renderMainTextBlock({
block,
@@ -292,8 +324,14 @@ text after`,
citationBlockId: 'test-citations'
})
// Verify utility functions are called
expect(mockCleanMarkdownContent).toHaveBeenCalled()
// Verify determineCitationSource was called with correct parameters
expect(mockDetermineCitationSource).toHaveBeenCalledWith(block.citationReferences)
// Verify citation processing was called with correct parameters
expect(mockWithCitationTags).toHaveBeenCalledWith('Content to process', mockCitations, 'DEFAULT')
// Verify the processed content is rendered
expect(screen.getByText('Markdown: Content to process [processed-citations]')).toBeInTheDocument()
})
})
@@ -308,7 +346,7 @@ text after`,
expect(mockUseSelector).toHaveBeenCalled()
})
it('should integrate with citation system when citations exist', () => {
it('should integrate with citation processing when all conditions are met', () => {
const block = createMainTextBlock({
content: 'Content with citation [1]',
citationReferences: [{ citationBlockSource: WebSearchSource.OPENAI }]
@@ -324,7 +362,11 @@ text after`,
}
]
mockUseSelector.mockReturnValue(mockCitations)
// Mock the useSelector calls - first call for citations, second call for citationBlock
mockUseSelector
.mockReturnValueOnce(mockCitations) // selectFormattedCitationsByBlockId
.mockReturnValueOnce(undefined) // messageBlocksSelectors.selectById
renderMainTextBlock({
block,
role: 'assistant',
@@ -335,28 +377,58 @@ text after`,
expect(mockUseSelector).toHaveBeenCalled()
expect(getRenderedMarkdown()).toBeInTheDocument()
// Verify content processing occurred
expect(mockCleanMarkdownContent).toHaveBeenCalledWith('Citation content')
// Verify determineCitationSource was called
expect(mockDetermineCitationSource).toHaveBeenCalledWith(block.citationReferences)
// Verify withCitationTags was called with correct parameters
expect(mockWithCitationTags).toHaveBeenCalledWith(
'Content with citation [1]',
mockCitations,
WebSearchSource.OPENAI
)
// Verify the processed content is rendered
expect(screen.getByText('Markdown: Content with citation [1] [processed-citations]')).toBeInTheDocument()
})
it('should handle different citation sources correctly', () => {
const testSources = [WebSearchSource.OPENAI, 'DEFAULT' as any, 'CUSTOM' as any]
it('should skip citation processing when conditions are not met', () => {
const testCases = [
{
name: 'no citationReferences',
block: createMainTextBlock({ content: 'Content [1]' }),
citationBlockId: 'test'
},
{
name: 'no citationBlockId',
block: createMainTextBlock({
content: 'Content [1]',
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
}),
citationBlockId: undefined
},
{
name: 'no citations data',
block: createMainTextBlock({
content: 'Content [1]',
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]
}),
citationBlockId: 'test'
}
]
testSources.forEach((source) => {
const block = createMainTextBlock({
content: `Citation test for ${source}`,
citationReferences: [{ citationBlockSource: source }]
})
mockUseSelector.mockReturnValue([{ id: '1', number: 1, url: 'https://test.com', title: 'Test' }])
testCases.forEach(({ block, citationBlockId }) => {
mockUseSelector.mockReturnValue([]) // No citations
const { unmount } = renderMainTextBlock({
block,
role: 'assistant',
citationBlockId: `test-${source}`
citationBlockId
})
expect(getRenderedMarkdown()).toBeInTheDocument()
// Should render original content without citation processing
expect(screen.getByText(`Markdown: ${block.content}`)).toBeInTheDocument()
unmount()
})
})
@@ -400,51 +472,7 @@ text after`,
})
})
describe('edge cases and robustness', () => {
it('should handle large content without performance issues', () => {
const largeContent = 'A'.repeat(1000) + ' with citations [1]'
const block = createMainTextBlock({ content: largeContent })
const largeCitations = [
{
id: '1',
number: 1,
url: 'https://large.com',
title: 'Large',
content: 'B'.repeat(500)
}
]
mockUseSelector.mockReturnValue(largeCitations)
expect(() => {
renderMainTextBlock({
block,
role: 'assistant',
citationBlockId: 'large-test'
})
}).not.toThrow()
expect(getRenderedMarkdown()).toBeInTheDocument()
})
it('should handle special characters and Unicode gracefully', () => {
const specialContent = '测试内容 🚀 📝 ✨ <>&"\'` [1]'
const block = createMainTextBlock({ content: specialContent })
mockUseSelector.mockReturnValue([{ id: '1', number: 1, title: '特殊字符测试', content: '内容 with 🎉' }])
expect(() => {
renderMainTextBlock({
block,
role: 'assistant',
citationBlockId: 'unicode-test'
})
}).not.toThrow()
expect(getRenderedMarkdown()).toBeInTheDocument()
})
describe('integration and robustness', () => {
it('should handle null and undefined values gracefully', () => {
const block = createMainTextBlock({ content: 'Null safety test' })
@@ -460,7 +488,7 @@ text after`,
expect(getRenderedMarkdown()).toBeInTheDocument()
})
it('should integrate properly with Redux store', () => {
it('should integrate properly with Redux store for citations', () => {
const block = createMainTextBlock({
content: 'Redux integration test',
citationReferences: [{ citationBlockSource: 'DEFAULT' as any }]

View File

@@ -1,5 +1,6 @@
import ContextMenu from '@renderer/components/ContextMenu'
import Favicon from '@renderer/components/Icons/FallbackFavicon'
import { Citation } from '@renderer/types'
import { fetchWebContent } from '@renderer/utils/fetch'
import { cleanMarkdownContent } from '@renderer/utils/formats'
import { QueryClient, QueryClientProvider, useQuery } from '@tanstack/react-query'
@@ -9,17 +10,6 @@ import React, { useState } from 'react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
export interface Citation {
number: number
url: string
title?: string
hostname?: string
content?: string
showFavicon?: boolean
type?: string
metadata?: Record<string, any>
}
interface CitationsListProps {
citations: Citation[]
}

View File

@@ -359,9 +359,6 @@ export async function fetchChatCompletion({
// --- Call AI Completions ---
onChunkReceived({ type: ChunkType.LLM_RESPONSE_CREATED })
if (enableWebSearch) {
onChunkReceived({ type: ChunkType.LLM_WEB_SEARCH_IN_PROGRESS })
}
await AI.completions(
{
callType: 'chat',

View File

@@ -43,6 +43,7 @@ export function createStreamProcessor(callbacks: StreamProcessorCallbacks = {})
return (chunk: Chunk) => {
try {
const data = chunk
// console.log('data: ', chunk)
switch (data.type) {
case ChunkType.BLOCK_COMPLETE: {
if (callbacks.onComplete) callbacks.onComplete(AssistantMessageStatus.SUCCESS, data?.response)

View File

@@ -1,8 +1,7 @@
import { WebSearchResultBlock } from '@anthropic-ai/sdk/resources'
import type { GroundingMetadata } from '@google/genai'
import { createEntityAdapter, createSelector, createSlice, type PayloadAction } from '@reduxjs/toolkit'
import type { Citation } from '@renderer/pages/home/Messages/CitationsList'
import { WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
import { Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
import type { CitationMessageBlock, MessageBlock } from '@renderer/types/newMessage'
import { MessageBlockType } from '@renderer/types/newMessage'
import type OpenAI from 'openai'
@@ -160,9 +159,19 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined
}
}) || []
break
case WebSearchSource.PERPLEXITY: {
formattedCitations =
(block.response.results as any[])?.map((result, index) => ({
number: index + 1,
url: result.url || result, // 兼容旧数据
title: result.title || new URL(result).hostname, // 兼容旧数据
showFavicon: true,
type: 'websearch'
})) || []
break
}
case WebSearchSource.GROK:
case WebSearchSource.OPENROUTER:
case WebSearchSource.PERPLEXITY:
formattedCitations =
(block.response.results as any[])?.map((url, index) => {
try {

View File

@@ -8,7 +8,15 @@ import { createStreamProcessor, type StreamProcessorCallbacks } from '@renderer/
import { estimateMessagesUsage } from '@renderer/services/TokenService'
import store from '@renderer/store'
import { updateTopicUpdatedAt } from '@renderer/store/assistants'
import type { Assistant, ExternalToolResult, FileMetadata, MCPToolResponse, Model, Topic } from '@renderer/types'
import {
type Assistant,
type ExternalToolResult,
type FileMetadata,
type MCPToolResponse,
type Model,
type Topic,
WebSearchSource
} from '@renderer/types'
import type {
CitationMessageBlock,
FileMessageBlock,
@@ -353,7 +361,7 @@ const fetchAndProcessAssistantResponseImpl = async (
let thinkingBlockId: string | null = null
let imageBlockId: string | null = null
let toolBlockId: string | null = null
let hasWebSearch = false
const toolCallIdToBlockIdMap = new Map<string, string>()
const notificationService = NotificationService.getInstance()
@@ -433,8 +441,7 @@ const fetchAndProcessAssistantResponseImpl = async (
const initialChanges: Partial<MessageBlock> = {
type: MessageBlockType.MAIN_TEXT,
content: accumulatedContent,
status: MessageBlockStatus.STREAMING,
citationReferences: citationBlockId ? [{ citationBlockId }] : []
status: MessageBlockStatus.STREAMING
}
mainTextBlockId = initialPlaceholderBlockId
// 清理占位块
@@ -444,8 +451,7 @@ const fetchAndProcessAssistantResponseImpl = async (
saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState)
} else {
const newBlock = createMainTextBlock(assistantMsgId, accumulatedContent, {
status: MessageBlockStatus.STREAMING,
citationReferences: citationBlockId ? [{ citationBlockId }] : []
status: MessageBlockStatus.STREAMING
})
mainTextBlockId = newBlock.id // 立即设置ID防止竞态条件
await handleBlockTransition(newBlock, MessageBlockType.MAIN_TEXT)
@@ -453,27 +459,27 @@ const fetchAndProcessAssistantResponseImpl = async (
},
onTextComplete: async (finalText) => {
if (mainTextBlockId) {
let citationBlockSource: WebSearchSource | undefined
if (citationBlockId) {
const citationBlock = getState().messageBlocks.entities[citationBlockId] as CitationMessageBlock
citationBlockSource = citationBlock.response?.source
}
const changes = {
content: finalText,
status: MessageBlockStatus.SUCCESS
status: MessageBlockStatus.SUCCESS,
citationReferences: citationBlockSource ? [{ citationBlockId, citationBlockSource }] : []
}
cancelThrottledBlockUpdate(mainTextBlockId)
dispatch(updateOneBlock({ id: mainTextBlockId, changes }))
saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState)
mainTextBlockId = null
if (!assistant.enableWebSearch) {
mainTextBlockId = null
}
} else {
console.warn(
`[onTextComplete] Received text.complete but last block was not MAIN_TEXT (was ${lastBlockType}) or lastBlockId is null.`
)
}
if (citationBlockId && !hasWebSearch) {
const changes: Partial<CitationMessageBlock> = {
status: MessageBlockStatus.SUCCESS
}
dispatch(updateOneBlock({ id: citationBlockId, changes }))
saveUpdatedBlockToDB(citationBlockId, assistantMsgId, topicId, getState)
citationBlockId = null
}
},
onThinkingChunk: async (text, thinking_millsec) => {
accumulatedThinking += text
@@ -616,15 +622,44 @@ const fetchAndProcessAssistantResponseImpl = async (
}
},
onLLMWebSearchComplete: async (llmWebSearchResult) => {
if (citationBlockId) {
hasWebSearch = true
const blockId = citationBlockId || initialPlaceholderBlockId
if (blockId) {
const changes: Partial<CitationMessageBlock> = {
type: MessageBlockType.CITATION,
response: llmWebSearchResult,
status: MessageBlockStatus.SUCCESS
}
dispatch(updateOneBlock({ id: citationBlockId, changes }))
saveUpdatedBlockToDB(citationBlockId, assistantMsgId, topicId, getState)
dispatch(updateOneBlock({ id: blockId, changes }))
saveUpdatedBlockToDB(blockId, assistantMsgId, topicId, getState)
if (mainTextBlockId) {
const state = getState()
const existingMainTextBlock = state.messageBlocks.entities[mainTextBlockId]
if (existingMainTextBlock && existingMainTextBlock.type === MessageBlockType.MAIN_TEXT) {
const currentRefs = existingMainTextBlock.citationReferences || []
const mainTextChanges = {
citationReferences: [...currentRefs, { blockId, citationBlockSource: llmWebSearchResult.source }]
}
dispatch(updateOneBlock({ id: mainTextBlockId, changes: mainTextChanges }))
saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState)
}
mainTextBlockId = null
}
if (initialPlaceholderBlockId) {
citationBlockId = initialPlaceholderBlockId
initialPlaceholderBlockId = null
}
} else {
const citationBlock = createCitationBlock(
assistantMsgId,
{
response: llmWebSearchResult
},
{
status: MessageBlockStatus.SUCCESS
}
)
citationBlockId = citationBlock.id
if (mainTextBlockId) {
const state = getState()
const existingMainTextBlock = state.messageBlocks.entities[mainTextBlockId]
@@ -641,6 +676,7 @@ const fetchAndProcessAssistantResponseImpl = async (
}
mainTextBlockId = null
}
await handleBlockTransition(citationBlock, MessageBlockType.CITATION)
}
},
onImageCreated: async () => {

View File

@@ -55,7 +55,6 @@ export interface LLMResponseInProgressChunk {
response?: Response
type: ChunkType.LLM_RESPONSE_IN_PROGRESS
}
export interface TextDeltaChunk {
/**
* The text content of the chunk

View File

@@ -728,9 +728,12 @@ export interface QuickPhrase {
export interface Citation {
number: number
url: string
hostname: string
title?: string
hostname?: string
content?: string
showFavicon?: boolean
type?: string
metadata?: Record<string, any>
}
export type MathEngine = 'KaTeX' | 'MathJax' | 'none'

View File

@@ -0,0 +1,562 @@
import { GroundingSupport } from '@google/genai'
import { Citation, WebSearchSource } from '@renderer/types'
import { describe, expect, it, vi } from 'vitest'
import {
determineCitationSource,
generateCitationTag,
mapCitationMarksToTags,
normalizeCitationMarks,
withCitationTags
} from '../citation'
// Mock dependencies
vi.mock('@renderer/utils/formats', () => ({
cleanMarkdownContent: vi.fn((content: string) => content.replace(/[*_~`]/g, '')),
encodeHTML: vi.fn((str: string) =>
str.replace(/[&<>"']/g, (match) => {
const entities: { [key: string]: string } = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&apos;'
}
return entities[match]
})
)
}))
describe('citation', () => {
const createCitationMap = (citations: Citation[]) => new Map(citations.map((c) => [c.number, c]))
describe('determineCitationSource', () => {
it('should find the the citation source', () => {
const citationReferences = [{ citationBlockId: 'block1', citationBlockSource: WebSearchSource.OPENAI }]
const result = determineCitationSource(citationReferences)
expect(result).toBe(WebSearchSource.OPENAI)
})
it('should find first valid source in citation references', () => {
const citationReferences = [
{ citationBlockId: 'block1' }, // no source
{ citationBlockId: 'block2', citationBlockSource: WebSearchSource.GEMINI },
{ citationBlockId: 'block3', citationBlockSource: WebSearchSource.GEMINI }
]
const result = determineCitationSource(citationReferences)
expect(result).toBe(WebSearchSource.GEMINI)
})
it('should return undefined when no sources available', () => {
const citationReferences = [
{ citationBlockId: 'block1' }, // no source
{ citationBlockId: 'block2' } // no source
]
const result = determineCitationSource(citationReferences)
expect(result).toBeUndefined()
})
it('should return undefined for empty citation references', () => {
const result = determineCitationSource([])
expect(result).toBeUndefined()
})
it('should return undefined for undefined citation references', () => {
const result = determineCitationSource(undefined)
expect(result).toBeUndefined()
})
})
describe('withCitationTags', () => {
it('should process citations with default source type', () => {
const content = 'Test content [1] with citation'
const citations: Citation[] = [
{
number: 1,
url: 'https://example.com',
title: 'Example'
}
]
const result = withCitationTags(content, citations)
expect(result).toContain('[<sup data-citation=')
expect(result).toContain('1</sup>](https://example.com)')
})
it('should process citations with OpenAI source type', () => {
const content = 'Test content [<sup>1</sup>](https://example.com)'
const citations: Citation[] = [
{
number: 1,
url: 'https://example.com',
title: 'Example',
content: 'Some **content**'
}
]
const result = withCitationTags(content, citations, WebSearchSource.OPENAI)
expect(result).toContain('[<sup data-citation=')
expect(result).toContain('1</sup>](https://example.com)')
})
it('should process citations with Gemini source type', () => {
const content = 'Test content from Gemini'
const metadata: GroundingSupport[] = [
{
segment: { text: 'Test content' },
groundingChunkIndices: [0]
}
]
const citations: Citation[] = [
{
number: 1,
url: 'https://example.com',
title: 'Example',
metadata
}
]
const result = withCitationTags(content, citations, WebSearchSource.GEMINI)
expect(result).toContain('Test content[<sup data-citation=')
expect(result).toContain('1</sup>](https://example.com)')
})
it('should handle empty citations array', () => {
const content = 'This is test content [1]'
const result = withCitationTags(content, [])
expect(result).toBe(content)
})
})
describe('normalizeCitationMarks with markdown', () => {
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Example 1' },
{ number: 2, url: 'https://example2.com', title: 'Example 2' },
{ number: 3, url: 'https://example3.com', title: 'Example 3' }
]
const citationMap = createCitationMap(citations)
it('should not process citations in inline code', () => {
const content = 'Here is `code with [1] citation` and normal [2] citation'
const result = normalizeCitationMarks(content, citationMap)
// 内联代码中的 [1] 应该保持不变
expect(result).toContain('`code with [1] citation`')
// 普通文本中的 [2] 应该被处理
expect(result).toContain('[cite:2]')
})
it('should not process citations in code blocks', () => {
const content = `Text with citation [1]
\`\`\`python
# Python code with [2] reference
def func():
data = [3, 4, 5] # Array with [1] element reference
return data
\`\`\`
\`\`\`bash
echo "Command with [2] parameter"
\`\`\`
// Indented code block is not skipped
echo "Indented code block [3]"
Normal text with [3] citation`
const result = normalizeCitationMarks(content, citationMap)
// 代码块内的内容应该保持原样
expect(result).toContain('# Python code with [2] reference')
expect(result).toContain('data = [3, 4, 5] # Array with [1] element reference')
expect(result).toContain('echo "Command with [2] parameter"')
// 代码块外的引用应该被处理
expect(result).toContain('Text with citation [cite:1]')
expect(result).toContain('Indented code block [cite:3]')
expect(result).toContain('Normal text with [cite:3]')
})
it('should handle malformed code blocks', () => {
const content = `Text with [1]
\`\`\`unclosed
Code block without closing
With [2] citation
Normal text with [3] continues`
const result = normalizeCitationMarks(content, citationMap)
expect(result).toContain('[cite:1]')
expect(result).toContain('[cite:2]')
expect(result).toContain('[cite:3]')
})
it('should handle citations in various markdown structures', () => {
const content = `Normal citation [1]
> This is a blockquote with [2] citation
> And another line with [3]
Back to normal **with [1] again**
# Heading with [3] citation
## Subheading with [2] citation
List:
- list item with citation [1]
Numbered list:
1. item with [2]`
const result = normalizeCitationMarks(content, citationMap)
console.log(result)
expect(result).toContain('citation [cite:1]')
expect(result).toContain('blockquote with [cite:2]')
expect(result).toContain('another line with [cite:3]')
expect(result).toContain('with [cite:1] again')
expect(result).toContain('Heading with [cite:3]')
expect(result).toContain('Subheading with [cite:2]')
expect(result).toContain('list item with citation [cite:1]')
expect(result).toContain('item with [cite:2]')
})
})
describe('normalizeCitationMarks simple', () => {
describe('OpenAI format citations', () => {
it('should normalize OpenAI format citations', () => {
const content = 'Text with [<sup>1</sup>](https://example.com) citation'
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
const citationMap = createCitationMap(citations)
for (const sourceType of [WebSearchSource.OPENAI, WebSearchSource.OPENAI_RESPONSE]) {
const result = normalizeCitationMarks(content, citationMap, sourceType)
expect(result).toBe('Text with [cite:1] citation')
}
})
it('should preserve non-matching OpenAI citations', () => {
const content = 'Text with [<sup>3</sup>](https://missing.com) citation'
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
const citationMap = createCitationMap(citations)
for (const sourceType of [WebSearchSource.OPENAI, WebSearchSource.OPENAI_RESPONSE]) {
const result = normalizeCitationMarks(content, citationMap, sourceType)
expect(result).toBe('Text with [<sup>3</sup>](https://missing.com) citation')
}
})
})
describe('Perplexity format citations', () => {
it('should normalize Perplexity format citations', () => {
const content = 'Perplexity citations [<sup>1</sup>](https://example.com)'
const citations: Citation[] = [
{ number: 1, url: 'https://example.com', title: 'Example Citation', content: 'Citation content' }
]
const citationMap = new Map(citations.map((c) => [c.number, c]))
const normalized = normalizeCitationMarks(content, citationMap, WebSearchSource.PERPLEXITY)
expect(normalized).toBe('Perplexity citations [cite:1]')
})
it('should preserve unmatched Perplexity citations', () => {
const content = 'Text with [<sup>2</sup>](https://notfound.com) citation'
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Example Citation' }]
const citationMap = new Map(citations.map((c) => [c.number, c]))
// 2号引用不存在应该保持原样
const normalized = normalizeCitationMarks(content, citationMap, WebSearchSource.PERPLEXITY)
expect(normalized).toBe('Text with [<sup>2</sup>](https://notfound.com) citation')
})
})
describe('Gemini format citations', () => {
it('should normalize Gemini format citations', () => {
const content = 'This is test content from Gemini'
const metadata: GroundingSupport[] = [
{
segment: { text: 'test content' },
groundingChunkIndices: [0, 1]
}
]
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Test 1', metadata },
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
]
const citationMap = createCitationMap(citations)
const result = normalizeCitationMarks(content, citationMap, WebSearchSource.GEMINI)
expect(result).toBe('This is test content[cite:1][cite:2] from Gemini')
})
it('should handle Gemini citations without metadata', () => {
const content = 'Content without metadata'
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
const citationMap = createCitationMap(citations)
const result = normalizeCitationMarks(content, citationMap, WebSearchSource.GEMINI)
expect(result).toBe('Content without metadata')
})
})
describe('default format citations', () => {
it('should normalize default format citations', () => {
const content = 'Text with [1][2] and [3] citations'
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
{ number: 2, url: 'https://example2.com', title: 'Test 2' },
{ number: 3, url: 'https://example3.com', title: 'Test 3' }
]
const citationMap = createCitationMap(citations)
const result = normalizeCitationMarks(content, citationMap)
expect(result).toBe('Text with [cite:1][cite:2] and [cite:3] citations')
})
it('should preserve non-matching default format citations', () => {
const content = 'Text with [1] and [3] citations'
const citations: Citation[] = [{ number: 1, url: 'https://example1.com', title: 'Test 1' }]
const citationMap = createCitationMap(citations)
const result = normalizeCitationMarks(content, citationMap)
expect(result).toBe('Text with [cite:1] and [3] citations')
})
it('should handle nested citation patterns', () => {
const content = 'Text with [[1]] and [cite:[2]] patterns'
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
]
const citationMap = new Map(citations.map((c) => [c.number, c]))
const result = normalizeCitationMarks(content, citationMap)
// 最里面的会被处理
expect(result).toBe('Text with [[cite:1]] and [cite:[cite:2]] patterns')
})
it('should handle mixed citation formats', () => {
const content = 'Text with [1] and [<sup>2</sup>](url) and other [3] formats'
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
]
const citationMap = createCitationMap(citations)
const result = normalizeCitationMarks(content, citationMap, WebSearchSource.OPENAI)
expect(result).toBe('Text with [1] and [cite:2] and other [3] formats')
})
})
})
describe('mapCitationMarksToTags', () => {
const createCitationMap = (citations: Citation[]) => new Map(citations.map((c) => [c.number, c]))
it('should convert cite marks to tags', () => {
const content = 'Text with [cite:1] citation'
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
const citationMap = createCitationMap(citations)
const result = mapCitationMarksToTags(content, citationMap)
expect(result).toContain('with [<sup data-citation=')
expect(result).toContain('1</sup>](https://example.com) citation')
})
it('should handle multiple cite marks', () => {
const content = 'Text with [cite:1][cite:2] and [cite:3] citations'
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
{ number: 2, url: 'https://example2.com', title: 'Test 2' },
{ number: 3, url: 'https://example3.com', title: 'Test 3' }
]
const citationMap = createCitationMap(citations)
const result = mapCitationMarksToTags(content, citationMap)
expect(result).toContain('with [<sup data-citation=')
expect(result).toContain('1</sup>](https://example1.com)[<sup data-citation=')
expect(result).toContain('2</sup>](https://example2.com) and')
expect(result).toContain('3</sup>](https://example3.com) citations')
})
it('should preserve non-matching cite marks', () => {
const content = 'Text with [cite:1] and [cite:3] citations'
const citations: Citation[] = [{ number: 1, url: 'https://example1.com', title: 'Test 1' }]
const citationMap = createCitationMap(citations)
const result = mapCitationMarksToTags(content, citationMap)
expect(result).toContain('1</sup>](https://example1.com)')
expect(result).toContain('[cite:3]') // Should remain unchanged
})
it('should handle nested cite marks', () => {
const content = 'Text with [cite:[cite:1]] and [cite:2] citations'
const citations: Citation[] = [
{ number: 1, url: 'https://example1.com', title: 'Test 1' },
{ number: 2, url: 'https://example2.com', title: 'Test 2' }
]
const citationMap = createCitationMap(citations)
const result = mapCitationMarksToTags(content, citationMap)
expect(result).toContain('[cite:[<sup data-citation=')
expect(result).toContain('1</sup>](https://example1.com)]')
expect(result).toContain('2</sup>](https://example2.com)')
})
it('should handle content without cite marks', () => {
const content = 'Text without citations'
const citationMap = new Map()
const result = mapCitationMarksToTags(content, citationMap)
expect(result).toBe('Text without citations')
})
it('should handle malformed citation numbers', () => {
const content = 'Text with [cite:abc] and [cite:] marks'
const citationMap = new Map()
const result = mapCitationMarksToTags(content, citationMap)
expect(result).toBe('Text with [cite:abc] and [cite:] marks')
})
})
describe('generateCitationTag', () => {
it('should generate citation tag with valid URL', () => {
const citation: Citation = {
number: 1,
url: 'https://example.com',
title: 'Example Title',
content: 'Some content here'
}
const result = generateCitationTag(citation)
expect(result).toContain('[<sup data-citation=')
expect(result).toContain('1</sup>](https://example.com)')
expect(result).toContain('Example Title')
})
it('should generate citation tag without URL when invalid', () => {
const citation: Citation = {
number: 2,
url: 'invalid-url',
title: 'Test Title'
}
const result = generateCitationTag(citation)
expect(result).toContain('[<sup data-citation=')
expect(result).toContain('2</sup>]()')
expect(result).not.toContain('](invalid-url)')
})
it('should handle citation without URL', () => {
const citation: Citation = {
number: 3,
url: '',
title: 'No URL Title'
}
const result = generateCitationTag(citation)
expect(result).toContain('[<sup data-citation=')
expect(result).toContain('3</sup>]()')
})
it('should use hostname when title is missing', () => {
const citation: Citation = {
number: 4,
url: 'https://example.com',
hostname: 'example.com'
}
const result = generateCitationTag(citation)
expect(result).toContain('example.com')
})
it('should handle citation with all empty values', () => {
const citation: Citation = {
number: 6,
url: '',
title: '',
hostname: '',
content: ''
}
const result = generateCitationTag(citation)
expect(result).toContain('[<sup data-citation=')
expect(result).toContain('6</sup>]()')
})
it('should truncate content to 200 characters in data-citation', () => {
const longContent = 'a'.repeat(300)
const citation: Citation = {
number: 1,
url: 'https://example.com',
title: 'Test',
content: longContent
}
const result = generateCitationTag(citation)
const match = result.match(/data-citation='([^']+)'/)
expect(match).not.toBeNull()
if (match) {
const citationData = JSON.parse(match[1].replace(/&quot;/g, '"'))
expect(citationData.content.length).toBe(200)
expect(citationData.content).toBe(longContent.substring(0, 200))
}
})
})
describe('performance', () => {
it('should handle large content efficiently', () => {
const largeContent = 'Test content '.repeat(10000) + '[1]'
const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }]
const start = Date.now()
const result = withCitationTags(largeContent, citations)
const end = Date.now()
expect(result).toContain('[<sup data-citation=')
expect(end - start).toBeLessThan(100) // Should complete within 100ms
})
it('should handle many citations efficiently', () => {
const citations: Citation[] = Array.from({ length: 100 }, (_, i) => ({
number: i + 1,
url: `https://example${i + 1}.com`,
title: `Test ${i + 1}`
}))
const content = citations.map((c) => `[${c.number}]`).join(' ')
const start = Date.now()
const result = withCitationTags(content, citations)
const end = Date.now()
expect(result).toContain('[<sup data-citation=')
expect(end - start).toBeLessThan(100) // Should complete within 200ms
})
})
})

View File

@@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
import {
cleanLinkCommas,
completeLinks,
completionPerplexityLinks,
convertLinks,
convertLinksToHunyuan,
convertLinksToOpenRouter,
@@ -88,6 +89,13 @@ describe('linkConverter', () => {
})
describe('convertLinks', () => {
it('should convert number links to numbered links', () => {
const input = '参考 [1](https://example.com/1) 和 [2](https://example.com/2)'
const result = convertLinks(input, true)
expect(result.text).toBe('参考 [<sup>1</sup>](https://example.com/1) 和 [<sup>2</sup>](https://example.com/2)')
expect(result.hasBufferedContent).toBe(false)
})
it('should convert links with domain-like text to numbered links', () => {
const input = '查看这个网站 [example.com](https://example.com)'
const result = convertLinks(input, true)
@@ -375,4 +383,13 @@ describe('linkConverter', () => {
expect(result).toBe('[链接1](https://example.com)[链接2](https://other.com)')
})
})
describe('completionPerplexityLinks', () => {
it('should complete links with webSearch data', () => {
const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }]
const input = '参考 [1] 和 [2]'
const result = completionPerplexityLinks(input, webSearch)
expect(result).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)')
})
})
})

View File

@@ -0,0 +1,210 @@
import { GroundingSupport } from '@google/genai'
import { Citation, WebSearchSource } from '@renderer/types'
import { cleanMarkdownContent, encodeHTML } from './formats'
/**
* 从多个 citationReference 中获取第一个有效的 source
* @returns WebSearchSource
*/
export function determineCitationSource(
citationReferences: Array<{ citationBlockId?: string; citationBlockSource?: WebSearchSource }> | undefined
): WebSearchSource | undefined {
// 从 citationReferences 获取第一个有效的 source
if (citationReferences?.length) {
const validReference = citationReferences.find((ref) => ref.citationBlockSource)
return validReference?.citationBlockSource
}
return undefined
}
/**
* 把文本内容中的引用标记转换为完整的引用标签
* - 标准化引用标记
* - 转换标记为用于渲染的标签
*
* @param content 原始文本内容
* @param citations 原始引用列表
* @param sourceType 引用来源类型
* @returns 处理后的文本内容
*/
export function withCitationTags(content: string, citations: Citation[], sourceType?: WebSearchSource): string {
if (!content || citations.length === 0) return content
const formattedCitations = citations.map((citation) => ({
...citation,
content: citation.content ? cleanMarkdownContent(citation.content) : citation.content
}))
const citationMap = new Map(formattedCitations.map((c) => [c.number, c]))
const normalizedContent = normalizeCitationMarks(content, citationMap, sourceType)
return mapCitationMarksToTags(normalizedContent, citationMap)
}
/**
* 标准化引用标记,统一转换为 [cite:N] 格式:
* - OpenAI 格式: [<sup>N</sup>](url) → [cite:N]
* - Gemini 格式: 根据metadata添加 [cite:N]
* - 其他格式: [N] → [cite:N]
*
* 算法:
* - one pass + 正则替换
* - 跳过代码块等特殊上下文
*
* @param content 原始文本内容
* @param citationMap 引用映射表
* @param sourceType 引用来源类型
* @returns 标准化后的文本内容
*/
export function normalizeCitationMarks(
content: string,
citationMap: Map<number, Citation>,
sourceType?: WebSearchSource
): string {
// 识别需要跳过的代码区域注意indented code block已被禁用不需要跳过
const codeBlockRegex = /```[\s\S]*?```|`[^`\n]*`/gm
const skipRanges: Array<{ start: number; end: number }> = []
let match
while ((match = codeBlockRegex.exec(content)) !== null) {
skipRanges.push({
start: match.index,
end: match.index + match[0].length
})
}
// 检查位置是否在代码块内
const shouldSkip = (pos: number): boolean => {
for (const range of skipRanges) {
if (pos >= range.start && pos < range.end) return true
if (range.start > pos) break // 已排序,可以提前结束
}
return false
}
// 统一的替换函数
const applyReplacements = (regex: RegExp, getReplacementFn: (match: RegExpExecArray) => string | null) => {
const replacements: Array<{ start: number; end: number; replacement: string }> = []
regex.lastIndex = 0 // 重置正则状态
let match: RegExpExecArray | null
while ((match = regex.exec(content)) !== null) {
if (!shouldSkip(match.index)) {
const replacement = getReplacementFn(match)
if (replacement !== null) {
replacements.push({
start: match.index,
end: match.index + match[0].length,
replacement
})
}
}
}
// 从后往前替换避免位置偏移
replacements.reverse().forEach(({ start, end, replacement }) => {
content = content.slice(0, start) + replacement + content.slice(end)
})
}
switch (sourceType) {
case WebSearchSource.OPENAI:
case WebSearchSource.OPENAI_RESPONSE:
case WebSearchSource.PERPLEXITY: {
// OpenAI 格式: [<sup>N</sup>](url) → [cite:N]
applyReplacements(/\[<sup>(\d+)<\/sup>\]\([^)]*\)/g, (match) => {
const citationNum = parseInt(match[1], 10)
return citationMap.has(citationNum) ? `[cite:${citationNum}]` : null
})
break
}
case WebSearchSource.GEMINI: {
// Gemini 格式: 根据metadata添加 [cite:N]
const firstCitation = Array.from(citationMap.values())[0]
if (firstCitation?.metadata) {
const textReplacements = new Map<string, string>()
// 收集所有需要替换的文本
firstCitation.metadata.forEach((support: GroundingSupport) => {
if (!support.groundingChunkIndices || !support.segment?.text) return
const citationNums = support.groundingChunkIndices
const text = support.segment.text
const basicTag = citationNums
.map((citationNum) => {
const citation = citationMap.get(citationNum + 1)
return citation ? `[cite:${citationNum + 1}]` : ''
})
.filter(Boolean)
.join('')
if (basicTag) {
textReplacements.set(text, `${text}${basicTag}`)
}
})
// 一次性应用所有替换
textReplacements.forEach((replacement, originalText) => {
const escapedText = originalText.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
applyReplacements(new RegExp(escapedText, 'g'), () => replacement)
})
}
break
}
default: {
// 简单数字格式: [N] → [cite:N]
applyReplacements(/\[(\d+)\]/g, (match) => {
const citationNum = parseInt(match[1], 10)
return citationMap.has(citationNum) ? `[cite:${citationNum}]` : null
})
}
}
return content
}
/**
* 把文本内容中的 [cite:N] 标记转换为用于渲染的标签
* @param content 原始文本内容
* @param citationMap 引用映射表
* @returns 处理后的文本内容
*/
export function mapCitationMarksToTags(content: string, citationMap: Map<number, Citation>): string {
// 统一替换所有 [cite:N] 标记
return content.replace(/\[cite:(\d+)\]/g, (match, num) => {
const citationNum = parseInt(num, 10)
const citation = citationMap.get(citationNum)
if (citation) {
return generateCitationTag(citation)
}
// 如果没找到对应的引用数据,保持原样(应该不会发生)
return match
})
}
/**
* 生成单个用于渲染的引用标签
* @param citation 引用数据
* @returns 渲染后的引用标签
*/
export function generateCitationTag(citation: Citation): string {
const supData = {
id: citation.number,
url: citation.url,
title: citation.title || citation.hostname || '',
content: citation.content?.substring(0, 200)
}
const citationJson = encodeHTML(JSON.stringify(supData))
// 判断是否为有效链接
const isLink = citation.url && citation.url.startsWith('http')
// 生成链接格式: [<sup data-citation='...'>N</sup>](url)
// 或者生成空括号格式: [<sup data-citation='...'>N</sup>]()
return `[<sup data-citation='${citationJson}'>${citation.number}</sup>]` + (isLink ? `(${citation.url})` : '()')
}

View File

@@ -88,34 +88,6 @@ export function removeSvgEmptyLines(text: string): string {
})
}
// export function withGeminiGrounding(block: MainTextMessageBlock | TranslationMessageBlock): string {
// // TODO
// // const citationBlock = findCitationBlockWithGrounding(block)
// // const groundingSupports = citationBlock?.groundingMetadata?.groundingSupports
// const content = block.content
// // if (!groundingSupports || groundingSupports.length === 0) {
// // return content
// // }
// // groundingSupports.forEach((support) => {
// // const text = support?.segment?.text
// // const indices = support?.groundingChunkIndices
// // if (!text || !indices) return
// // const nodes = indices.reduce((acc, index) => {
// // acc.push(`<sup>${index + 1}</sup>`)
// // return acc
// // }, [] as string[])
// // content = content.replace(text, `${text} ${nodes.join(' ')}`)
// // })
// return content
// }
export function withGenerateImage(message: Message): { content: string; images?: string[] } {
const originalContent = getMainTextContent(message)
const imagePattern = new RegExp(`!\\[[^\\]]*\\]\\((.*?)\\s*("(?:.*[^"])")?\\s*\\)`)

View File

@@ -1,3 +1,5 @@
import { WebSearchResponse, WebSearchSource } from '@renderer/types'
// Counter for numbering links
let linkCounter = 1
// Buffer to hold incomplete link fragments across chunks
@@ -236,11 +238,13 @@ export function convertLinks(
}
// Rule 3: If the link text is not a URL/host, keep the text and add the numbered link
if (!isHost(linkText)) {
result += `${linkText} [<sup>${counter}</sup>](${url})`
} else {
// Rule 2: If the link text is a URL/host, replace with numbered link
// 增加一个条件:如果 linkText 是纯数字,也直接替换
if (isHost(linkText) || /^\d+$/.test(linkText)) {
// Rule 2: If the link text is a URL/host or purely digits, replace with numbered link
result += `[<sup>${counter}</sup>](${url})`
} else {
// If the link text is neither a URL/host nor purely digits, keep the text and add the numbered link
result += `${linkText} [<sup>${counter}</sup>](${url})`
}
position += match[0].length
@@ -337,6 +341,25 @@ export function completeLinks(text: string, webSearch: any[]): string {
})
}
/**
* 根据webSearch结果补全链接将[num]转换为[num](webSearch[num-1].url)
* @param {string} text 原始文本
* @param {any[]} webSearch webSearch结果
* @returns {string} 补全后的文本
*/
export function completionPerplexityLinks(text: string, webSearch: any[]): string {
return text.replace(/\[(\d+)\]/g, (match, numStr) => {
const num = parseInt(numStr)
const index = num - 1
// 检查 webSearch 数组中是否存在对应的 URL
if (index >= 0 && index < webSearch.length && webSearch[index].url) {
return `[${num}](${webSearch[index].url})`
}
// 如果没有找到对应的 URL保持原样
return match
})
}
/**
* 从Markdown文本中提取所有URL
* 支持以下格式:
@@ -463,8 +486,18 @@ export function extractWebSearchReferences(text: string): Array<{
export function smartLinkConverter(
text: string,
providerType: string = 'openai',
resetCounter: boolean = false
resetCounter: boolean = false,
webSearchResults?: WebSearchResponse
): { text: string; hasBufferedContent: boolean } {
if (webSearchResults) {
const webSearch = webSearchResults.results
switch (webSearchResults.source) {
case WebSearchSource.PERPLEXITY: {
text = completionPerplexityLinks(text, webSearch as any[])
break
}
}
}
// 检测文本中的引用模式
const references = extractWebSearchReferences(text)