From 395fa1b4987494cb1ee1f51b79369394a0cf7836 Mon Sep 17 00:00:00 2001 From: ZhenYi <434836402@qq.com> Date: Tue, 12 May 2026 13:04:40 +0800 Subject: [PATCH] refactor(agent): update AI chat execution, streaming and ReAct logic --- libs/agent/chat/chat_execution.rs | 9 ++-- libs/agent/chat/streaming_execution.rs | 11 +++-- libs/agent/react/mod.rs | 13 ++++++ libs/api/chat/stream.rs | 57 ++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 11 deletions(-) diff --git a/libs/agent/chat/chat_execution.rs b/libs/agent/chat/chat_execution.rs index 0498046..d9007ec 100644 --- a/libs/agent/chat/chat_execution.rs +++ b/libs/agent/chat/chat_execution.rs @@ -217,11 +217,10 @@ pub async fn execute_chat_stream( let has_tool_calls = tools_enabled && !response.tool_calls.is_empty(); if !has_tool_calls { let final_content = response.content.clone(); - // Don't broadcast the done chunk via SSE/NATS — incremental deltas - // already delivered the content; the separate "done" SSE event - // signals completion. Pushing full content again would duplicate it - // in the frontend streaming store. - all_chunks.push(StreamChunk { chunk_type: StreamChunkType::Answer, content: final_content.clone() }); + // Don't push full content as a chunk — incremental deltas in + // response.chunks (already added above) sum to the same text. + // merge_consecutive_blocks would concatenate delta_sum + full = + // 2× full, causing duplicate content in DB persistence. return Ok(StreamResult { content: final_content, reasoning_content: response.reasoning_content, diff --git a/libs/agent/chat/streaming_execution.rs b/libs/agent/chat/streaming_execution.rs index 993688f..4e9a629 100644 --- a/libs/agent/chat/streaming_execution.rs +++ b/libs/agent/chat/streaming_execution.rs @@ -240,17 +240,16 @@ async fn execute_streaming_tools( async fn handle_final_answer( response: crate::client::StreamResponse, - mut all_chunks: Vec, request: &AiRequest, + all_chunks: Vec, request: &AiRequest, session_id: Uuid, version_id: Option, total_input_tokens: i64, total_output_tokens: i64, session_start: std::time::Instant, ) -> Result { let full_content = response.content.clone(); - // Don't broadcast the done chunk via SSE/NATS — incremental deltas - // already delivered the content; the separate completion event - // signals end of stream. Broadcasting full content again would - // duplicate it in the frontend streaming display. - all_chunks.push(StreamChunk { chunk_type: StreamChunkType::Answer, content: response.content.clone() }); + // Don't push full content as a chunk — incremental deltas in + // response.chunks (already accumulated above) sum to the same text. + // merge_consecutive_blocks would concatenate delta_sum + full = + // 2× full, causing duplicate content in DB persistence. record_ai_session(&request.cache, &request.db, request.project.id, request.sender.uid, session_id, request.room.id, request.model.id, version_id.unwrap_or_default(), total_input_tokens, total_output_tokens, session_start.elapsed().as_millis() as i64).await; Ok(StreamResult { content: full_content, reasoning_content: response.reasoning_content, input_tokens: total_input_tokens, output_tokens: total_output_tokens, chunks: all_chunks }) } diff --git a/libs/agent/react/mod.rs b/libs/agent/react/mod.rs index 46c48c8..d073003 100644 --- a/libs/agent/react/mod.rs +++ b/libs/agent/react/mod.rs @@ -37,6 +37,19 @@ If local data does not contain the answer, state that clearly before considering - Be precise. Cite issue/PR numbers, commit hashes, or message IDs when available. - State ambiguity or uncertainty explicitly. - Prefer facts over speculation. + +## Rich Output + +You may use ````html` code blocks to render rich HTML content directly on the page. The HTML is rendered inside a Shadow DOM — your ` +
Styled content
+``` + +**IMPORTANT**: Only content inside ````html` code blocks is rendered as HTML. Raw HTML outside code blocks will appear as plain text. Use inline styles or `