From 395fa1b4987494cb1ee1f51b79369394a0cf7836 Mon Sep 17 00:00:00 2001
From: ZhenYi <434836402@qq.com>
Date: Tue, 12 May 2026 13:04:40 +0800
Subject: [PATCH] refactor(agent): update AI chat execution, streaming and
 ReAct logic

---
 libs/agent/chat/chat_execution.rs      |  9 ++--
 libs/agent/chat/streaming_execution.rs | 11 +++--
 libs/agent/react/mod.rs                | 13 ++++++
 libs/api/chat/stream.rs                | 57 ++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 11 deletions(-)
diff --git a/libs/agent/chat/chat_execution.rs b/libs/agent/chat/chat_execution.rs
index 0498046..d9007ec 100644
--- a/libs/agent/chat/chat_execution.rs
+++ b/libs/agent/chat/chat_execution.rs
@@ -217,11 +217,10 @@ pub async fn execute_chat_stream(
         let has_tool_calls = tools_enabled && !response.tool_calls.is_empty();
         if !has_tool_calls {
             let final_content = response.content.clone();
-            // Don't broadcast the done chunk via SSE/NATS — incremental deltas
-            // already delivered the content; the separate "done" SSE event
-            // signals completion. Pushing full content again would duplicate it
-            // in the frontend streaming store.
-            all_chunks.push(StreamChunk { chunk_type: StreamChunkType::Answer, content: final_content.clone() });
+            // Don't push full content as a chunk — incremental deltas in
+            // response.chunks (already added above) sum to the same text.
+            // merge_consecutive_blocks would concatenate delta_sum + full =
+            // 2× full, causing duplicate content in DB persistence.
             return Ok(StreamResult {
                 content: final_content,
                 reasoning_content: response.reasoning_content,
diff --git a/libs/agent/chat/streaming_execution.rs b/libs/agent/chat/streaming_execution.rs
index 993688f..4e9a629 100644
--- a/libs/agent/chat/streaming_execution.rs
+++ b/libs/agent/chat/streaming_execution.rs
@@ -240,17 +240,16 @@ async fn execute_streaming_tools(
 
 async fn handle_final_answer(
     response: crate::client::StreamResponse,
-    mut all_chunks: Vec<StreamChunk>, request: &AiRequest,
+    all_chunks: Vec<StreamChunk>, request: &AiRequest,
     session_id: Uuid, version_id: Option<Uuid>,
     total_input_tokens: i64, total_output_tokens: i64,
     session_start: std::time::Instant,
 ) -> Result<StreamResult> {
     let full_content = response.content.clone();
-    // Don't broadcast the done chunk via SSE/NATS — incremental deltas
-    // already delivered the content; the separate completion event
-    // signals end of stream. Broadcasting full content again would
-    // duplicate it in the frontend streaming display.
-    all_chunks.push(StreamChunk { chunk_type: StreamChunkType::Answer, content: response.content.clone() });
+    // Don't push full content as a chunk — incremental deltas in
+    // response.chunks (already accumulated above) sum to the same text.
+    // merge_consecutive_blocks would concatenate delta_sum + full =
+    // 2× full, causing duplicate content in DB persistence.
     record_ai_session(&request.cache, &request.db, request.project.id, request.sender.uid, session_id, request.room.id, request.model.id, version_id.unwrap_or_default(), total_input_tokens, total_output_tokens, session_start.elapsed().as_millis() as i64).await;
     Ok(StreamResult { content: full_content, reasoning_content: response.reasoning_content, input_tokens: total_input_tokens, output_tokens: total_output_tokens, chunks: all_chunks })
 }
diff --git a/libs/agent/react/mod.rs b/libs/agent/react/mod.rs
index 46c48c8..d073003 100644
--- a/libs/agent/react/mod.rs
+++ b/libs/agent/react/mod.rs
@@ -37,6 +37,19 @@ If local data does not contain the answer, state that clearly before considering
 - Be precise. Cite issue/PR numbers, commit hashes, or message IDs when available.
 - State ambiguity or uncertainty explicitly.
 - Prefer facts over speculation.
+
+## Rich Output
+
+You may use ````html` code blocks to render rich HTML content directly on the page. The HTML is rendered inside a Shadow DOM — your `<style>` rules are scoped to your block only and will NOT affect the rest of the page.
+
+```html
+<style>
+  .card { padding:16px; border-radius:12px; background:var(--surface-elevated); }
+</style>
+<div class="card">Styled content</div>
+```
+
+**IMPORTANT**: Only content inside ````html` code blocks is rendered as HTML. Raw HTML outside code blocks will appear as plain text. Use inline styles or `<style>` blocks. Theme CSS variables (`--accent`, `--surface-elevated`, `--border-default`, etc.) are available. **JavaScript is NOT allowed.**
 "#;
 
 /// Room-specific system prompt appended when the AI is @mentioned in a chat room.
diff --git a/libs/api/chat/stream.rs b/libs/api/chat/stream.rs
index 4544fd4..f315537 100644
--- a/libs/api/chat/stream.rs
+++ b/libs/api/chat/stream.rs
@@ -5,6 +5,7 @@ use agent::client::types::ChatRequestMessage;
 use agent::client::StreamChunkType;
 use futures::StreamExt;
 use models::ai::{ai_message, ai_conversation, AiMessage};
+use models::agents::{model, model_version};
 use queue::{ChatMessageEvent, ChatStreamChunkEvent};
 use sea_orm::{EntityTrait, QueryFilter, ColumnTrait, QueryOrder, ActiveModelTrait, Set, PaginatorTrait};
 use service::AppService;
@@ -334,6 +335,62 @@ pub fn create_chat_sse_stream(
                     }
                 }
 
+                // Record billing after successful AI response
+                let billing_version_id = model::Entity::find()
+                    .filter(model::Column::Name.eq(&model_name))
+                    .one(service.db.reader())
+                    .await
+                    .ok()
+                    .flatten()
+                    .and_then(|m| {
+                        // Resolve to active/default version for pricing lookup
+                        let reader = service.db.reader();
+                        async move {
+                            model_version::Entity::find()
+                                .filter(model_version::Column::ModelId.eq(m.id))
+                                .filter(model_version::Column::Status.eq("active"))
+                                .order_by_desc(model_version::Column::IsDefault)
+                                .order_by_desc(model_version::Column::ReleaseDate)
+                                .one(reader)
+                                .await
+                                .ok()
+                                .flatten()
+                                .map(|v| v.id)
+                        }
+                    })
+                    .await;
+
+                if let Some(version_id) = billing_version_id {
+                    match agent::billing::record_ai_usage(
+                        &service.db,
+                        project_id,
+                        user_id,
+                        version_id,
+                        stream_result.input_tokens,
+                        stream_result.output_tokens,
+                    )
+                    .await
+                    {
+                        Ok(agent::billing::BillingResult::Success(record)) => {
+                            tracing::info!(
+                                cost = record.cost,
+                                deducted_from = record.deducted_from.as_str(),
+                                "chat_billing_deducted"
+                            );
+                        }
+                        Ok(agent::billing::BillingResult::InsufficientBalance { .. }) => {
+                            tracing::warn!(
+                                project_id = %project_id,
+                                user_id = %user_id,
+                                "chat_billing_insufficient_balance"
+                            );
+                        }
+                        Err(e) => {
+                            tracing::error!(error = %e, "chat_billing_error");
+                        }
+                    }
+                }
+
                 // Broadcast final chat message with token usage
                 let final_msg = ChatMessageEvent {
                     message_id: user_message_id,