feat: thinking_content column + first-project budget logic

- Add thinking_content column to room_message table - Migration for thinking_content column - ws-protocol update with streaming chunk types - Billing: first project gets $10, first workspace gets $30 - Subsequent projects/workspaces get $0 budget
fix(frontend): ordered chunk rendering + initial scroll-to-bottom
2026-04-26 13:11:06 +08:00 · 2026-04-26 13:10:51 +08:00 · 2026-04-26 13:10:42 +08:00 · 2026-04-26 13:10:26 +08:00
39 changed files with 2846 additions and 2168 deletions
--- a/libs/agent/chat/service.rs
+++ b/libs/agent/chat/service.rs
@ -1,4 +1,5 @@
 use std::pin::Pin;
+use std::sync::Arc;
 use std::time::Duration;
 use models::projects::project_skill;
 use models::rooms::room_ai;
@ -9,7 +10,7 @@ use super::context::RoomMessageContext;
 use super::{AiChunkType, AiRequest, AiStreamChunk, Mention, StreamCallback};
 use crate::client::types::{ChatRequestMessage, ToolCall};
 use crate::client::AiClientConfig;
-use crate::client::{call_stream, call_with_params};
+use crate::client::{call_stream, call_with_params, StreamChunk, StreamChunkType, StreamedToolCall};
 use crate::compact::{CompactConfig, CompactService};
 use crate::embed::EmbedService;
 use crate::error::{AgentError, Result};
@ -17,6 +18,23 @@ use crate::perception::{PerceptionService, SkillEntry, ToolCallEvent};
 use crate::react::{ReactAgent, ReactConfig, DEFAULT_SYSTEM_PROMPT};
 use crate::tool::{ToolCall as AgentToolCall, ToolContext, ToolExecutor, ToolResult, registry::ToolRegistry};

+/// Result from streaming AI response.
+pub struct StreamResult {
+    pub content: String,
+    pub reasoning_content: String,
+    pub input_tokens: i64,
+    pub output_tokens: i64,
+    /// All chunks in arrival order — preserves ReAct multi-cycle ordering.
+    pub chunks: Vec<StreamChunk>,
+}
+
+/// Result from non-streaming AI response.
+pub struct ProcessResult {
+    pub content: String,
+    pub input_tokens: i64,
+    pub output_tokens: i64,
+}
+
 /// Service for handling AI chat requests in rooms.
 pub struct ChatService {
    ai_base_url: Option<String>,
@ -97,7 +115,7 @@ impl ChatService {
        self.tool_registry.as_ref()
    }

-    pub async fn process(&self, request: AiRequest) -> Result<String> {
+    pub async fn process(&self, request: AiRequest) -> Result<ProcessResult> {
        let tools: Vec<serde_json::Value> = request.tools.clone().unwrap_or_default();
        let tools_enabled = !tools.is_empty();
        let max_tool_depth = request.max_tool_depth;
@ -120,6 +138,8 @@ impl ChatService {
            .and_then(|r| r.max_tokens.map(|v| v as u32))
            .unwrap_or(request.max_tokens as u32);
        let mut tool_depth = 0;
+        let mut input_tokens = 0i64;
+        let mut output_tokens = 0i64;

        let config = AiClientConfig::new(
            self.ai_api_key.clone().unwrap_or_default(),
@ -140,6 +160,8 @@ impl ChatService {
            .await?;

            let text = response.content.clone();
+            input_tokens += response.input_tokens;
+            output_tokens += response.output_tokens;

            if tools_enabled && !response.tool_calls_finished.is_empty() {
                // Build assistant message with tool_calls
@ -176,16 +198,29 @@ impl ChatService {
                    })
                    .collect();

-                let tool_messages = match self.execute_tool_calls(calls, &request).await {
-                    Ok(msgs) => msgs,
-                    Err(e) => {
-                        let err_msg = format!("[Tool call failed: {}]", e);
-                        // Return error as a single tool result per call
-                        response
-                            .tool_calls_finished
-                            .iter()
-                            .map(|_| ChatRequestMessage::tool(Uuid::new_v4().to_string(), &err_msg))
-                            .collect()
+                let tool_messages = {
+                    let mut ctx = ToolContext::new(
+                        request.db.clone(),
+                        request.cache.clone(),
+                        request.config.clone(),
+                        request.room.id,
+                        Some(request.sender.uid),
+                    )
+                    .with_project(request.project.id);
+                    if let Some(ref registry) = self.tool_registry {
+                        ctx.registry_mut().merge(registry.clone());
+                    }
+                    let executor = ToolExecutor::new();
+                    match executor.execute_batch(calls, &mut ctx).await {
+                        Ok(results) => ToolExecutor::to_tool_messages(&results),
+                        Err(e) => {
+                            let err_msg = format!("[Tool call failed: {}]", e);
+                            response
+                                .tool_calls_finished
+                                .iter()
+                                .map(|_| ChatRequestMessage::tool(Uuid::new_v4().to_string(), &err_msg))
+                                .collect()
+                        }
                    }
                };
                messages.extend(tool_messages);
@ -225,22 +260,26 @@ impl ChatService {

                tool_depth += 1;
                if tool_depth >= max_tool_depth {
-                    if text.is_empty() {
-                        return Ok(format!(
+                    let content = if text.is_empty() {
+                        format!(
                            "[AI reached maximum tool depth ({}) — no final answer produced]",
                            max_tool_depth
-                        ));
-                    }
-                    return Ok(text);
+                        )
+                    } else {
+                        text
+                    };
+                    return Ok(ProcessResult { content, input_tokens, output_tokens });
                }
                continue;
            }

-            return Ok(text);
+            return Ok(ProcessResult { content: text, input_tokens, output_tokens });
        }
    }

-    pub async fn process_stream(&self, request: AiRequest, on_chunk: StreamCallback) -> Result<String> {
+    pub async fn process_stream(&self, request: AiRequest, on_chunk: StreamCallback) -> Result<StreamResult> {
+        // Wrap on_chunk in Arc so it can be shared across loop iterations
+        let on_chunk = Arc::new(on_chunk);
        let tools: Vec<serde_json::Value> = request.tools.clone().unwrap_or_default();
        let tools_enabled = !tools.is_empty();
        let max_tool_depth = request.max_tool_depth;
@ -270,13 +309,15 @@ impl ChatService {
        .with_base_url(self.ai_base_url.clone().unwrap_or_else(|| "https://api.openai.com".into()));

        let mut full_content = String::new();
-        let mut has_called_tools = false;
+        let mut all_chunks: Vec<StreamChunk> = Vec::new();
+        // Collect tool calls during streaming, push them incrementally after.
+        let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<StreamedToolCall>();
+
        loop {
-            let chunk_type = if has_called_tools {
-                AiChunkType::Answer
-            } else {
-                AiChunkType::Thinking
-            };
+            let on_chunk_cb = on_chunk.clone();
+            let on_chunk_cb2 = on_chunk_cb.clone();
+            let tx_arc = Arc::new(tx.clone());
+            let tx_arc2 = tx_arc.clone();
            let response = call_stream(
                &messages,
                &model_name,
@ -284,18 +325,36 @@ impl ChatService {
                temperature,
                max_tokens,
                if tools_enabled { Some(&tools) } else { None },
-                |delta| {
-                    let _ = on_chunk(AiStreamChunk {
+                Arc::new(move |delta| {
+                    let fut = on_chunk_cb(AiStreamChunk {
                        content: delta.to_string(),
                        done: false,
-                        chunk_type: chunk_type.clone(),
+                        chunk_type: AiChunkType::Answer,
                    });
-                },
+                    fut
+                }),
+                Arc::new(move |delta| {
+                    let fut = on_chunk_cb2(AiStreamChunk {
+                        content: delta.to_string(),
+                        done: false,
+                        chunk_type: AiChunkType::Thinking,
+                    });
+                    fut
+                }),
+                Arc::new(move |tc: &StreamedToolCall| {
+                    let tx = tx_arc2.clone();
+                    let tc_owned = tc.clone();
+                    Box::pin(async move {
+                        let _ = tx.send(tc_owned);
+                    }) as Pin<Box<dyn std::future::Future<Output = ()> + Send>>
+                }),
            )
            .await?;

+            // Collect chunks from this streaming iteration in order.
+            all_chunks.extend(response.chunks);
+
            let has_tool_calls = tools_enabled && !response.tool_calls.is_empty();
-            has_called_tools = true;

            if has_tool_calls {
                // Accumulate the assistant's text before tool calls
@ -321,28 +380,34 @@ impl ChatService {
                    Some(tool_calls.clone()),
                ));

-                // Stream tool call summary to frontend
-                let call_summary: Vec<String> = response
-                    .tool_calls
-                    .iter()
-                    .map(|tc| {
-                        // Truncate long arguments for display
-                        let args_display = if tc.arguments.len() > 100 {
-                            format!("{}...", &tc.arguments[..100])
-                        } else {
-                            tc.arguments.clone()
-                        };
-                        format!("{}({})", tc.name, args_display)
-                    })
-                    .collect();
-                on_chunk(AiStreamChunk {
-                    content: format!("[Calling tools: {}]", call_summary.join(", ")),
-                    done: false,
-                    chunk_type: AiChunkType::ToolCall,
-                })
-                .await;
+                // Push each tool call incrementally to frontend.
+                // Use try_recv() — tx is never dropped so recv() would deadlock.
+                loop {
+                    match rx.try_recv() {
+                        Ok(tc) => {
+                            let args_display = if tc.arguments.len() > 100 {
+                                format!("{}...", &tc.arguments[..100])
+                            } else {
+                                tc.arguments.clone()
+                            };
+                            let tool_display = format!("🔧 {}({})", tc.name, args_display);
+                            on_chunk(AiStreamChunk {
+                                content: tool_display.clone(),
+                                done: false,
+                                chunk_type: AiChunkType::ToolCall,
+                            })
+                            .await;
+                            all_chunks.push(StreamChunk {
+                                chunk_type: StreamChunkType::ToolCall,
+                                content: tool_display,
+                            });
+                        }
+                        Err(tokio::sync::mpsc::error::TryRecvError::Empty) => break,
+                        Err(tokio::sync::mpsc::error::TryRecvError::Disconnected) => break,
+                    }
+                }

-                // Execute tools with full arguments from streaming
+                // Execute tools one at a time, push each result incrementally
                let calls: Vec<AgentToolCall> = response
                    .tool_calls
                    .iter()
@ -353,43 +418,71 @@ impl ChatService {
                    })
                    .collect();

-                let tool_messages = match self.execute_tool_calls(calls, &request).await {
-                    Ok(msgs) => {
-                        let result_summary: Vec<String> = msgs
-                            .iter()
-                            .map(|m| {
-                                let text = m.content.as_deref().unwrap_or("[no content]");
-                                if text.len() > 300 {
-                                    format!("{}...", &text[..300])
-                                } else {
-                                    text.to_string()
-                                }
+                let mut tool_messages = Vec::new();
+                for call in &calls {
+                    let ctx = &mut crate::tool::ToolContext::new(
+                        request.db.clone(),
+                        request.cache.clone(),
+                        request.config.clone(),
+                        request.room.id,
+                        Some(request.sender.uid),
+                    );
+                    if let Some(ref registry) = self.tool_registry {
+                        ctx.registry_mut().merge(registry.clone());
+                    }
+
+                    let executor = crate::tool::ToolExecutor::new();
+                    let results = match executor.execute_batch(vec![call.clone()], ctx).await {
+                        Ok(r) => r,
+                        Err(e) => {
+                            let err_text = format!("[Tool call failed: {}]", e);
+                            tracing::warn!(tool = %call.name, error = %e, "tool_call_failed");
+                            // Do NOT emit tool_result chunks to frontend — show error via tool_call instead
+                            let err_display = format!("❌ {} (failed)", call.name);
+                            on_chunk(AiStreamChunk {
+                                content: err_display.clone(),
+                                done: false,
+                                chunk_type: AiChunkType::ToolCall,
                            })
-                            .collect();
-                        on_chunk(AiStreamChunk {
-                            content: format!("[Tool results: {}]", result_summary.join("; ")),
-                            done: false,
-                            chunk_type: AiChunkType::ToolResult,
-                        })
-                        .await;
-                        msgs
+                            .await;
+                            all_chunks.push(StreamChunk {
+                                chunk_type: StreamChunkType::ToolCall,
+                                content: err_display,
+                            });
+                            tool_messages.push(ChatRequestMessage::tool(&call.id, &err_text));
+                            continue;
+                        }
+                    };
+
+                    for result in &results {
+                        let text = match &result.result {
+                            crate::tool::ToolResult::Ok(v) => v.to_string(),
+                            crate::tool::ToolResult::Error(msg) => msg.clone(),
+                        };
+                        let preview = if text.len() > 300 {
+                            format!("{}...", &text[..300])
+                        } else {
+                            text.clone()
+                        };
+                        tracing::debug!("tool_result: {} — {}", call.name, preview);
+                        // Do NOT emit tool_result chunks to frontend — raw output may contain sensitive data.
+                        // Log server-side only; frontend sees tool_call status via on_chunk below.
                    }
-                    Err(e) => {
-                        let err_text = format!("[Tool call failed: {}]", e);
-                        on_chunk(AiStreamChunk {
-                            content: err_text.clone(),
-                            done: false,
-                            chunk_type: AiChunkType::ToolResult,
-                        })
-                        .await;
-                        // Return error tool messages
-                        response
-                            .tool_calls
-                            .iter()
-                            .map(|tc| ChatRequestMessage::tool(&tc.id, &err_text))
-                            .collect()
-                    }
-                };
+                    let success_display = format!("✅ {}", call.name);
+                    on_chunk(AiStreamChunk {
+                        content: success_display.clone(),
+                        done: false,
+                        chunk_type: AiChunkType::ToolCall,
+                    })
+                    .await;
+                    all_chunks.push(StreamChunk {
+                        chunk_type: StreamChunkType::ToolCall,
+                        content: success_display,
+                    });
+
+                    let msgs = crate::tool::ToolExecutor::to_tool_messages(&results);
+                    tool_messages.extend(msgs);
+                }
                messages.extend(tool_messages);

                // Inject passive-detected skills based on tool calls
@ -427,60 +520,54 @@ impl ChatService {

                tool_depth += 1;
                if tool_depth >= max_tool_depth {
+                    let max_depth_text = format!(
+                        "[AI reached maximum tool depth ({}) — no final answer produced]",
+                        max_tool_depth
+                    );
                    on_chunk(AiStreamChunk {
-                        content: format!(
-                            "[AI reached maximum tool depth ({}) — no final answer produced]",
-                            max_tool_depth
-                        ),
+                        content: max_depth_text.clone(),
                        done: true,
                        chunk_type: AiChunkType::Answer,
                    })
                    .await;
-                    return Ok(full_content);
+                    all_chunks.push(StreamChunk {
+                        chunk_type: StreamChunkType::Answer,
+                        content: max_depth_text,
+                    });
+                    return Ok(StreamResult {
+                        content: full_content,
+                        reasoning_content: String::new(),
+                        input_tokens: 0,
+                        output_tokens: 0,
+                        chunks: all_chunks,
+                    });
                }
                continue;
            }

            // Final answer — accumulate and return
            full_content.push_str(&response.content);
+
            on_chunk(AiStreamChunk {
-                content: response.content,
+                content: response.content.clone(),
                done: true,
                chunk_type: AiChunkType::Answer,
            })
            .await;
-            return Ok(full_content);
+            all_chunks.push(StreamChunk {
+                chunk_type: StreamChunkType::Answer,
+                content: response.content.clone(),
+            });
+            return Ok(StreamResult {
+                content: full_content,
+                reasoning_content: response.reasoning_content,
+                input_tokens: response.input_tokens,
+                output_tokens: response.output_tokens,
+                chunks: all_chunks,
+            });
        }
    }

-    /// Executes a batch of tool calls and returns the tool result messages.
-    async fn execute_tool_calls(
-        &self,
-        calls: Vec<AgentToolCall>,
-        request: &AiRequest,
-    ) -> Result<Vec<ChatRequestMessage>> {
-        let mut ctx = ToolContext::new(
-            request.db.clone(),
-            request.cache.clone(),
-            request.config.clone(),
-            request.room.id,
-            Some(request.sender.uid),
-        )
-        .with_project(request.project.id);
-
-        if let Some(ref registry) = self.tool_registry {
-            ctx.registry_mut().merge(registry.clone());
-        }
-
-        let executor = ToolExecutor::new();
-        let results = executor
-            .execute_batch(calls, &mut ctx)
-            .await
-            .map_err(|e| AgentError::Internal(e.to_string()))?;
-
-        Ok(ToolExecutor::to_tool_messages(&results))
-    }
-
    async fn build_messages(&self, request: &AiRequest) -> Result<Vec<ChatRequestMessage>> {
        let mut messages = Vec::new();

--- a/libs/agent/client/mod.rs
+++ b/libs/agent/client/mod.rs
@ -5,6 +5,8 @@
 pub mod types;
 pub use types::{ChatRequestMessage, ToolCall as ClientToolCall};

+use std::pin::Pin;
+use std::sync::Arc;
 use std::time::Instant;
 use uuid::Uuid;

@ -130,6 +132,8 @@ fn is_retryable_error(err: &AgentError) -> bool {
        || msg.contains("connection timed out")
        || msg.contains("network error")
        || msg.contains("dns error")
+        || msg.contains("error sending request")
+        || msg.contains("Http client error")
        || msg.contains("rate_limit")
        || msg.contains("rate limit")
        || msg.contains("429")
@ -451,17 +455,42 @@ pub struct StreamedToolCall {
    pub arguments: String,
 }

+/// Type of chunk in the streaming response, preserving arrival order.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum StreamChunkType {
+    Thinking,
+    Answer,
+    ToolCall,
+}
+
+/// A single chunk from the streaming response in arrival order.
+#[derive(Debug, Clone)]
+pub struct StreamChunk {
+    pub chunk_type: StreamChunkType,
+    pub content: String,
+}
+
 /// Streaming result from rig.
 #[derive(Debug)]
 pub struct StreamResponse {
    pub content: String,
    pub input_tokens: i64,
    pub output_tokens: i64,
+    /// Accumulated reasoning/thinking text from the model.
+    pub reasoning_content: String,
    /// Full tool calls with accumulated arguments (not just names)
    pub tool_calls: Vec<StreamedToolCall>,
+    /// All chunks in arrival order — preserves think/answer/tool interleaving.
+    pub chunks: Vec<StreamChunk>,
 }

-/// Run a streaming chat completion.
+/// Async callback: takes a string delta and broadcasts it to the WebSocket.
+/// The returned Future must be awaited by the caller.
+pub type StreamTextCb = Arc<dyn Fn(&str) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
+pub type StreamReasoningCb = Arc<dyn Fn(&str) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
+pub type StreamToolCallCb = Arc<dyn Fn(&StreamedToolCall) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
+
+/// Run a streaming chat completion with 60s timeout and 5 retries.
 pub async fn call_stream(
    messages: &[ChatRequestMessage],
    model_name: &str,
@ -469,7 +498,53 @@ pub async fn call_stream(
    temperature: f32,
    max_tokens: u32,
    tools: Option<&[serde_json::Value]>,
-    mut on_text_delta: impl FnMut(&str),
+    on_text_delta: StreamTextCb,
+    on_reasoning_delta: StreamReasoningCb,
+    on_tool_call: StreamToolCallCb,
+) -> Result<StreamResponse> {
+    let mut state = RetryState::new(5);
+
+    loop {
+        let result = call_stream_once(
+            messages, model_name, config, temperature, max_tokens, tools,
+            on_text_delta.clone(), on_reasoning_delta.clone(), on_tool_call.clone(),
+        )
+        .await;
+
+        match result {
+            Ok(response) => return Ok(response),
+            Err(ref err) if state.should_retry() && is_retryable_error(err) => {
+                let duration = state.backoff_duration();
+                tracing::warn!(
+                    attempt = state.attempt + 1,
+                    max_retries = 5,
+                    backoff_ms = duration.as_millis() as u64,
+                    model = %model_name,
+                    error = %err,
+                    "ai_stream_retry"
+                );
+                tokio::time::sleep(duration).await;
+                state.next();
+            }
+            Err(err) => {
+                ai_metrics().record_failure();
+                return Err(err);
+            }
+        }
+    }
+}
+
+/// Single attempt of streaming completion with 60s timeout.
+async fn call_stream_once(
+    messages: &[ChatRequestMessage],
+    model_name: &str,
+    config: &AiClientConfig,
+    temperature: f32,
+    max_tokens: u32,
+    tools: Option<&[serde_json::Value]>,
+    on_text_delta: StreamTextCb,
+    on_reasoning_delta: StreamReasoningCb,
+    on_tool_call: StreamToolCallCb,
 ) -> Result<StreamResponse> {
    let client = config.build_rig_client();
    let model = client.completion_model(model_name);
@ -506,107 +581,144 @@ pub async fn call_stream(
        builder = builder.tools(tool_defs);
    }

-    let mut stream = builder
-        .stream()
-        .await
-        .map_err(|e| AgentError::OpenAi(e.to_string()))?;
+    let stream_fut = async {
+        let mut stream = builder
+            .stream()
+            .await
+            .map_err(|e| AgentError::OpenAi(e.to_string()))?;

-    let mut content = String::new();
-    let mut tool_calls: Vec<StreamedToolCall> = Vec::new();
+        let mut content = String::new();
+        let mut reasoning_content = String::new();
+        let mut tool_calls: Vec<StreamedToolCall> = Vec::new();
+        let mut chunks: Vec<StreamChunk> = Vec::new();

-    // Track partial tool calls by internal_call_id for argument accumulation
-    use std::collections::HashMap;
-    let mut partial_tool_calls: HashMap<String, StreamedToolCall> = HashMap::new();
-    let mut stream_finished = false;
+        use std::collections::HashMap;
+        let mut partial_tool_calls: HashMap<String, StreamedToolCall> = HashMap::new();
+        let mut stream_finished = false;

-    use rig::streaming::StreamedAssistantContent;
+        use rig::streaming::StreamedAssistantContent;

-    while let Some(item) = stream.next().await {
-        match item {
-            Ok(StreamedAssistantContent::Text(text)) => {
-                content.push_str(&text.text);
-                on_text_delta(&text.text);
-            }
-            Ok(StreamedAssistantContent::ToolCall {
-                tool_call,
-                internal_call_id,
-            }) => {
-                // Complete tool call - extract arguments from the JSON Value
-                let arguments = match &tool_call.function.arguments {
-                    serde_json::Value::String(s) => s.clone(),
-                    other => serde_json::to_string(other).unwrap_or_else(|_| "{}".to_string()),
-                };
-                tool_calls.push(StreamedToolCall {
-                    id: tool_call.id.clone(),
-                    name: tool_call.function.name.clone(),
-                    arguments,
-                });
-                // Remove from partial if it was being accumulated
-                partial_tool_calls.remove(&internal_call_id);
-            }
-            Ok(StreamedAssistantContent::ToolCallDelta {
-                id,
-                internal_call_id,
-                content,
-            }) => {
-                use rig::streaming::ToolCallDeltaContent;
-                match content {
-                    ToolCallDeltaContent::Name(name) => {
-                        // Start accumulating a new tool call
-                        partial_tool_calls.insert(
-                            internal_call_id.clone(),
-                            StreamedToolCall {
-                                id: id.clone(),
-                                name,
-                                arguments: String::new(),
-                            },
-                        );
-                    }
-                    ToolCallDeltaContent::Delta(delta) => {
-                        // Append to existing partial tool call
-                        if let Some(tc) = partial_tool_calls.get_mut(&internal_call_id) {
-                            tc.arguments.push_str(&delta);
+        while let Some(item) = stream.next().await {
+            match item {
+                Ok(StreamedAssistantContent::Text(text)) => {
+                    content.push_str(&text.text);
+                    on_text_delta(&text.text).await;
+                    chunks.push(StreamChunk {
+                        chunk_type: StreamChunkType::Answer,
+                        content: text.text,
+                    });
+                }
+                Ok(StreamedAssistantContent::ToolCall {
+                    tool_call,
+                    internal_call_id,
+                }) => {
+                    let arguments = match &tool_call.function.arguments {
+                        serde_json::Value::String(s) => s.clone(),
+                        other => serde_json::to_string(other).unwrap_or_else(|_| "{}".to_string()),
+                    };
+                    let tc = StreamedToolCall {
+                        id: tool_call.id.clone(),
+                        name: tool_call.function.name.clone(),
+                        arguments,
+                    };
+                    on_tool_call(&tc).await;
+                    chunks.push(StreamChunk {
+                        chunk_type: StreamChunkType::ToolCall,
+                        content: serde_json::json!({
+                            "id": tc.id,
+                            "name": tc.name,
+                            "arguments": tc.arguments,
+                        }).to_string(),
+                    });
+                    tool_calls.push(tc);
+                    partial_tool_calls.remove(&internal_call_id);
+                }
+                Ok(StreamedAssistantContent::ToolCallDelta {
+                    id,
+                    internal_call_id,
+                    content: delta_content,
+                }) => {
+                    use rig::streaming::ToolCallDeltaContent;
+                    match delta_content {
+                        ToolCallDeltaContent::Name(name) => {
+                            partial_tool_calls.insert(
+                                internal_call_id.clone(),
+                                StreamedToolCall {
+                                    id: id.clone(),
+                                    name,
+                                    arguments: String::new(),
+                                },
+                            );
+                        }
+                        ToolCallDeltaContent::Delta(delta) => {
+                            if let Some(tc) = partial_tool_calls.get_mut(&internal_call_id) {
+                                tc.arguments.push_str(&delta);
+                            }
                        }
                    }
                }
-            }
-            Ok(StreamedAssistantContent::Reasoning(_)) => {}
-            Ok(StreamedAssistantContent::ReasoningDelta { .. }) => {}
-            Ok(StreamedAssistantContent::Final(response)) => {
-                stream_finished = true;
-                // Flush any remaining partial tool calls
-                for (_, tc) in partial_tool_calls.drain() {
-                    tool_calls.push(tc);
+                Ok(StreamedAssistantContent::Reasoning(reasoning)) => {
+                    for part in &reasoning.reasoning {
+                        reasoning_content.push_str(part);
+                        on_reasoning_delta(part).await;
+                        chunks.push(StreamChunk {
+                            chunk_type: StreamChunkType::Thinking,
+                            content: part.clone(),
+                        });
+                    }
                }
-                if let Some(usage) = response.token_usage() {
-                    ai_metrics().record_success(
-                        usage.input_tokens as i64,
-                        usage.output_tokens as i64,
-                        !tool_calls.is_empty(),
-                    );
-                    return Ok(StreamResponse {
-                        content,
-                        input_tokens: usage.input_tokens as i64,
-                        output_tokens: usage.output_tokens as i64,
-                        tool_calls,
+                Ok(StreamedAssistantContent::ReasoningDelta { reasoning, .. }) => {
+                    reasoning_content.push_str(&reasoning);
+                    on_reasoning_delta(&reasoning).await;
+                    chunks.push(StreamChunk {
+                        chunk_type: StreamChunkType::Thinking,
+                        content: reasoning.clone(),
                    });
                }
+                Ok(StreamedAssistantContent::Final(response)) => {
+                    stream_finished = true;
+                    for (_, tc) in partial_tool_calls.drain() {
+                        tool_calls.push(tc);
+                    }
+                    if let Some(usage) = response.token_usage() {
+                        let in_toks = usage.input_tokens as i64;
+                        let out_toks = usage.output_tokens as i64;
+                        ai_metrics().record_success(in_toks, out_toks, !tool_calls.is_empty());
+                        return Ok(StreamResponse {
+                            content,
+                            reasoning_content,
+                            input_tokens: in_toks,
+                            output_tokens: out_toks,
+                            tool_calls,
+                            chunks,
+                        });
+                    }
+                    // Usage not available from Final — fall through to flush
+                }
+                Err(e) => return Err(AgentError::OpenAi(e.to_string())),
            }
-            Err(e) => return Err(AgentError::OpenAi(e.to_string())),
        }
-    }

-    // Flush any remaining partial tool calls (if stream ended without Final)
-    if !stream_finished {
-        for (_, tc) in partial_tool_calls.drain() {
-            tool_calls.push(tc);
+        // Flush any remaining partial tool calls (if stream ended without Final or Final had no usage)
+        if !stream_finished {
+            for (_, tc) in partial_tool_calls.drain() {
+                tool_calls.push(tc);
+            }
        }
+        ai_metrics().record_success(0, 0, !tool_calls.is_empty());
+        Ok(StreamResponse {
+            content,
+            reasoning_content,
+            input_tokens: 0,
+            output_tokens: 0,
+            tool_calls,
+            chunks,
+        })
+    };
+
+    // 60s timeout for the entire stream
+    match tokio::time::timeout(std::time::Duration::from_secs(60), stream_fut).await {
+        Ok(result) => result,
+        Err(_) => Err(AgentError::Timeout { task_id: 0, seconds: 60 }),
    }
-    ai_metrics().record_success(0, 0, !tool_calls.is_empty());
-    Ok(StreamResponse {
-        content,
-        input_tokens: 0,
-        output_tokens: 0,
-        tool_calls,
-    })
 }
--- a/libs/api/admin/billing.rs
+++ b/libs/api/admin/billing.rs
@ -60,7 +60,7 @@ pub async fn admin_workspace_add_credit(
    }

    let ws = service.utils_find_workspace_by_slug(slug.clone()).await?;
-    let billing = service.ensure_workspace_billing(ws.id).await?;
+    let billing = service.ensure_workspace_billing(ws.id, None).await?;
    let now_utc = Utc::now();

    let new_balance = rust_decimal::Decimal::from_f64_retain(
--- a/libs/migrate/lib.rs
+++ b/libs/migrate/lib.rs
@ -2,6 +2,7 @@ pub use sea_orm_migration::prelude::*;

 mod m20260420_000003_add_model_id_to_room_message;
 pub mod m20260421_000001_add_agent_type_to_room_ai;
+pub mod m20260426_000001_add_thinking_content_to_room_message;

 pub async fn execute_sql(manager: &SchemaManager<'_>, sql: &str) -> Result<(), DbErr> {
    for stmt in split_sql_statements(sql) {
@ -89,7 +90,7 @@ impl MigratorTrait for Migrator {
            Box::new(m20260420_000002_add_push_subscription::Migration),
            Box::new(m20260420_000003_add_model_id_to_room_message::Migration),
            Box::new(m20260421_000001_add_agent_type_to_room_ai::Migration),
-            Box::new(m20260420_000003_add_model_id_to_room_message::Migration),
+            Box::new(m20260426_000001_add_thinking_content_to_room_message::Migration),
            // Repo tables
            Box::new(m20250628_000028_create_repo::Migration),
            Box::new(m20250628_000029_create_repo_branch::Migration),
--- a/libs/migrate/m20260426_000001_add_thinking_content_to_room_message.rs
+++ b/libs/migrate/m20260426_000001_add_thinking_content_to_room_message.rs
@ -0,0 +1,30 @@
+//! SeaORM migration: add thinking_content column to room_message
+
+use sea_orm_migration::prelude::*;
+
+pub struct Migration;
+
+impl MigrationName for Migration {
+    fn name(&self) -> &str {
+        "m20260426_000001_add_thinking_content_to_room_message"
+    }
+}
+
+#[async_trait::async_trait]
+impl MigrationTrait for Migration {
+    async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
+        let sql = include_str!("sql/m20260426_000001_add_thinking_content_to_room_message.sql");
+        super::execute_sql(manager, sql).await
+    }
+
+    async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
+        manager
+            .get_connection()
+            .execute_raw(sea_orm::Statement::from_string(
+                sea_orm::DbBackend::Postgres,
+                "ALTER TABLE room_message DROP COLUMN IF EXISTS thinking_content;",
+            ))
+            .await?;
+        Ok(())
+    }
+}
--- a/libs/migrate/sql/m20260426_000001_add_thinking_content_to_room_message.sql
+++ b/libs/migrate/sql/m20260426_000001_add_thinking_content_to_room_message.sql
@ -0,0 +1 @@
+ALTER TABLE room_message ADD COLUMN IF NOT EXISTS thinking_content TEXT;
--- a/libs/models/rooms/room_message.rs
+++ b/libs/models/rooms/room_message.rs
@ -19,6 +19,8 @@ pub struct Model {
    pub in_reply_to: Option<MessageId>,
    pub content: String,
    pub content_type: MessageContentType,
+    /// Accumulated AI reasoning/thinking text.
+    pub thinking_content: Option<String>,
    pub edited_at: Option<DateTimeUtc>,
    pub send_at: DateTimeUtc,
    pub revoked: Option<DateTimeUtc>,
--- a/libs/queue/producer.rs
+++ b/libs/queue/producer.rs
@ -188,6 +188,7 @@ impl MessageProducer {
            in_reply_to: None,
            content: String::new(),
            content_type: String::new(),
+            thinking_content: None,
            send_at: chrono::Utc::now(),
            seq: 0,
            display_name: None,
--- a/libs/queue/types.rs
+++ b/libs/queue/types.rs
@ -17,6 +17,9 @@ pub struct RoomMessageEnvelope {
    pub in_reply_to: Option<Uuid>,
    pub content: String,
    pub content_type: String,
+    /// Accumulated AI reasoning/thinking text.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_content: Option<String>,
    pub send_at: DateTime<Utc>,
    pub seq: i64,
    /// Pre-resolved display name for the sender (e.g. AI model name).
@ -34,6 +37,9 @@ pub struct RoomMessageEvent {
    pub in_reply_to: Option<Uuid>,
    pub content: String,
    pub content_type: String,
+    /// Accumulated AI reasoning/thinking text.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_content: Option<String>,
    pub send_at: DateTime<Utc>,
    pub seq: i64,
    pub display_name: Option<String>,
@ -79,6 +85,7 @@ impl From<RoomMessageEnvelope> for RoomMessageEvent {
            in_reply_to: e.in_reply_to,
            content: e.content,
            content_type: e.content_type,
+            thinking_content: e.thinking_content,
            send_at: e.send_at,
            seq: e.seq,
            display_name: e.display_name,
--- a/libs/room/src/connection.rs
+++ b/libs/room/src/connection.rs
@ -826,6 +826,7 @@ pub fn make_persist_fn(
                        thread: Set(env.thread_id),
                        content: Set(env.content.clone()),
                        content_type: Set(content_type),
+                        thinking_content: Set(env.thinking_content.clone()),
                        edited_at: Set(None),
                        send_at: Set(env.send_at.clone()),
                        revoked: Set(None),
--- a/libs/room/src/error.rs
+++ b/libs/room/src/error.rs
@ -32,3 +32,9 @@ impl From<anyhow::Error> for RoomError {
        RoomError::Internal(e.to_string())
    }
 }
+
+impl From<agent::error::AgentError> for RoomError {
+    fn from(e: agent::error::AgentError) -> Self {
+        RoomError::Internal(e.to_string())
+    }
+}
--- a/libs/room/src/helpers.rs
+++ b/libs/room/src/helpers.rs
@ -68,6 +68,7 @@ impl From<room_message::Model> for super::RoomMessageResponse {
            thread: value.thread,
            content: value.content,
            content_type: value.content_type.to_string(),
+            thinking_content: value.thinking_content,
            edited_at: value.edited_at,
            send_at: value.send_at,
            revoked: value.revoked,
@ -427,6 +428,7 @@ impl RoomService {
            thread: msg.thread,
            content: msg.content,
            content_type: msg.content_type.to_string(),
+            thinking_content: msg.thinking_content,
            edited_at: msg.edited_at,
            send_at: msg.send_at,
            revoked: msg.revoked,
--- a/libs/room/src/message.rs
+++ b/libs/room/src/message.rs
@ -92,6 +92,7 @@ impl RoomService {
                    in_reply_to: msg.in_reply_to,
                    content: msg.content,
                    content_type: msg.content_type.to_string(),
+                    thinking_content: msg.thinking_content,
                    edited_at: msg.edited_at,
                    send_at: msg.send_at,
                    revoked: msg.revoked,
@ -158,7 +159,7 @@ impl RoomService {
            }
        }

-        let seq = Self::next_room_message_seq_internal(room_id, &self.db, &self.cache).await?;
+        let seq = crate::service::next_room_message_seq_internal(room_id, &self.db, &self.cache).await?;
        let now = Utc::now();
        let id = Uuid::now_v7();
        let project_id = room_model.project;
@ -175,6 +176,7 @@ impl RoomService {
            in_reply_to,
            content: content.clone(),
            content_type: content_type_str.clone(),
+            thinking_content: None,
            send_at: now,
            seq,
            display_name: None,
@ -349,6 +351,7 @@ impl RoomService {
            in_reply_to,
            content: request.content,
            content_type: content_type_str,
+            thinking_content: None,
            edited_at: None,
            send_at: now,
            revoked: None,
--- a/libs/room/src/reaction.rs
+++ b/libs/room/src/reaction.rs
@ -321,6 +321,7 @@ impl RoomService {
                    in_reply_to: msg.in_reply_to,
                    content: msg.content,
                    content_type: msg.content_type.to_string(),
+                    thinking_content: msg.thinking_content,
                    edited_at: msg.edited_at,
                    send_at: msg.send_at,
                    revoked: msg.revoked,
--- a/libs/room/src/search.rs
+++ b/libs/room/src/search.rs
@ -124,6 +124,7 @@ impl RoomService {
                in_reply_to: row.try_get::<Option<MessageId>>("", "in_reply_to").ok().flatten(),
                content: row.try_get::<String>("", "content").unwrap_or_default(),
                content_type,
+                thinking_content: None,
                edited_at: row.try_get::<Option<DateTimeUtc>>("", "edited_at").ok().flatten(),
                send_at: row.try_get::<DateTimeUtc>("", "send_at").unwrap_or_default(),
                revoked: row.try_get::<Option<DateTimeUtc>>("", "revoked").ok().flatten(),
--- a/libs/room/src/service.rs
+++ b/libs/room/src/service.rs
--- a/libs/room/src/service/access.rs
+++ b/libs/room/src/service/access.rs
@ -0,0 +1,73 @@
+use db::database::AppDatabase;
+use models::projects::project_members;
+use models::rooms::room;
+use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
+use uuid::Uuid;
+
+use crate::error::RoomError;
+
+pub async fn check_room_access(
+    db: &AppDatabase,
+    room_id: Uuid,
+    user_id: Uuid,
+) -> Result<(), RoomError> {
+    let room = room::Entity::find_by_id(room_id)
+        .one(db)
+        .await?
+        .ok_or_else(|| RoomError::NotFound("Room not found".to_string()))?;
+
+    if room.public {
+        return Ok(());
+    }
+
+    if require_room_member(db, room_id, user_id).await.is_ok() {
+        return Ok(());
+    }
+
+    check_project_member(db, room.project, user_id).await?;
+
+    Ok(())
+}
+
+pub async fn check_project_member(
+    db: &AppDatabase,
+    project_id: Uuid,
+    user_id: Uuid,
+) -> Result<(), RoomError> {
+    let member = project_members::Entity::find()
+        .filter(project_members::Column::Project.eq(project_id))
+        .filter(project_members::Column::User.eq(user_id))
+        .one(db)
+        .await?;
+
+    if member.is_some() {
+        Ok(())
+    } else {
+        Err(RoomError::NoPower)
+    }
+}
+
+pub async fn require_room_member(
+    db: &AppDatabase,
+    room_id: Uuid,
+    user_id: Uuid,
+) -> Result<(), RoomError> {
+    use models::rooms::room_member::{Column as RmCol, Entity as RoomMember};
+
+    let member = RoomMember::find()
+        .filter(RmCol::Room.eq(room_id))
+        .filter(RmCol::User.eq(user_id))
+        .one(db)
+        .await?;
+
+    member
+        .ok_or_else(|| RoomError::NotFound("Room member not found".to_string()))
+        .map(|_| ())
+}
+
+pub async fn find_room_or_404(db: &AppDatabase, room_id: Uuid) -> Result<room::Model, RoomError> {
+    room::Entity::find_by_id(room_id)
+        .one(db)
+        .await?
+        .ok_or_else(|| RoomError::NotFound("Room not found".to_string()))
+}
--- a/libs/room/src/service/ai_common.rs
+++ b/libs/room/src/service/ai_common.rs
@ -0,0 +1,77 @@
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use queue::MessageProducer;
+use uuid::Uuid;
+
+use super::sequence::next_room_message_seq_internal;
+use crate::connection::RoomConnectionManager;
+use crate::error::RoomError;
+
+pub async fn create_and_publish_ai_message(
+    db: &AppDatabase,
+    cache: &AppCache,
+    queue: &MessageProducer,
+    room_manager: &Arc<RoomConnectionManager>,
+    room_id: Uuid,
+    project_id: Uuid,
+    _reply_to: Uuid,
+    content: String,
+    model_id: Uuid,
+    model_display_name: Option<String>,
+) -> Result<Uuid, RoomError> {
+    let now = Utc::now();
+    let seq = next_room_message_seq_internal(room_id, db, cache).await?;
+    let id = Uuid::now_v7();
+
+    let envelope = queue::RoomMessageEnvelope {
+        id,
+        dedup_key: Some(format!("{}:{}", room_id, id)),
+        room_id,
+        sender_type: "ai".to_string(),
+        sender_id: None,
+        model_id: Some(model_id),
+        thread_id: None,
+        content: content.clone(),
+        content_type: "text".to_string(),
+        thinking_content: None,
+        send_at: now,
+        seq,
+        in_reply_to: None,
+        display_name: model_display_name.clone(),
+    };
+
+    queue.publish(room_id, envelope).await?;
+    room_manager.metrics.messages_sent.increment(1);
+
+    let event = queue::RoomMessageEvent {
+        id,
+        room_id,
+        sender_type: "ai".to_string(),
+        sender_id: None,
+        thread_id: None,
+        content: content.clone(),
+        content_type: "text".to_string(),
+        thinking_content: None,
+        send_at: now,
+        seq,
+        display_name: model_display_name,
+        in_reply_to: None,
+        reactions: None,
+        message_id: None,
+    };
+    room_manager.broadcast(room_id, event).await;
+
+    super::notifications::publish_room_event(
+        queue,
+        project_id,
+        crate::RoomEventType::NewMessage,
+        Some(room_id),
+        Some(id),
+        Some(seq),
+    );
+
+    Ok(id)
+}
--- a/libs/room/src/service/ai_nonstreaming.rs
+++ b/libs/room/src/service/ai_nonstreaming.rs
@ -0,0 +1,94 @@
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room_ai;
+use queue::MessageProducer;
+use sea_orm::{sea_query::Expr, ColumnTrait, EntityTrait, ExprTrait, QueryFilter};
+use uuid::Uuid;
+
+use super::ai_common::create_and_publish_ai_message;
+use crate::connection::RoomConnectionManager;
+use agent::chat::{AiRequest, ChatService};
+
+pub async fn process_message_ai_nonstreaming(
+    chat_service: Arc<ChatService>,
+    request: AiRequest,
+    room_id: Uuid,
+    project_id: Uuid,
+    model_id: Uuid,
+    lock_guard: crate::room_ai_queue::RoomAiLockGuard,
+    db: AppDatabase,
+    cache: AppCache,
+    queue: MessageProducer,
+    room_manager: Arc<RoomConnectionManager>,
+) {
+    let chat_service = chat_service.clone();
+
+    tokio::spawn(async move {
+        let _lock_guard = lock_guard;
+        let model_display_name = request.model.name.clone();
+        match chat_service.process(request).await {
+            Ok(result) => {
+                if let Err(e) = create_and_publish_ai_message(
+                    &db,
+                    &cache,
+                    &queue,
+                    &room_manager,
+                    room_id,
+                    project_id,
+                    Uuid::now_v7(),
+                    result.content,
+                    model_id,
+                    Some(model_display_name),
+                )
+                .await
+                {
+                    tracing::error!(error = %e, "Failed to create AI message");
+                } else {
+                    let now = Utc::now();
+                    if let Err(e) = room_ai::Entity::update_many()
+                        .col_expr(
+                            room_ai::Column::CallCount,
+                            Expr::col(room_ai::Column::CallCount).add(1),
+                        )
+                        .col_expr(room_ai::Column::LastCallAt, Expr::value(Some(now)))
+                        .filter(room_ai::Column::Room.eq(room_id))
+                        .filter(room_ai::Column::Model.eq(model_id))
+                        .exec(&db)
+                        .await
+                    {
+                        tracing::warn!(error = %e, "Failed to update room_ai call stats");
+                    }
+
+                    // Record billing (non-fatal)
+                    let _ = super::billing::record_ai_usage(
+                        &db,
+                        project_id,
+                        model_id,
+                        result.input_tokens,
+                        result.output_tokens,
+                    )
+                    .await;
+                }
+            }
+            Err(e) => {
+                tracing::error!(error = %e, "AI processing failed");
+                let _ = create_and_publish_ai_message(
+                    &db,
+                    &cache,
+                    &queue,
+                    &room_manager,
+                    room_id,
+                    project_id,
+                    Uuid::now_v7(),
+                    format!("[AI error: {}]", e),
+                    model_id,
+                    Some(model_display_name),
+                )
+                .await;
+            }
+        }
+    });
+}
--- a/libs/room/src/service/ai_react_nonstreaming.rs
+++ b/libs/room/src/service/ai_react_nonstreaming.rs
@ -0,0 +1,98 @@
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room_ai;
+use queue::MessageProducer;
+use sea_orm::{sea_query::Expr, ColumnTrait, EntityTrait, ExprTrait, QueryFilter};
+use uuid::Uuid;
+
+use super::ai_common::create_and_publish_ai_message;
+use crate::connection::RoomConnectionManager;
+use agent::chat::{AiRequest, ChatService};
+
+pub async fn process_message_ai_react_nonstreaming(
+    chat_service: Arc<ChatService>,
+    request: AiRequest,
+    room_id: Uuid,
+    project_id: Uuid,
+    model_id: Uuid,
+    lock_guard: crate::room_ai_queue::RoomAiLockGuard,
+    db: AppDatabase,
+    cache: AppCache,
+    queue: MessageProducer,
+    room_manager: Arc<RoomConnectionManager>,
+) {
+    tokio::spawn(async move {
+        let _lock_guard = lock_guard;
+        let model_display_name = request.model.name.clone();
+
+        let final_answer = chat_service
+            .process_react(&request, |_step| {})
+            .await;
+
+        match final_answer {
+            Ok(response) => {
+                if let Err(e) = create_and_publish_ai_message(
+                    &db,
+                    &cache,
+                    &queue,
+                    &room_manager,
+                    room_id,
+                    project_id,
+                    Uuid::now_v7(),
+                    response,
+                    model_id,
+                    Some(model_display_name),
+                )
+                .await
+                {
+                    tracing::error!(error = %e, "Failed to create ReAct AI message");
+                } else {
+                    let now = Utc::now();
+                    if let Err(e) = room_ai::Entity::update_many()
+                        .col_expr(
+                            room_ai::Column::CallCount,
+                            Expr::col(room_ai::Column::CallCount).add(1),
+                        )
+                        .col_expr(room_ai::Column::LastCallAt, Expr::value(Some(now)))
+                        .filter(room_ai::Column::Room.eq(room_id))
+                        .filter(room_ai::Column::Model.eq(model_id))
+                        .exec(&db)
+                        .await
+                    {
+                        tracing::warn!(error = %e, "Failed to update room_ai call stats");
+                    }
+
+                    // Record billing (non-fatal)
+                    // TODO: ReAct agent does not track token counts yet; billing with 0/0
+                    let _ = super::billing::record_ai_usage(
+                        &db,
+                        project_id,
+                        model_id,
+                        0,
+                        0,
+                    )
+                    .await;
+                }
+            }
+            Err(e) => {
+                tracing::error!(error = %e, "ReAct agent failed");
+                let _ = create_and_publish_ai_message(
+                    &db,
+                    &cache,
+                    &queue,
+                    &room_manager,
+                    room_id,
+                    project_id,
+                    Uuid::now_v7(),
+                    format!("[AI error: {}]", e),
+                    model_id,
+                    Some(model_display_name),
+                )
+                .await;
+            }
+        }
+    });
+}
--- a/libs/room/src/service/ai_react_streaming.rs
+++ b/libs/room/src/service/ai_react_streaming.rs
@ -0,0 +1,266 @@
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room_ai;
+use queue::{MessageProducer, ProjectRoomEvent, RoomMessageEnvelope};
+use sea_orm::{sea_query::Expr, ColumnTrait, EntityTrait, ExprTrait, QueryFilter};
+use uuid::Uuid;
+
+use super::sequence::next_room_message_seq_internal;
+use crate::connection::RoomConnectionManager;
+use agent::chat::{AiRequest, ChatService};
+use agent::react::ReactStep;
+
+pub async fn process_message_ai_react_streaming(
+    chat_service: Arc<ChatService>,
+    request: AiRequest,
+    room_id: Uuid,
+    project_id: Uuid,
+    model_id: Uuid,
+    lock_guard: crate::room_ai_queue::RoomAiLockGuard,
+    db: AppDatabase,
+    _cache: AppCache,
+    queue: MessageProducer,
+    room_manager: Arc<RoomConnectionManager>,
+) {
+    use queue::RoomMessageStreamChunkEvent;
+
+    let streaming_msg_id = Uuid::now_v7();
+    let seq = match next_room_message_seq_internal(room_id, &db, &_cache).await {
+        Ok(s) => s,
+        Err(e) => {
+            tracing::error!(error = %e, "Failed to get seq for ReAct streaming");
+            return;
+        }
+    };
+
+    let room_id_inner = room_id;
+    let project_id_inner = project_id;
+    let now = Utc::now();
+    let sender_type = "ai".to_string();
+    let ai_display_name = request.model.name.clone();
+
+    tokio::spawn(async move {
+        let _lock_guard = lock_guard;
+
+        // Collect ordered steps for storage and streaming.
+        let steps: std::sync::Arc<std::sync::Mutex<Vec<(String, String)>>> =
+            std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let last_action_name: std::sync::Arc<std::sync::Mutex<String>> =
+            std::sync::Arc::new(std::sync::Mutex::new(String::new()));
+        let answer_buffer: std::sync::Arc<std::sync::Mutex<String>> =
+            std::sync::Arc::new(std::sync::Mutex::new(String::new()));
+        let step_count = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
+
+        let on_step = {
+            let room_manager = room_manager.clone();
+            let streaming_msg_id = streaming_msg_id;
+            let room_id = room_id_inner;
+            let step_count = step_count.clone();
+            let ai_display_name_for_step = std::sync::Arc::new(ai_display_name.clone());
+            let steps = steps.clone();
+            let answer_buffer = answer_buffer.clone();
+            let last_action_name = last_action_name.clone();
+            move |step: ReactStep| {
+                let room_manager = room_manager.clone();
+                let (chunk_type, content) = match &step {
+                    ReactStep::Thought { step: _, thought } => {
+                        ("thinking".to_string(), format!("[Thinking] {}", thought))
+                    }
+                    ReactStep::Action { step: _, action } => {
+                        *last_action_name.lock().unwrap() = action.name.clone();
+                        ("tool_call".to_string(), format!("[Action] Calling `{}` with {:?}", action.name, action.args))
+                    }
+                    ReactStep::Observation {
+                        step: _,
+                        observation: _,
+                    } => {
+                        // Sanitize observation — don't expose raw tool output to frontend
+                        let action_name = last_action_name.lock().unwrap().clone();
+                        ("tool_call".to_string(), format!("[Observation] {} (completed)", action_name))
+                    }
+                    ReactStep::Answer { step: _, answer } => {
+                        ("answer".to_string(), answer.clone())
+                    }
+                };
+
+                let is_answer = matches!(&step, ReactStep::Answer { .. });
+                if is_answer {
+                    step_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+                }
+
+                // Record ordered step for storage
+                {
+                    let mut s = steps.lock().unwrap();
+                    s.push((chunk_type.clone(), content.clone()));
+                }
+                if is_answer {
+                    let mut ab = answer_buffer.lock().unwrap();
+                    ab.push_str(&content);
+                }
+
+                let done = is_answer;
+                let ai_name = ai_display_name_for_step.clone();
+                tokio::spawn(async move {
+                    let event = RoomMessageStreamChunkEvent {
+                        message_id: streaming_msg_id,
+                        room_id,
+                        content: content.clone(),
+                        done,
+                        error: None,
+                        display_name: Some((*ai_name).clone()),
+                        chunk_type: Some(chunk_type),
+                    };
+                    room_manager.broadcast_stream_chunk(event).await;
+                });
+            }
+        };
+
+        let result = chat_service.process_react(&request, on_step).await;
+
+        let final_content = answer_buffer.lock().unwrap().clone();
+        let all_steps = steps.lock().unwrap().clone();
+        let reasoning_chain: String = all_steps
+            .iter()
+            .filter(|(t, _)| t != "answer")
+            .map(|(_, c)| c.clone())
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        let content_to_persist = if !final_content.is_empty() {
+            final_content
+        } else if !reasoning_chain.trim().is_empty() {
+            format!(
+                "[Agent ran through {} reasoning steps but did not produce a final answer.]\n{}",
+                step_count.load(std::sync::atomic::Ordering::Relaxed),
+                reasoning_chain.trim_end()
+            )
+        } else {
+            String::from("[No output from reasoning agent]")
+        };
+
+        let (err_msg, should_log) = match &result {
+            Err(e) => (Some(format!("[Agent error: {}]", e)), true),
+            _ => (None, false),
+        };
+
+        let content_to_persist = if let Some(msg) = &err_msg {
+            format!(
+                "{}\n[Error during reasoning: {}]",
+                content_to_persist.trim_end(),
+                msg.trim_start_matches("[Agent error: ")
+                    .trim_end_matches("]")
+            )
+        } else {
+            content_to_persist
+        };
+
+        if should_log {
+            tracing::error!(error = %result.as_ref().unwrap_err(), "ReAct streaming failed");
+        }
+
+        let persist_content = content_to_persist.trim().to_string();
+        if persist_content.is_empty() {
+            return;
+        }
+
+        // Serialize ordered steps as JSON for ordered replay.
+        let thinking_content = {
+            let steps = steps.lock().unwrap();
+            if steps.is_empty() {
+                None
+            } else {
+                let chunks_json = serde_json::json!({
+                    "__chunks__": steps.iter().map(|(t, c)| serde_json::json!({
+                        "type": t,
+                        "content": c,
+                    })).collect::<Vec<_>>(),
+                });
+                Some(chunks_json.to_string())
+            }
+        };
+
+        let envelope = RoomMessageEnvelope {
+            id: streaming_msg_id,
+            dedup_key: Some(format!("{}:{}", room_id_inner, streaming_msg_id)),
+            room_id: room_id_inner,
+            sender_type: sender_type.clone(),
+            sender_id: None,
+            model_id: Some(model_id),
+            thread_id: None,
+            content: persist_content.clone(),
+            content_type: "text".to_string(),
+            thinking_content,
+            send_at: now,
+            seq,
+            in_reply_to: None,
+            display_name: Some(ai_display_name.clone()),
+        };
+
+        if let Err(e) = queue.publish(room_id_inner, envelope).await {
+            tracing::error!(error = %e, "Failed to publish ReAct streaming message");
+        } else {
+            let now = Utc::now();
+            if let Err(e) = room_ai::Entity::update_many()
+                .col_expr(
+                    room_ai::Column::CallCount,
+                    Expr::col(room_ai::Column::CallCount).add(1),
+                )
+                .col_expr(room_ai::Column::LastCallAt, Expr::value(Some(now)))
+                .filter(room_ai::Column::Room.eq(room_id_inner))
+                .filter(room_ai::Column::Model.eq(model_id))
+                .exec(&db)
+                .await
+            {
+                tracing::warn!(error = %e, "Failed to update room_ai call stats");
+            }
+
+            // Record billing (non-fatal)
+            // TODO: ReAct agent does not track token counts yet; billing with 0/0
+            let _ = super::billing::record_ai_usage(
+                &db,
+                project_id_inner,
+                model_id,
+                0,
+                0,
+            )
+            .await;
+
+            let msg_event = queue::RoomMessageEvent {
+                id: streaming_msg_id,
+                room_id: room_id_inner,
+                sender_type: sender_type.clone(),
+                sender_id: None,
+                thread_id: None,
+                content: persist_content,
+                content_type: "text".to_string(),
+                thinking_content: None,
+                send_at: now,
+                seq,
+                display_name: Some(ai_display_name.clone()),
+                in_reply_to: None,
+                reactions: None,
+                message_id: None,
+            };
+            room_manager.broadcast(room_id_inner, msg_event).await;
+            room_manager.metrics.messages_sent.increment(1);
+
+            let event = ProjectRoomEvent {
+                event_type: crate::RoomEventType::NewMessage.as_str().into(),
+                project_id: project_id_inner,
+                room_id: Some(room_id_inner),
+                category_id: None,
+                message_id: Some(streaming_msg_id),
+                seq: Some(seq),
+                timestamp: now,
+            };
+            queue
+                .publish_project_room_event(project_id_inner, event)
+                .await;
+        }
+
+        room_manager.close_stream_channel(streaming_msg_id).await;
+    });
+}
--- a/libs/room/src/service/ai_streaming.rs
+++ b/libs/room/src/service/ai_streaming.rs
@ -0,0 +1,274 @@
+use std::pin::Pin;
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room_ai;
+use queue::{MessageProducer, ProjectRoomEvent, RoomMessageEnvelope};
+use sea_orm::{sea_query::Expr, ColumnTrait, EntityTrait, ExprTrait, QueryFilter};
+use uuid::Uuid;
+
+use super::sequence::next_room_message_seq_internal;
+use crate::connection::RoomConnectionManager;
+use agent::chat::{AiRequest, ChatService};
+
+pub async fn process_message_ai_streaming(
+    chat_service: Arc<ChatService>,
+    request: AiRequest,
+    room_id: Uuid,
+    project_id: Uuid,
+    model_id: Uuid,
+    lock_guard: crate::room_ai_queue::RoomAiLockGuard,
+    db: AppDatabase,
+    cache: AppCache,
+    queue: MessageProducer,
+    room_manager: Arc<RoomConnectionManager>,
+) {
+    use queue::RoomMessageStreamChunkEvent;
+
+    let streaming_msg_id = Uuid::now_v7();
+    let seq = match next_room_message_seq_internal(room_id, &db, &cache).await {
+        Ok(s) => s,
+        Err(e) => {
+            tracing::error!(error = %e, "Failed to get seq for streaming AI message");
+            return;
+        }
+    };
+
+    let _ = room_manager
+        .register_stream_channel(streaming_msg_id)
+        .await;
+
+    let initial_event = RoomMessageStreamChunkEvent {
+        message_id: streaming_msg_id,
+        room_id,
+        content: String::new(),
+        done: false,
+        error: None,
+        display_name: Some(request.model.name.clone()),
+        chunk_type: Some("thinking".to_string()),
+    };
+    room_manager.broadcast_stream_chunk(initial_event).await;
+
+    let room_id_inner = room_id;
+    let project_id_inner = project_id;
+    let now = Utc::now();
+    let sender_type = "ai".to_string();
+    let ai_display_name = request.model.name.clone();
+
+    tokio::spawn(async move {
+        let _lock_guard = lock_guard;
+        let ai_typing_id = Uuid::parse_str("00000000-0000-0000-0000-000000000001").unwrap();
+        let ai_display_name_for_chunk = ai_display_name.clone();
+        let ai_display_name_for_final = ai_display_name.clone();
+
+        let chunk_count = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
+        let room_manager_cb = room_manager.clone();
+
+        let on_chunk = move |chunk: agent::chat::AiStreamChunk| {
+            Box::pin({
+                let room_manager = room_manager_cb.clone();
+                let streaming_msg_id = streaming_msg_id;
+                let room_id = room_id_inner;
+                let chunk_count = chunk_count.clone();
+                let ai_display_name_for_chunk = ai_display_name_for_chunk.clone();
+                async move {
+                    let chunk_type_str = match chunk.chunk_type {
+                        agent::chat::AiChunkType::Thinking => "thinking",
+                        agent::chat::AiChunkType::Answer => "answer",
+                        agent::chat::AiChunkType::ToolCall => "tool_call",
+                        agent::chat::AiChunkType::ToolResult => "tool_result",
+                    };
+                    let event = RoomMessageStreamChunkEvent {
+                        message_id: streaming_msg_id,
+                        room_id,
+                        content: chunk.content,
+                        done: chunk.done,
+                        error: None,
+                        display_name: Some(ai_display_name_for_chunk),
+                        chunk_type: Some(chunk_type_str.to_string()),
+                    };
+                    room_manager.broadcast_stream_chunk(event).await;
+                    chunk_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+                }
+            }) as Pin<Box<dyn std::future::Future<Output = ()> + Send>>
+        };
+
+        let stream_callback: agent::chat::StreamCallback = Box::new(on_chunk);
+
+        let typing_start = queue::TypingEvent {
+            room_id: room_id_inner,
+            user_id: ai_typing_id,
+            username: ai_display_name.clone(),
+            avatar_url: None,
+            action: "start".to_string(),
+            sender_type: Some("ai".to_string()),
+        };
+        room_manager.broadcast_typing(room_id_inner, typing_start.clone()).await;
+
+        let (typing_cancel_tx, typing_cancel_rx) = tokio::sync::oneshot::channel::<()>();
+        let typing_renew_handle = tokio::spawn({
+            let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
+            interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+            let mgr = room_manager.clone();
+            let rid = room_id_inner;
+            let evt = typing_start.clone();
+            async move {
+                tokio::select! {
+                    _ = typing_cancel_rx => {}
+                    _ = async {
+                        loop {
+                            interval.tick().await;
+                            mgr.broadcast_typing(rid, evt.clone()).await;
+                        }
+                    } => {}
+                }
+            }
+        });
+
+        match chat_service.process_stream(request, stream_callback).await {
+            Ok(result) => {
+                // Store ordered chunks as JSON in thinking_content for ordered replay.
+                // Uses {"__chunks__": [...]} marker so legacy plain-text still works.
+                let thinking_content = if result.chunks.is_empty() {
+                    None
+                } else {
+                    let chunks_json = serde_json::json!({
+                        "__chunks__": result.chunks.iter().map(|c| {
+                            let type_str = match c.chunk_type {
+                                agent::client::StreamChunkType::Thinking => "thinking",
+                                agent::client::StreamChunkType::Answer => "answer",
+                                agent::client::StreamChunkType::ToolCall => "tool_call",
+                            };
+                            serde_json::json!({
+                                "type": type_str,
+                                "content": c.content,
+                            })
+                        }).collect::<Vec<_>>(),
+                    });
+                    Some(chunks_json.to_string())
+                };
+                let envelope = RoomMessageEnvelope {
+                    id: streaming_msg_id,
+                    dedup_key: Some(format!("{}:{}", room_id_inner, streaming_msg_id)),
+                    room_id: room_id_inner,
+                    sender_type: sender_type.clone(),
+                    sender_id: None,
+                    model_id: Some(model_id),
+                    thread_id: None,
+                    content: result.content.clone(),
+                    content_type: "text".to_string(),
+                    thinking_content: thinking_content.clone(),
+                    send_at: now,
+                    seq,
+                    in_reply_to: None,
+                    display_name: Some(ai_display_name_for_final.clone()),
+                };
+
+                if let Err(e) = queue.publish(room_id_inner, envelope).await {
+                    tracing::error!(error = %e, "Failed to publish streaming AI message");
+                } else {
+                    let now = Utc::now();
+                    if let Err(e) = room_ai::Entity::update_many()
+                        .col_expr(
+                            room_ai::Column::CallCount,
+                            Expr::col(room_ai::Column::CallCount).add(1),
+                        )
+                        .col_expr(
+                            room_ai::Column::LastCallAt,
+                            Expr::value(Some(now)),
+                        )
+                        .filter(room_ai::Column::Room.eq(room_id_inner))
+                        .filter(room_ai::Column::Model.eq(model_id))
+                        .exec(&db)
+                        .await
+                    {
+                        tracing::warn!(error = %e, "Failed to update room_ai call stats");
+                    }
+
+                    // Record billing (non-fatal)
+                    let _ = super::billing::record_ai_usage(
+                        &db,
+                        project_id_inner,
+                        model_id,
+                        result.input_tokens,
+                        result.output_tokens,
+                    )
+                    .await;
+
+                    let msg_event = queue::RoomMessageEvent {
+                        id: streaming_msg_id,
+                        room_id: room_id_inner,
+                        sender_type: sender_type.clone(),
+                        sender_id: None,
+                        thread_id: None,
+                        content: result.content.clone(),
+                        content_type: "text".to_string(),
+                        thinking_content: thinking_content.clone(),
+                        send_at: now,
+                        seq,
+                        display_name: Some(ai_display_name_for_final.clone()),
+                        in_reply_to: None,
+                        reactions: None,
+                        message_id: None,
+                    };
+                    room_manager.broadcast(room_id_inner, msg_event).await;
+                    room_manager.metrics.messages_sent.increment(1);
+
+                    let _ = typing_cancel_tx.send(());
+                    typing_renew_handle.abort();
+                    let typing_stop = queue::TypingEvent {
+                        room_id: room_id_inner,
+                        user_id: ai_typing_id,
+                        username: ai_display_name_for_final.clone(),
+                        avatar_url: None,
+                        action: "stop".to_string(),
+                        sender_type: Some("ai".to_string()),
+                    };
+                    room_manager.broadcast_typing(room_id_inner, typing_stop).await;
+
+                    let event = ProjectRoomEvent {
+                        event_type: crate::RoomEventType::NewMessage.as_str().into(),
+                        project_id: project_id_inner,
+                        room_id: Some(room_id_inner),
+                        category_id: None,
+                        message_id: Some(streaming_msg_id),
+                        seq: Some(seq),
+                        timestamp: now,
+                    };
+                    queue
+                        .publish_project_room_event(project_id_inner, event)
+                        .await;
+                }
+            }
+            Err(e) => {
+                tracing::error!(error = %e, "AI streaming failed");
+                let _ = typing_cancel_tx.send(());
+                typing_renew_handle.abort();
+                let typing_stop = queue::TypingEvent {
+                    room_id: room_id_inner,
+                    user_id: ai_typing_id,
+                    username: ai_display_name.clone(),
+                    avatar_url: None,
+                    action: "stop".to_string(),
+                    sender_type: Some("ai".to_string()),
+                };
+                room_manager.broadcast_typing(room_id_inner, typing_stop).await;
+
+                let event = RoomMessageStreamChunkEvent {
+                    message_id: streaming_msg_id,
+                    room_id: room_id_inner,
+                    content: String::new(),
+                    done: true,
+                    error: Some(e.to_string()),
+                    display_name: Some(ai_display_name.clone()),
+                    chunk_type: None,
+                };
+                room_manager.broadcast_stream_chunk(event).await;
+            }
+        }
+
+        room_manager.close_stream_channel(streaming_msg_id).await;
+    });
+}
--- a/libs/room/src/service/billing.rs
+++ b/libs/room/src/service/billing.rs
@ -0,0 +1,51 @@
+//! AI usage billing helper for room service.
+//!
+//! Delegates to `agent::billing::record_ai_usage`.
+//! Billing is non-fatal — failures are logged but do not block AI responses.
+
+use db::database::AppDatabase;
+use uuid::Uuid;
+
+use crate::error::RoomError;
+
+/// Record AI token usage against a project's billing balance.
+///
+/// Returns `Ok(())` on success. On billing failure (e.g. insufficient balance,
+/// missing pricing), returns `Err` but the caller should still complete the AI
+/// request — billing is a non-critical side-effect.
+pub async fn record_ai_usage(
+    db: &AppDatabase,
+    project_id: Uuid,
+    model_id: Uuid,
+    input_tokens: i64,
+    output_tokens: i64,
+) -> Result<(), RoomError> {
+    if input_tokens == 0 && output_tokens == 0 {
+        return Ok(());
+    }
+
+    match agent::billing::record_ai_usage(db, project_id, model_id, input_tokens, output_tokens).await {
+        Ok(record) => {
+            tracing::info!(
+                project_id = %project_id,
+                model_id = %model_id,
+                input_tokens = input_tokens,
+                output_tokens = output_tokens,
+                cost_usd = %record.cost,
+                "ai_usage_recorded"
+            );
+            Ok(())
+        }
+        Err(e) => {
+            tracing::warn!(
+                project_id = %project_id,
+                model_id = %model_id,
+                input_tokens = input_tokens,
+                output_tokens = output_tokens,
+                error = %e,
+                "ai_billing_failed_non_fatal"
+            );
+            Err(e.into())
+        }
+    }
+}
--- a/libs/room/src/service/history.rs
+++ b/libs/room/src/service/history.rs
@ -0,0 +1,62 @@
+use db::database::AppDatabase;
+use models::rooms::room_ai;
+use models::rooms::room_message::{Column as RmCol, Entity as RoomMessage};
+use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, QueryOrder, QuerySelect};
+use uuid::Uuid;
+
+use crate::error::RoomError;
+
+pub async fn get_room_history(
+    db: &AppDatabase,
+    room_id: Uuid,
+    limit: usize,
+) -> Result<Vec<models::rooms::room_message::Model>, RoomError> {
+    let messages = RoomMessage::find()
+        .filter(RmCol::Room.eq(room_id))
+        .order_by_desc(RmCol::Seq)
+        .limit(limit as u64)
+        .all(db)
+        .await?;
+
+    Ok(messages)
+}
+
+pub async fn get_user_names(
+    db: &AppDatabase,
+    user_ids: &[Uuid],
+) -> std::collections::HashMap<Uuid, String> {
+    use models::users::User;
+
+    let mut names = std::collections::HashMap::new();
+    if user_ids.is_empty() {
+        return names;
+    }
+
+    let users = User::find()
+        .filter(models::users::user::Column::Uid.is_in(user_ids.to_vec()))
+        .all(db)
+        .await
+        .unwrap_or_default();
+
+    for user in users {
+        names.insert(user.uid, user.username);
+    }
+
+    names
+}
+
+pub async fn get_room_ai_config(
+    db: &AppDatabase,
+    room_id: Uuid,
+) -> Result<Option<room_ai::Model>, RoomError> {
+    let ai_config = room_ai::Entity::find()
+        .filter(room_ai::Column::Room.eq(room_id))
+        .one(db)
+        .await?;
+
+    Ok(ai_config)
+}
+
+pub async fn extract_mention_context(_content: &str) -> Vec<agent::chat::Mention> {
+    Vec::new()
+}
--- a/libs/room/src/service/mentions.rs
+++ b/libs/room/src/service/mentions.rs
@ -0,0 +1,48 @@
+use uuid::Uuid;
+
+use super::patterns::{mention_bracket_re, mention_tag_re, user_mention_re};
+
+/// Extracts user UUIDs from all mention formats:
+/// - Legacy: `<user>uuid</user>`
+/// - Legacy: `<mention type="user" id="uuid">label</mention>`
+/// - New: `@[user:uuid:label]`
+pub fn extract_mentions(content: &str) -> Vec<Uuid> {
+    let mut mentioned = Vec::new();
+
+    for cap in user_mention_re().captures_iter(content) {
+        if let Some(inner) = cap.get(1) {
+            let token = inner.as_str().trim();
+            if let Ok(uuid) = Uuid::parse_str(token) {
+                if !mentioned.contains(&uuid) {
+                    mentioned.push(uuid);
+                }
+            }
+        }
+    }
+
+    for cap in mention_tag_re().captures_iter(content) {
+        if let (Some(type_m), Some(id_m)) = (cap.get(1), cap.get(2)) {
+            if type_m.as_str() == "user" {
+                if let Ok(uuid) = Uuid::parse_str(id_m.as_str().trim()) {
+                    if !mentioned.contains(&uuid) {
+                        mentioned.push(uuid);
+                    }
+                }
+            }
+        }
+    }
+
+    for cap in mention_bracket_re().captures_iter(content) {
+        if let (Some(type_m), Some(id_m)) = (cap.get(1), cap.get(2)) {
+            if type_m.as_str() == "user" {
+                if let Ok(uuid) = Uuid::parse_str(id_m.as_str().trim()) {
+                    if !mentioned.contains(&uuid) {
+                        mentioned.push(uuid);
+                    }
+                }
+            }
+        }
+    }
+
+    mentioned
+}
--- a/libs/room/src/service/mod.rs
+++ b/libs/room/src/service/mod.rs
@ -0,0 +1,466 @@
+mod access;
+mod billing;
+mod ai_common;
+mod ai_nonstreaming;
+mod ai_react_nonstreaming;
+mod ai_react_streaming;
+mod ai_streaming;
+mod history;
+mod mentions;
+mod notifications;
+mod patterns;
+mod sequence;
+mod workers;
+
+pub use access::{check_room_access, check_project_member, require_room_member, find_room_or_404};
+pub use ai_common::create_and_publish_ai_message;
+pub use ai_nonstreaming::process_message_ai_nonstreaming;
+pub use ai_react_nonstreaming::process_message_ai_react_nonstreaming;
+pub use ai_react_streaming::process_message_ai_react_streaming;
+pub use ai_streaming::process_message_ai_streaming;
+pub use history::{get_room_history, get_user_names, get_room_ai_config, extract_mention_context};
+pub use mentions::extract_mentions;
+pub use notifications::{notify_project_members, publish_room_event};
+pub use sequence::next_room_message_seq_internal;
+pub use workers::{start_workers, spawn_agent_task, spawn_room_workers, PushNotificationFn};
+
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room;
+use models::rooms::room_ai;
+use queue::{MessageProducer, ProjectRoomEvent};
+use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
+use uuid::Uuid;
+
+use crate::connection::{RoomConnectionManager, DedupCache};
+use crate::error::RoomError;
+use agent::chat::{AiRequest, ChatService};
+use agent::embed::EmbedService;
+use agent::TaskService;
+use models::agent_task::AgentType;
+
+use crate::service::patterns::{mention_bracket_re, mention_tag_re};
+
+const DEFAULT_MAX_CONCURRENT_WORKERS: usize = 1024;
+
+#[derive(Clone)]
+pub struct RoomService {
+    pub db: AppDatabase,
+    pub cache: AppCache,
+    pub config: config::AppConfig,
+    pub room_manager: Arc<RoomConnectionManager>,
+    pub queue: MessageProducer,
+    pub redis_url: String,
+    pub chat_service: Option<Arc<ChatService>>,
+    pub task_service: Option<Arc<TaskService>>,
+    pub embed_service: Option<Arc<EmbedService>>,
+    pub push_fn: Option<workers::PushNotificationFn>,
+    worker_semaphore: Arc<tokio::sync::Semaphore>,
+    dedup_cache: DedupCache,
+}
+
+impl RoomService {
+    pub fn new(
+        db: AppDatabase,
+        cache: AppCache,
+        config: config::AppConfig,
+        queue: MessageProducer,
+        room_manager: Arc<RoomConnectionManager>,
+        redis_url: String,
+        chat_service: Option<Arc<ChatService>>,
+        task_service: Option<Arc<TaskService>>,
+        max_concurrent_workers: Option<usize>,
+        push_fn: Option<workers::PushNotificationFn>,
+        embed_service: Option<Arc<EmbedService>>,
+    ) -> Self {
+        let dedup_cache: DedupCache =
+            Arc::new(dashmap::DashMap::with_capacity_and_hasher(10000, Default::default()));
+        Self {
+            db,
+            cache,
+            config,
+            room_manager,
+            queue,
+            redis_url,
+            chat_service,
+            task_service,
+            embed_service,
+            worker_semaphore: Arc::new(tokio::sync::Semaphore::new(
+                max_concurrent_workers.unwrap_or(DEFAULT_MAX_CONCURRENT_WORKERS),
+            )),
+            dedup_cache,
+            push_fn,
+        }
+    }
+
+    pub async fn start_workers(
+        &self,
+        shutdown_rx: tokio::sync::broadcast::Receiver<()>,
+    ) -> anyhow::Result<()> {
+        workers::start_workers(
+            self.db.clone(),
+            self.cache.clone(),
+            self.room_manager.clone(),
+            self.queue.clone(),
+            self.redis_url.clone(),
+            self.dedup_cache.clone(),
+            self.task_service.clone(),
+            None, // max_concurrent_workers handled by semaphore
+            shutdown_rx,
+        )
+        .await
+    }
+
+    pub async fn spawn_agent_task<F, Fut>(
+        &self,
+        project_id: Uuid,
+        agent_type: AgentType,
+        input: String,
+        _title: Option<String>,
+        execute: F,
+    ) -> anyhow::Result<i64>
+    where
+        F: FnOnce(i64, Arc<TaskService>) -> Fut + Send + 'static,
+        Fut: std::future::Future<Output = Result<String, String>> + Send,
+    {
+        let task_service = match &self.task_service {
+            Some(ts) => ts.clone(),
+            None => return Err(anyhow::anyhow!("task service not configured")),
+        };
+
+        workers::spawn_agent_task(
+            project_id,
+            agent_type,
+            input,
+            task_service,
+            self.queue.clone(),
+            self.room_manager.clone(),
+            self.worker_semaphore.clone(),
+            execute,
+        )
+        .await
+    }
+
+    pub fn spawn_room_workers(&self, room_id: uuid::Uuid) {
+        workers::spawn_room_workers(
+            room_id,
+            self.db.clone(),
+            self.room_manager.clone(),
+            self.queue.clone(),
+            self.redis_url.clone(),
+            self.worker_semaphore.clone(),
+        );
+    }
+
+    pub async fn publish_room_event(
+        &self,
+        project_id: uuid::Uuid,
+        event_type: super::RoomEventType,
+        room_id: Option<uuid::Uuid>,
+        category_id: Option<uuid::Uuid>,
+        message_id: Option<uuid::Uuid>,
+        seq: Option<i64>,
+    ) {
+        let event = ProjectRoomEvent {
+            event_type: event_type.as_str().into(),
+            project_id,
+            room_id,
+            category_id,
+            message_id,
+            seq,
+            timestamp: Utc::now(),
+        };
+        self.queue
+            .publish_project_room_event(project_id, event)
+            .await;
+    }
+
+    pub fn notify_project_members(
+        &self,
+        project_id: uuid::Uuid,
+        notification_type: super::NotificationType,
+        title: String,
+        content: Option<String>,
+        related_room_id: Option<uuid::Uuid>,
+    ) {
+        notifications::notify_project_members(
+            self.db.clone(),
+            project_id,
+            notification_type,
+            title,
+            content,
+            related_room_id,
+        );
+    }
+
+    pub fn extract_mentions(content: &str) -> Vec<Uuid> {
+        mentions::extract_mentions(content)
+    }
+
+    pub async fn resolve_mentions(&self, content: &str) -> Vec<Uuid> {
+        use models::users::User;
+        use sea_orm::EntityTrait;
+
+        let mut resolved: Vec<Uuid> = Vec::new();
+        let mut seen_usernames: Vec<String> = Vec::new();
+
+        for cap in mention_bracket_re().captures_iter(content) {
+            if let (Some(type_m), Some(id_m)) = (cap.get(1), cap.get(2)) {
+                if type_m.as_str() == "user" {
+                    let id = id_m.as_str().trim();
+                    if let Ok(uuid) = Uuid::parse_str(id) {
+                        if !resolved.contains(&uuid) {
+                            resolved.push(uuid);
+                        }
+                    } else if let Some(label_m) = cap.get(3) {
+                        let label = label_m.as_str().trim();
+                        if !label.is_empty() {
+                            let label_lower = label.to_lowercase();
+                            if seen_usernames.contains(&label_lower) {
+                                continue;
+                            }
+                            seen_usernames.push(label_lower.clone());
+
+                            if let Some(user) = User::find()
+                                .filter(models::users::user::Column::Username.eq(label_lower))
+                                .one(&self.db)
+                                .await
+                                .ok()
+                                .flatten()
+                            {
+                                if !resolved.contains(&user.uid) {
+                                    resolved.push(user.uid);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        resolved
+    }
+
+    pub async fn check_room_access(&self, room_id: Uuid, user_id: Uuid) -> Result<(), RoomError> {
+        access::check_room_access(&self.db, room_id, user_id).await
+    }
+
+    pub async fn check_project_member(
+        &self,
+        project_id: Uuid,
+        user_id: Uuid,
+    ) -> Result<(), RoomError> {
+        access::check_project_member(&self.db, project_id, user_id).await
+    }
+
+    pub async fn should_ai_respond(&self, room_id: Uuid, content: &str) -> Result<bool, RoomError> {
+        let ai_config = history::get_room_ai_config(&self.db, room_id).await?;
+
+        let config = match ai_config {
+            Some(c) => c,
+            None => return Ok(false),
+        };
+
+        if !config.use_exact {
+            return Ok(true);
+        }
+
+        let model_id_str = config.model.to_string();
+
+        for cap in mention_bracket_re().captures_iter(content) {
+            if let (Some(type_m), Some(id_m)) = (cap.get(1), cap.get(2)) {
+                if type_m.as_str() == "ai" && id_m.as_str().trim() == model_id_str {
+                    return Ok(true);
+                }
+            }
+        }
+
+        for cap in mention_tag_re().captures_iter(content) {
+            if let (Some(type_m), Some(id_m)) = (cap.get(1), cap.get(2)) {
+                if type_m.as_str() == "ai" && id_m.as_str().trim() == model_id_str {
+                    return Ok(true);
+                }
+            }
+        }
+
+        Ok(false)
+    }
+
+    pub async fn get_room_ai_config(
+        &self,
+        room_id: Uuid,
+    ) -> Result<Option<room_ai::Model>, RoomError> {
+        history::get_room_ai_config(&self.db, room_id).await
+    }
+
+    pub async fn get_user_names(
+        &self,
+        user_ids: &[Uuid],
+    ) -> std::collections::HashMap<Uuid, String> {
+        history::get_user_names(&self.db, user_ids).await
+    }
+
+    pub async fn require_room_member(&self, room_id: Uuid, user_id: Uuid) -> Result<(), RoomError> {
+        access::require_room_member(&self.db, room_id, user_id).await
+    }
+
+    pub async fn find_room_or_404(&self, room_id: Uuid) -> Result<room::Model, RoomError> {
+        access::find_room_or_404(&self.db, room_id).await
+    }
+
+    pub async fn process_message_ai(
+        &self,
+        room_id: Uuid,
+        _message_id: Uuid,
+        sender_id: Uuid,
+        content: String,
+    ) -> Result<(), RoomError> {
+        let Some(chat_service) = &self.chat_service else {
+            return Ok(());
+        };
+
+        let Some(ai_config) = self.get_room_ai_config(room_id).await? else {
+            return Ok(());
+        };
+
+        let Some(lock_guard) =
+            crate::room_ai_queue::acquire_room_ai_lock(&self.cache, room_id).await?
+        else {
+            return Ok(());
+        };
+
+        let room = self.find_room_or_404(room_id).await?;
+
+        let project = models::projects::project::Entity::find_by_id(room.project)
+            .one(&self.db)
+            .await?
+            .ok_or_else(|| RoomError::NotFound("Project not found".to_string()))?;
+
+        let mentioned_model_id = {
+            let mut found = None;
+            for cap in mention_bracket_re().captures_iter(&content) {
+                if let (Some(type_m), Some(id_m)) = (cap.get(1), cap.get(2)) {
+                    if type_m.as_str() == "ai" {
+                        if let Ok(uuid) = Uuid::parse_str(id_m.as_str().trim()) {
+                            found = Some(uuid);
+                            break;
+                        }
+                    }
+                }
+            }
+            found
+        };
+
+        let model_id = mentioned_model_id.unwrap_or(ai_config.model);
+        let model = models::agents::model::Entity::find_by_id(model_id)
+            .one(&self.db)
+            .await?
+            .ok_or_else(|| RoomError::NotFound("AI model not found".to_string()))?;
+
+        let sender = models::users::User::find_by_id(sender_id)
+            .one(&self.db)
+            .await?
+            .ok_or_else(|| RoomError::NotFound("Sender not found".to_string()))?;
+
+        let history = history::get_room_history(&self.db, room_id, 50).await?;
+
+        let user_ids: Vec<Uuid> = history
+            .iter()
+            .filter_map(|m| m.sender_id)
+            .chain(std::iter::once(sender_id))
+            .collect();
+        let user_names = self.get_user_names(&user_ids).await;
+
+        let mentions = history::extract_mention_context(&content).await;
+
+        let request = AiRequest {
+            db: self.db.clone(),
+            cache: self.cache.clone(),
+            config: self.config.clone(),
+            model,
+            project: project.clone(),
+            sender,
+            room: room.clone(),
+            input: content,
+            mention: mentions,
+            history,
+            user_names,
+            temperature: ai_config.temperature.unwrap_or(0.7),
+            max_tokens: ai_config.max_tokens.unwrap_or(4096) as i32,
+            top_p: 1.0,
+            frequency_penalty: 0.0,
+            presence_penalty: 0.0,
+            think: ai_config.think,
+            tools: Some(chat_service.tools()),
+            max_tool_depth: 1000,
+        };
+
+        let use_streaming = ai_config.stream;
+        let is_react = ai_config.agent_type.as_deref() == Some("react");
+
+        if is_react {
+            if use_streaming {
+                ai_react_streaming::process_message_ai_react_streaming(
+                    chat_service.clone(),
+                    request,
+                    room_id,
+                    room.project,
+                    model_id,
+                    lock_guard,
+                    self.db.clone(),
+                    self.cache.clone(),
+                    self.queue.clone(),
+                    self.room_manager.clone(),
+                )
+                .await;
+            } else {
+                ai_react_nonstreaming::process_message_ai_react_nonstreaming(
+                    chat_service.clone(),
+                    request,
+                    room_id,
+                    room.project,
+                    model_id,
+                    lock_guard,
+                    self.db.clone(),
+                    self.cache.clone(),
+                    self.queue.clone(),
+                    self.room_manager.clone(),
+                )
+                .await;
+            }
+        } else if use_streaming {
+            ai_streaming::process_message_ai_streaming(
+                chat_service.clone(),
+                request,
+                room_id,
+                room.project,
+                model_id,
+                lock_guard,
+                self.db.clone(),
+                self.cache.clone(),
+                self.queue.clone(),
+                self.room_manager.clone(),
+            )
+            .await;
+        } else {
+            ai_nonstreaming::process_message_ai_nonstreaming(
+                chat_service.clone(),
+                request,
+                room_id,
+                room.project,
+                model_id,
+                lock_guard,
+                self.db.clone(),
+                self.cache.clone(),
+                self.queue.clone(),
+                self.room_manager.clone(),
+            )
+            .await;
+        }
+
+        Ok(())
+    }
+}
--- a/libs/room/src/service/notifications.rs
+++ b/libs/room/src/service/notifications.rs
@ -0,0 +1,134 @@
+use chrono::Utc;
+use db::database::AppDatabase;
+use models::projects::project_members;
+use queue::ProjectRoomEvent;
+use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
+use uuid::Uuid;
+
+use crate::error::RoomError;
+
+pub fn notify_project_members(
+    db: AppDatabase,
+    project_id: Uuid,
+    notification_type: crate::NotificationType,
+    title: String,
+    content: Option<String>,
+    related_room_id: Option<Uuid>,
+) {
+    let notification_type_inner = notification_type;
+    let title_inner = title;
+    let content_inner = content;
+    let related_room_id_inner = related_room_id;
+    let project_id_inner = project_id;
+
+    tokio::spawn(async move {
+        let members = match project_members::Entity::find()
+            .filter(project_members::Column::Project.eq(project_id_inner))
+            .all(&db)
+            .await
+        {
+            Ok(m) => m,
+            Err(e) => {
+                tracing::error!(project_id = %project_id_inner, error = %e,
+                    "notify_project_members: failed to fetch members");
+                return;
+            }
+        };
+
+        for member in members {
+            let user_id = member.user;
+            if let Err(e) = create_notification_sync(
+                &db,
+                notification_type_inner,
+                user_id,
+                title_inner.clone(),
+                content_inner.clone(),
+                related_room_id_inner,
+                project_id_inner,
+            )
+            .await
+            {
+                tracing::warn!(user_id = %user_id, project_id = %project_id_inner, error = %e,
+                    "notify_project_members: failed to create notification for user");
+            }
+        }
+    });
+}
+
+async fn create_notification_sync(
+    db: &AppDatabase,
+    notification_type: crate::NotificationType,
+    user_id: Uuid,
+    title: String,
+    content: Option<String>,
+    related_room_id: Option<Uuid>,
+    project_id: Uuid,
+) -> Result<(), RoomError> {
+    use models::rooms::room_notifications;
+    use sea_orm::{ActiveModelTrait, Set};
+
+    let notification_type_model = match notification_type {
+        crate::NotificationType::Mention => room_notifications::NotificationType::Mention,
+        crate::NotificationType::Invitation => room_notifications::NotificationType::Invitation,
+        crate::NotificationType::RoleChange => room_notifications::NotificationType::RoleChange,
+        crate::NotificationType::RoomCreated => room_notifications::NotificationType::RoomCreated,
+        crate::NotificationType::RoomDeleted => room_notifications::NotificationType::RoomDeleted,
+        crate::NotificationType::SystemAnnouncement => {
+            room_notifications::NotificationType::SystemAnnouncement
+        }
+        crate::NotificationType::ProjectInvitation => {
+            room_notifications::NotificationType::ProjectInvitation
+        }
+        crate::NotificationType::WorkspaceInvitation => {
+            room_notifications::NotificationType::WorkspaceInvitation
+        }
+    };
+
+    let _model = room_notifications::ActiveModel {
+        id: Set(Uuid::now_v7()),
+        room: Set(related_room_id),
+        project: Set(Some(project_id)),
+        user_id: Set(Some(user_id)),
+        notification_type: Set(notification_type_model),
+        related_message_id: Set(None),
+        related_user_id: Set(None),
+        related_room_id: Set(related_room_id),
+        title: Set(title),
+        content: Set(content),
+        metadata: Set(None),
+        is_read: Set(false),
+        is_archived: Set(false),
+        created_at: Set(Utc::now()),
+        read_at: Set(None),
+        expires_at: Set(None),
+    }
+    .insert(db)
+    .await
+    .map_err(|e| RoomError::Database(e))?;
+
+    Ok(())
+}
+
+pub fn publish_room_event(
+    queue: &queue::MessageProducer,
+    project_id: Uuid,
+    event_type: crate::RoomEventType,
+    room_id: Option<Uuid>,
+    message_id: Option<Uuid>,
+    seq: Option<i64>,
+) {
+    let event = ProjectRoomEvent {
+        event_type: event_type.as_str().into(),
+        project_id,
+        room_id,
+        category_id: None,
+        message_id,
+        seq,
+        timestamp: Utc::now(),
+    };
+    // Fire-and-forget — caller doesn't need to await.
+    let queue = queue.clone();
+    tokio::spawn(async move {
+        queue.publish_project_room_event(project_id, event).await;
+    });
+}
--- a/libs/room/src/service/patterns.rs
+++ b/libs/room/src/service/patterns.rs
@ -0,0 +1,30 @@
+use std::sync::LazyLock;
+
+/// Legacy: <user>uuid</user> or <user>username</user>
+static USER_MENTION_RE: LazyLock<regex_lite::Regex, fn() -> regex_lite::Regex> =
+    LazyLock::new(|| regex_lite::Regex::new(r"<user>\s*([^<]+?)\s*</user>").unwrap());
+
+/// Legacy: <mention type="..." id="...">label</mention>
+static MENTION_TAG_RE: LazyLock<regex_lite::Regex, fn() -> regex_lite::Regex> =
+    LazyLock::new(|| {
+        regex_lite::Regex::new(
+            r#"<mention\s+type="([^"]+)"\s+id="([^"]+)"[^>]*>\s*([^<]*?)\s*</mention>"#,
+        )
+        .unwrap()
+    });
+
+/// New format: @[type:id:label]
+static MENTION_BRACKET_RE: LazyLock<regex_lite::Regex, fn() -> regex_lite::Regex> =
+    LazyLock::new(|| regex_lite::Regex::new(r"@\[([a-z]+):([^:\]]+):([^\]]+)\]").unwrap());
+
+pub fn user_mention_re() -> &'static regex_lite::Regex {
+    &USER_MENTION_RE
+}
+
+pub fn mention_tag_re() -> &'static regex_lite::Regex {
+    &MENTION_TAG_RE
+}
+
+pub fn mention_bracket_re() -> &'static regex_lite::Regex {
+    &MENTION_BRACKET_RE
+}
--- a/libs/room/src/service/sequence.rs
+++ b/libs/room/src/service/sequence.rs
@ -0,0 +1,49 @@
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room_message::{Column as RmCol, Entity as RoomMessage};
+use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, QuerySelect};
+use uuid::Uuid;
+
+use crate::error::RoomError;
+
+pub async fn next_room_message_seq_internal(
+    room_id: Uuid,
+    db: &AppDatabase,
+    cache: &AppCache,
+) -> Result<i64, RoomError> {
+    let seq_key = format!("room:seq:{}", room_id);
+    let mut conn = cache.conn().await.map_err(|e| {
+        RoomError::Internal(format!("failed to get redis connection for seq: {}", e))
+    })?;
+
+    let seq: i64 = redis::cmd("INCR")
+        .arg(&seq_key)
+        .query_async(&mut conn)
+        .await
+        .map_err(|e| RoomError::Internal(format!("seq INCR: {}", e)))?;
+
+    // DB reconciliation: only check every 1000 messages
+    if seq % 1000 == 0 {
+        let db_seq: Option<Option<Option<i64>>> = RoomMessage::find()
+            .filter(RmCol::Room.eq(room_id))
+            .select_only()
+            .column_as(RmCol::Seq.max(), "max_seq")
+            .into_tuple::<Option<Option<i64>>>()
+            .one(db)
+            .await?
+            .map(|r| r);
+        let db_seq = db_seq.flatten().flatten().unwrap_or(0);
+
+        if db_seq >= seq {
+            let _: String = redis::cmd("SET")
+                .arg(&seq_key)
+                .arg(db_seq + 1)
+                .query_async(&mut conn)
+                .await
+                .map_err(|e| RoomError::Internal(format!("seq SET: {}", e)))?;
+            return Ok(db_seq + 1);
+        }
+    }
+
+    Ok(seq)
+}
--- a/libs/room/src/service/workers.rs
+++ b/libs/room/src/service/workers.rs
@ -0,0 +1,329 @@
+use std::sync::Arc;
+
+use chrono::Utc;
+use db::cache::AppCache;
+use db::database::AppDatabase;
+use models::rooms::room;
+use queue::{AgentTaskEvent, MessageProducer};
+use sea_orm::EntityTrait;
+use uuid::Uuid;
+
+use crate::connection::{
+    extract_get_redis, make_persist_fn, DedupCache, PersistFn, RoomConnectionManager,
+};
+
+/// Callback type for sending push notifications.
+pub type PushNotificationFn =
+    Arc<dyn Fn(Uuid, String, Option<String>, Option<String>) + Send + Sync>;
+
+pub async fn start_workers(
+    db: AppDatabase,
+    _cache: AppCache,
+    room_manager: Arc<RoomConnectionManager>,
+    queue: MessageProducer,
+    redis_url: String,
+    dedup_cache: DedupCache,
+    _task_service: Option<Arc<agent::TaskService>>,
+    _max_concurrent_workers: Option<usize>,
+    mut shutdown_rx: tokio::sync::broadcast::Receiver<()>,
+) -> anyhow::Result<()> {
+    let rooms: Vec<room::Model> = room::Entity::find().all(&db).await?;
+    let room_ids: Vec<uuid::Uuid> = rooms.iter().map(|r| r.id).collect();
+    let project_ids: Vec<uuid::Uuid> = rooms
+        .iter()
+        .map(|r| r.project)
+        .collect::<std::collections::HashSet<_>>()
+        .into_iter()
+        .collect();
+
+    let task_project_ids = project_ids.clone();
+
+    tracing::info!(
+        room_count = room_ids.len(),
+        project_count = project_ids.len(),
+        "starting room workers"
+    );
+
+    let persist_fn: PersistFn = make_persist_fn(db.clone(), room_manager.metrics.clone(), dedup_cache.clone());
+
+    let get_redis: Arc<dyn Fn() -> queue::worker::RedisFuture + Send + Sync> =
+        extract_get_redis(queue.clone());
+
+    let worker_room_ids = room_ids.clone();
+    let worker_shutdown = shutdown_rx.resubscribe();
+    let worker_handle = tokio::spawn({
+        let get_redis = get_redis.clone();
+        let persist_fn = persist_fn.clone();
+        async move {
+            queue::start_worker(worker_room_ids, get_redis, persist_fn, worker_shutdown).await;
+        }
+    });
+
+    let manager = room_manager.clone();
+    let redis_url_clone = redis_url.clone();
+
+    let mut handles: Vec<_> = room_ids
+        .into_iter()
+        .map(|room_id| {
+            let manager = manager.clone();
+            let redis_url = redis_url_clone.clone();
+            let shutdown_rx = shutdown_rx.resubscribe();
+            tokio::spawn(async move {
+                crate::connection::subscribe_room_events(
+                    redis_url,
+                    manager,
+                    room_id,
+                    shutdown_rx,
+                )
+                .await;
+            })
+        })
+        .collect();
+
+    let project_handles: Vec<_> = project_ids
+        .into_iter()
+        .map(|project_id| {
+            let manager = manager.clone();
+            let redis_url = redis_url_clone.clone();
+            let shutdown_rx = shutdown_rx.resubscribe();
+            tokio::spawn(async move {
+                crate::connection::subscribe_project_room_events(
+                    redis_url,
+                    manager,
+                    project_id,
+                    shutdown_rx,
+                )
+                .await;
+            })
+        })
+        .collect();
+    handles.extend(project_handles);
+
+    let task_handles: Vec<_> = task_project_ids
+        .into_iter()
+        .map(|project_id| {
+            let manager = manager.clone();
+            let redis_url = redis_url_clone.clone();
+            let shutdown_rx = shutdown_rx.resubscribe();
+            tokio::spawn(async move {
+                crate::connection::subscribe_task_events_fn(
+                    redis_url,
+                    manager,
+                    project_id,
+                    shutdown_rx,
+                )
+                .await;
+            })
+        })
+        .collect();
+    handles.extend(task_handles);
+
+    let cleanup_handle = {
+        let manager = room_manager.clone();
+        let db = db.clone();
+        let dedup_cache = dedup_cache.clone();
+        let mut cleanup_shutdown = shutdown_rx.resubscribe();
+        tokio::spawn(async move {
+            let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(300));
+            interval.tick().await;
+            loop {
+                tokio::select! {
+                    _ = interval.tick() => {
+                        manager.cleanup_rate_limit().await;
+                        crate::connection::cleanup_dedup_cache(&dedup_cache);
+                        if let Ok(rooms) = room::Entity::find().all(&db).await {
+                            let room_ids: Vec<_> = rooms.iter().map(|r| r.id).collect();
+                            let project_ids: Vec<_> = rooms.iter().map(|r| r.project).collect();
+                            manager.metrics.cleanup_stale_rooms(&room_ids).await;
+                            manager.prune_stale_rooms(&room_ids).await;
+                            manager.prune_stale_projects(&project_ids).await;
+                        }
+                    }
+                    _ = cleanup_shutdown.recv() => {
+                        tracing::info!("cleanup task shutting down");
+                        break;
+                    }
+                }
+            }
+        })
+    };
+    handles.push(cleanup_handle);
+
+    let _ = shutdown_rx.recv().await;
+
+    tracing::info!("room workers shutting down");
+
+    for h in handles {
+        let _ = h.abort();
+    }
+    let _ = worker_handle.await;
+
+    tracing::info!("room workers stopped");
+    Ok(())
+}
+
+pub async fn spawn_agent_task<F, Fut>(
+    project_id: Uuid,
+    agent_type: models::agent_task::AgentType,
+    input: String,
+    task_service: Arc<agent::TaskService>,
+    queue: MessageProducer,
+    room_manager: Arc<RoomConnectionManager>,
+    worker_semaphore: Arc<tokio::sync::Semaphore>,
+    execute: F,
+) -> anyhow::Result<i64>
+where
+    F: FnOnce(i64, Arc<agent::TaskService>) -> Fut + Send + 'static,
+    Fut: std::future::Future<Output = Result<String, String>> + Send,
+{
+    let task = task_service
+        .create(project_id, input, agent_type)
+        .await
+        .map_err(|e| anyhow::anyhow!("create task failed: {}", e))?;
+
+    let task_id = task.id;
+
+    let started_event = AgentTaskEvent {
+        task_id,
+        project_id,
+        parent_id: task.parent_id,
+        event: "started".to_string(),
+        message: None,
+        output: None,
+        error: None,
+        status: models::agent_task::TaskStatus::Running.to_string(),
+        timestamp: Utc::now(),
+    };
+    queue
+        .publish_agent_task_event(project_id, started_event)
+        .await;
+
+    let _ = task_service.start(task_id).await;
+
+    let queue_clone = queue.clone();
+    let room_manager_clone = room_manager.clone();
+    let semaphore = worker_semaphore.clone();
+
+    tokio::spawn(async move {
+        let _permit = semaphore.acquire().await.expect("semaphore closed");
+
+        let result = execute(task_id, task_service.clone()).await;
+
+        let event = match result {
+            Ok(output) => {
+                let _ = task_service.complete(task_id, &output).await;
+                AgentTaskEvent {
+                    task_id,
+                    project_id,
+                    parent_id: None,
+                    event: "done".to_string(),
+                    message: None,
+                    output: Some(output),
+                    error: None,
+                    status: models::agent_task::TaskStatus::Done.to_string(),
+                    timestamp: chrono::Utc::now(),
+                }
+            }
+            Err(err) => {
+                let _ = task_service.fail(task_id, &err).await;
+                AgentTaskEvent {
+                    task_id,
+                    project_id,
+                    parent_id: None,
+                    event: "failed".to_string(),
+                    message: None,
+                    output: None,
+                    error: Some(err),
+                    status: models::agent_task::TaskStatus::Failed.to_string(),
+                    timestamp: chrono::Utc::now(),
+                }
+            }
+        };
+
+        queue_clone
+            .publish_agent_task_event(project_id, event.clone())
+            .await;
+        room_manager_clone.broadcast_agent_task(project_id, event).await;
+        tracing::info!(task_id = task_id, project_id = %project_id, "agent task finished");
+    });
+
+    Ok(task_id)
+}
+
+pub fn spawn_room_workers(
+    room_id: uuid::Uuid,
+    db: AppDatabase,
+    room_manager: Arc<RoomConnectionManager>,
+    queue: MessageProducer,
+    redis_url: String,
+    worker_semaphore: Arc<tokio::sync::Semaphore>,
+) {
+    let persist_fn: PersistFn = make_persist_fn(
+        db.clone(),
+        room_manager.metrics.clone(),
+        Arc::new(
+            dashmap::DashMap::with_capacity_and_hasher(
+                10000,
+                Default::default(),
+            ),
+        ),
+    );
+    let get_redis: Arc<dyn Fn() -> queue::worker::RedisFuture + Send + Sync> =
+        extract_get_redis(queue.clone());
+    let manager = room_manager.clone();
+    let redis_url_clone = redis_url.clone();
+    let semaphore = worker_semaphore.clone();
+
+    let manager2 = room_manager.clone();
+    let redis_url3 = redis_url.clone();
+
+    tokio::spawn(async move {
+        let _permit = match semaphore.acquire_owned().await {
+            Ok(p) => p,
+            Err(_) => return,
+        };
+        let (shutdown_tx, shutdown_rx) = tokio::sync::broadcast::channel::<()>(1);
+        queue::room_worker_task(
+            room_id,
+            uuid::Uuid::new_v4().to_string(),
+            get_redis,
+            persist_fn,
+            shutdown_rx,
+        )
+        .await;
+        let _ = shutdown_tx.send(());
+    });
+
+    tokio::spawn(async move {
+        let shutdown_rx = manager.register_room(room_id).await;
+        crate::connection::subscribe_room_events(
+            redis_url_clone,
+            manager.clone(),
+            room_id,
+            shutdown_rx,
+        )
+        .await;
+    });
+
+    tokio::spawn(async move {
+        let project_id = {
+            let room = room::Entity::find_by_id(room_id)
+                .one(&db)
+                .await
+                .ok()
+                .flatten();
+            match room {
+                Some(r) => r.project,
+                None => return,
+            }
+        };
+        let shutdown_rx = manager2.register_project(project_id).await;
+        crate::connection::subscribe_project_room_events(
+            redis_url3,
+            manager2,
+            project_id,
+            shutdown_rx,
+        )
+        .await;
+    });
+}
--- a/libs/room/src/types.rs
+++ b/libs/room/src/types.rs
@ -223,6 +223,9 @@ pub struct RoomMessageResponse {
    pub in_reply_to: Option<Uuid>,
    pub content: String,
    pub content_type: String,
+    /// Accumulated AI reasoning/thinking text.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_content: Option<String>,
    pub edited_at: Option<DateTime<Utc>>,
    pub send_at: DateTime<Utc>,
    pub revoked: Option<DateTime<Utc>>,
--- a/libs/service/project/billing.rs
+++ b/libs/service/project/billing.rs
@ -149,9 +149,24 @@ impl AppService {
        }

        let now_utc = Utc::now();
+        // Only first project per user gets initial budget ($10)
+        let initial_balance = if let Some(uid) = user_uid {
+            let existing_projects = models::projects::project::Entity::find()
+                .filter(models::projects::project::Column::CreatedBy.eq(uid))
+                .all(&self.db)
+                .await?;
+            if existing_projects.is_empty() {
+                Decimal::from_f64_retain(DEFAULT_PROJECT_MONTHLY_CREDIT).unwrap_or(Decimal::ZERO)
+            } else {
+                Decimal::ZERO
+            }
+        } else {
+            Decimal::ZERO
+        };
+
        let created = project_billing::ActiveModel {
            project: Set(project_uid),
-            balance: Set(Decimal::from(DEFAULT_PROJECT_MONTHLY_CREDIT as i64)),
+            balance: Set(initial_balance),
            currency: Set("USD".to_string()),
            user: Set(user_uid),
            updated_at: Set(now_utc),
--- a/libs/service/project/init.rs
+++ b/libs/service/project/init.rs
@ -9,6 +9,8 @@ use serde::{Deserialize, Serialize};
 use session::Session;
 use uuid::Uuid;

+const DEFAULT_PROJECT_INITIAL_BALANCE: f64 = 10.0;
+
 #[derive(Deserialize, Serialize, Clone, Debug, utoipa::ToSchema)]
 pub struct ProjectInitParams {
    pub name: String,
@ -94,9 +96,20 @@ impl AppService {
        };
        project_member.insert(&txn).await?;

+        // Only first project per user gets initial budget
+        let existing_projects = project::Entity::find()
+            .filter(project::Column::CreatedBy.eq(user.uid))
+            .all(&self.db)
+            .await?;
+        let initial_balance = if existing_projects.is_empty() {
+            Decimal::from_f64_retain(DEFAULT_PROJECT_INITIAL_BALANCE).unwrap_or(Decimal::ZERO)
+        } else {
+            Decimal::ZERO
+        };
+
        let billing = project_billing::ActiveModel {
            project: Set(_project.id),
-            balance: Set(Decimal::from(200i64)),
+            balance: Set(initial_balance),
            currency: Set("USD".to_string()),
            user: Set(Some(user.uid)),
            updated_at: Set(Utc::now()),
--- a/libs/service/workspace/billing.rs
+++ b/libs/service/workspace/billing.rs
@ -78,7 +78,7 @@ impl AppService {
            .await?
            .ok_or(AppError::NotWorkspaceMember)?;

-        let billing = self.ensure_workspace_billing(ws.id).await?;
+        let billing = self.ensure_workspace_billing(ws.id, Some(user_uid)).await?;
        let now_utc = Utc::now();
        let (month_start, next_month_start) = utc_month_bounds(now_utc)?;

@ -132,7 +132,7 @@ impl AppService {
        let page = std::cmp::max(query.page.unwrap_or(1), 1);
        let per_page = query.per_page.unwrap_or(20).clamp(1, 200);

-        self.ensure_workspace_billing(ws.id).await?;
+        self.ensure_workspace_billing(ws.id, Some(user_uid)).await?;

        let paginator = workspace_billing_history::Entity::find()
            .filter(workspace_billing_history::Column::WorkspaceId.eq(ws.id))
@ -186,7 +186,7 @@ impl AppService {
            return Err(AppError::BadRequest("Amount must be positive".to_string()));
        }

-        let billing = self.ensure_workspace_billing(ws.id).await?;
+        let billing = self.ensure_workspace_billing(ws.id, Some(user_uid)).await?;
        let now_utc = Utc::now();
        let new_balance =
            Decimal::from_f64_retain(billing.balance.to_f64().unwrap_or_default() + params.amount)
@ -221,6 +221,7 @@ impl AppService {
    pub async fn ensure_workspace_billing(
        &self,
        workspace_id: Uuid,
+        user_uid: Option<Uuid>,
    ) -> Result<workspace_billing::Model, AppError> {
        if let Some(billing) = workspace_billing::Entity::find_by_id(workspace_id)
            .one(&self.db)
@ -230,9 +231,25 @@ impl AppService {
        }

        let now_utc = Utc::now();
+        // Only first workspace per user gets initial budget ($30)
+        let initial_balance = if let Some(uid) = user_uid {
+            let existing_workspaces = workspace_membership::Entity::find()
+                .filter(workspace_membership::Column::UserId.eq(uid))
+                .filter(workspace_membership::Column::Status.eq("active"))
+                .all(&self.db)
+                .await?;
+            if existing_workspaces.len() <= 1 {
+                Decimal::from_f64_retain(30.0).unwrap_or(Decimal::ZERO)
+            } else {
+                Decimal::ZERO
+            }
+        } else {
+            Decimal::ZERO
+        };
+
        let created = workspace_billing::ActiveModel {
            workspace_id: Set(workspace_id),
-            balance: Set(Decimal::ZERO),
+            balance: Set(initial_balance),
            currency: Set("USD".to_string()),
            monthly_quota: Set(
                Decimal::from_f64_retain(DEFAULT_MONTHLY_QUOTA).unwrap_or(Decimal::ZERO)
--- a/libs/service/workspace/init.rs
+++ b/libs/service/workspace/init.rs
@ -1,8 +1,10 @@
 use crate::AppService;
 use crate::error::AppError;
 use chrono::Utc;
+use models::Decimal;
 use models::WorkspaceRole;
 use models::workspaces::workspace;
+use models::workspaces::workspace_billing;
 use models::workspaces::workspace_membership;
 use sea_orm::*;
 use serde::{Deserialize, Serialize};
@ -89,6 +91,28 @@ impl AppService {
        };
        membership.insert(&txn).await?;

+        // Create billing record — only first workspace gets $30 initial balance
+        let existing_workspaces = workspace_membership::Entity::find()
+            .filter(workspace_membership::Column::UserId.eq(user.uid))
+            .filter(workspace_membership::Column::Status.eq("active"))
+            .all(&self.db)
+            .await?;
+        let initial_balance = if existing_workspaces.len() <= 1 {
+            Decimal::from_f64_retain(30.0).unwrap_or(Decimal::ZERO)
+        } else {
+            Decimal::ZERO
+        };
+        let billing = workspace_billing::ActiveModel {
+            workspace_id: Set(ws.id),
+            balance: Set(initial_balance),
+            currency: Set("USD".to_string()),
+            monthly_quota: Set(Decimal::from_f64_retain(100.0).unwrap_or(Decimal::ZERO)),
+            total_spent: Set(Decimal::ZERO),
+            updated_at: Set(Utc::now()),
+            created_at: Set(Utc::now()),
+        };
+        billing.insert(&txn).await?;
+
        txn.commit().await?;
        Ok(ws)
    }
--- a/src/components/room/message/MessageBubble.tsx
+++ b/src/components/room/message/MessageBubble.tsx
@ -19,6 +19,107 @@ import { getSenderDisplayName, getSenderUserUid, isUserSender } from '../sender'
 import { MessageReactions } from './MessageReactions';
 import { ReactionPicker } from './ReactionPicker';

+/** Parse thinking text from stored thinking_content (may be __chunks__ JSON or plain text). */
+function parseThinkingText(raw: string): string {
+  if (!raw) return '';
+  try {
+    const parsed = JSON.parse(raw) as { __chunks__?: Array<{ type: string; content: string }> };
+    if (parsed.__chunks__) {
+      return parsed.__chunks__
+        .filter((c) => c.type === 'thinking')
+        .map((c) => c.content)
+        .join('');
+    }
+  } catch {
+    // Not JSON — plain text, use as-is
+  }
+  return raw;
+}
+
+/** Parse ordered chunks from stored thinking_content JSON. Returns null if not in __chunks__ format. */
+function parseSavedChunks(raw: string | null | undefined): Array<{ type: string; content: string }> | null {
+  if (!raw) return null;
+  try {
+    const parsed = JSON.parse(raw) as { __chunks__?: Array<{ type: string; content: string }> };
+    if (parsed.__chunks__) return parsed.__chunks__;
+  } catch {
+    // Not JSON — legacy plain text
+  }
+  return null;
+}
+
+/** Render ordered stream chunks: consecutive thinking tokens are merged into one collapsible block, answer tokens rendered inline. tool_call is hidden. */
+function OrderedStreamChunks({
+  chunks,
+  onMentionClick,
+}: {
+  chunks: Array<{ type: string; content: string }>;
+  onMentionClick?: (type: string, id: string, label: string) => void;
+}) {
+  // Group consecutive same-type chunks (tool_call hidden)
+  const groups: Array<{ type: 'thinking' | 'answer'; content: string }> = [];
+  for (const chunk of chunks) {
+    if (chunk.type === 'tool_call') continue;
+    const cType = chunk.type === 'thinking' ? 'thinking' : 'answer';
+    const last = groups[groups.length - 1];
+    if (last && last.type === cType) {
+      last.content += chunk.content;
+    } else {
+      groups.push({ type: cType, content: chunk.content });
+    }
+  }
+
+  return (
+    <>
+      {groups.map((group, i) =>
+        group.type === 'thinking' ? (
+          <ThinkingBlock key={i} content={group.content} />
+        ) : (
+          <MessageContent key={i} content={group.content} onMentionClick={onMentionClick} />
+        ),
+      )}
+      {/* Streaming cursor */}
+      <span className="discord-streaming-cursor" />
+    </>
+  );
+}
+
+/** Collapsible thinking block with auto-expand. */
+function ThinkingBlock({ content }: { content: string }) {
+  const [expanded, setExpanded] = useState(false);
+  return (
+    <div className="mb-2 last:mb-0 rounded-lg border text-sm" style={{ borderColor: 'var(--room-border)', background: 'var(--room-bg)' }}>
+      <button
+        onClick={() => setExpanded(v => !v)}
+        className="flex w-full items-center gap-2 px-3 py-2 text-left transition-colors hover:opacity-80"
+        style={{ color: 'var(--room-text-secondary)' }}
+      >
+        <svg
+          className={cn('size-3.5 transition-transform', expanded && 'rotate-90')}
+          viewBox="0 0 16 16"
+          fill="currentColor"
+        >
+          <path d="M6 4l4 4-4 4" />
+        </svg>
+        <span className="text-xs font-semibold uppercase tracking-wider opacity-70">
+          Thinking
+        </span>
+        <span className="text-[11px] opacity-50">
+          · {content.split(/\s+/).filter(Boolean).length} tokens
+        </span>
+        <svg className="ml-auto size-3.5 opacity-40" viewBox="0 0 16 16" fill="currentColor">
+          <path d={expanded ? 'M4 10l4-4 4 4' : 'M4 6l4 4 4-4'} />
+        </svg>
+      </button>
+      {expanded && (
+        <div className="border-t px-3 py-2 text-sm leading-relaxed whitespace-pre-wrap" style={{ borderColor: 'var(--room-border)', color: 'var(--room-text-subtle)' }}>
+          {content}
+        </div>
+      )}
+    </div>
+  );
+}
+
 // Sender colors — AI Studio clean palette
 const SENDER_COLORS: Record<string, string> = {
  system: '#9ca3af',
@ -81,7 +182,7 @@ export const MessageBubble = memo(function MessageBubble({
  const isEdited = !!message.edited_at;
  useTheme();
  const { user } = useUser();
-  const { wsClient, streamingMessages, streamingThinkingContent, members, pins, pinMessage, unpinMessage } = useRoom();
+  const { wsClient, streamingChunks, members, pins, pinMessage, unpinMessage } = useRoom();
  const avatarUrl = (() => {
    if (message.sender_type === 'ai') return undefined;
    const member = members.find(m => m.user === message.sender_id);
@ -93,15 +194,10 @@ export const MessageBubble = memo(function MessageBubble({
  const isPending = message.isOptimistic === true || message.id.startsWith('temp-') || message.id.startsWith('optimistic-');
  const isPinned = pins.some(p => p.message === message.id);

-  const displayContent = isStreaming && streamingMessages?.has(message.id)
-    ? streamingMessages.get(message.id)!
-    : message.content;
-
-  // Thinking/reasoning content: from streamingThinkingContent while live, or stored thinking_content on message
-  const thinkingContent = isStreaming && streamingThinkingContent?.has(message.id)
-    ? streamingThinkingContent.get(message.id)!
-    : (message.thinking_content ?? '');
  const [thinkingExpanded, setThinkingExpanded] = useState(false);
+  const thinkingContent = isStreaming && streamingChunks?.has(message.id)
+    ? streamingChunks.get(message.id)!.filter(c => c.type === 'thinking').map(c => c.content).join('')
+    : parseThinkingText(message.thinking_content ?? '');

  const handleMentionClick = useCallback(
    (type: string, id: string, label: string) => {
@ -138,7 +234,7 @@ export const MessageBubble = memo(function MessageBubble({
    }
  }, [roomId, message.id, wsClient]);

-  const textContent = displayContent;
+  const textContent = message.content;
  const estimatedLines = textContent.split(/\r?\n/).reduce((total, line) => {
    return total + Math.max(1, Math.ceil(line.trim().length / 90));
  }, 0);
@ -312,51 +408,63 @@ export const MessageBubble = memo(function MessageBubble({
                <div className="text-[15px] leading-[1.4] min-w-0" style={{ color: 'var(--room-text)' }}>
                  {message.content_type === 'text' || message.content_type === 'Text' ? (
                    <div className={cn('relative', isTextCollapsed && 'max-h-[4.5rem] overflow-hidden')}>
-                      {/* Thinking/reasoning section — collapsible, DeepSeek-style */}
-                      {thinkingContent && (
-                        <div className="mb-2 rounded-lg border text-sm" style={{ borderColor: 'var(--room-border)', background: 'var(--room-bg)' }}>
-                          <button
-                            onClick={() => setThinkingExpanded(v => !v)}
-                            className="flex w-full items-center gap-2 px-3 py-2 text-left transition-colors hover:opacity-80"
-                            style={{ color: 'var(--room-text-secondary)' }}
-                          >
-                            <svg
-                              className={cn('size-3.5 transition-transform', thinkingExpanded && 'rotate-90')}
-                              viewBox="0 0 16 16"
-                              fill="currentColor"
-                            >
-                              <path d="M6 4l4 4-4 4" />
-                            </svg>
-                            <span className="text-xs font-semibold uppercase tracking-wider opacity-70">
-                              Thinking
-                            </span>
-                            {thinkingContent && (
-                              <span className="text-[11px] opacity-50">
-                                · {thinkingContent.split(/\s+/).filter(Boolean).length} tokens
-                              </span>
-                            )}
-                            <svg className="ml-auto size-3.5 opacity-40" viewBox="0 0 16 16" fill="currentColor">
-                              <path d={thinkingExpanded ? 'M4 10l4-4 4 4' : 'M4 6l4 4 4-4'} />
-                            </svg>
-                          </button>
-                          {thinkingExpanded && (
-                            <div className="border-t px-3 py-2 text-sm leading-relaxed whitespace-pre-wrap" style={{ borderColor: 'var(--room-border)', color: 'var(--room-text-subtle)' }}>
-                              {thinkingContent}
-                            </div>
-                          )}
-                        </div>
-                      )}
-                      {/* Answer content — always visible */}
-                      {displayContent && (
-                        <MessageContent
-                          content={displayContent}
+                      {/* Streaming: ordered chunks — think/answer interleaved, tool_call hidden */}
+                      {isStreaming && streamingChunks?.has(message.id) ? (
+                        <OrderedStreamChunks
+                          chunks={streamingChunks.get(message.id)!}
                          onMentionClick={handleMentionClick}
                        />
+                      ) : parseSavedChunks(message.thinking_content) ? (
+                        /* Saved ordered chunks — render in original order */
+                        <OrderedStreamChunks
+                          chunks={parseSavedChunks(message.thinking_content)!}
+                          onMentionClick={handleMentionClick}
+                        />
+                      ) : (
+                        /* Legacy: aggregated thinking at top, content at bottom */
+                        <>
+                          {thinkingContent && (
+                            <div className="mb-2 rounded-lg border text-sm" style={{ borderColor: 'var(--room-border)', background: 'var(--room-bg)' }}>
+                              <button
+                                onClick={() => setThinkingExpanded(v => !v)}
+                                className="flex w-full items-center gap-2 px-3 py-2 text-left transition-colors hover:opacity-80"
+                                style={{ color: 'var(--room-text-secondary)' }}
+                              >
+                                <svg
+                                  className={cn('size-3.5 transition-transform', thinkingExpanded && 'rotate-90')}
+                                  viewBox="0 0 16 16"
+                                  fill="currentColor"
+                                >
+                                  <path d="M6 4l4 4-4 4" />
+                                </svg>
+                                <span className="text-xs font-semibold uppercase tracking-wider opacity-70">
+                                  Thinking
+                                </span>
+                                {thinkingContent && (
+                                  <span className="text-[11px] opacity-50">
+                                    · {thinkingContent.split(/\s+/).filter(Boolean).length} tokens
+                                  </span>
+                                )}
+                                <svg className="ml-auto size-3.5 opacity-40" viewBox="0 0 16 16" fill="currentColor">
+                                  <path d={thinkingExpanded ? 'M4 10l4-4 4 4' : 'M4 6l4 4 4-4'} />
+                                </svg>
+                              </button>
+                              {thinkingExpanded && (
+                                <div className="border-t px-3 py-2 text-sm leading-relaxed whitespace-pre-wrap" style={{ borderColor: 'var(--room-border)', color: 'var(--room-text-subtle)' }}>
+                                  {thinkingContent}
+                                </div>
+                              )}
+                            </div>
+                          )}
+                          {message.content && (
+                            <MessageContent
+                              content={message.content}
+                              onMentionClick={handleMentionClick}
+                            />
+                          )}
+                        </>
                      )}

-                      {/* Streaming cursor */}
-                      {isStreaming && <span className="discord-streaming-cursor" />}
-
                      {/* Collapse gradient */}
                      {isTextCollapsed && (
                        <div
--- a/src/components/room/message/MessageList.tsx
+++ b/src/components/room/message/MessageList.tsx
@ -95,13 +95,13 @@ export const MessageList = memo(function MessageList({
  const scrollTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
  const isRestoringScrollRef = useRef(false);
  const firstVisibleMessageIdRef = useRef<string | null>(null);
-  const isInitialLoadRef = useRef(true);
  const wasNearBottomRef = useRef(true);
+  const didInitialLayoutRef = useRef(false);

-  // Reset initial load flag when switching rooms
+  // Reset scroll flags when switching rooms
  useEffect(() => {
-    isInitialLoadRef.current = true;
    wasNearBottomRef.current = true;
+    didInitialLayoutRef.current = false;
  }, [roomId]);

  const replyMap = useMemo(() => {
@ -153,13 +153,6 @@ export const MessageList = memo(function MessageList({
    return result;
  }, [messages, replyMap]);

-  const scrollToBottom = useCallback((smooth = true) => {
-    const container = scrollContainerRef.current;
-    if (container) {
-      container.scrollTo({ top: container.scrollHeight, behavior: smooth ? 'smooth' : 'auto' });
-    }
-  }, []);
-
  const handleScroll = useCallback(() => {
    const container = scrollContainerRef.current;
    if (!container) return;
@ -192,32 +185,6 @@ export const MessageList = memo(function MessageList({
    };
  }, [handleScroll]);

-  useEffect(() => {
-    if (messages.length === 0) return;
-    const container = scrollContainerRef.current;
-    if (!container) return;
-
-    // On initial load, jump to bottom instantly (no animation)
-    if (isInitialLoadRef.current) {
-      isInitialLoadRef.current = false;
-      wasNearBottomRef.current = true;
-      // Use requestAnimationFrame to wait for virtualizer to layout
-      requestAnimationFrame(() => {
-        requestAnimationFrame(() => {
-          scrollToBottom(false);
-        });
-      });
-      return;
-    }
-
-    // For new messages: auto-scroll only if user was near bottom
-    const distanceFromBottom = container.scrollHeight - container.scrollTop - container.clientHeight;
-    if (distanceFromBottom < 150) {
-      wasNearBottomRef.current = true;
-      requestAnimationFrame(() => scrollToBottom(false));
-    }
-  }, [messages.length, scrollToBottom]);
-
  const virtualizer = useVirtualizer({
    count: rows.length,
    getScrollElement: () => scrollContainerRef.current,
@ -231,6 +198,31 @@ export const MessageList = memo(function MessageList({
    gap: 0,
  });

+  const scrollToBottom = useCallback((smooth = true) => {
+    if (rows.length === 0) return;
+    virtualizer.scrollToIndex(rows.length - 1, { align: 'end', smooth });
+  }, [virtualizer, rows.length]);
+
+  // Ensure scroll-to-bottom fires after virtualizer measures all rows
+  useEffect(() => {
+    if (messages.length === 0) return;
+    if (didInitialLayoutRef.current) return;
+
+    const container = scrollContainerRef.current;
+    if (!container) return;
+
+    // Only fire when virtualizer has a meaningful total size
+    if (virtualizer.getTotalSize() < 10) return;
+
+    didInitialLayoutRef.current = true;
+    wasNearBottomRef.current = true;
+    requestAnimationFrame(() => {
+      requestAnimationFrame(() => {
+        scrollToBottom(false);
+      });
+    });
+  }, [virtualizer.getTotalSize(), messages.length, scrollToBottom]);
+
  const virtualItems = virtualizer.getVirtualItems();

  // IntersectionObserver for load more
--- a/src/contexts/room-context.tsx
+++ b/src/contexts/room-context.tsx
@ -98,6 +98,7 @@ function wsMessageToUiMessage(wsMsg: RoomMessagePayload): MessageWithMeta {
    display_content: wsMsg.content,
    is_streaming: false,
    reactions: wsMsg.reactions,
+    thinking_content: wsMsg.thinking_content,
  };
 }

@ -157,9 +158,8 @@ interface RoomContextValue {
  createRoom: (name: string, isPublic: boolean, categoryId?: string) => Promise<RoomResponse>;
  updateRoom: (roomId: string, name?: string, isPublic?: boolean, categoryId?: string) => Promise<void>;
  deleteRoom: (roomId: string) => Promise<void>;
-  streamingMessages: Map<string, string>;
-  /** Streaming thinking/reasoning content keyed by message_id */
-  streamingThinkingContent: Map<string, string>;
+  /** Streaming chunks in arrival order per message_id — preserves think/answer interleaving */
+  streamingChunks: Map<string, Array<{ type: string; content: string }>>;
  /** Active AI stream info for typing indicator */
  activeAiStream: { message_id: string; display_name: string } | null;

@ -440,16 +440,15 @@ export function RoomProvider({



-  const [streamingContent, setStreamingContent] = useState<Map<string, string>>(new Map());
-  const [streamingThinkingContent, setStreamingThinkingContent] = useState<Map<string, string>>(new Map());
+  const [streamingChunks, setStreamingChunks] = useState<Map<string, Array<{ type: string; content: string }>>>(new Map());
  const [activeAiStream, setActiveAiStream] = useState<{ message_id: string; display_name: string } | null>(null);

  // Streaming timeout: if no chunk received for 60s, force-end the stream
  // to prevent UI hanging forever when done=true is never delivered.
  const streamingTimersRef = useRef<Map<string, ReturnType<typeof setTimeout>>>(new Map());

-  // Ref to latest streamingThinkingContent so done handler can read it (setState is async)
-  const streamingThinkingContentRef = useRef<Map<string, string>>(new Map());
+  // Ref to latest streamingChunks so done handler can read accumulated thinking (setState is async)
+  const streamingChunksRef = useRef<Map<string, Array<{ type: string; content: string }>>>(new Map());

  const clearStreamingTimer = useCallback((msgId: string) => {
    const timer = streamingTimersRef.current.get(msgId);
@ -464,15 +463,11 @@ export function RoomProvider({
    const timer = setTimeout(() => {
      // Force-end: mark message as not-streaming and keep whatever content we have
      setActiveAiStream((prev) => prev?.message_id === msgId ? null : prev);
-      setStreamingContent((prev) => {
+      setStreamingChunks((prev) => {
        prev.delete(msgId);
        return new Map(prev);
      });
-      setStreamingThinkingContent((prev) => {
-        prev.delete(msgId);
-        return new Map(prev);
-      });
-      streamingThinkingContentRef.current.delete(msgId);
+      streamingChunksRef.current.delete(msgId);
      setMessages((prev) =>
        prev.map((m) =>
          m.id === msgId && m.is_streaming
@ -514,18 +509,20 @@ export function RoomProvider({
              const existingIdx = prev.findIndex((m) => m.id === payload.id);
              if (existingIdx !== -1) {
                // Message already exists (e.g. created by streaming chunk) —
-                // merge server-side fields (display_name, reactions) that the
+                // merge server-side fields (display_name, reactions, thinking_content) that the
                // chunk didn't have.
                const existing = prev[existingIdx];
                const needsUpdate =
                  (!existing.display_name && payload.display_name) ||
-                  (payload.reactions !== undefined && existing.reactions === undefined);
+                  (payload.reactions !== undefined && existing.reactions === undefined) ||
+                  (payload.thinking_content && !existing.thinking_content);
                if (needsUpdate) {
                  const updated = [...prev];
                  updated[existingIdx] = {
                    ...existing,
                    display_name: payload.display_name ?? existing.display_name,
                    reactions: payload.reactions ?? existing.reactions,
+                    thinking_content: payload.thinking_content ?? existing.thinking_content,
                  };
                  return updated;
                }
@ -571,27 +568,32 @@ export function RoomProvider({
        clearStreamingTimer(chunk.message_id);
        // Set activeAiStream to null since streaming is done
        setActiveAiStream(null);
-        // Clear streaming content maps
-        setStreamingContent((prev) => {
+
+        // Get the ordered chunk list for this message.
+        // Build final content/thinking_content from ordered chunks for persistence.
+        const orderedChunks = streamingChunksRef.current.get(chunk.message_id) ?? [];
+        // For thinking_content: concatenate all thinking chunks in order
+        const thinkingText = orderedChunks
+          .filter(c => c.type === 'thinking')
+          .map(c => c.content)
+          .join('');
+
+        // Clear streaming state
+        setStreamingChunks((prev) => {
          prev.delete(chunk.message_id);
          return new Map(prev);
        });
-        setStreamingThinkingContent((prev) => {
-          prev.delete(chunk.message_id);
-          return new Map(prev);
-        });
-        // Finalize message: keep thinking_content from accumulator, set content from done chunk
+
+        // Finalize message with ordered content
        setMessages((prev) =>
          prev.map((m) => {
            if (m.id !== chunk.message_id) return m;
-            // Get thinking_content from the accumulator before it was cleared
-            const tc = streamingThinkingContentRef.current.get(chunk.message_id);
            return {
              ...m,
              content: chunk.content,
              display_content: chunk.content,
              is_streaming: false,
-              thinking_content: tc ?? m.thinking_content,
+              thinking_content: thinkingText || m.thinking_content,
              chunk_type: chunk.chunk_type,
            };
          }),
@ -604,78 +606,36 @@ export function RoomProvider({
          setActiveAiStream({ message_id: chunk.message_id, display_name: chunk.display_name });
        }

-        if (chunk.chunk_type === 'thinking') {
-          // Accumulate thinking content separately
-          setStreamingThinkingContent((prev) => {
-            const next = new Map(prev);
-            const prevContent = next.get(chunk.message_id) ?? '';
-            const newContent =
-              prevContent === '' || !chunk.content.startsWith(prevContent)
-                ? chunk.content
-                : prevContent + chunk.content.slice(prevContent.length);
-            next.set(chunk.message_id, newContent);
-            // Sync ref for done handler access
-            streamingThinkingContentRef.current = new Map(next);
-            return next;
-          });
-          // Ensure message entry exists (with minimal content to show streaming state)
-          setMessages((msgs) => {
-            const idx = msgs.findIndex((m) => m.id === chunk.message_id);
-            if (idx !== -1) return msgs;
-            const newMsg: MessageWithMeta = {
-              id: chunk.message_id,
-              room: chunk.room_id,
-              seq: 0,
-              sender_type: 'ai',
-              display_name: chunk.display_name,
-              content: '',
-              display_content: '',
-              content_type: 'text',
-              send_at: new Date().toISOString(),
-              is_streaming: true,
-              chunk_type: 'thinking',
-            };
-            return [...msgs, newMsg];
-          });
-        } else if (chunk.chunk_type === 'answer') {
-          // Accumulate answer content (existing behavior)
-          setStreamingContent((prev) => {
-            const next = new Map(prev);
-            const prevContent = next.get(chunk.message_id) ?? '';
-            const newContent =
-              prevContent === '' || !chunk.content.startsWith(prevContent)
-                ? chunk.content
-                : prevContent + chunk.content.slice(prevContent.length);
-            next.set(chunk.message_id, newContent);
-            setMessages((msgs) => {
-              const idx = msgs.findIndex((m) => m.id === chunk.message_id);
-              if (idx !== -1) {
-                const m = msgs[idx];
-                if (m.content === newContent && m.is_streaming === true) return msgs;
-                const updated = [...msgs];
-                updated[idx] = { ...m, content: newContent, display_content: newContent };
-                return updated;
-              }
-              if (!newContent) return msgs;
-              const newMsg: MessageWithMeta = {
-                id: chunk.message_id,
-                room: chunk.room_id,
-                seq: 0,
-                sender_type: 'ai',
-                display_name: chunk.display_name,
-                content: newContent,
-                display_content: newContent,
-                content_type: 'text',
-                send_at: new Date().toISOString(),
-                is_streaming: true,
-                chunk_type: chunk.chunk_type,
-              };
-              return [...msgs, newMsg];
-            });
-            return next;
-          });
-        }
-        // tool_call / tool_result: skip content update entirely — don't pollute display
+        // Append chunk to ordered list — preserves think/answer/tool interleaving.
+        setStreamingChunks((prev) => {
+          const next = new Map(prev);
+          const existing = next.get(chunk.message_id) ?? [];
+          const newChunks = [...existing, { type: chunk.chunk_type ?? 'answer', content: chunk.content }];
+          next.set(chunk.message_id, newChunks);
+          // Sync ref for done handler access
+          streamingChunksRef.current = new Map(next);
+          return next;
+        });
+
+        // Ensure message entry exists (with minimal content to show streaming state)
+        setMessages((msgs) => {
+          const idx = msgs.findIndex((m) => m.id === chunk.message_id);
+          if (idx !== -1) return msgs;
+          const newMsg: MessageWithMeta = {
+            id: chunk.message_id,
+            room: chunk.room_id,
+            seq: 0,
+            sender_type: 'ai',
+            display_name: chunk.display_name,
+            content: '',
+            display_content: '',
+            content_type: 'text',
+            send_at: new Date().toISOString(),
+            is_streaming: true,
+            chunk_type: chunk.chunk_type,
+          };
+          return [...msgs, newMsg];
+        });
      }
    },
        onRoomReactionUpdated: (payload: RoomReactionUpdatedPayload) => {
@ -1478,8 +1438,7 @@ export function RoomProvider({
      createRoom,
      updateRoom,
      deleteRoom,
-      streamingMessages: streamingContent,
-      streamingThinkingContent,
+      streamingChunks,
      activeAiStream,
      projectRepos,
      reposLoading,
@ -1534,8 +1493,7 @@ export function RoomProvider({
      createRoom,
      updateRoom,
      deleteRoom,
-      streamingContent,
-      streamingThinkingContent,
+      streamingChunks,
      activeAiStream,
      projectRepos,
      reposLoading,
--- a/src/lib/ws-protocol.ts
+++ b/src/lib/ws-protocol.ts
@ -193,6 +193,8 @@ export interface RoomMessagePayload {
  thread_id?: string;
  content: string;
  content_type: string;
+  /** Accumulated AI reasoning/thinking text. */
+  thinking_content?: string;
  send_at: string;
  seq: number;
  display_name?: string;
Author	SHA1	Message	Date
ZhenYi	07e74c230c	feat: thinking_content column + first-project budget logic Some checks are pending CI / Rust Lint & Check (push) Waiting to run Details CI / Rust Tests (push) Waiting to run Details CI / Frontend Lint & Type Check (push) Waiting to run Details CI / Frontend Build (push) Blocked by required conditions Details - Add thinking_content column to room_message table - Migration for thinking_content column - ws-protocol update with streaming chunk types - Billing: first project gets $10, first workspace gets $30 - Subsequent projects/workspaces get $0 budget	2026-04-26 13:11:06 +08:00
ZhenYi	0939aa240b	fix(frontend): ordered chunk rendering + initial scroll-to-bottom - OrderedStreamChunks renders think/answer interleaved per arrival order - parseSavedChunks parses stored __chunks__ JSON on page refresh - Tool call chunks hidden from frontend display - Fix streaming join('') instead of join('\n') to avoid per-token newlines - Fix MessageList scroll-to-bottom using virtualizer.scrollToIndex - Remove unused streamingContent/streamingThinkingContent state - Add retryable error patterns for HTTP connection issues	2026-04-26 13:10:51 +08:00
ZhenYi	f5e3da35b0	feat(room): store ordered streaming chunks + billing integration - Save thinking_content as {"__chunks__": [{type, content}]} for replay - Tool call sanitization — don't expose raw results to frontend - Billing record_ai_usage integration - Room service module refactoring into service/ directory	2026-04-26 13:10:42 +08:00
ZhenYi	b4b5538447	feat(agent): add ordered stream chunk collection + retry for HTTP errors - StreamChunk/StreamChunkType types for preserving arrival order - Chunk collection in call_stream_once and process_stream - Add "error sending request" and "Http client error" to retryable errors - StreamResult includes chunks vector for ordered replay	2026-04-26 13:10:26 +08:00
				`@ -0,0 +1 @@`
				`ALTER TABLE room_message ADD COLUMN IF NOT EXISTS thinking_content TEXT;`