refactor(agent): update AI chat execution, streaming and ReAct logic
This commit is contained in:
parent
6921220cc2
commit
395fa1b498
@ -217,11 +217,10 @@ pub async fn execute_chat_stream(
|
|||||||
let has_tool_calls = tools_enabled && !response.tool_calls.is_empty();
|
let has_tool_calls = tools_enabled && !response.tool_calls.is_empty();
|
||||||
if !has_tool_calls {
|
if !has_tool_calls {
|
||||||
let final_content = response.content.clone();
|
let final_content = response.content.clone();
|
||||||
// Don't broadcast the done chunk via SSE/NATS — incremental deltas
|
// Don't push full content as a chunk — incremental deltas in
|
||||||
// already delivered the content; the separate "done" SSE event
|
// response.chunks (already added above) sum to the same text.
|
||||||
// signals completion. Pushing full content again would duplicate it
|
// merge_consecutive_blocks would concatenate delta_sum + full =
|
||||||
// in the frontend streaming store.
|
// 2× full, causing duplicate content in DB persistence.
|
||||||
all_chunks.push(StreamChunk { chunk_type: StreamChunkType::Answer, content: final_content.clone() });
|
|
||||||
return Ok(StreamResult {
|
return Ok(StreamResult {
|
||||||
content: final_content,
|
content: final_content,
|
||||||
reasoning_content: response.reasoning_content,
|
reasoning_content: response.reasoning_content,
|
||||||
|
|||||||
@ -240,17 +240,16 @@ async fn execute_streaming_tools(
|
|||||||
|
|
||||||
async fn handle_final_answer(
|
async fn handle_final_answer(
|
||||||
response: crate::client::StreamResponse,
|
response: crate::client::StreamResponse,
|
||||||
mut all_chunks: Vec<StreamChunk>, request: &AiRequest,
|
all_chunks: Vec<StreamChunk>, request: &AiRequest,
|
||||||
session_id: Uuid, version_id: Option<Uuid>,
|
session_id: Uuid, version_id: Option<Uuid>,
|
||||||
total_input_tokens: i64, total_output_tokens: i64,
|
total_input_tokens: i64, total_output_tokens: i64,
|
||||||
session_start: std::time::Instant,
|
session_start: std::time::Instant,
|
||||||
) -> Result<StreamResult> {
|
) -> Result<StreamResult> {
|
||||||
let full_content = response.content.clone();
|
let full_content = response.content.clone();
|
||||||
// Don't broadcast the done chunk via SSE/NATS — incremental deltas
|
// Don't push full content as a chunk — incremental deltas in
|
||||||
// already delivered the content; the separate completion event
|
// response.chunks (already accumulated above) sum to the same text.
|
||||||
// signals end of stream. Broadcasting full content again would
|
// merge_consecutive_blocks would concatenate delta_sum + full =
|
||||||
// duplicate it in the frontend streaming display.
|
// 2× full, causing duplicate content in DB persistence.
|
||||||
all_chunks.push(StreamChunk { chunk_type: StreamChunkType::Answer, content: response.content.clone() });
|
|
||||||
record_ai_session(&request.cache, &request.db, request.project.id, request.sender.uid, session_id, request.room.id, request.model.id, version_id.unwrap_or_default(), total_input_tokens, total_output_tokens, session_start.elapsed().as_millis() as i64).await;
|
record_ai_session(&request.cache, &request.db, request.project.id, request.sender.uid, session_id, request.room.id, request.model.id, version_id.unwrap_or_default(), total_input_tokens, total_output_tokens, session_start.elapsed().as_millis() as i64).await;
|
||||||
Ok(StreamResult { content: full_content, reasoning_content: response.reasoning_content, input_tokens: total_input_tokens, output_tokens: total_output_tokens, chunks: all_chunks })
|
Ok(StreamResult { content: full_content, reasoning_content: response.reasoning_content, input_tokens: total_input_tokens, output_tokens: total_output_tokens, chunks: all_chunks })
|
||||||
}
|
}
|
||||||
|
|||||||
@ -37,6 +37,19 @@ If local data does not contain the answer, state that clearly before considering
|
|||||||
- Be precise. Cite issue/PR numbers, commit hashes, or message IDs when available.
|
- Be precise. Cite issue/PR numbers, commit hashes, or message IDs when available.
|
||||||
- State ambiguity or uncertainty explicitly.
|
- State ambiguity or uncertainty explicitly.
|
||||||
- Prefer facts over speculation.
|
- Prefer facts over speculation.
|
||||||
|
|
||||||
|
## Rich Output
|
||||||
|
|
||||||
|
You may use ````html` code blocks to render rich HTML content directly on the page. The HTML is rendered inside a Shadow DOM — your `<style>` rules are scoped to your block only and will NOT affect the rest of the page.
|
||||||
|
|
||||||
|
```html
|
||||||
|
<style>
|
||||||
|
.card { padding:16px; border-radius:12px; background:var(--surface-elevated); }
|
||||||
|
</style>
|
||||||
|
<div class="card">Styled content</div>
|
||||||
|
```
|
||||||
|
|
||||||
|
**IMPORTANT**: Only content inside ````html` code blocks is rendered as HTML. Raw HTML outside code blocks will appear as plain text. Use inline styles or `<style>` blocks. Theme CSS variables (`--accent`, `--surface-elevated`, `--border-default`, etc.) are available. **JavaScript is NOT allowed.**
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
/// Room-specific system prompt appended when the AI is @mentioned in a chat room.
|
/// Room-specific system prompt appended when the AI is @mentioned in a chat room.
|
||||||
|
|||||||
@ -5,6 +5,7 @@ use agent::client::types::ChatRequestMessage;
|
|||||||
use agent::client::StreamChunkType;
|
use agent::client::StreamChunkType;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use models::ai::{ai_message, ai_conversation, AiMessage};
|
use models::ai::{ai_message, ai_conversation, AiMessage};
|
||||||
|
use models::agents::{model, model_version};
|
||||||
use queue::{ChatMessageEvent, ChatStreamChunkEvent};
|
use queue::{ChatMessageEvent, ChatStreamChunkEvent};
|
||||||
use sea_orm::{EntityTrait, QueryFilter, ColumnTrait, QueryOrder, ActiveModelTrait, Set, PaginatorTrait};
|
use sea_orm::{EntityTrait, QueryFilter, ColumnTrait, QueryOrder, ActiveModelTrait, Set, PaginatorTrait};
|
||||||
use service::AppService;
|
use service::AppService;
|
||||||
@ -334,6 +335,62 @@ pub fn create_chat_sse_stream(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record billing after successful AI response
|
||||||
|
let billing_version_id = model::Entity::find()
|
||||||
|
.filter(model::Column::Name.eq(&model_name))
|
||||||
|
.one(service.db.reader())
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
.flatten()
|
||||||
|
.and_then(|m| {
|
||||||
|
// Resolve to active/default version for pricing lookup
|
||||||
|
let reader = service.db.reader();
|
||||||
|
async move {
|
||||||
|
model_version::Entity::find()
|
||||||
|
.filter(model_version::Column::ModelId.eq(m.id))
|
||||||
|
.filter(model_version::Column::Status.eq("active"))
|
||||||
|
.order_by_desc(model_version::Column::IsDefault)
|
||||||
|
.order_by_desc(model_version::Column::ReleaseDate)
|
||||||
|
.one(reader)
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
.flatten()
|
||||||
|
.map(|v| v.id)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if let Some(version_id) = billing_version_id {
|
||||||
|
match agent::billing::record_ai_usage(
|
||||||
|
&service.db,
|
||||||
|
project_id,
|
||||||
|
user_id,
|
||||||
|
version_id,
|
||||||
|
stream_result.input_tokens,
|
||||||
|
stream_result.output_tokens,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(agent::billing::BillingResult::Success(record)) => {
|
||||||
|
tracing::info!(
|
||||||
|
cost = record.cost,
|
||||||
|
deducted_from = record.deducted_from.as_str(),
|
||||||
|
"chat_billing_deducted"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(agent::billing::BillingResult::InsufficientBalance { .. }) => {
|
||||||
|
tracing::warn!(
|
||||||
|
project_id = %project_id,
|
||||||
|
user_id = %user_id,
|
||||||
|
"chat_billing_insufficient_balance"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!(error = %e, "chat_billing_error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Broadcast final chat message with token usage
|
// Broadcast final chat message with token usage
|
||||||
let final_msg = ChatMessageEvent {
|
let final_msg = ChatMessageEvent {
|
||||||
message_id: user_message_id,
|
message_id: user_message_id,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user