fix(billing): track actual tokens in ReAct mode instead of hardcoded 0/0

- process_react now returns (String, i64, i64) tuple with token counts
- Extract token stats from rig Agent FinalResponse usage field
- Both streaming and non-streaming ReAct modes now bill correctly
This commit is contained in:
ZhenYi 2026-04-28 10:04:54 +08:00
parent 7f927a4b6b
commit 6edacbcdf2
3 changed files with 13 additions and 17 deletions

View File

@ -1065,7 +1065,7 @@ impl ChatService {
.await .await
} }
pub async fn process_react<C>(&self, request: &AiRequest, mut on_chunk: C) -> Result<String> pub async fn process_react<C>(&self, request: &AiRequest, mut on_chunk: C) -> Result<(String, i64, i64)>
where where
C: FnMut(crate::react::ReactStep) + Send, C: FnMut(crate::react::ReactStep) + Send,
{ {
@ -1232,7 +1232,7 @@ impl ChatService {
) )
.await; .await;
Ok(final_content) Ok((final_content, total_input_tokens, total_output_tokens))
} }
} }

View File

@ -33,7 +33,7 @@ pub async fn process_message_ai_react_nonstreaming(
.await; .await;
match final_answer { match final_answer {
Ok(response) => { Ok((response, input_tokens, output_tokens)) => {
if let Err(e) = create_and_publish_ai_message( if let Err(e) = create_and_publish_ai_message(
&db, &db,
&cache, &cache,

View File

@ -155,7 +155,14 @@ pub async fn process_message_ai_react_streaming(
}) })
.await; .await;
let final_content = lock_or_recover(&answer_buffer).clone(); let (final_content, input_tokens, output_tokens, err_msg, should_log) = match result {
Ok((content, input, output)) => (content, input, output, None, false),
Err(e) => {
let msg = format!("[Agent error: {}]", e);
tracing::error!(error = %e, "ReAct streaming failed");
(String::new(), 0, 0, Some(msg), true)
}
};
let all_steps = lock_or_recover(&steps).clone(); let all_steps = lock_or_recover(&steps).clone();
let reasoning_chain: String = all_steps let reasoning_chain: String = all_steps
.iter() .iter()
@ -175,12 +182,6 @@ pub async fn process_message_ai_react_streaming(
} else { } else {
String::from("[No output from reasoning agent]") String::from("[No output from reasoning agent]")
}; };
let (err_msg, should_log) = match &result {
Err(e) => (Some(format!("[Agent error: {}]", e)), true),
_ => (None, false),
};
let content_to_persist = if let Some(msg) = &err_msg { let content_to_persist = if let Some(msg) = &err_msg {
format!( format!(
"{}\n[Error during reasoning: {}]", "{}\n[Error during reasoning: {}]",
@ -192,10 +193,6 @@ pub async fn process_message_ai_react_streaming(
content_to_persist content_to_persist
}; };
if should_log {
tracing::error!(error = %result.as_ref().unwrap_err(), "ReAct streaming failed");
}
let persist_content = content_to_persist.trim().to_string(); let persist_content = content_to_persist.trim().to_string();
if persist_content.is_empty() { if persist_content.is_empty() {
return; return;
@ -254,13 +251,12 @@ pub async fn process_message_ai_react_streaming(
} }
// Record billing (non-fatal) // Record billing (non-fatal)
// TODO: ReAct agent does not track token counts yet; billing with 0/0
if let Err(e) = super::billing::record_ai_usage( if let Err(e) = super::billing::record_ai_usage(
&db, &db,
project_id_inner, project_id_inner,
model_id, model_id,
0, input_tokens,
0, output_tokens,
) )
.await .await
{ {