fix(billing): track actual tokens in ReAct mode instead of hardcoded 0/0

- process_react now returns (String, i64, i64) tuple with token counts - Extract token stats from rig Agent FinalResponse usage field - Both streaming and non-streaming ReAct modes now bill correctly
2026-04-28 10:04:54 +08:00 · 2026-04-28 10:04:54 +08:00 · 6edacbcdf2
commit 6edacbcdf2
parent 7f927a4b6b
3 changed files with 13 additions and 17 deletions
--- a/libs/agent/chat/service.rs
+++ b/libs/agent/chat/service.rs
@ -1065,7 +1065,7 @@ impl ChatService {
            .await
    }
-    pub async fn process_react<C>(&self, request: &AiRequest, mut on_chunk: C) -> Result<String>
+    pub async fn process_react<C>(&self, request: &AiRequest, mut on_chunk: C) -> Result<(String, i64, i64)>
    where
        C: FnMut(crate::react::ReactStep) + Send,
    {
@ -1232,7 +1232,7 @@ impl ChatService {
        )
        .await;
-        Ok(final_content)
+        Ok((final_content, total_input_tokens, total_output_tokens))
    }
 }
--- a/libs/room/src/service/ai_react_nonstreaming.rs
+++ b/libs/room/src/service/ai_react_nonstreaming.rs
@ -33,7 +33,7 @@ pub async fn process_message_ai_react_nonstreaming(
            .await;
        match final_answer {
-            Ok(response) => {
+            Ok((response, input_tokens, output_tokens)) => {
                if let Err(e) = create_and_publish_ai_message(
                    &db,
                    &cache,
--- a/libs/room/src/service/ai_react_streaming.rs
+++ b/libs/room/src/service/ai_react_streaming.rs
@ -155,7 +155,14 @@ pub async fn process_message_ai_react_streaming(
            })
            .await;
-        let final_content = lock_or_recover(&answer_buffer).clone();
+        let (final_content, input_tokens, output_tokens, err_msg, should_log) = match result {
            Ok((content, input, output)) => (content, input, output, None, false),
            Err(e) => {
                let msg = format!("[Agent error: {}]", e);
                tracing::error!(error = %e, "ReAct streaming failed");
                (String::new(), 0, 0, Some(msg), true)
            }
        };
        let all_steps = lock_or_recover(&steps).clone();
        let reasoning_chain: String = all_steps
            .iter()
@ -175,12 +182,6 @@ pub async fn process_message_ai_react_streaming(
        } else {
            String::from("[No output from reasoning agent]")
        };
        let (err_msg, should_log) = match &result {
            Err(e) => (Some(format!("[Agent error: {}]", e)), true),
            _ => (None, false),
        };
        let content_to_persist = if let Some(msg) = &err_msg {
            format!(
                "{}\n[Error during reasoning: {}]",
@ -192,10 +193,6 @@ pub async fn process_message_ai_react_streaming(
            content_to_persist
        };
        if should_log {
            tracing::error!(error = %result.as_ref().unwrap_err(), "ReAct streaming failed");
        }
        let persist_content = content_to_persist.trim().to_string();
        if persist_content.is_empty() {
            return;
@ -254,13 +251,12 @@ pub async fn process_message_ai_react_streaming(
            }
            // Record billing (non-fatal)
            // TODO: ReAct agent does not track token counts yet; billing with 0/0
            if let Err(e) = super::billing::record_ai_usage(
                &db,
                project_id_inner,
                model_id,
-                0,
+                input_tokens,
-                0,
+                output_tokens,
            )
            .await
            {