fix(billing): track actual tokens in ReAct mode instead of hardcoded 0/0
- process_react now returns (String, i64, i64) tuple with token counts - Extract token stats from rig Agent FinalResponse usage field - Both streaming and non-streaming ReAct modes now bill correctly
This commit is contained in:
parent
7f927a4b6b
commit
6edacbcdf2
@ -1065,7 +1065,7 @@ impl ChatService {
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn process_react<C>(&self, request: &AiRequest, mut on_chunk: C) -> Result<String>
|
pub async fn process_react<C>(&self, request: &AiRequest, mut on_chunk: C) -> Result<(String, i64, i64)>
|
||||||
where
|
where
|
||||||
C: FnMut(crate::react::ReactStep) + Send,
|
C: FnMut(crate::react::ReactStep) + Send,
|
||||||
{
|
{
|
||||||
@ -1232,7 +1232,7 @@ impl ChatService {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
Ok(final_content)
|
Ok((final_content, total_input_tokens, total_output_tokens))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -33,7 +33,7 @@ pub async fn process_message_ai_react_nonstreaming(
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
match final_answer {
|
match final_answer {
|
||||||
Ok(response) => {
|
Ok((response, input_tokens, output_tokens)) => {
|
||||||
if let Err(e) = create_and_publish_ai_message(
|
if let Err(e) = create_and_publish_ai_message(
|
||||||
&db,
|
&db,
|
||||||
&cache,
|
&cache,
|
||||||
|
|||||||
@ -155,7 +155,14 @@ pub async fn process_message_ai_react_streaming(
|
|||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let final_content = lock_or_recover(&answer_buffer).clone();
|
let (final_content, input_tokens, output_tokens, err_msg, should_log) = match result {
|
||||||
|
Ok((content, input, output)) => (content, input, output, None, false),
|
||||||
|
Err(e) => {
|
||||||
|
let msg = format!("[Agent error: {}]", e);
|
||||||
|
tracing::error!(error = %e, "ReAct streaming failed");
|
||||||
|
(String::new(), 0, 0, Some(msg), true)
|
||||||
|
}
|
||||||
|
};
|
||||||
let all_steps = lock_or_recover(&steps).clone();
|
let all_steps = lock_or_recover(&steps).clone();
|
||||||
let reasoning_chain: String = all_steps
|
let reasoning_chain: String = all_steps
|
||||||
.iter()
|
.iter()
|
||||||
@ -175,12 +182,6 @@ pub async fn process_message_ai_react_streaming(
|
|||||||
} else {
|
} else {
|
||||||
String::from("[No output from reasoning agent]")
|
String::from("[No output from reasoning agent]")
|
||||||
};
|
};
|
||||||
|
|
||||||
let (err_msg, should_log) = match &result {
|
|
||||||
Err(e) => (Some(format!("[Agent error: {}]", e)), true),
|
|
||||||
_ => (None, false),
|
|
||||||
};
|
|
||||||
|
|
||||||
let content_to_persist = if let Some(msg) = &err_msg {
|
let content_to_persist = if let Some(msg) = &err_msg {
|
||||||
format!(
|
format!(
|
||||||
"{}\n[Error during reasoning: {}]",
|
"{}\n[Error during reasoning: {}]",
|
||||||
@ -192,10 +193,6 @@ pub async fn process_message_ai_react_streaming(
|
|||||||
content_to_persist
|
content_to_persist
|
||||||
};
|
};
|
||||||
|
|
||||||
if should_log {
|
|
||||||
tracing::error!(error = %result.as_ref().unwrap_err(), "ReAct streaming failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
let persist_content = content_to_persist.trim().to_string();
|
let persist_content = content_to_persist.trim().to_string();
|
||||||
if persist_content.is_empty() {
|
if persist_content.is_empty() {
|
||||||
return;
|
return;
|
||||||
@ -254,13 +251,12 @@ pub async fn process_message_ai_react_streaming(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Record billing (non-fatal)
|
// Record billing (non-fatal)
|
||||||
// TODO: ReAct agent does not track token counts yet; billing with 0/0
|
|
||||||
if let Err(e) = super::billing::record_ai_usage(
|
if let Err(e) = super::billing::record_ai_usage(
|
||||||
&db,
|
&db,
|
||||||
project_id_inner,
|
project_id_inner,
|
||||||
model_id,
|
model_id,
|
||||||
0,
|
input_tokens,
|
||||||
0,
|
output_tokens,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user