fix(ai): cap max_output_tokens to 2048 hard limit
- Add resolve_room_max_tokens helper with hard cap at 2048 - Replace unwrap_or(4096) defaults across ai_service and process_ai
This commit is contained in:
parent
78598b4586
commit
54d6f01981
@ -11,14 +11,24 @@ use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
|
|||||||
|
|
||||||
use crate::connection::RoomConnectionManager;
|
use crate::connection::RoomConnectionManager;
|
||||||
use crate::error::RoomError;
|
use crate::error::RoomError;
|
||||||
use crate::service::ai_streaming;
|
|
||||||
use crate::service::ai_nonstreaming;
|
use crate::service::ai_nonstreaming;
|
||||||
|
use crate::service::ai_streaming;
|
||||||
use crate::service::history;
|
use crate::service::history;
|
||||||
use crate::service::patterns::{mention_bracket_re, mention_tag_re};
|
use crate::service::patterns::{mention_bracket_re, mention_tag_re};
|
||||||
use agent::chat::{AiRequest, ChatService};
|
use agent::chat::{AiRequest, ChatService};
|
||||||
use agent::react::ROOM_CONTEXT_PROMPT;
|
use agent::react::ROOM_CONTEXT_PROMPT;
|
||||||
use agent::tool::registry::ToolRegistry;
|
use agent::tool::registry::ToolRegistry;
|
||||||
|
|
||||||
|
const ROOM_DEFAULT_MAX_OUTPUT_TOKENS: i32 = 1024;
|
||||||
|
const ROOM_MAX_OUTPUT_TOKENS_HARD_CAP: i32 = 2048;
|
||||||
|
|
||||||
|
fn resolve_room_max_tokens(configured: Option<i64>) -> i32 {
|
||||||
|
configured
|
||||||
|
.and_then(|v| i32::try_from(v).ok())
|
||||||
|
.unwrap_or(ROOM_DEFAULT_MAX_OUTPUT_TOKENS)
|
||||||
|
.clamp(1, ROOM_MAX_OUTPUT_TOKENS_HARD_CAP)
|
||||||
|
}
|
||||||
|
|
||||||
/// Service responsible for AI message generation orchestration.
|
/// Service responsible for AI message generation orchestration.
|
||||||
/// Decides which execution path to use (streaming/nonstreaming, ReAct/chat)
|
/// Decides which execution path to use (streaming/nonstreaming, ReAct/chat)
|
||||||
/// and dispatches accordingly.
|
/// and dispatches accordingly.
|
||||||
@ -228,7 +238,7 @@ impl RoomAiService {
|
|||||||
})
|
})
|
||||||
.unwrap_or_else(|| "guest".into());
|
.unwrap_or_else(|| "guest".into());
|
||||||
|
|
||||||
let max_tokens = ai_config.max_tokens.unwrap_or(4096) as i32;
|
let max_tokens = resolve_room_max_tokens(ai_config.max_tokens);
|
||||||
|
|
||||||
let mut request = AiRequest {
|
let mut request = AiRequest {
|
||||||
db: self.db.clone(),
|
db: self.db.clone(),
|
||||||
@ -304,7 +314,7 @@ impl RoomAiService {
|
|||||||
|
|
||||||
// Send the billing error as a visible message in the room
|
// Send the billing error as a visible message in the room
|
||||||
let error_content = format!(
|
let error_content = format!(
|
||||||
"⚠️ Billing Error: Insufficient balance. Your project and personal account do not have enough funds to process this AI request. Please add credits to continue using AI features."
|
"鈿狅笍 Billing Error: Insufficient balance. Your project and personal account do not have enough funds to process this AI request. Please add credits to continue using AI features."
|
||||||
);
|
);
|
||||||
let _ = super::ai_common::create_and_publish_ai_message(
|
let _ = super::ai_common::create_and_publish_ai_message(
|
||||||
&self.db,
|
&self.db,
|
||||||
|
|||||||
@ -12,6 +12,16 @@ use agent::react::ROOM_CONTEXT_PROMPT;
|
|||||||
use agent::tool::registry::ToolRegistry;
|
use agent::tool::registry::ToolRegistry;
|
||||||
use models::projects::project_members;
|
use models::projects::project_members;
|
||||||
|
|
||||||
|
const ROOM_DEFAULT_MAX_OUTPUT_TOKENS: i32 = 1024;
|
||||||
|
const ROOM_MAX_OUTPUT_TOKENS_HARD_CAP: i32 = 2048;
|
||||||
|
|
||||||
|
fn resolve_room_max_tokens(configured: Option<i64>) -> i32 {
|
||||||
|
configured
|
||||||
|
.and_then(|v| i32::try_from(v).ok())
|
||||||
|
.unwrap_or(ROOM_DEFAULT_MAX_OUTPUT_TOKENS)
|
||||||
|
.clamp(1, ROOM_MAX_OUTPUT_TOKENS_HARD_CAP)
|
||||||
|
}
|
||||||
|
|
||||||
impl RoomService {
|
impl RoomService {
|
||||||
pub async fn process_message_ai(
|
pub async fn process_message_ai(
|
||||||
&self,
|
&self,
|
||||||
@ -127,7 +137,7 @@ impl RoomService {
|
|||||||
})
|
})
|
||||||
.unwrap_or_else(|| "guest".into());
|
.unwrap_or_else(|| "guest".into());
|
||||||
|
|
||||||
let max_tokens = ai_config.max_tokens.unwrap_or(4096) as i32;
|
let max_tokens = resolve_room_max_tokens(ai_config.max_tokens);
|
||||||
|
|
||||||
let mut request = AiRequest {
|
let mut request = AiRequest {
|
||||||
db: self.db.clone(),
|
db: self.db.clone(),
|
||||||
@ -164,7 +174,13 @@ impl RoomService {
|
|||||||
});
|
});
|
||||||
request.history_cutoff_seq = cutoff_seq;
|
request.history_cutoff_seq = cutoff_seq;
|
||||||
|
|
||||||
request.room_preamble = Some(build_room_preamble(&room, &project, &sender, &sender_role, &optimized_history));
|
request.room_preamble = Some(build_room_preamble(
|
||||||
|
&room,
|
||||||
|
&project,
|
||||||
|
&sender,
|
||||||
|
&sender_role,
|
||||||
|
&optimized_history,
|
||||||
|
));
|
||||||
|
|
||||||
let use_streaming = ai_config.stream;
|
let use_streaming = ai_config.stream;
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user