fix(ai): cap max_output_tokens to 2048 hard limit

- Add resolve_room_max_tokens helper with hard cap at 2048
- Replace unwrap_or(4096) defaults across ai_service and process_ai
This commit is contained in:
ZhenYi 2026-05-17 17:32:29 +08:00
parent 78598b4586
commit 54d6f01981
2 changed files with 31 additions and 5 deletions

View File

@ -11,14 +11,24 @@ use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
use crate::connection::RoomConnectionManager;
use crate::error::RoomError;
use crate::service::ai_streaming;
use crate::service::ai_nonstreaming;
use crate::service::ai_streaming;
use crate::service::history;
use crate::service::patterns::{mention_bracket_re, mention_tag_re};
use agent::chat::{AiRequest, ChatService};
use agent::react::ROOM_CONTEXT_PROMPT;
use agent::tool::registry::ToolRegistry;
const ROOM_DEFAULT_MAX_OUTPUT_TOKENS: i32 = 1024;
const ROOM_MAX_OUTPUT_TOKENS_HARD_CAP: i32 = 2048;
fn resolve_room_max_tokens(configured: Option<i64>) -> i32 {
configured
.and_then(|v| i32::try_from(v).ok())
.unwrap_or(ROOM_DEFAULT_MAX_OUTPUT_TOKENS)
.clamp(1, ROOM_MAX_OUTPUT_TOKENS_HARD_CAP)
}
/// Service responsible for AI message generation orchestration.
/// Decides which execution path to use (streaming/nonstreaming, ReAct/chat)
/// and dispatches accordingly.
@ -228,7 +238,7 @@ impl RoomAiService {
})
.unwrap_or_else(|| "guest".into());
let max_tokens = ai_config.max_tokens.unwrap_or(4096) as i32;
let max_tokens = resolve_room_max_tokens(ai_config.max_tokens);
let mut request = AiRequest {
db: self.db.clone(),
@ -304,7 +314,7 @@ impl RoomAiService {
// Send the billing error as a visible message in the room
let error_content = format!(
"⚠️ Billing Error: Insufficient balance. Your project and personal account do not have enough funds to process this AI request. Please add credits to continue using AI features."
"鈿狅笍 Billing Error: Insufficient balance. Your project and personal account do not have enough funds to process this AI request. Please add credits to continue using AI features."
);
let _ = super::ai_common::create_and_publish_ai_message(
&self.db,

View File

@ -12,6 +12,16 @@ use agent::react::ROOM_CONTEXT_PROMPT;
use agent::tool::registry::ToolRegistry;
use models::projects::project_members;
const ROOM_DEFAULT_MAX_OUTPUT_TOKENS: i32 = 1024;
const ROOM_MAX_OUTPUT_TOKENS_HARD_CAP: i32 = 2048;
fn resolve_room_max_tokens(configured: Option<i64>) -> i32 {
configured
.and_then(|v| i32::try_from(v).ok())
.unwrap_or(ROOM_DEFAULT_MAX_OUTPUT_TOKENS)
.clamp(1, ROOM_MAX_OUTPUT_TOKENS_HARD_CAP)
}
impl RoomService {
pub async fn process_message_ai(
&self,
@ -127,7 +137,7 @@ impl RoomService {
})
.unwrap_or_else(|| "guest".into());
let max_tokens = ai_config.max_tokens.unwrap_or(4096) as i32;
let max_tokens = resolve_room_max_tokens(ai_config.max_tokens);
let mut request = AiRequest {
db: self.db.clone(),
@ -164,7 +174,13 @@ impl RoomService {
});
request.history_cutoff_seq = cutoff_seq;
request.room_preamble = Some(build_room_preamble(&room, &project, &sender, &sender_role, &optimized_history));
request.room_preamble = Some(build_room_preamble(
&room,
&project,
&sender,
&sender_role,
&optimized_history,
));
let use_streaming = ai_config.stream;