fix(ai): cap max_output_tokens to 2048 hard limit
- Add resolve_room_max_tokens helper with hard cap at 2048 - Replace unwrap_or(4096) defaults across ai_service and process_ai
This commit is contained in:
parent
78598b4586
commit
54d6f01981
@ -11,14 +11,24 @@ use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
|
||||
|
||||
use crate::connection::RoomConnectionManager;
|
||||
use crate::error::RoomError;
|
||||
use crate::service::ai_streaming;
|
||||
use crate::service::ai_nonstreaming;
|
||||
use crate::service::ai_streaming;
|
||||
use crate::service::history;
|
||||
use crate::service::patterns::{mention_bracket_re, mention_tag_re};
|
||||
use agent::chat::{AiRequest, ChatService};
|
||||
use agent::react::ROOM_CONTEXT_PROMPT;
|
||||
use agent::tool::registry::ToolRegistry;
|
||||
|
||||
const ROOM_DEFAULT_MAX_OUTPUT_TOKENS: i32 = 1024;
|
||||
const ROOM_MAX_OUTPUT_TOKENS_HARD_CAP: i32 = 2048;
|
||||
|
||||
fn resolve_room_max_tokens(configured: Option<i64>) -> i32 {
|
||||
configured
|
||||
.and_then(|v| i32::try_from(v).ok())
|
||||
.unwrap_or(ROOM_DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.clamp(1, ROOM_MAX_OUTPUT_TOKENS_HARD_CAP)
|
||||
}
|
||||
|
||||
/// Service responsible for AI message generation orchestration.
|
||||
/// Decides which execution path to use (streaming/nonstreaming, ReAct/chat)
|
||||
/// and dispatches accordingly.
|
||||
@ -228,7 +238,7 @@ impl RoomAiService {
|
||||
})
|
||||
.unwrap_or_else(|| "guest".into());
|
||||
|
||||
let max_tokens = ai_config.max_tokens.unwrap_or(4096) as i32;
|
||||
let max_tokens = resolve_room_max_tokens(ai_config.max_tokens);
|
||||
|
||||
let mut request = AiRequest {
|
||||
db: self.db.clone(),
|
||||
@ -304,7 +314,7 @@ impl RoomAiService {
|
||||
|
||||
// Send the billing error as a visible message in the room
|
||||
let error_content = format!(
|
||||
"⚠️ Billing Error: Insufficient balance. Your project and personal account do not have enough funds to process this AI request. Please add credits to continue using AI features."
|
||||
"鈿狅笍 Billing Error: Insufficient balance. Your project and personal account do not have enough funds to process this AI request. Please add credits to continue using AI features."
|
||||
);
|
||||
let _ = super::ai_common::create_and_publish_ai_message(
|
||||
&self.db,
|
||||
|
||||
@ -12,6 +12,16 @@ use agent::react::ROOM_CONTEXT_PROMPT;
|
||||
use agent::tool::registry::ToolRegistry;
|
||||
use models::projects::project_members;
|
||||
|
||||
const ROOM_DEFAULT_MAX_OUTPUT_TOKENS: i32 = 1024;
|
||||
const ROOM_MAX_OUTPUT_TOKENS_HARD_CAP: i32 = 2048;
|
||||
|
||||
fn resolve_room_max_tokens(configured: Option<i64>) -> i32 {
|
||||
configured
|
||||
.and_then(|v| i32::try_from(v).ok())
|
||||
.unwrap_or(ROOM_DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.clamp(1, ROOM_MAX_OUTPUT_TOKENS_HARD_CAP)
|
||||
}
|
||||
|
||||
impl RoomService {
|
||||
pub async fn process_message_ai(
|
||||
&self,
|
||||
@ -127,7 +137,7 @@ impl RoomService {
|
||||
})
|
||||
.unwrap_or_else(|| "guest".into());
|
||||
|
||||
let max_tokens = ai_config.max_tokens.unwrap_or(4096) as i32;
|
||||
let max_tokens = resolve_room_max_tokens(ai_config.max_tokens);
|
||||
|
||||
let mut request = AiRequest {
|
||||
db: self.db.clone(),
|
||||
@ -164,7 +174,13 @@ impl RoomService {
|
||||
});
|
||||
request.history_cutoff_seq = cutoff_seq;
|
||||
|
||||
request.room_preamble = Some(build_room_preamble(&room, &project, &sender, &sender_role, &optimized_history));
|
||||
request.room_preamble = Some(build_room_preamble(
|
||||
&room,
|
||||
&project,
|
||||
&sender,
|
||||
&sender_role,
|
||||
&optimized_history,
|
||||
));
|
||||
|
||||
let use_streaming = ai_config.stream;
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user