use models::rooms::room_message::Model as RoomMessageModel; use models::users::user::{Column as UserCol, Entity as User}; use sea_orm::{ColumnTrait, EntityTrait, QueryFilter}; use crate::AgentError; use crate::client::call_with_params; use crate::client::types::ChatRequestMessage; use crate::compact::types::{CompactConfig, MessageSummary}; use crate::tokent::{TokenUsage, count_message_text}; const DEFAULT_MODEL_CONTEXT_LIMIT: usize = 128_000; const MODEL_INPUT_RATIO_NUMERATOR: usize = 85; const MODEL_INPUT_RATIO_DENOMINATOR: usize = 100; const MIN_ROUND_SUMMARY_TOKENS: usize = 64; #[derive(Clone, Copy)] enum SummaryKind { Conversation, RoomIncrement, } impl super::CompactService { pub async fn summarize_room_increment( &self, previous_summary: Option<&str>, messages: &[RoomMessageModel], max_summary_tokens: usize, ) -> Result<(String, Option), AgentError> { let user_ids: Vec = messages .iter() .filter_map(|m| m.sender_id) .collect::>() .into_iter() .collect(); let user_name_map = self.get_user_name_map(&user_ids).await?; let blocks = messages .iter() .map(|m| { let sender = if let Some(user_id) = m.sender_id { user_name_map .get(&user_id) .cloned() .unwrap_or_else(|| m.sender_type.to_string()) } else { m.sender_type.to_string() }; format!("[{}] {}: {}", m.send_at, sender, m.content) }) .collect::>(); self.summarize_blocks_with_optional_previous( blocks, previous_summary, max_summary_tokens, SummaryKind::RoomIncrement, ) .await } pub async fn summarize_messages( &self, messages: &[RoomMessageModel], max_summary_tokens: usize, ) -> Result<(String, Option), AgentError> { let user_ids: Vec = messages .iter() .filter_map(|m| m.sender_id) .collect::>() .into_iter() .collect(); let user_name_map = self.get_user_name_map(&user_ids).await?; let blocks = messages .iter() .map(|m| { let sender = if let Some(user_id) = m.sender_id { user_name_map .get(&user_id) .cloned() .unwrap_or_else(|| m.sender_type.to_string()) } else { m.sender_type.to_string() }; format!("[{}] {}: {}", m.send_at, sender, m.content) }) .collect::>(); self.summarize_blocks_with_optional_previous( blocks, None, max_summary_tokens, SummaryKind::Conversation, ) .await } pub fn message_to_summary( m: &RoomMessageModel, user_name_map: &std::collections::HashMap, ) -> MessageSummary { let sender_name = if let Some(user_id) = m.sender_id { user_name_map .get(&user_id) .cloned() .unwrap_or_else(|| m.sender_type.to_string()) } else { m.sender_type.to_string() }; MessageSummary { id: m.id, sender_type: m.sender_type.clone(), sender_id: m.sender_id, sender_name, content: m.content.clone(), content_type: m.content_type.clone(), tool_call_id: None, send_at: m.send_at, } } pub async fn get_user_name_map( &self, user_ids: &[uuid::Uuid], ) -> Result, AgentError> { use std::collections::HashMap; let mut map = HashMap::new(); if !user_ids.is_empty() { let users = User::find() .filter(UserCol::Uid.is_in(user_ids.to_vec())) .all(&self.db) .await .map_err(|e| AgentError::Internal(e.to_string()))?; for user in users { map.insert(user.uid, user.username); } } Ok(map) } async fn summarize_blocks_with_optional_previous( &self, blocks: Vec, previous_summary: Option<&str>, max_summary_tokens: usize, kind: SummaryKind, ) -> Result<(String, Option), AgentError> { let final_budget = Self::final_summary_budget(max_summary_tokens); let input_budget = self.safe_model_input_budget(); let round_budget = Self::round_summary_budget(final_budget, input_budget); let mut total_usage = TokenUsage::default(); let mut has_usage = false; let fitted_chunks = self.split_blocks_to_fit(blocks, input_budget, round_budget, kind, false)?; let mut partial_summaries = Vec::new(); for chunk in fitted_chunks { let prompt = self.build_prompt(kind, false, &chunk, round_budget); let (summary, usage) = self .invoke_summary_prompt(&prompt, round_budget, Self::temperature_for(kind)) .await?; Self::accumulate_usage(&mut total_usage, &mut has_usage, usage); partial_summaries.push(summary); } if let Some(previous) = previous_summary .map(str::trim) .filter(|summary| !summary.is_empty()) { partial_summaries.insert(0, previous.to_string()); } if partial_summaries.is_empty() { return Ok((String::new(), None)); } if partial_summaries.len() == 1 && previous_summary.is_none() { return Ok(( partial_summaries.remove(0), if has_usage { Some(total_usage) } else { None }, )); } let final_summary = self .merge_summary_rounds( partial_summaries, final_budget, round_budget, kind, &mut total_usage, &mut has_usage, ) .await?; Ok(( final_summary, if has_usage { Some(total_usage) } else { None }, )) } async fn merge_summary_rounds( &self, mut summaries: Vec, final_budget: usize, round_budget: usize, kind: SummaryKind, total_usage: &mut TokenUsage, has_usage: &mut bool, ) -> Result { let input_budget = self.safe_model_input_budget(); while summaries.len() > 1 { let current_budget = if summaries.len() <= 2 { final_budget } else { round_budget }; let mut next_round = Vec::new(); let mut idx = 0usize; while idx < summaries.len() { if idx + 1 >= summaries.len() { next_round.push(summaries[idx].clone()); idx += 1; continue; } let pair = vec![summaries[idx].clone(), summaries[idx + 1].clone()]; let fitted_pairs = self.split_blocks_to_fit(pair, input_budget, current_budget, kind, true)?; for pair_text in fitted_pairs { let prompt = self.build_prompt(kind, true, &pair_text, current_budget); let (summary, usage) = self .invoke_summary_prompt(&prompt, current_budget, Self::temperature_for(kind)) .await?; Self::accumulate_usage(total_usage, has_usage, usage); next_round.push(summary); } idx += 2; } summaries = next_round; } summaries .pop() .ok_or_else(|| AgentError::Internal("summary merge produced no output".into())) } async fn invoke_summary_prompt( &self, prompt: &str, max_summary_tokens: usize, temperature: f32, ) -> Result<(String, Option), AgentError> { let response = call_with_params( &[ChatRequestMessage::user(prompt.to_string())], &self.model, &self.ai_client_config, temperature, max_summary_tokens as u32, None, None, None, ) .await .map_err(|e| AgentError::OpenAi(e.to_string()))?; let usage = TokenUsage::from_remote(response.input_tokens as u32, response.output_tokens as u32); Ok((response.content, usage)) } fn split_blocks_to_fit( &self, blocks: Vec, input_budget: usize, max_summary_tokens: usize, kind: SummaryKind, is_merge: bool, ) -> Result, AgentError> { let mut chunks = Vec::new(); self.collect_fitting_chunks( blocks, input_budget, max_summary_tokens, kind, is_merge, &mut chunks, )?; Ok(chunks) } fn collect_fitting_chunks( &self, blocks: Vec, input_budget: usize, max_summary_tokens: usize, kind: SummaryKind, is_merge: bool, chunks: &mut Vec, ) -> Result<(), AgentError> { let body = Self::join_blocks(&blocks, is_merge); let prompt = self.build_prompt(kind, is_merge, &body, max_summary_tokens); if self.estimate_tokens(&prompt) <= input_budget { chunks.push(body); return Ok(()); } if blocks.len() > 1 { let mid = blocks.len() / 2; self.collect_fitting_chunks( blocks[..mid].to_vec(), input_budget, max_summary_tokens, kind, is_merge, chunks, )?; self.collect_fitting_chunks( blocks[mid..].to_vec(), input_budget, max_summary_tokens, kind, is_merge, chunks, )?; return Ok(()); } let single = blocks .into_iter() .next() .ok_or_else(|| AgentError::Internal("cannot split empty summary block".into()))?; let (left, right) = Self::split_text_in_half(&single)?; self.collect_fitting_chunks( vec![left], input_budget, max_summary_tokens, kind, is_merge, chunks, )?; self.collect_fitting_chunks( vec![right], input_budget, max_summary_tokens, kind, is_merge, chunks, )?; Ok(()) } fn build_prompt( &self, kind: SummaryKind, is_merge: bool, body: &str, max_summary_tokens: usize, ) -> String { match (kind, is_merge) { (SummaryKind::Conversation, false) => format!( "Summarise the following conversation concisely, preserving all key facts, \ decisions, and any pending or in-progress work. \ The summary MUST NOT exceed {} tokens. \ Use this format:\n\n\ **Summary:** \n\ **Key decisions:** \n\ **Open items:** \n\n\ Conversation:\n\n{}", max_summary_tokens, body ), (SummaryKind::Conversation, true) => format!( "Merge the following partial conversation summaries into a single concise summary. \ Deduplicate overlap, preserve chronology, and keep all concrete decisions, \ status updates, and unresolved work. The summary MUST NOT exceed {} tokens. \ Use this format:\n\n\ **Summary:** \n\ **Key decisions:** \n\ **Open items:** \n\n\ Partial summaries:\n\n{}", max_summary_tokens, body ), (SummaryKind::RoomIncrement, false) => format!( "Create an incremental room summary from the new messages below. \ Deduplicate repeated messages, clean noise, keep chronological order, and preserve \ decisions, facts, assignments/owners, unresolved questions, and concrete next steps. \ The result MUST NOT exceed {} tokens.\n\n\ Format:\n\ **Summary:** \n\ **Decisions:** \n\ **Owners:** task or 'none'>\n\ **Open items:** \n\n\ New messages:\n\n{}", max_summary_tokens, body ), (SummaryKind::RoomIncrement, true) => format!( "Merge the following partial room summaries into one room summary. Deduplicate overlap, \ keep chronology, preserve decisions, facts, assignments/owners, unresolved questions, \ and concrete next steps. The result MUST NOT exceed {} tokens.\n\n\ Format:\n\ **Summary:** \n\ **Decisions:** \n\ **Owners:** task or 'none'>\n\ **Open items:** \n\n\ Partial summaries:\n\n{}", max_summary_tokens, body ), } } fn join_blocks(blocks: &[String], is_merge: bool) -> String { if is_merge { blocks .iter() .enumerate() .map(|(index, block)| format!("### Partial Summary {}\n{}", index + 1, block)) .collect::>() .join("\n\n") } else { blocks.join("\n") } } fn split_text_in_half(text: &str) -> Result<(String, String), AgentError> { if text.chars().count() < 2 { return Err(AgentError::Internal( "single summary block exceeds input budget and cannot be split".into(), )); } let midpoint = text.len() / 2; let mut split_at = text.floor_char_boundary(midpoint); if split_at == 0 || split_at >= text.len() { split_at = text.ceil_char_boundary(midpoint); } if split_at == 0 || split_at >= text.len() { return Err(AgentError::Internal( "failed to split oversized summary block".into(), )); } Ok((text[..split_at].to_string(), text[split_at..].to_string())) } fn estimate_tokens(&self, text: &str) -> usize { count_message_text(text, &self.model).unwrap_or_else(|_| (text.len() / 4).max(1)) } fn safe_model_input_budget(&self) -> usize { Self::safe_model_input_budget_from_limit(self.model_context_limit) } fn final_summary_budget(max_summary_tokens: usize) -> usize { max_summary_tokens.clamp( CompactConfig::MIN_SUMMARY_TOKENS, CompactConfig::MAX_SUMMARY_TOKENS, ) } fn round_summary_budget(final_budget: usize, input_budget: usize) -> usize { final_budget.min((input_budget / 8).max(MIN_ROUND_SUMMARY_TOKENS)) } fn temperature_for(kind: SummaryKind) -> f32 { match kind { SummaryKind::Conversation => 0.3, SummaryKind::RoomIncrement => 0.2, } } fn safe_model_input_budget_from_limit(model_context_limit: Option) -> usize { let context_limit = model_context_limit .unwrap_or(DEFAULT_MODEL_CONTEXT_LIMIT) .max(1); context_limit .saturating_mul(MODEL_INPUT_RATIO_NUMERATOR) .saturating_div(MODEL_INPUT_RATIO_DENOMINATOR) .max(1) } fn accumulate_usage(total: &mut TokenUsage, has_usage: &mut bool, usage: Option) { if let Some(usage) = usage { total.input_tokens += usage.input_tokens; total.output_tokens += usage.output_tokens; *has_usage = true; } } } #[cfg(test)] mod tests { use super::super::CompactService; #[test] fn room_summary_uses_eighty_five_percent_input_budget() { assert_eq!( CompactService::safe_model_input_budget_from_limit(Some(1000)), 850 ); } #[test] fn oversized_text_is_split_in_half() { let (left, right) = CompactService::split_text_in_half("abcdefgh").unwrap(); assert_eq!(format!("{}{}", left, right), "abcdefgh"); assert!(!left.is_empty()); assert!(!right.is_empty()); } }