gitdataai/libs/agent/compact/summarizer.rs
ZhenYi 8d144ac139 feat(agent): add architect, debugger, implementer, tester, security sub-agent roles
Extend delegation system with 5 new specialized roles alongside
researcher/analyst/reviewer. Each role has curated tool access.
Refactor profile lookup to use profile_for_role_name and update
compact/summarizer and tool context accordingly.
2026-05-18 20:42:57 +08:00

514 lines
17 KiB
Rust

use models::rooms::room_message::Model as RoomMessageModel;
use models::users::user::{Column as UserCol, Entity as User};
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
use crate::AgentError;
use crate::client::call_with_params;
use crate::client::types::ChatRequestMessage;
use crate::compact::types::{CompactConfig, MessageSummary};
use crate::tokent::{TokenUsage, count_message_text};
const DEFAULT_MODEL_CONTEXT_LIMIT: usize = 128_000;
const MODEL_INPUT_RATIO_NUMERATOR: usize = 85;
const MODEL_INPUT_RATIO_DENOMINATOR: usize = 100;
const MIN_ROUND_SUMMARY_TOKENS: usize = 64;
#[derive(Clone, Copy)]
enum SummaryKind {
Conversation,
RoomIncrement,
}
impl super::CompactService {
pub async fn summarize_room_increment(
&self,
previous_summary: Option<&str>,
messages: &[RoomMessageModel],
max_summary_tokens: usize,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = self.get_user_name_map(&user_ids).await?;
let blocks = messages
.iter()
.map(|m| {
let sender = if let Some(user_id) = m.sender_id {
user_name_map
.get(&user_id)
.cloned()
.unwrap_or_else(|| m.sender_type.to_string())
} else {
m.sender_type.to_string()
};
format!("[{}] {}: {}", m.send_at, sender, m.content)
})
.collect::<Vec<_>>();
self.summarize_blocks_with_optional_previous(
blocks,
previous_summary,
max_summary_tokens,
SummaryKind::RoomIncrement,
)
.await
}
pub async fn summarize_messages(
&self,
messages: &[RoomMessageModel],
max_summary_tokens: usize,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = self.get_user_name_map(&user_ids).await?;
let blocks = messages
.iter()
.map(|m| {
let sender = if let Some(user_id) = m.sender_id {
user_name_map
.get(&user_id)
.cloned()
.unwrap_or_else(|| m.sender_type.to_string())
} else {
m.sender_type.to_string()
};
format!("[{}] {}: {}", m.send_at, sender, m.content)
})
.collect::<Vec<_>>();
self.summarize_blocks_with_optional_previous(
blocks,
None,
max_summary_tokens,
SummaryKind::Conversation,
)
.await
}
pub fn message_to_summary(
m: &RoomMessageModel,
user_name_map: &std::collections::HashMap<uuid::Uuid, String>,
) -> MessageSummary {
let sender_name = if let Some(user_id) = m.sender_id {
user_name_map
.get(&user_id)
.cloned()
.unwrap_or_else(|| m.sender_type.to_string())
} else {
m.sender_type.to_string()
};
MessageSummary {
id: m.id,
sender_type: m.sender_type.clone(),
sender_id: m.sender_id,
sender_name,
content: m.content.clone(),
content_type: m.content_type.clone(),
tool_call_id: None,
send_at: m.send_at,
}
}
pub async fn get_user_name_map(
&self,
user_ids: &[uuid::Uuid],
) -> Result<std::collections::HashMap<uuid::Uuid, String>, AgentError> {
use std::collections::HashMap;
let mut map = HashMap::new();
if !user_ids.is_empty() {
let users = User::find()
.filter(UserCol::Uid.is_in(user_ids.to_vec()))
.all(&self.db)
.await
.map_err(|e| AgentError::Internal(e.to_string()))?;
for user in users {
map.insert(user.uid, user.username);
}
}
Ok(map)
}
async fn summarize_blocks_with_optional_previous(
&self,
blocks: Vec<String>,
previous_summary: Option<&str>,
max_summary_tokens: usize,
kind: SummaryKind,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let final_budget = Self::final_summary_budget(max_summary_tokens);
let input_budget = self.safe_model_input_budget();
let round_budget = Self::round_summary_budget(final_budget, input_budget);
let mut total_usage = TokenUsage::default();
let mut has_usage = false;
let fitted_chunks =
self.split_blocks_to_fit(blocks, input_budget, round_budget, kind, false)?;
let mut partial_summaries = Vec::new();
for chunk in fitted_chunks {
let prompt = self.build_prompt(kind, false, &chunk, round_budget);
let (summary, usage) = self
.invoke_summary_prompt(&prompt, round_budget, Self::temperature_for(kind))
.await?;
Self::accumulate_usage(&mut total_usage, &mut has_usage, usage);
partial_summaries.push(summary);
}
if let Some(previous) = previous_summary
.map(str::trim)
.filter(|summary| !summary.is_empty())
{
partial_summaries.insert(0, previous.to_string());
}
if partial_summaries.is_empty() {
return Ok((String::new(), None));
}
if partial_summaries.len() == 1 && previous_summary.is_none() {
return Ok((
partial_summaries.remove(0),
if has_usage { Some(total_usage) } else { None },
));
}
let final_summary = self
.merge_summary_rounds(
partial_summaries,
final_budget,
round_budget,
kind,
&mut total_usage,
&mut has_usage,
)
.await?;
Ok((
final_summary,
if has_usage { Some(total_usage) } else { None },
))
}
async fn merge_summary_rounds(
&self,
mut summaries: Vec<String>,
final_budget: usize,
round_budget: usize,
kind: SummaryKind,
total_usage: &mut TokenUsage,
has_usage: &mut bool,
) -> Result<String, AgentError> {
let input_budget = self.safe_model_input_budget();
while summaries.len() > 1 {
let current_budget = if summaries.len() <= 2 {
final_budget
} else {
round_budget
};
let mut next_round = Vec::new();
let mut idx = 0usize;
while idx < summaries.len() {
if idx + 1 >= summaries.len() {
next_round.push(summaries[idx].clone());
idx += 1;
continue;
}
let pair = vec![summaries[idx].clone(), summaries[idx + 1].clone()];
let fitted_pairs =
self.split_blocks_to_fit(pair, input_budget, current_budget, kind, true)?;
for pair_text in fitted_pairs {
let prompt = self.build_prompt(kind, true, &pair_text, current_budget);
let (summary, usage) = self
.invoke_summary_prompt(&prompt, current_budget, Self::temperature_for(kind))
.await?;
Self::accumulate_usage(total_usage, has_usage, usage);
next_round.push(summary);
}
idx += 2;
}
summaries = next_round;
}
summaries
.pop()
.ok_or_else(|| AgentError::Internal("summary merge produced no output".into()))
}
async fn invoke_summary_prompt(
&self,
prompt: &str,
max_summary_tokens: usize,
temperature: f32,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let response = call_with_params(
&[ChatRequestMessage::user(prompt.to_string())],
&self.model,
&self.ai_client_config,
temperature,
max_summary_tokens as u32,
None,
None,
None,
)
.await
.map_err(|e| AgentError::OpenAi(e.to_string()))?;
let usage =
TokenUsage::from_remote(response.input_tokens as u32, response.output_tokens as u32);
Ok((response.content, usage))
}
fn split_blocks_to_fit(
&self,
blocks: Vec<String>,
input_budget: usize,
max_summary_tokens: usize,
kind: SummaryKind,
is_merge: bool,
) -> Result<Vec<String>, AgentError> {
let mut chunks = Vec::new();
self.collect_fitting_chunks(
blocks,
input_budget,
max_summary_tokens,
kind,
is_merge,
&mut chunks,
)?;
Ok(chunks)
}
fn collect_fitting_chunks(
&self,
blocks: Vec<String>,
input_budget: usize,
max_summary_tokens: usize,
kind: SummaryKind,
is_merge: bool,
chunks: &mut Vec<String>,
) -> Result<(), AgentError> {
let body = Self::join_blocks(&blocks, is_merge);
let prompt = self.build_prompt(kind, is_merge, &body, max_summary_tokens);
if self.estimate_tokens(&prompt) <= input_budget {
chunks.push(body);
return Ok(());
}
if blocks.len() > 1 {
let mid = blocks.len() / 2;
self.collect_fitting_chunks(
blocks[..mid].to_vec(),
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
self.collect_fitting_chunks(
blocks[mid..].to_vec(),
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
return Ok(());
}
let single = blocks
.into_iter()
.next()
.ok_or_else(|| AgentError::Internal("cannot split empty summary block".into()))?;
let (left, right) = Self::split_text_in_half(&single)?;
self.collect_fitting_chunks(
vec![left],
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
self.collect_fitting_chunks(
vec![right],
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
Ok(())
}
fn build_prompt(
&self,
kind: SummaryKind,
is_merge: bool,
body: &str,
max_summary_tokens: usize,
) -> String {
match (kind, is_merge) {
(SummaryKind::Conversation, false) => format!(
"Summarise the following conversation concisely, preserving all key facts, \
decisions, and any pending or in-progress work. \
The summary MUST NOT exceed {} tokens. \
Use this format:\n\n\
**Summary:** <one-paragraph overview>\n\
**Key decisions:** <bullet list or 'none'>\n\
**Open items:** <bullet list or 'none'>\n\n\
Conversation:\n\n{}",
max_summary_tokens, body
),
(SummaryKind::Conversation, true) => format!(
"Merge the following partial conversation summaries into a single concise summary. \
Deduplicate overlap, preserve chronology, and keep all concrete decisions, \
status updates, and unresolved work. The summary MUST NOT exceed {} tokens. \
Use this format:\n\n\
**Summary:** <one-paragraph overview>\n\
**Key decisions:** <bullet list or 'none'>\n\
**Open items:** <bullet list or 'none'>\n\n\
Partial summaries:\n\n{}",
max_summary_tokens, body
),
(SummaryKind::RoomIncrement, false) => format!(
"Create an incremental room summary from the new messages below. \
Deduplicate repeated messages, clean noise, keep chronological order, and preserve \
decisions, facts, assignments/owners, unresolved questions, and concrete next steps. \
The result MUST NOT exceed {} tokens.\n\n\
Format:\n\
**Summary:** <compact overview>\n\
**Decisions:** <bullets or 'none'>\n\
**Owners:** <bullets with owner -> task or 'none'>\n\
**Open items:** <bullets or 'none'>\n\n\
New messages:\n\n{}",
max_summary_tokens, body
),
(SummaryKind::RoomIncrement, true) => format!(
"Merge the following partial room summaries into one room summary. Deduplicate overlap, \
keep chronology, preserve decisions, facts, assignments/owners, unresolved questions, \
and concrete next steps. The result MUST NOT exceed {} tokens.\n\n\
Format:\n\
**Summary:** <compact overview>\n\
**Decisions:** <bullets or 'none'>\n\
**Owners:** <bullets with owner -> task or 'none'>\n\
**Open items:** <bullets or 'none'>\n\n\
Partial summaries:\n\n{}",
max_summary_tokens, body
),
}
}
fn join_blocks(blocks: &[String], is_merge: bool) -> String {
if is_merge {
blocks
.iter()
.enumerate()
.map(|(index, block)| format!("### Partial Summary {}\n{}", index + 1, block))
.collect::<Vec<_>>()
.join("\n\n")
} else {
blocks.join("\n")
}
}
fn split_text_in_half(text: &str) -> Result<(String, String), AgentError> {
if text.chars().count() < 2 {
return Err(AgentError::Internal(
"single summary block exceeds input budget and cannot be split".into(),
));
}
let midpoint = text.len() / 2;
let mut split_at = text.floor_char_boundary(midpoint);
if split_at == 0 || split_at >= text.len() {
split_at = text.ceil_char_boundary(midpoint);
}
if split_at == 0 || split_at >= text.len() {
return Err(AgentError::Internal(
"failed to split oversized summary block".into(),
));
}
Ok((text[..split_at].to_string(), text[split_at..].to_string()))
}
fn estimate_tokens(&self, text: &str) -> usize {
count_message_text(text, &self.model).unwrap_or_else(|_| (text.len() / 4).max(1))
}
fn safe_model_input_budget(&self) -> usize {
Self::safe_model_input_budget_from_limit(self.model_context_limit)
}
fn final_summary_budget(max_summary_tokens: usize) -> usize {
max_summary_tokens.clamp(
CompactConfig::MIN_SUMMARY_TOKENS,
CompactConfig::MAX_SUMMARY_TOKENS,
)
}
fn round_summary_budget(final_budget: usize, input_budget: usize) -> usize {
final_budget.min((input_budget / 8).max(MIN_ROUND_SUMMARY_TOKENS))
}
fn temperature_for(kind: SummaryKind) -> f32 {
match kind {
SummaryKind::Conversation => 0.3,
SummaryKind::RoomIncrement => 0.2,
}
}
fn safe_model_input_budget_from_limit(model_context_limit: Option<usize>) -> usize {
let context_limit = model_context_limit
.unwrap_or(DEFAULT_MODEL_CONTEXT_LIMIT)
.max(1);
context_limit
.saturating_mul(MODEL_INPUT_RATIO_NUMERATOR)
.saturating_div(MODEL_INPUT_RATIO_DENOMINATOR)
.max(1)
}
fn accumulate_usage(total: &mut TokenUsage, has_usage: &mut bool, usage: Option<TokenUsage>) {
if let Some(usage) = usage {
total.input_tokens += usage.input_tokens;
total.output_tokens += usage.output_tokens;
*has_usage = true;
}
}
}
#[cfg(test)]
mod tests {
use super::super::CompactService;
#[test]
fn room_summary_uses_eighty_five_percent_input_budget() {
assert_eq!(
CompactService::safe_model_input_budget_from_limit(Some(1000)),
850
);
}
#[test]
fn oversized_text_is_split_in_half() {
let (left, right) = CompactService::split_text_in_half("abcdefgh").unwrap();
assert_eq!(format!("{}{}", left, right), "abcdefgh");
assert!(!left.is_empty());
assert!(!right.is_empty());
}
}