gitdataai/libs/agent/compact/summarizer.rs

use models::rooms::room_message::Model as RoomMessageModel;
use models::users::user::{Column as UserCol, Entity as User};
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};

use crate::AgentError;
use crate::client::call_with_params;
use crate::client::types::ChatRequestMessage;
use crate::compact::types::{CompactConfig, MessageSummary};
use crate::tokent::{TokenUsage, count_message_text};

const DEFAULT_MODEL_CONTEXT_LIMIT: usize = 128_000;
const MODEL_INPUT_RATIO_NUMERATOR: usize = 85;
const MODEL_INPUT_RATIO_DENOMINATOR: usize = 100;
const MIN_ROUND_SUMMARY_TOKENS: usize = 64;

#[derive(Clone, Copy)]
enum SummaryKind {
    Conversation,
    RoomIncrement,
}

impl super::CompactService {
    pub async fn summarize_room_increment(
        &self,
        previous_summary: Option<&str>,
        messages: &[RoomMessageModel],
        max_summary_tokens: usize,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();

        let user_name_map = self.get_user_name_map(&user_ids).await?;
        let blocks = messages
            .iter()
            .map(|m| {
                let sender = if let Some(user_id) = m.sender_id {
                    user_name_map
                        .get(&user_id)
                        .cloned()
                        .unwrap_or_else(|| m.sender_type.to_string())
                } else {
                    m.sender_type.to_string()
                };
                format!("[{}] {}: {}", m.send_at, sender, m.content)
            })
            .collect::<Vec<_>>();

        self.summarize_blocks_with_optional_previous(
            blocks,
            previous_summary,
            max_summary_tokens,
            SummaryKind::RoomIncrement,
        )
        .await
    }

    pub async fn summarize_messages(
        &self,
        messages: &[RoomMessageModel],
        max_summary_tokens: usize,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();

        let user_name_map = self.get_user_name_map(&user_ids).await?;
        let blocks = messages
            .iter()
            .map(|m| {
                let sender = if let Some(user_id) = m.sender_id {
                    user_name_map
                        .get(&user_id)
                        .cloned()
                        .unwrap_or_else(|| m.sender_type.to_string())
                } else {
                    m.sender_type.to_string()
                };
                format!("[{}] {}: {}", m.send_at, sender, m.content)
            })
            .collect::<Vec<_>>();

        self.summarize_blocks_with_optional_previous(
            blocks,
            None,
            max_summary_tokens,
            SummaryKind::Conversation,
        )
        .await
    }

    pub fn message_to_summary(
        m: &RoomMessageModel,
        user_name_map: &std::collections::HashMap<uuid::Uuid, String>,
    ) -> MessageSummary {
        let sender_name = if let Some(user_id) = m.sender_id {
            user_name_map
                .get(&user_id)
                .cloned()
                .unwrap_or_else(|| m.sender_type.to_string())
        } else {
            m.sender_type.to_string()
        };
        MessageSummary {
            id: m.id,
            sender_type: m.sender_type.clone(),
            sender_id: m.sender_id,
            sender_name,
            content: m.content.clone(),
            content_type: m.content_type.clone(),
            tool_call_id: None,
            send_at: m.send_at,
        }
    }

    pub async fn get_user_name_map(
        &self,
        user_ids: &[uuid::Uuid],
    ) -> Result<std::collections::HashMap<uuid::Uuid, String>, AgentError> {
        use std::collections::HashMap;
        let mut map = HashMap::new();
        if !user_ids.is_empty() {
            let users = User::find()
                .filter(UserCol::Uid.is_in(user_ids.to_vec()))
                .all(&self.db)
                .await
                .map_err(|e| AgentError::Internal(e.to_string()))?;
            for user in users {
                map.insert(user.uid, user.username);
            }
        }
        Ok(map)
    }

    async fn summarize_blocks_with_optional_previous(
        &self,
        blocks: Vec<String>,
        previous_summary: Option<&str>,
        max_summary_tokens: usize,
        kind: SummaryKind,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let final_budget = Self::final_summary_budget(max_summary_tokens);
        let input_budget = self.safe_model_input_budget();
        let round_budget = Self::round_summary_budget(final_budget, input_budget);
        let mut total_usage = TokenUsage::default();
        let mut has_usage = false;

        let fitted_chunks =
            self.split_blocks_to_fit(blocks, input_budget, round_budget, kind, false)?;

        let mut partial_summaries = Vec::new();
        for chunk in fitted_chunks {
            let prompt = self.build_prompt(kind, false, &chunk, round_budget);
            let (summary, usage) = self
                .invoke_summary_prompt(&prompt, round_budget, Self::temperature_for(kind))
                .await?;
            Self::accumulate_usage(&mut total_usage, &mut has_usage, usage);
            partial_summaries.push(summary);
        }

        if let Some(previous) = previous_summary
            .map(str::trim)
            .filter(|summary| !summary.is_empty())
        {
            partial_summaries.insert(0, previous.to_string());
        }

        if partial_summaries.is_empty() {
            return Ok((String::new(), None));
        }

        if partial_summaries.len() == 1 && previous_summary.is_none() {
            return Ok((
                partial_summaries.remove(0),
                if has_usage { Some(total_usage) } else { None },
            ));
        }

        let final_summary = self
            .merge_summary_rounds(
                partial_summaries,
                final_budget,
                round_budget,
                kind,
                &mut total_usage,
                &mut has_usage,
            )
            .await?;

        Ok((
            final_summary,
            if has_usage { Some(total_usage) } else { None },
        ))
    }

    async fn merge_summary_rounds(
        &self,
        mut summaries: Vec<String>,
        final_budget: usize,
        round_budget: usize,
        kind: SummaryKind,
        total_usage: &mut TokenUsage,
        has_usage: &mut bool,
    ) -> Result<String, AgentError> {
        let input_budget = self.safe_model_input_budget();

        while summaries.len() > 1 {
            let current_budget = if summaries.len() <= 2 {
                final_budget
            } else {
                round_budget
            };
            let mut next_round = Vec::new();
            let mut idx = 0usize;

            while idx < summaries.len() {
                if idx + 1 >= summaries.len() {
                    next_round.push(summaries[idx].clone());
                    idx += 1;
                    continue;
                }

                let pair = vec![summaries[idx].clone(), summaries[idx + 1].clone()];
                let fitted_pairs =
                    self.split_blocks_to_fit(pair, input_budget, current_budget, kind, true)?;

                for pair_text in fitted_pairs {
                    let prompt = self.build_prompt(kind, true, &pair_text, current_budget);
                    let (summary, usage) = self
                        .invoke_summary_prompt(&prompt, current_budget, Self::temperature_for(kind))
                        .await?;
                    Self::accumulate_usage(total_usage, has_usage, usage);
                    next_round.push(summary);
                }
                idx += 2;
            }

            summaries = next_round;
        }

        summaries
            .pop()
            .ok_or_else(|| AgentError::Internal("summary merge produced no output".into()))
    }

    async fn invoke_summary_prompt(
        &self,
        prompt: &str,
        max_summary_tokens: usize,
        temperature: f32,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let response = call_with_params(
            &[ChatRequestMessage::user(prompt.to_string())],
            &self.model,
            &self.ai_client_config,
            temperature,
            max_summary_tokens as u32,
            None,
            None,
            None,
        )
        .await
        .map_err(|e| AgentError::OpenAi(e.to_string()))?;

        let usage =
            TokenUsage::from_remote(response.input_tokens as u32, response.output_tokens as u32);
        Ok((response.content, usage))
    }

    fn split_blocks_to_fit(
        &self,
        blocks: Vec<String>,
        input_budget: usize,
        max_summary_tokens: usize,
        kind: SummaryKind,
        is_merge: bool,
    ) -> Result<Vec<String>, AgentError> {
        let mut chunks = Vec::new();
        self.collect_fitting_chunks(
            blocks,
            input_budget,
            max_summary_tokens,
            kind,
            is_merge,
            &mut chunks,
        )?;
        Ok(chunks)
    }

    fn collect_fitting_chunks(
        &self,
        blocks: Vec<String>,
        input_budget: usize,
        max_summary_tokens: usize,
        kind: SummaryKind,
        is_merge: bool,
        chunks: &mut Vec<String>,
    ) -> Result<(), AgentError> {
        let body = Self::join_blocks(&blocks, is_merge);
        let prompt = self.build_prompt(kind, is_merge, &body, max_summary_tokens);
        if self.estimate_tokens(&prompt) <= input_budget {
            chunks.push(body);
            return Ok(());
        }

        if blocks.len() > 1 {
            let mid = blocks.len() / 2;
            self.collect_fitting_chunks(
                blocks[..mid].to_vec(),
                input_budget,
                max_summary_tokens,
                kind,
                is_merge,
                chunks,
            )?;
            self.collect_fitting_chunks(
                blocks[mid..].to_vec(),
                input_budget,
                max_summary_tokens,
                kind,
                is_merge,
                chunks,
            )?;
            return Ok(());
        }

        let single = blocks
            .into_iter()
            .next()
            .ok_or_else(|| AgentError::Internal("cannot split empty summary block".into()))?;
        let (left, right) = Self::split_text_in_half(&single)?;
        self.collect_fitting_chunks(
            vec![left],
            input_budget,
            max_summary_tokens,
            kind,
            is_merge,
            chunks,
        )?;
        self.collect_fitting_chunks(
            vec![right],
            input_budget,
            max_summary_tokens,
            kind,
            is_merge,
            chunks,
        )?;
        Ok(())
    }

    fn build_prompt(
        &self,
        kind: SummaryKind,
        is_merge: bool,
        body: &str,
        max_summary_tokens: usize,
    ) -> String {
        match (kind, is_merge) {
            (SummaryKind::Conversation, false) => format!(
                "Summarise the following conversation concisely, preserving all key facts, \
                 decisions, and any pending or in-progress work. \
                 The summary MUST NOT exceed {} tokens. \
                 Use this format:\n\n\
                 **Summary:** <one-paragraph overview>\n\
                 **Key decisions:** <bullet list or 'none'>\n\
                 **Open items:** <bullet list or 'none'>\n\n\
                 Conversation:\n\n{}",
                max_summary_tokens, body
            ),
            (SummaryKind::Conversation, true) => format!(
                "Merge the following partial conversation summaries into a single concise summary. \
                 Deduplicate overlap, preserve chronology, and keep all concrete decisions, \
                 status updates, and unresolved work. The summary MUST NOT exceed {} tokens. \
                 Use this format:\n\n\
                 **Summary:** <one-paragraph overview>\n\
                 **Key decisions:** <bullet list or 'none'>\n\
                 **Open items:** <bullet list or 'none'>\n\n\
                 Partial summaries:\n\n{}",
                max_summary_tokens, body
            ),
            (SummaryKind::RoomIncrement, false) => format!(
                "Create an incremental room summary from the new messages below. \
                 Deduplicate repeated messages, clean noise, keep chronological order, and preserve \
                 decisions, facts, assignments/owners, unresolved questions, and concrete next steps. \
                 The result MUST NOT exceed {} tokens.\n\n\
                 Format:\n\
                 **Summary:** <compact overview>\n\
                 **Decisions:** <bullets or 'none'>\n\
                 **Owners:** <bullets with owner -> task or 'none'>\n\
                 **Open items:** <bullets or 'none'>\n\n\
                 New messages:\n\n{}",
                max_summary_tokens, body
            ),
            (SummaryKind::RoomIncrement, true) => format!(
                "Merge the following partial room summaries into one room summary. Deduplicate overlap, \
                 keep chronology, preserve decisions, facts, assignments/owners, unresolved questions, \
                 and concrete next steps. The result MUST NOT exceed {} tokens.\n\n\
                 Format:\n\
                 **Summary:** <compact overview>\n\
                 **Decisions:** <bullets or 'none'>\n\
                 **Owners:** <bullets with owner -> task or 'none'>\n\
                 **Open items:** <bullets or 'none'>\n\n\
                 Partial summaries:\n\n{}",
                max_summary_tokens, body
            ),
        }
    }

    fn join_blocks(blocks: &[String], is_merge: bool) -> String {
        if is_merge {
            blocks
                .iter()
                .enumerate()
                .map(|(index, block)| format!("### Partial Summary {}\n{}", index + 1, block))
                .collect::<Vec<_>>()
                .join("\n\n")
        } else {
            blocks.join("\n")
        }
    }

    fn split_text_in_half(text: &str) -> Result<(String, String), AgentError> {
        if text.chars().count() < 2 {
            return Err(AgentError::Internal(
                "single summary block exceeds input budget and cannot be split".into(),
            ));
        }

        let midpoint = text.len() / 2;
        let mut split_at = text.floor_char_boundary(midpoint);
        if split_at == 0 || split_at >= text.len() {
            split_at = text.ceil_char_boundary(midpoint);
        }
        if split_at == 0 || split_at >= text.len() {
            return Err(AgentError::Internal(
                "failed to split oversized summary block".into(),
            ));
        }

        Ok((text[..split_at].to_string(), text[split_at..].to_string()))
    }

    fn estimate_tokens(&self, text: &str) -> usize {
        count_message_text(text, &self.model).unwrap_or_else(|_| (text.len() / 4).max(1))
    }

    fn safe_model_input_budget(&self) -> usize {
        Self::safe_model_input_budget_from_limit(self.model_context_limit)
    }

    fn final_summary_budget(max_summary_tokens: usize) -> usize {
        max_summary_tokens.clamp(
            CompactConfig::MIN_SUMMARY_TOKENS,
            CompactConfig::MAX_SUMMARY_TOKENS,
        )
    }

    fn round_summary_budget(final_budget: usize, input_budget: usize) -> usize {
        final_budget.min((input_budget / 8).max(MIN_ROUND_SUMMARY_TOKENS))
    }

    fn temperature_for(kind: SummaryKind) -> f32 {
        match kind {
            SummaryKind::Conversation => 0.3,
            SummaryKind::RoomIncrement => 0.2,
        }
    }

    fn safe_model_input_budget_from_limit(model_context_limit: Option<usize>) -> usize {
        let context_limit = model_context_limit
            .unwrap_or(DEFAULT_MODEL_CONTEXT_LIMIT)
            .max(1);
        context_limit
            .saturating_mul(MODEL_INPUT_RATIO_NUMERATOR)
            .saturating_div(MODEL_INPUT_RATIO_DENOMINATOR)
            .max(1)
    }

    fn accumulate_usage(total: &mut TokenUsage, has_usage: &mut bool, usage: Option<TokenUsage>) {
        if let Some(usage) = usage {
            total.input_tokens += usage.input_tokens;
            total.output_tokens += usage.output_tokens;
            *has_usage = true;
        }
    }
}

#[cfg(test)]
mod tests {
    use super::super::CompactService;

    #[test]
    fn room_summary_uses_eighty_five_percent_input_budget() {
        assert_eq!(
            CompactService::safe_model_input_budget_from_limit(Some(1000)),
            850
        );
    }

    #[test]
    fn oversized_text_is_split_in_half() {
        let (left, right) = CompactService::split_text_in_half("abcdefgh").unwrap();
        assert_eq!(format!("{}{}", left, right), "abcdefgh");
        assert!(!left.is_empty());
        assert!(!right.is_empty());
    }
}