gitdataai/lib/ai/agent/compression.rs

use crate::error::AiResult;

/// Compression strategy controlling when and how context compaction occurs.
#[derive(Clone, Debug)]
pub struct CompressionStrategy {
    /// Token threshold that triggers compaction.
    pub threshold_tokens: i64,
    /// Target token count after compaction.
    pub target_tokens: i64,
    /// Number of recent message pairs to always preserve.
    pub preserve_last_n_pairs: usize,
    /// Optional model override for the compaction LLM call.
    pub summary_model: String,
    /// Reserve this many tokens for the compaction prompt itself.
    pub reserve_tokens: i64,
    /// Whether to generate branch summaries when forking.
    pub branch_summarization: bool,
    /// Custom instructions appended to the compaction prompt.
    pub custom_instructions: Option<String>,
    /// Maximum word count for compaction summaries.
    pub max_summary_words: usize,
}

impl Default for CompressionStrategy {
    fn default() -> Self {
        Self {
            threshold_tokens: 64_000,
            target_tokens: 32_000,
            preserve_last_n_pairs: 4,
            summary_model: String::new(),
            reserve_tokens: 16_384,
            branch_summarization: true,
            custom_instructions: None,
            max_summary_words: 1500,
        }
    }
}

impl CompressionStrategy {
    pub fn new(threshold_tokens: i64, target_tokens: i64) -> Self {
        Self {
            threshold_tokens,
            target_tokens,
            ..Default::default()
        }
    }

    pub fn with_preserve_last(mut self, n: usize) -> Self {
        self.preserve_last_n_pairs = n;
        self
    }

    pub fn with_summary_model(mut self, model: impl Into<String>) -> Self {
        self.summary_model = model.into();
        self
    }

    pub fn with_reserve_tokens(mut self, tokens: i64) -> Self {
        self.reserve_tokens = tokens;
        self
    }

    pub fn with_branch_summarization(mut self, enabled: bool) -> Self {
        self.branch_summarization = enabled;
        self
    }

    pub fn with_custom_instructions(
        mut self,
        instructions: impl Into<String>,
    ) -> Self {
        self.custom_instructions = Some(instructions.into());
        self
    }

    pub fn with_max_summary_words(mut self, words: usize) -> Self {
        self.max_summary_words = words;
        self
    }

    /// Check whether compaction should be triggered based on current token count.
    pub fn should_compact(&self, current_tokens: i64) -> bool {
        current_tokens >= self.threshold_tokens
    }
}

#[derive(Debug, Clone)]
pub struct CompactionResult {
    pub summary: String,
    pub messages_compacted: usize,
    pub tokens_saved: i64,
    /// Whether this was a branch summary (vs. standard compaction).
    pub is_branch_summary: bool,
}

impl CompactionResult {
    pub fn new(
        summary: String,
        messages_compacted: usize,
        tokens_saved: i64,
    ) -> Self {
        Self {
            summary,
            messages_compacted,
            tokens_saved,
            is_branch_summary: false,
        }
    }

    pub fn branch_summary(summary: String, entries_summarized: usize) -> Self {
        Self {
            summary,
            messages_compacted: entries_summarized,
            tokens_saved: 0,
            is_branch_summary: true,
        }
    }
}

/// Build the compaction prompt for standard context compression.
pub fn build_compression_prompt(
    existing_summary: Option<&str>,
    messages_text: &str,
) -> String {
    build_compression_prompt_with_options(
        existing_summary,
        messages_text,
        None,
        1500,
    )
}

/// Build the compaction prompt with custom instructions and word limit.
pub fn build_compression_prompt_with_options(
    existing_summary: Option<&str>,
    messages_text: &str,
    custom_instructions: Option<&str>,
    max_words: usize,
) -> String {
    let custom = custom_instructions
        .map(|ci| format!("\n\nAdditional instructions: {ci}"))
        .unwrap_or_default();

    if let Some(summary) = existing_summary {
        format!(
            "## Previous Summary\n{summary}\n\n## New Messages\n{messages_text}\n\n\
             Combine the previous summary and the new messages into a concise, \
             single-paragraph summary of the conversation. Preserve facts, \
             decisions, code snippets, and anything essential for continuing \
             work. Target up to {max_words} words.{custom} \
             Output ONLY the summary text, no preamble.",
        )
    } else {
        format!(
            "## Conversation\n{messages_text}\n\n\
             Summarise the conversation above into a concise, single-paragraph \
             summary. Preserve facts, decisions, code snippets, and anything \
             essential for continuing work. Target up to {max_words} words.{custom} \
             Output ONLY the summary text, no preamble.",
        )
    }
}

/// Build a prompt for generating a branch summary.
///
/// Used when the user forks a conversation from a different point in the
/// session tree. Summarizes the divergent branch so context is preserved.
pub fn build_branch_summary_prompt(
    branch_messages: &str,
    custom_instructions: Option<&str>,
) -> String {
    let custom = custom_instructions
        .map(|ci| format!("\n\nAdditional instructions: {ci}"))
        .unwrap_or_default();

    format!(
        "## Branch Conversation\n{branch_messages}\n\n\
         Summarize the conversation branch above. This summary will be used \
         to preserve context when the user navigates away from this branch. \
         Focus on key decisions, unresolved questions, and important context.{custom} \
         Output ONLY the summary text, no preamble.",
    )
}

/// Calculate how many messages to truncate to reach the target token count.
pub fn estimate_truncation(
    message_token_counts: &[i64],
    current_total: i64,
    target: i64,
    preserve_last: usize,
) -> AiResult<(usize, i64)> {
    let n = message_token_counts.len();
    if n <= preserve_last {
        return Ok((0, 0));
    }

    let excess = (current_total - target).max(0);

    let mut truncated = 0;
    let mut saved = 0i64;
    let limit = n - preserve_last;

    for i in 0..limit {
        if saved >= excess {
            break;
        }
        saved += message_token_counts[i];
        truncated += 1;
    }

    Ok((truncated, saved.min(excess)))
}

/// Calculate compaction parameters for a given set of messages.
///
/// Returns `(messages_to_compact, tokens_saved)` where `messages_to_compact`
/// is the count of oldest messages to summarize, and `tokens_saved` is the
/// estimated token savings.
pub fn plan_compaction(
    strategy: &CompressionStrategy,
    message_token_counts: &[i64],
    current_total: i64,
) -> AiResult<(usize, i64)> {
    if !strategy.should_compact(current_total) {
        return Ok((0, 0));
    }

    estimate_truncation(
        message_token_counts,
        current_total,
        strategy.target_tokens,
        strategy.preserve_last_n_pairs * 2, // pairs → individual messages
    )
}