gitdataai/lib/ai/agent/compression.rs
2026-05-30 01:38:40 +08:00

223 lines
6.9 KiB
Rust

use crate::error::AiResult;
/// Compression strategy controlling when and how context compaction occurs.
#[derive(Clone, Debug)]
pub struct CompressionStrategy {
/// Token threshold that triggers compaction.
pub threshold_tokens: i64,
/// Target token count after compaction.
pub target_tokens: i64,
/// Number of recent message pairs to always preserve.
pub preserve_last_n_pairs: usize,
/// Optional model override for the compaction LLM call.
pub summary_model: String,
/// Reserve this many tokens for the compaction prompt itself.
pub reserve_tokens: i64,
/// Whether to generate branch summaries when forking.
pub branch_summarization: bool,
/// Custom instructions appended to the compaction prompt.
pub custom_instructions: Option<String>,
/// Maximum word count for compaction summaries.
pub max_summary_words: usize,
}
impl Default for CompressionStrategy {
fn default() -> Self {
Self {
threshold_tokens: 64_000,
target_tokens: 32_000,
preserve_last_n_pairs: 4,
summary_model: String::new(),
reserve_tokens: 16_384,
branch_summarization: true,
custom_instructions: None,
max_summary_words: 1500,
}
}
}
impl CompressionStrategy {
pub fn new(threshold_tokens: i64, target_tokens: i64) -> Self {
Self {
threshold_tokens,
target_tokens,
..Default::default()
}
}
pub fn with_preserve_last(mut self, n: usize) -> Self {
self.preserve_last_n_pairs = n;
self
}
pub fn with_summary_model(mut self, model: impl Into<String>) -> Self {
self.summary_model = model.into();
self
}
pub fn with_reserve_tokens(mut self, tokens: i64) -> Self {
self.reserve_tokens = tokens;
self
}
pub fn with_branch_summarization(mut self, enabled: bool) -> Self {
self.branch_summarization = enabled;
self
}
pub fn with_custom_instructions(mut self, instructions: impl Into<String>) -> Self {
self.custom_instructions = Some(instructions.into());
self
}
pub fn with_max_summary_words(mut self, words: usize) -> Self {
self.max_summary_words = words;
self
}
/// Check whether compaction should be triggered based on current token count.
pub fn should_compact(&self, current_tokens: i64) -> bool {
current_tokens >= self.threshold_tokens
}
}
#[derive(Debug, Clone)]
pub struct CompactionResult {
pub summary: String,
pub messages_compacted: usize,
pub tokens_saved: i64,
/// Whether this was a branch summary (vs. standard compaction).
pub is_branch_summary: bool,
}
impl CompactionResult {
pub fn new(summary: String, messages_compacted: usize, tokens_saved: i64) -> Self {
Self {
summary,
messages_compacted,
tokens_saved,
is_branch_summary: false,
}
}
pub fn branch_summary(summary: String, entries_summarized: usize) -> Self {
Self {
summary,
messages_compacted: entries_summarized,
tokens_saved: 0,
is_branch_summary: true,
}
}
}
/// Build the compaction prompt for standard context compression.
pub fn build_compression_prompt(
existing_summary: Option<&str>,
messages_text: &str,
) -> String {
build_compression_prompt_with_options(existing_summary, messages_text, None, 1500)
}
/// Build the compaction prompt with custom instructions and word limit.
pub fn build_compression_prompt_with_options(
existing_summary: Option<&str>,
messages_text: &str,
custom_instructions: Option<&str>,
max_words: usize,
) -> String {
let custom = custom_instructions
.map(|ci| format!("\n\nAdditional instructions: {ci}"))
.unwrap_or_default();
if let Some(summary) = existing_summary {
format!(
"## Previous Summary\n{summary}\n\n## New Messages\n{messages_text}\n\n\
Combine the previous summary and the new messages into a concise, \
single-paragraph summary of the conversation. Preserve facts, \
decisions, code snippets, and anything essential for continuing \
work. Target up to {max_words} words.{custom} \
Output ONLY the summary text, no preamble.",
)
} else {
format!(
"## Conversation\n{messages_text}\n\n\
Summarise the conversation above into a concise, single-paragraph \
summary. Preserve facts, decisions, code snippets, and anything \
essential for continuing work. Target up to {max_words} words.{custom} \
Output ONLY the summary text, no preamble.",
)
}
}
/// Build a prompt for generating a branch summary.
///
/// Used when the user forks a conversation from a different point in the
/// session tree. Summarizes the divergent branch so context is preserved.
pub fn build_branch_summary_prompt(
branch_messages: &str,
custom_instructions: Option<&str>,
) -> String {
let custom = custom_instructions
.map(|ci| format!("\n\nAdditional instructions: {ci}"))
.unwrap_or_default();
format!(
"## Branch Conversation\n{branch_messages}\n\n\
Summarize the conversation branch above. This summary will be used \
to preserve context when the user navigates away from this branch. \
Focus on key decisions, unresolved questions, and important context.{custom} \
Output ONLY the summary text, no preamble.",
)
}
/// Calculate how many messages to truncate to reach the target token count.
pub fn estimate_truncation(
message_token_counts: &[i64],
current_total: i64,
target: i64,
preserve_last: usize,
) -> AiResult<(usize, i64)> {
let n = message_token_counts.len();
if n <= preserve_last {
return Ok((0, 0));
}
let excess = (current_total - target).max(0);
let mut truncated = 0;
let mut saved = 0i64;
let limit = n - preserve_last;
for i in 0..limit {
if saved >= excess {
break;
}
saved += message_token_counts[i];
truncated += 1;
}
Ok((truncated, saved.min(excess)))
}
/// Calculate compaction parameters for a given set of messages.
///
/// Returns `(messages_to_compact, tokens_saved)` where `messages_to_compact`
/// is the count of oldest messages to summarize, and `tokens_saved` is the
/// estimated token savings.
pub fn plan_compaction(
strategy: &CompressionStrategy,
message_token_counts: &[i64],
current_total: i64,
) -> AiResult<(usize, i64)> {
if !strategy.should_compact(current_total) {
return Ok((0, 0));
}
estimate_truncation(
message_token_counts,
current_total,
strategy.target_tokens,
strategy.preserve_last_n_pairs * 2, // pairs → individual messages
)
}