235 lines
7.0 KiB
Rust
235 lines
7.0 KiB
Rust
use crate::error::AiResult;
|
|
|
|
/// Compression strategy controlling when and how context compaction occurs.
|
|
#[derive(Clone, Debug)]
|
|
pub struct CompressionStrategy {
|
|
/// Token threshold that triggers compaction.
|
|
pub threshold_tokens: i64,
|
|
/// Target token count after compaction.
|
|
pub target_tokens: i64,
|
|
/// Number of recent message pairs to always preserve.
|
|
pub preserve_last_n_pairs: usize,
|
|
/// Optional model override for the compaction LLM call.
|
|
pub summary_model: String,
|
|
/// Reserve this many tokens for the compaction prompt itself.
|
|
pub reserve_tokens: i64,
|
|
/// Whether to generate branch summaries when forking.
|
|
pub branch_summarization: bool,
|
|
/// Custom instructions appended to the compaction prompt.
|
|
pub custom_instructions: Option<String>,
|
|
/// Maximum word count for compaction summaries.
|
|
pub max_summary_words: usize,
|
|
}
|
|
|
|
impl Default for CompressionStrategy {
|
|
fn default() -> Self {
|
|
Self {
|
|
threshold_tokens: 64_000,
|
|
target_tokens: 32_000,
|
|
preserve_last_n_pairs: 4,
|
|
summary_model: String::new(),
|
|
reserve_tokens: 16_384,
|
|
branch_summarization: true,
|
|
custom_instructions: None,
|
|
max_summary_words: 1500,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl CompressionStrategy {
|
|
pub fn new(threshold_tokens: i64, target_tokens: i64) -> Self {
|
|
Self {
|
|
threshold_tokens,
|
|
target_tokens,
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
pub fn with_preserve_last(mut self, n: usize) -> Self {
|
|
self.preserve_last_n_pairs = n;
|
|
self
|
|
}
|
|
|
|
pub fn with_summary_model(mut self, model: impl Into<String>) -> Self {
|
|
self.summary_model = model.into();
|
|
self
|
|
}
|
|
|
|
pub fn with_reserve_tokens(mut self, tokens: i64) -> Self {
|
|
self.reserve_tokens = tokens;
|
|
self
|
|
}
|
|
|
|
pub fn with_branch_summarization(mut self, enabled: bool) -> Self {
|
|
self.branch_summarization = enabled;
|
|
self
|
|
}
|
|
|
|
pub fn with_custom_instructions(
|
|
mut self,
|
|
instructions: impl Into<String>,
|
|
) -> Self {
|
|
self.custom_instructions = Some(instructions.into());
|
|
self
|
|
}
|
|
|
|
pub fn with_max_summary_words(mut self, words: usize) -> Self {
|
|
self.max_summary_words = words;
|
|
self
|
|
}
|
|
|
|
/// Check whether compaction should be triggered based on current token count.
|
|
pub fn should_compact(&self, current_tokens: i64) -> bool {
|
|
current_tokens >= self.threshold_tokens
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct CompactionResult {
|
|
pub summary: String,
|
|
pub messages_compacted: usize,
|
|
pub tokens_saved: i64,
|
|
/// Whether this was a branch summary (vs. standard compaction).
|
|
pub is_branch_summary: bool,
|
|
}
|
|
|
|
impl CompactionResult {
|
|
pub fn new(
|
|
summary: String,
|
|
messages_compacted: usize,
|
|
tokens_saved: i64,
|
|
) -> Self {
|
|
Self {
|
|
summary,
|
|
messages_compacted,
|
|
tokens_saved,
|
|
is_branch_summary: false,
|
|
}
|
|
}
|
|
|
|
pub fn branch_summary(summary: String, entries_summarized: usize) -> Self {
|
|
Self {
|
|
summary,
|
|
messages_compacted: entries_summarized,
|
|
tokens_saved: 0,
|
|
is_branch_summary: true,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Build the compaction prompt for standard context compression.
|
|
pub fn build_compression_prompt(
|
|
existing_summary: Option<&str>,
|
|
messages_text: &str,
|
|
) -> String {
|
|
build_compression_prompt_with_options(
|
|
existing_summary,
|
|
messages_text,
|
|
None,
|
|
1500,
|
|
)
|
|
}
|
|
|
|
/// Build the compaction prompt with custom instructions and word limit.
|
|
pub fn build_compression_prompt_with_options(
|
|
existing_summary: Option<&str>,
|
|
messages_text: &str,
|
|
custom_instructions: Option<&str>,
|
|
max_words: usize,
|
|
) -> String {
|
|
let custom = custom_instructions
|
|
.map(|ci| format!("\n\nAdditional instructions: {ci}"))
|
|
.unwrap_or_default();
|
|
|
|
if let Some(summary) = existing_summary {
|
|
format!(
|
|
"## Previous Summary\n{summary}\n\n## New Messages\n{messages_text}\n\n\
|
|
Combine the previous summary and the new messages into a concise, \
|
|
single-paragraph summary of the conversation. Preserve facts, \
|
|
decisions, code snippets, and anything essential for continuing \
|
|
work. Target up to {max_words} words.{custom} \
|
|
Output ONLY the summary text, no preamble.",
|
|
)
|
|
} else {
|
|
format!(
|
|
"## Conversation\n{messages_text}\n\n\
|
|
Summarise the conversation above into a concise, single-paragraph \
|
|
summary. Preserve facts, decisions, code snippets, and anything \
|
|
essential for continuing work. Target up to {max_words} words.{custom} \
|
|
Output ONLY the summary text, no preamble.",
|
|
)
|
|
}
|
|
}
|
|
|
|
/// Build a prompt for generating a branch summary.
|
|
///
|
|
/// Used when the user forks a conversation from a different point in the
|
|
/// session tree. Summarizes the divergent branch so context is preserved.
|
|
pub fn build_branch_summary_prompt(
|
|
branch_messages: &str,
|
|
custom_instructions: Option<&str>,
|
|
) -> String {
|
|
let custom = custom_instructions
|
|
.map(|ci| format!("\n\nAdditional instructions: {ci}"))
|
|
.unwrap_or_default();
|
|
|
|
format!(
|
|
"## Branch Conversation\n{branch_messages}\n\n\
|
|
Summarize the conversation branch above. This summary will be used \
|
|
to preserve context when the user navigates away from this branch. \
|
|
Focus on key decisions, unresolved questions, and important context.{custom} \
|
|
Output ONLY the summary text, no preamble.",
|
|
)
|
|
}
|
|
|
|
/// Calculate how many messages to truncate to reach the target token count.
|
|
pub fn estimate_truncation(
|
|
message_token_counts: &[i64],
|
|
current_total: i64,
|
|
target: i64,
|
|
preserve_last: usize,
|
|
) -> AiResult<(usize, i64)> {
|
|
let n = message_token_counts.len();
|
|
if n <= preserve_last {
|
|
return Ok((0, 0));
|
|
}
|
|
|
|
let excess = (current_total - target).max(0);
|
|
|
|
let mut truncated = 0;
|
|
let mut saved = 0i64;
|
|
let limit = n - preserve_last;
|
|
|
|
for i in 0..limit {
|
|
if saved >= excess {
|
|
break;
|
|
}
|
|
saved += message_token_counts[i];
|
|
truncated += 1;
|
|
}
|
|
|
|
Ok((truncated, saved.min(excess)))
|
|
}
|
|
|
|
/// Calculate compaction parameters for a given set of messages.
|
|
///
|
|
/// Returns `(messages_to_compact, tokens_saved)` where `messages_to_compact`
|
|
/// is the count of oldest messages to summarize, and `tokens_saved` is the
|
|
/// estimated token savings.
|
|
pub fn plan_compaction(
|
|
strategy: &CompressionStrategy,
|
|
message_token_counts: &[i64],
|
|
current_total: i64,
|
|
) -> AiResult<(usize, i64)> {
|
|
if !strategy.should_compact(current_total) {
|
|
return Ok((0, 0));
|
|
}
|
|
|
|
estimate_truncation(
|
|
message_token_counts,
|
|
current_total,
|
|
strategy.target_tokens,
|
|
strategy.preserve_last_n_pairs * 2, // pairs → individual messages
|
|
)
|
|
}
|