use ai::agent::AgentConfig; use ai::agent::RigAgent; use ai::agent::request::AgentRequest; use ai::client::AiClient; use db::sqlx; use tracing::{info, warn}; use uuid::Uuid; use crate::AppService; use crate::error::AppError; const COMPACTION_SYSTEM_PROMPT: &str = r#"You are a conversation context compaction assistant. Your task: summarize the older portion of a conversation so the agent can continue working with only the summary + recent messages. Rules: - Preserve: key decisions, file paths, technical details, user preferences, unresolved questions. - Discard: redundant tool outputs, verbose explanations that were already acted upon, pleasantries. - Write in the same language as the conversation. - Output a concise structured summary using bullet points. - Keep the summary under 800 tokens. - Do NOT answer any questions from the conversation. Only summarize."#; const COMPACTION_TRIGGER_CHARS: usize = 80_000; const RECENT_MESSAGES_TO_KEEP: usize = 10; impl AppService { pub async fn agent_maybe_compact( &self, ai_client: &AiClient, model_name: &str, conversation_id: Uuid, ) -> Result<(), AppError> { let rows: Vec<(Uuid, String, String)> = sqlx::query_as( "SELECT id, role, content \ FROM agent_message \ WHERE conversation = $1 \ AND deleted_at IS NULL \ AND status = 'completed' \ ORDER BY created_at ASC", ) .bind(conversation_id) .fetch_all(self.db.reader()) .await .map_err(|e| AppError::DatabaseError(e.to_string()))?; let total_chars: usize = rows.iter().map(|(_, _, c)| c.len()).sum(); if total_chars < COMPACTION_TRIGGER_CHARS { return Ok(()); } if rows.len() <= RECENT_MESSAGES_TO_KEEP { return Ok(()); } let split_at = rows.len().saturating_sub(RECENT_MESSAGES_TO_KEEP); let older = &rows[..split_at]; let existing_summary: Option = sqlx::query_scalar( "SELECT compacted_summary FROM agent_conversation WHERE id = $1", ) .bind(conversation_id) .fetch_optional(self.db.reader()) .await .map_err(|e| AppError::DatabaseError(e.to_string()))? .flatten(); let mut body = String::new(); if let Some(ref prev) = existing_summary { body.push_str("\n"); body.push_str(prev); body.push_str("\n\n\n"); body.push_str( "Merge the previous summary with the new messages below:\n\n", ); } for (_, role, content) in older { body.push_str(&format!("[{}]: {}\n\n", role, content)); } let summary = match self .agent_run_compaction_llm(ai_client, model_name, &body) .await { Ok(s) => s, Err(e) => { warn!( conversation_id = %conversation_id, error = %e, "compaction LLM call failed, skipping compaction" ); return Ok(()); } }; sqlx::query( "UPDATE agent_conversation \ SET compacted_summary = $1, updated_at = now() \ WHERE id = $2", ) .bind(&summary) .bind(conversation_id) .execute(self.db.writer()) .await .map_err(|e| AppError::DatabaseError(e.to_string()))?; let ids: Vec = older.iter().map(|(id, _, _)| *id).collect(); if !ids.is_empty() { let now = chrono::Utc::now(); sqlx::query( "UPDATE agent_message \ SET deleted_at = $1, updated_at = $1 \ WHERE id = ANY($2::uuid[]) AND deleted_at IS NULL", ) .bind(now) .bind(&ids) .execute(self.db.writer()) .await .map_err(|e| AppError::DatabaseError(e.to_string()))?; } info!( conversation_id = %conversation_id, compacted_count = older.len(), kept_recent = RECENT_MESSAGES_TO_KEEP, "conversation compacted successfully" ); Ok(()) } async fn agent_run_compaction_llm( &self, ai_client: &AiClient, model_name: &str, body: &str, ) -> Result { let config = AgentConfig::new(model_name) .map_err(|e| AppError::AiError(e))? .with_system_prompt(COMPACTION_SYSTEM_PROMPT) .with_temperature(Some(0.2)) .with_max_completion_tokens(Some(1024)) .with_quiet_mode(true); let agent = RigAgent::new(ai_client.clone(), config) .map_err(|e| AppError::AiError(e))?; let request = AgentRequest::new(body); let summary = agent .chat(request, Vec::new()) .await .map_err(|e| AppError::AiError(e))?; let summary = summary.trim().to_string(); if summary.is_empty() { return Err(AppError::InternalServerError( "compaction returned empty summary".to_string(), )); } Ok(summary) } }