//! Auto skill awareness: ambient relevance matching. use super::{SkillActivation, SkillContext, SkillEntry}; use std::collections::HashSet; #[derive(Debug, Clone)] pub struct AutoSkillAwareness { /// Minimum overlap score to consider a skill relevant. min_score: f32, /// Maximum number of auto-selected skills. max_skills: usize, } impl Default for AutoSkillAwareness { fn default() -> Self { Self { min_score: 0.20, max_skills: 3, } } } impl AutoSkillAwareness { pub fn new(min_score: f32, max_skills: usize) -> Self { Self { min_score, max_skills, } } pub async fn detect( &self, current_input: &str, history: &[String], skills: &[SkillEntry], ) -> Vec { if skills.is_empty() { return Vec::new(); } let history_text = history .iter() .rev() .take(5) .map(String::as_str) .collect::>() .join(" "); let corpus = format!("{} {}", current_input, history_text).to_lowercase(); let corpus_keywords = Self::extract_keywords(&corpus); if corpus_keywords.is_empty() { return Vec::new(); } let mut scored = skills .iter() .filter_map(|skill| { let score = Self::score_skill(&corpus_keywords, skill); (score >= self.min_score).then_some((score, skill)) }) .collect::>(); scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); scored .into_iter() .take(self.max_skills) .map(|(score, skill)| { let excerpt = Self::best_excerpt(&corpus, skill); SkillContext::new(skill, SkillActivation::Auto, None, excerpt, Some(score)) }) .collect() } fn extract_keywords(text: &str) -> HashSet { const STOPWORDS: &[&str] = &[ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", "to", "of", "in", "for", "on", "with", "at", "by", "from", "as", "or", "and", "but", "if", "not", "no", "so", "this", "that", "these", "those", "it", "its", "i", "you", "we", "they", "what", "which", "who", "when", "where", "why", "how", "all", "each", "every", "more", "most", "some", "such", "only", "same", "than", "too", "very", "just", "also", "now", "here", "there", "then", ]; let mut terms = HashSet::new(); let mut ascii = String::new(); let mut cjk_run = String::new(); for ch in text.chars() { if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' { if !cjk_run.is_empty() { Self::push_cjk_terms(&mut terms, &cjk_run); cjk_run.clear(); } ascii.push(ch); } else if ('\u{4e00}'..='\u{9fff}').contains(&ch) { if !ascii.is_empty() { Self::push_ascii_term(&mut terms, &ascii, STOPWORDS); ascii.clear(); } cjk_run.push(ch); } else { if !ascii.is_empty() { Self::push_ascii_term(&mut terms, &ascii, STOPWORDS); ascii.clear(); } if !cjk_run.is_empty() { Self::push_cjk_terms(&mut terms, &cjk_run); cjk_run.clear(); } } } if !ascii.is_empty() { Self::push_ascii_term(&mut terms, &ascii, STOPWORDS); } if !cjk_run.is_empty() { Self::push_cjk_terms(&mut terms, &cjk_run); } terms } fn push_ascii_term(terms: &mut HashSet, raw: &str, stopwords: &[&str]) { let term = raw .trim_matches(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-') .to_lowercase(); if term.len() >= 3 && !stopwords.contains(&term.as_str()) { terms.insert(term); } } fn push_cjk_terms(terms: &mut HashSet, raw: &str) { let chars = raw.chars().collect::>(); if chars.len() < 2 { return; } for window in chars.windows(2) { terms.insert(window.iter().collect()); } if chars.len() >= 4 { terms.insert(chars.iter().collect()); } } fn score_skill(corpus_keywords: &HashSet, skill: &SkillEntry) -> f32 { let skill_text = format!( "{} {} {}", skill.name, skill.description.as_deref().unwrap_or(""), skill.content.chars().take(800).collect::() ) .to_lowercase(); let skill_keywords = Self::extract_keywords(&skill_text); if skill_keywords.is_empty() { return 0.0; } let overlap = corpus_keywords .iter() .filter(|kw| { skill_keywords .iter() .any(|sk| sk == *kw || (kw.len() >= 4 && sk.contains(kw.as_str()))) }) .count(); let denominator = corpus_keywords.len().min(skill_keywords.len()).max(1); overlap as f32 / denominator as f32 } fn best_excerpt(corpus: &str, skill: &SkillEntry) -> String { let corpus_kws = Self::extract_keywords(corpus); let best_para = skill .content .split('\n') .filter(|para| !para.trim().is_empty()) .map(|para| { let para_kws = Self::extract_keywords(¶.to_lowercase()); let overlap = corpus_kws .iter() .filter(|kw| { para_kws .iter() .any(|pk| pk == *kw || pk.contains(kw.as_str())) }) .count(); (overlap, para) }) .filter(|(score, _)| *score > 0) .max_by_key(|(score, _)| *score); if let Some((_, para)) = best_para { format!("# {} (auto-matched)\n\n{}", skill.name, para.trim()) } else { let excerpt = skill.content.chars().take(300).collect::(); format!("# {} (auto-matched)\n\n{}...", skill.name, excerpt.trim()) } } } #[cfg(test)] mod tests { use super::*; fn skill(slug: &str, name: &str, description: &str, content: &str) -> SkillEntry { SkillEntry { slug: slug.to_string(), name: name.to_string(), description: Some(description.to_string()), content: content.to_string(), } } #[tokio::test] async fn auto_detects_chinese_without_spaces() { let skills = vec![skill( "code-review", "代码审查", "检查代码安全和性能问题", "审查变更,发现 bug、安全漏洞和性能风险。", )]; let found = AutoSkillAwareness::new(0.10, 3) .detect("帮我检查这次代码安全问题", &[], &skills) .await; assert_eq!(found[0].slug, "code-review"); } }