179 lines
6.9 KiB
Rust
179 lines
6.9 KiB
Rust
//! Auto skill awareness — background scanning for skill relevance.
|
||
//!
|
||
//! Periodically (or on-demand) scans the conversation context to identify
|
||
//! which enabled skills might be relevant, based on keyword overlap between
|
||
//! the skill's metadata (name, description, content snippets) and the
|
||
//! conversation text.
|
||
//!
|
||
//! This is the "ambient awareness" mode — the agent is always aware of
|
||
//! which skills might apply without the user explicitly invoking them.
|
||
|
||
use super::{SkillContext, SkillEntry};
|
||
|
||
/// Auto skill awareness config.
|
||
#[derive(Debug, Clone)]
|
||
pub struct AutoSkillAwareness {
|
||
/// Minimum keyword overlap score (0.0–1.0) to consider a skill relevant.
|
||
min_score: f32,
|
||
/// Maximum number of skills to inject via auto-awareness.
|
||
max_skills: usize,
|
||
}
|
||
|
||
impl Default for AutoSkillAwareness {
|
||
fn default() -> Self {
|
||
Self {
|
||
min_score: 0.15,
|
||
max_skills: 3,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl AutoSkillAwareness {
|
||
pub fn new(min_score: f32, max_skills: usize) -> Self {
|
||
Self { min_score, max_skills }
|
||
}
|
||
|
||
/// Detect relevant skills by scoring keyword overlap between skill metadata
|
||
/// and the conversation text (current input + recent history).
|
||
///
|
||
/// Returns up to `max_skills` skills sorted by relevance score.
|
||
pub async fn detect(
|
||
&self,
|
||
current_input: &str,
|
||
history: &[String],
|
||
skills: &[SkillEntry],
|
||
) -> Vec<SkillContext> {
|
||
if skills.is_empty() {
|
||
return Vec::new();
|
||
}
|
||
|
||
// Build a combined corpus from current input and recent history (last 5 messages).
|
||
let history_text: String = history
|
||
.iter()
|
||
.rev()
|
||
.take(5)
|
||
.map(|s| s.as_str())
|
||
.collect::<Vec<_>>()
|
||
.join(" ");
|
||
|
||
let corpus = format!("{} {}", current_input, history_text).to_lowercase();
|
||
|
||
// Extract keywords from the corpus (split on whitespace + strip punctuation).
|
||
let corpus_keywords = Self::extract_keywords(&corpus);
|
||
|
||
if corpus_keywords.is_empty() {
|
||
return Vec::new();
|
||
}
|
||
|
||
// Score each skill.
|
||
let mut scored: Vec<_> = skills
|
||
.iter()
|
||
.map(|skill| {
|
||
let score = Self::score_skill(&corpus_keywords, skill);
|
||
(score, skill)
|
||
})
|
||
.filter(|(score, _)| *score >= self.min_score)
|
||
.collect();
|
||
|
||
// Sort descending by score.
|
||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||
|
||
scored
|
||
.into_iter()
|
||
.take(self.max_skills)
|
||
.map(|(_, skill)| {
|
||
// Extract a short relevant excerpt around the first keyword match.
|
||
let excerpt = Self::best_excerpt(&corpus, skill);
|
||
SkillContext {
|
||
label: format!("Auto skill: {}", skill.name),
|
||
content: excerpt,
|
||
}
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
/// Extract meaningful keywords from text.
|
||
fn extract_keywords(text: &str) -> Vec<String> {
|
||
// Common English + Chinese stopwords to filter out.
|
||
const STOPWORDS: &[&str] = &[
|
||
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
||
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
||
"should", "may", "might", "can", "to", "of", "in", "for", "on", "with",
|
||
"at", "by", "from", "as", "or", "and", "but", "if", "not", "no", "so",
|
||
"this", "that", "these", "those", "it", "its", "i", "you", "he", "she",
|
||
"we", "they", "what", "which", "who", "when", "where", "why", "how",
|
||
"all", "each", "every", "both", "few", "more", "most", "other", "some",
|
||
"such", "only", "own", "same", "than", "too", "very", "just", "also",
|
||
"now", "here", "there", "then", "once", "again", "always", "ever",
|
||
"的", "了", "是", "在", "我", "你", "他", "她", "它", "们", "这", "那",
|
||
"个", "一", "上", "下", "来", "去", "说", "看", "想", "要", "会", "能",
|
||
"和", "与", "或", "不", "就", "也", "都", "还", "从", "到", "把", "被",
|
||
"让", "给", "用", "做", "为", "以", "及", "等", "很", "太", "比较",
|
||
];
|
||
|
||
text.split_whitespace()
|
||
.filter(|w| {
|
||
let w_clean = w.trim_matches(|c: char| !c.is_alphanumeric());
|
||
w_clean.len() >= 3 && !STOPWORDS.contains(&w_clean)
|
||
})
|
||
.map(|w| w.to_lowercase())
|
||
.collect()
|
||
}
|
||
|
||
/// Score a skill by keyword overlap between the corpus keywords and the skill's
|
||
/// name + description + content (first 500 chars).
|
||
fn score_skill(corpus_keywords: &[String], skill: &SkillEntry) -> f32 {
|
||
let skill_text = format!(
|
||
"{} {}",
|
||
skill.name,
|
||
skill.description.as_deref().unwrap_or("")
|
||
);
|
||
let skill_text = skill_text.to_lowercase();
|
||
let skill_keywords = Self::extract_keywords(&skill_text);
|
||
let content_sample = skill.content.chars().take(500).collect::<String>().to_lowercase();
|
||
let content_keywords = Self::extract_keywords(&content_sample);
|
||
let all_skill_keywords = [&skill_keywords[..], &content_keywords[..]].concat();
|
||
|
||
if all_skill_keywords.is_empty() {
|
||
return 0.0;
|
||
}
|
||
|
||
let overlap: usize = corpus_keywords
|
||
.iter()
|
||
.filter(|kw| all_skill_keywords.iter().any(|sk| sk.contains(kw.as_str()) || kw.as_str().contains(sk.as_str())))
|
||
.count();
|
||
|
||
overlap as f32 / all_skill_keywords.len().max(1) as f32
|
||
}
|
||
|
||
/// Extract the best excerpt from skill content — the paragraph most relevant to the corpus.
|
||
fn best_excerpt(corpus: &str, skill: &SkillEntry) -> String {
|
||
// Try to find a relevant paragraph: one that shares the most keywords with corpus.
|
||
let corpus_kws = Self::extract_keywords(corpus);
|
||
|
||
let best_para = skill
|
||
.content
|
||
.split('\n')
|
||
.filter(|para| !para.trim().is_empty())
|
||
.map(|para| {
|
||
let para_kws = Self::extract_keywords(¶.to_lowercase());
|
||
let overlap: usize = corpus_kws
|
||
.iter()
|
||
.filter(|kw| para_kws.iter().any(|pk| pk.contains(kw.as_str()) || kw.as_str().contains(pk.as_str())))
|
||
.count();
|
||
(overlap, para)
|
||
})
|
||
.filter(|(score, _)| *score > 0)
|
||
.max_by_key(|(score, _)| *score);
|
||
|
||
if let Some((_, para)) = best_para {
|
||
// Return the best paragraph with a header.
|
||
format!("# {} (auto-matched)\n\n{}", skill.name, para.trim())
|
||
} else {
|
||
// Fallback: use first 300 chars of content as excerpt.
|
||
let excerpt = skill.content.chars().take(300).collect::<String>();
|
||
format!("# {} (auto-matched)\n\n{}...", skill.name, excerpt.trim())
|
||
}
|
||
}
|
||
}
|