gitdataai/libs/agent/perception/auto.rs
2026-04-14 19:02:01 +08:00

179 lines
6.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Auto skill awareness — background scanning for skill relevance.
//!
//! Periodically (or on-demand) scans the conversation context to identify
//! which enabled skills might be relevant, based on keyword overlap between
//! the skill's metadata (name, description, content snippets) and the
//! conversation text.
//!
//! This is the "ambient awareness" mode — the agent is always aware of
//! which skills might apply without the user explicitly invoking them.
use super::{SkillContext, SkillEntry};
/// Auto skill awareness config.
#[derive(Debug, Clone)]
pub struct AutoSkillAwareness {
/// Minimum keyword overlap score (0.01.0) to consider a skill relevant.
min_score: f32,
/// Maximum number of skills to inject via auto-awareness.
max_skills: usize,
}
impl Default for AutoSkillAwareness {
fn default() -> Self {
Self {
min_score: 0.15,
max_skills: 3,
}
}
}
impl AutoSkillAwareness {
pub fn new(min_score: f32, max_skills: usize) -> Self {
Self { min_score, max_skills }
}
/// Detect relevant skills by scoring keyword overlap between skill metadata
/// and the conversation text (current input + recent history).
///
/// Returns up to `max_skills` skills sorted by relevance score.
pub async fn detect(
&self,
current_input: &str,
history: &[String],
skills: &[SkillEntry],
) -> Vec<SkillContext> {
if skills.is_empty() {
return Vec::new();
}
// Build a combined corpus from current input and recent history (last 5 messages).
let history_text: String = history
.iter()
.rev()
.take(5)
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(" ");
let corpus = format!("{} {}", current_input, history_text).to_lowercase();
// Extract keywords from the corpus (split on whitespace + strip punctuation).
let corpus_keywords = Self::extract_keywords(&corpus);
if corpus_keywords.is_empty() {
return Vec::new();
}
// Score each skill.
let mut scored: Vec<_> = skills
.iter()
.map(|skill| {
let score = Self::score_skill(&corpus_keywords, skill);
(score, skill)
})
.filter(|(score, _)| *score >= self.min_score)
.collect();
// Sort descending by score.
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
scored
.into_iter()
.take(self.max_skills)
.map(|(_, skill)| {
// Extract a short relevant excerpt around the first keyword match.
let excerpt = Self::best_excerpt(&corpus, skill);
SkillContext {
label: format!("Auto skill: {}", skill.name),
content: excerpt,
}
})
.collect()
}
/// Extract meaningful keywords from text.
fn extract_keywords(text: &str) -> Vec<String> {
// Common English + Chinese stopwords to filter out.
const STOPWORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "can", "to", "of", "in", "for", "on", "with",
"at", "by", "from", "as", "or", "and", "but", "if", "not", "no", "so",
"this", "that", "these", "those", "it", "its", "i", "you", "he", "she",
"we", "they", "what", "which", "who", "when", "where", "why", "how",
"all", "each", "every", "both", "few", "more", "most", "other", "some",
"such", "only", "own", "same", "than", "too", "very", "just", "also",
"now", "here", "there", "then", "once", "again", "always", "ever",
"", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "比较",
];
text.split_whitespace()
.filter(|w| {
let w_clean = w.trim_matches(|c: char| !c.is_alphanumeric());
w_clean.len() >= 3 && !STOPWORDS.contains(&w_clean)
})
.map(|w| w.to_lowercase())
.collect()
}
/// Score a skill by keyword overlap between the corpus keywords and the skill's
/// name + description + content (first 500 chars).
fn score_skill(corpus_keywords: &[String], skill: &SkillEntry) -> f32 {
let skill_text = format!(
"{} {}",
skill.name,
skill.description.as_deref().unwrap_or("")
);
let skill_text = skill_text.to_lowercase();
let skill_keywords = Self::extract_keywords(&skill_text);
let content_sample = skill.content.chars().take(500).collect::<String>().to_lowercase();
let content_keywords = Self::extract_keywords(&content_sample);
let all_skill_keywords = [&skill_keywords[..], &content_keywords[..]].concat();
if all_skill_keywords.is_empty() {
return 0.0;
}
let overlap: usize = corpus_keywords
.iter()
.filter(|kw| all_skill_keywords.iter().any(|sk| sk.contains(kw.as_str()) || kw.as_str().contains(sk.as_str())))
.count();
overlap as f32 / all_skill_keywords.len().max(1) as f32
}
/// Extract the best excerpt from skill content — the paragraph most relevant to the corpus.
fn best_excerpt(corpus: &str, skill: &SkillEntry) -> String {
// Try to find a relevant paragraph: one that shares the most keywords with corpus.
let corpus_kws = Self::extract_keywords(corpus);
let best_para = skill
.content
.split('\n')
.filter(|para| !para.trim().is_empty())
.map(|para| {
let para_kws = Self::extract_keywords(&para.to_lowercase());
let overlap: usize = corpus_kws
.iter()
.filter(|kw| para_kws.iter().any(|pk| pk.contains(kw.as_str()) || kw.as_str().contains(pk.as_str())))
.count();
(overlap, para)
})
.filter(|(score, _)| *score > 0)
.max_by_key(|(score, _)| *score);
if let Some((_, para)) = best_para {
// Return the best paragraph with a header.
format!("# {} (auto-matched)\n\n{}", skill.name, para.trim())
} else {
// Fallback: use first 300 chars of content as excerpt.
let excerpt = skill.content.chars().take(300).collect::<String>();
format!("# {} (auto-matched)\n\n{}...", skill.name, excerpt.trim())
}
}
}