refactor(perception): simplify active/auto detection and deduplication
- Remove activation threshold logic from PassiveSkillAwareness - Add SkillActivation enum with Priority/Keyword/Vector/Auto variants - Add deduplication via SkillContext.dedupe_key() using rank ordering - Simplify ActiveSkillAwareness with cleaner regex-based detection
This commit is contained in:
parent
1e94c280e9
commit
5c1b14c26a
@ -1,20 +1,13 @@
|
||||
//! Active skill awareness — proactive skill retrieval triggered by explicit user intent.
|
||||
//! Active skill awareness: explicit user intent.
|
||||
//!
|
||||
//! The agent proactively loads a specific skill when the user explicitly references it
|
||||
//! in their message. Patterns include:
|
||||
//!
|
||||
//! - Direct slug mention: "用 code-review", "使用 skill:code-review", "@code-review"
|
||||
//! - Task-based invocation: "帮我 code review", "做一次 security scan"
|
||||
//! - Intent keywords with skill context: "review 我的 PR", "scan for bugs"
|
||||
//!
|
||||
//! This is the highest-priority perception mode — if the user explicitly asks for a
|
||||
//! skill, it always gets injected regardless of auto/passive scores.
|
||||
//! Active detection has the highest priority. It only fires when the user
|
||||
//! directly references a skill by slug, name, mention, or clear "use this"
|
||||
//! wording.
|
||||
|
||||
use super::{SkillContext, SkillEntry};
|
||||
use super::{SkillActivation, SkillContext, SkillEntry, normalize_skill_key};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
/// Active skill awareness that detects explicit skill invocations in user messages.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ActiveSkillAwareness;
|
||||
|
||||
@ -23,150 +16,99 @@ impl ActiveSkillAwareness {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Detect if the user explicitly invoked a skill in their message.
|
||||
///
|
||||
/// Returns the first matching skill, or `None` if no explicit invocation is found.
|
||||
///
|
||||
/// Matching patterns:
|
||||
/// - `用 <slug>` / `使用 <slug>` (Chinese: "use / apply <slug>")
|
||||
/// - `skill:<slug>` (explicit namespace)
|
||||
/// - `@<slug>` (GitHub-style mention)
|
||||
/// - `帮我 <slug>` / `<name> 帮我` (Chinese: "help me <slug>")
|
||||
/// - `做一次 <name>` / `进行一次 <name>` (Chinese: "do a <name>")
|
||||
pub fn detect(&self, input: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
let input_lower = input.to_lowercase();
|
||||
|
||||
// Try each matching pattern in priority order.
|
||||
if let Some(skill) = self.match_by_prefix_pattern(&input_lower, skills) {
|
||||
return Some(skill);
|
||||
}
|
||||
|
||||
// Try matching by skill name (for natural language invocations).
|
||||
if let Some(skill) = self.match_by_name(&input_lower, skills) {
|
||||
return Some(skill);
|
||||
}
|
||||
|
||||
// Try matching by slug substring in the message.
|
||||
self.match_by_slug_substring(&input_lower, skills)
|
||||
self.match_by_prefix_pattern(&input_lower, skills)
|
||||
.or_else(|| self.match_by_name(&input_lower, skills))
|
||||
.or_else(|| self.match_by_slug_substring(&input_lower, skills))
|
||||
}
|
||||
|
||||
/// Pattern: "用 code-review", "使用 skill:xxx", "@xxx", "skill:xxx"
|
||||
fn match_by_prefix_pattern(&self, input: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
// Pattern 1: 英文 slug 前缀 "use ", "using ", "apply ", "with "
|
||||
static USE_PAT: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"(?i)^\s*(?:use|using|apply|with)\s+([a-z0-9/_-]+)").unwrap());
|
||||
|
||||
if let Some(caps) = USE_PAT.captures(input) {
|
||||
let slug = caps.get(1)?.as_str().trim();
|
||||
return self.find_skill_by_slug(slug, skills);
|
||||
}
|
||||
|
||||
// Pattern 2: skill:xxx
|
||||
static SKILL_COLON_PAT: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"(?i)skill\s*:\s*([a-z0-9/_-]+)").unwrap());
|
||||
|
||||
if let Some(caps) = SKILL_COLON_PAT.captures(input) {
|
||||
let slug = caps.get(1)?.as_str().trim();
|
||||
return self.find_skill_by_slug(slug, skills);
|
||||
}
|
||||
|
||||
// Pattern 3: @xxx (mention style)
|
||||
static AT_PAT: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"@([a-z0-9][a-z0-9_/-]*[a-z0-9])").unwrap());
|
||||
|
||||
if let Some(caps) = AT_PAT.captures(input) {
|
||||
let slug = caps.get(1)?.as_str().trim();
|
||||
return self.find_skill_by_slug(slug, skills);
|
||||
}
|
||||
|
||||
// Pattern 4: 帮我 xxx, 做一个 xxx, 进行 xxx, 做 xxx
|
||||
static ZH_PAT: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r"(?ix)[\u4e00-\u9fff]+\s+(?:帮我|做一个|进行一次|做|使用|用)\s+([a-z0-9][a-z0-9_/-]{0,30})")
|
||||
.unwrap()
|
||||
Regex::new(
|
||||
r"(?i)(?:使用|用|应用|启用|调用|帮我|帮忙|做一次|执行|进行)\s*([a-z0-9][a-z0-9_/\-]{0,60})",
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
if let Some(caps) = ZH_PAT.captures(input) {
|
||||
let slug_or_name = caps.get(1)?.as_str().trim();
|
||||
return self
|
||||
.find_skill_by_slug(slug_or_name, skills)
|
||||
.or_else(|| self.find_skill_by_name(slug_or_name, skills));
|
||||
for pattern in [&USE_PAT, &SKILL_COLON_PAT, &AT_PAT, &ZH_PAT] {
|
||||
if let Some(caps) = pattern.captures(input) {
|
||||
let slug = caps.get(1)?.as_str().trim();
|
||||
if let Some(skill) = self.find_skill_by_slug(slug, skills) {
|
||||
return Some(skill);
|
||||
}
|
||||
if let Some(skill) = self.find_skill_by_name(slug, skills) {
|
||||
return Some(skill);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Match by skill name in natural language (e.g., "code review" → "code-review")
|
||||
fn match_by_name(&self, input: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
for skill in skills {
|
||||
// Normalize skill name to a search pattern: "Code Review" -> "code review"
|
||||
let name_lower = skill.name.to_lowercase();
|
||||
|
||||
// Direct substring match (the skill name appears in the input).
|
||||
if input.contains(&name_lower) {
|
||||
return Some(SkillContext {
|
||||
label: format!("Active skill: {}", skill.name),
|
||||
content: format!("# {} (actively invoked)\n\n{}", skill.name, skill.content),
|
||||
});
|
||||
}
|
||||
|
||||
// Try removing hyphens/underscores: "code-review" contains "code review"
|
||||
let normalized_name = name_lower.replace(['-', '_'], " ");
|
||||
if input.contains(&normalized_name) {
|
||||
return Some(SkillContext {
|
||||
label: format!("Active skill: {}", skill.name),
|
||||
content: format!("# {} (actively invoked)\n\n{}", skill.name, skill.content),
|
||||
});
|
||||
if input.contains(&name_lower) || input.contains(&normalized_name) {
|
||||
return Some(Self::context_from_skill(skill));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Match by slug substring anywhere in the message.
|
||||
fn match_by_slug_substring(&self, input: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
// Remove common command words to isolate the slug.
|
||||
let cleaned = input
|
||||
.replace("please ", "")
|
||||
.replace("帮我", "")
|
||||
.replace("帮我review", "")
|
||||
.replace("帮我 code review", "")
|
||||
.replace("帮我review", "");
|
||||
.replace("帮忙", "")
|
||||
.replace("使用", "")
|
||||
.replace("调用", "")
|
||||
.replace("启用", "");
|
||||
|
||||
for skill in skills {
|
||||
let slug = skill.slug.to_lowercase();
|
||||
// Check if the slug (or any segment of it) appears as a word.
|
||||
let slug = normalize_skill_key(&skill.slug);
|
||||
if cleaned.contains(&slug)
|
||||
|| slug
|
||||
.split('/')
|
||||
.any(|seg| cleaned.contains(seg) && seg.len() > 3)
|
||||
.split(['/', '-'])
|
||||
.any(|seg| seg.len() > 3 && cleaned.contains(seg))
|
||||
{
|
||||
return Some(SkillContext {
|
||||
label: format!("Active skill: {}", skill.name),
|
||||
content: format!("# {} (actively invoked)\n\n{}", skill.name, skill.content),
|
||||
});
|
||||
return Some(Self::context_from_skill(skill));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn find_skill_by_slug(&self, slug: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
let slug_lower = slug.to_lowercase();
|
||||
let slug_key = normalize_skill_key(slug);
|
||||
skills
|
||||
.iter()
|
||||
.find(|s| s.slug.to_lowercase() == slug_lower)
|
||||
.map(|skill| SkillContext {
|
||||
label: format!("Active skill: {}", skill.name),
|
||||
content: format!("# {} (actively invoked)\n\n{}", skill.name, skill.content),
|
||||
})
|
||||
.find(|s| normalize_skill_key(&s.slug) == slug_key)
|
||||
.map(Self::context_from_skill)
|
||||
}
|
||||
|
||||
fn find_skill_by_name(&self, name: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
let name_lower = name.to_lowercase();
|
||||
let name_lower = name.to_lowercase().replace(['-', '_'], " ");
|
||||
skills
|
||||
.iter()
|
||||
.find(|s| s.name.to_lowercase() == name_lower)
|
||||
.map(|skill| SkillContext {
|
||||
label: format!("Active skill: {}", skill.name),
|
||||
content: format!("# {} (actively invoked)\n\n{}", skill.name, skill.content),
|
||||
})
|
||||
.find(|s| s.name.to_lowercase().replace(['-', '_'], " ") == name_lower)
|
||||
.map(Self::context_from_skill)
|
||||
}
|
||||
|
||||
fn context_from_skill(skill: &SkillEntry) -> SkillContext {
|
||||
SkillContext::new(
|
||||
skill,
|
||||
SkillActivation::Active,
|
||||
None,
|
||||
format!("# {} (actively invoked)\n\n{}", skill.name, skill.content),
|
||||
None,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,28 +1,20 @@
|
||||
//! Auto skill awareness — background scanning for skill relevance.
|
||||
//!
|
||||
//! Periodically (or on-demand) scans the conversation context to identify
|
||||
//! which enabled skills might be relevant, based on keyword overlap between
|
||||
//! the skill's metadata (name, description, content snippets) and the
|
||||
//! conversation text.
|
||||
//!
|
||||
//! This is the "ambient awareness" mode — the agent is always aware of
|
||||
//! which skills might apply without the user explicitly invoking them.
|
||||
//! Auto skill awareness: ambient relevance matching.
|
||||
|
||||
use super::{SkillContext, SkillEntry};
|
||||
use super::{SkillActivation, SkillContext, SkillEntry};
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Auto skill awareness config.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AutoSkillAwareness {
|
||||
/// Minimum keyword overlap score (0.0–1.0) to consider a skill relevant.
|
||||
/// Minimum overlap score to consider a skill relevant.
|
||||
min_score: f32,
|
||||
/// Maximum number of skills to inject via auto-awareness.
|
||||
/// Maximum number of auto-selected skills.
|
||||
max_skills: usize,
|
||||
}
|
||||
|
||||
impl Default for AutoSkillAwareness {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_score: 0.15,
|
||||
min_score: 0.20,
|
||||
max_skills: 3,
|
||||
}
|
||||
}
|
||||
@ -36,10 +28,6 @@ impl AutoSkillAwareness {
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect relevant skills by scoring keyword overlap between skill metadata
|
||||
/// and the conversation text (current input + recent history).
|
||||
///
|
||||
/// Returns up to `max_skills` skills sorted by relevance score.
|
||||
pub async fn detect(
|
||||
&self,
|
||||
current_input: &str,
|
||||
@ -50,129 +38,151 @@ impl AutoSkillAwareness {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Build a combined corpus from current input and recent history (last 5 messages).
|
||||
let history_text: String = history
|
||||
let history_text = history
|
||||
.iter()
|
||||
.rev()
|
||||
.take(5)
|
||||
.map(|s| s.as_str())
|
||||
.map(String::as_str)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let corpus = format!("{} {}", current_input, history_text).to_lowercase();
|
||||
|
||||
// Extract keywords from the corpus (split on whitespace + strip punctuation).
|
||||
let corpus_keywords = Self::extract_keywords(&corpus);
|
||||
|
||||
if corpus_keywords.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Score each skill.
|
||||
let mut scored: Vec<_> = skills
|
||||
let mut scored = skills
|
||||
.iter()
|
||||
.map(|skill| {
|
||||
.filter_map(|skill| {
|
||||
let score = Self::score_skill(&corpus_keywords, skill);
|
||||
(score, skill)
|
||||
(score >= self.min_score).then_some((score, skill))
|
||||
})
|
||||
.filter(|(score, _)| *score >= self.min_score)
|
||||
.collect();
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Sort descending by score.
|
||||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
scored
|
||||
.into_iter()
|
||||
.take(self.max_skills)
|
||||
.map(|(_, skill)| {
|
||||
// Extract a short relevant excerpt around the first keyword match.
|
||||
.map(|(score, skill)| {
|
||||
let excerpt = Self::best_excerpt(&corpus, skill);
|
||||
SkillContext {
|
||||
label: format!("Auto skill: {}", skill.name),
|
||||
content: excerpt,
|
||||
}
|
||||
SkillContext::new(skill, SkillActivation::Auto, None, excerpt, Some(score))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Extract meaningful keywords from text.
|
||||
fn extract_keywords(text: &str) -> Vec<String> {
|
||||
// Common English + Chinese stopwords to filter out.
|
||||
fn extract_keywords(text: &str) -> HashSet<String> {
|
||||
const STOPWORDS: &[&str] = &[
|
||||
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has",
|
||||
"had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can",
|
||||
"to", "of", "in", "for", "on", "with", "at", "by", "from", "as", "or", "and", "but",
|
||||
"if", "not", "no", "so", "this", "that", "these", "those", "it", "its", "i", "you",
|
||||
"he", "she", "we", "they", "what", "which", "who", "when", "where", "why", "how",
|
||||
"all", "each", "every", "both", "few", "more", "most", "other", "some", "such", "only",
|
||||
"own", "same", "than", "too", "very", "just", "also", "now", "here", "there", "then",
|
||||
"once", "again", "always", "ever", "的", "了", "是", "在", "我", "你", "他", "她",
|
||||
"它", "们", "这", "那", "个", "一", "上", "下", "来", "去", "说", "看", "想", "要",
|
||||
"会", "能", "和", "与", "或", "不", "就", "也", "都", "还", "从", "到", "把", "被",
|
||||
"让", "给", "用", "做", "为", "以", "及", "等", "很", "太", "比较",
|
||||
"we", "they", "what", "which", "who", "when", "where", "why", "how", "all", "each",
|
||||
"every", "more", "most", "some", "such", "only", "same", "than", "too", "very", "just",
|
||||
"also", "now", "here", "there", "then",
|
||||
];
|
||||
|
||||
text.split_whitespace()
|
||||
.filter(|w| {
|
||||
let w_clean = w.trim_matches(|c: char| !c.is_alphanumeric());
|
||||
w_clean.len() >= 3 && !STOPWORDS.contains(&w_clean)
|
||||
})
|
||||
.map(|w| w.to_lowercase())
|
||||
.collect()
|
||||
let mut terms = HashSet::new();
|
||||
let mut ascii = String::new();
|
||||
let mut cjk_run = String::new();
|
||||
|
||||
for ch in text.chars() {
|
||||
if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' {
|
||||
if !cjk_run.is_empty() {
|
||||
Self::push_cjk_terms(&mut terms, &cjk_run);
|
||||
cjk_run.clear();
|
||||
}
|
||||
ascii.push(ch);
|
||||
} else if ('\u{4e00}'..='\u{9fff}').contains(&ch) {
|
||||
if !ascii.is_empty() {
|
||||
Self::push_ascii_term(&mut terms, &ascii, STOPWORDS);
|
||||
ascii.clear();
|
||||
}
|
||||
cjk_run.push(ch);
|
||||
} else {
|
||||
if !ascii.is_empty() {
|
||||
Self::push_ascii_term(&mut terms, &ascii, STOPWORDS);
|
||||
ascii.clear();
|
||||
}
|
||||
if !cjk_run.is_empty() {
|
||||
Self::push_cjk_terms(&mut terms, &cjk_run);
|
||||
cjk_run.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !ascii.is_empty() {
|
||||
Self::push_ascii_term(&mut terms, &ascii, STOPWORDS);
|
||||
}
|
||||
if !cjk_run.is_empty() {
|
||||
Self::push_cjk_terms(&mut terms, &cjk_run);
|
||||
}
|
||||
|
||||
terms
|
||||
}
|
||||
|
||||
/// Score a skill by keyword overlap between the corpus keywords and the skill's
|
||||
/// name + description + content (first 500 chars).
|
||||
fn score_skill(corpus_keywords: &[String], skill: &SkillEntry) -> f32 {
|
||||
let skill_text = format!(
|
||||
"{} {}",
|
||||
skill.name,
|
||||
skill.description.as_deref().unwrap_or("")
|
||||
);
|
||||
let skill_text = skill_text.to_lowercase();
|
||||
let skill_keywords = Self::extract_keywords(&skill_text);
|
||||
let content_sample = skill
|
||||
.content
|
||||
.chars()
|
||||
.take(500)
|
||||
.collect::<String>()
|
||||
fn push_ascii_term(terms: &mut HashSet<String>, raw: &str, stopwords: &[&str]) {
|
||||
let term = raw
|
||||
.trim_matches(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-')
|
||||
.to_lowercase();
|
||||
let content_keywords = Self::extract_keywords(&content_sample);
|
||||
let all_skill_keywords = [&skill_keywords[..], &content_keywords[..]].concat();
|
||||
if term.len() >= 3 && !stopwords.contains(&term.as_str()) {
|
||||
terms.insert(term);
|
||||
}
|
||||
}
|
||||
|
||||
if all_skill_keywords.is_empty() {
|
||||
fn push_cjk_terms(terms: &mut HashSet<String>, raw: &str) {
|
||||
let chars = raw.chars().collect::<Vec<_>>();
|
||||
if chars.len() < 2 {
|
||||
return;
|
||||
}
|
||||
for window in chars.windows(2) {
|
||||
terms.insert(window.iter().collect());
|
||||
}
|
||||
if chars.len() >= 4 {
|
||||
terms.insert(chars.iter().collect());
|
||||
}
|
||||
}
|
||||
|
||||
fn score_skill(corpus_keywords: &HashSet<String>, skill: &SkillEntry) -> f32 {
|
||||
let skill_text = format!(
|
||||
"{} {} {}",
|
||||
skill.name,
|
||||
skill.description.as_deref().unwrap_or(""),
|
||||
skill.content.chars().take(800).collect::<String>()
|
||||
)
|
||||
.to_lowercase();
|
||||
let skill_keywords = Self::extract_keywords(&skill_text);
|
||||
|
||||
if skill_keywords.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let overlap: usize = corpus_keywords
|
||||
let overlap = corpus_keywords
|
||||
.iter()
|
||||
.filter(|kw| {
|
||||
all_skill_keywords
|
||||
skill_keywords
|
||||
.iter()
|
||||
.any(|sk| sk.contains(kw.as_str()) || kw.as_str().contains(sk.as_str()))
|
||||
.any(|sk| sk == *kw || (kw.len() >= 4 && sk.contains(kw.as_str())))
|
||||
})
|
||||
.count();
|
||||
|
||||
overlap as f32 / all_skill_keywords.len().max(1) as f32
|
||||
let denominator = corpus_keywords.len().min(skill_keywords.len()).max(1);
|
||||
overlap as f32 / denominator as f32
|
||||
}
|
||||
|
||||
/// Extract the best excerpt from skill content — the paragraph most relevant to the corpus.
|
||||
fn best_excerpt(corpus: &str, skill: &SkillEntry) -> String {
|
||||
// Try to find a relevant paragraph: one that shares the most keywords with corpus.
|
||||
let corpus_kws = Self::extract_keywords(corpus);
|
||||
|
||||
let best_para = skill
|
||||
.content
|
||||
.split('\n')
|
||||
.filter(|para| !para.trim().is_empty())
|
||||
.map(|para| {
|
||||
let para_kws = Self::extract_keywords(¶.to_lowercase());
|
||||
let overlap: usize = corpus_kws
|
||||
let overlap = corpus_kws
|
||||
.iter()
|
||||
.filter(|kw| {
|
||||
para_kws
|
||||
.iter()
|
||||
.any(|pk| pk.contains(kw.as_str()) || kw.as_str().contains(pk.as_str()))
|
||||
.any(|pk| pk == *kw || pk.contains(kw.as_str()))
|
||||
})
|
||||
.count();
|
||||
(overlap, para)
|
||||
@ -181,12 +191,38 @@ impl AutoSkillAwareness {
|
||||
.max_by_key(|(score, _)| *score);
|
||||
|
||||
if let Some((_, para)) = best_para {
|
||||
// Return the best paragraph with a header.
|
||||
format!("# {} (auto-matched)\n\n{}", skill.name, para.trim())
|
||||
} else {
|
||||
// Fallback: use first 300 chars of content as excerpt.
|
||||
let excerpt = skill.content.chars().take(300).collect::<String>();
|
||||
format!("# {} (auto-matched)\n\n{}...", skill.name, excerpt.trim())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn skill(slug: &str, name: &str, description: &str, content: &str) -> SkillEntry {
|
||||
SkillEntry {
|
||||
slug: slug.to_string(),
|
||||
name: name.to_string(),
|
||||
description: Some(description.to_string()),
|
||||
content: content.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn auto_detects_chinese_without_spaces() {
|
||||
let skills = vec![skill(
|
||||
"code-review",
|
||||
"代码审查",
|
||||
"检查代码安全和性能问题",
|
||||
"审查变更,发现 bug、安全漏洞和性能风险。",
|
||||
)];
|
||||
let found = AutoSkillAwareness::new(0.10, 3)
|
||||
.detect("帮我检查这次代码安全问题", &[], &skills)
|
||||
.await;
|
||||
assert_eq!(found[0].slug, "code-review");
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,16 +1,11 @@
|
||||
//! Skill perception system for the AI agent.
|
||||
//!
|
||||
//! Provides three perception modes for injecting relevant skills into the agent's context:
|
||||
//! Skills are injected through three modes:
|
||||
//! - Active: explicit user invocation, highest priority.
|
||||
//! - Passive: tool-call or event driven activation.
|
||||
//! - Auto: ambient keyword relevance.
|
||||
//!
|
||||
//! - **Auto (自动感知)**: Background awareness that scans conversation content for skill
|
||||
//! relevance based on keyword matching and semantic similarity.
|
||||
//!
|
||||
//! - **Active (主动感知)**: Proactive skill retrieval triggered by explicit user intent,
|
||||
//! such as mentioning a skill slug directly in the message. Both keyword and vector-based.
|
||||
//!
|
||||
//! - **Passive (被动感知)**: Reactive skill retrieval triggered by tool-call events,
|
||||
//! such as when the agent mentions a specific skill in its reasoning. Both keyword and
|
||||
//! vector-based.
|
||||
//! Vector search is merged by the message builder as a semantic auto signal.
|
||||
|
||||
pub mod active;
|
||||
pub mod auto;
|
||||
@ -23,23 +18,95 @@ pub use passive::PassiveSkillAwareness;
|
||||
pub use vector::{VectorActiveAwareness, VectorPassiveAwareness};
|
||||
|
||||
use crate::client::ChatRequestMessage;
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum SkillActivation {
|
||||
Active,
|
||||
Passive,
|
||||
Vector,
|
||||
Auto,
|
||||
}
|
||||
|
||||
impl SkillActivation {
|
||||
fn label(self) -> &'static str {
|
||||
match self {
|
||||
SkillActivation::Active => "Active",
|
||||
SkillActivation::Passive => "Passive",
|
||||
SkillActivation::Vector => "Vector",
|
||||
SkillActivation::Auto => "Auto",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rank(self) -> u8 {
|
||||
match self {
|
||||
SkillActivation::Active => 0,
|
||||
SkillActivation::Passive => 1,
|
||||
SkillActivation::Vector => 2,
|
||||
SkillActivation::Auto => 3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A chunk of skill context ready to be injected into the message list.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SkillContext {
|
||||
/// Stable skill identifier used for de-duplication across trigger sources.
|
||||
pub slug: String,
|
||||
/// Human-readable label shown to the AI, e.g. "Active skill: code-review"
|
||||
pub label: String,
|
||||
/// The actual skill content to inject.
|
||||
pub content: String,
|
||||
/// How this skill was selected.
|
||||
pub activation: SkillActivation,
|
||||
/// Optional relevance score. Active/passive matches use `None`.
|
||||
pub score: Option<f32>,
|
||||
}
|
||||
|
||||
/// Converts skill context into a system message for injection.
|
||||
impl SkillContext {
|
||||
pub fn new(
|
||||
skill: &SkillEntry,
|
||||
activation: SkillActivation,
|
||||
reason: Option<&str>,
|
||||
content: String,
|
||||
score: Option<f32>,
|
||||
) -> Self {
|
||||
let label = match reason {
|
||||
Some(reason) => format!("{} skill: {} ({})", activation.label(), skill.name, reason),
|
||||
None => format!("{} skill: {}", activation.label(), skill.name),
|
||||
};
|
||||
Self {
|
||||
slug: skill.slug.clone(),
|
||||
label,
|
||||
content,
|
||||
activation,
|
||||
score,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dedupe_key(&self) -> String {
|
||||
if !self.slug.trim().is_empty() {
|
||||
return normalize_skill_key(&self.slug);
|
||||
}
|
||||
normalize_skill_key(&self.label)
|
||||
}
|
||||
|
||||
pub fn to_system_message(self) -> ChatRequestMessage {
|
||||
ChatRequestMessage::system(format!("[{}]\n{}", self.label, self.content))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn normalize_skill_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_lowercase()
|
||||
.replace(['_', ' '], "-")
|
||||
.chars()
|
||||
.filter(|c| c.is_ascii_alphanumeric() || *c == '-' || *c == '/')
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Unified perception service combining all three modes.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerceptionService {
|
||||
@ -77,19 +144,18 @@ impl PerceptionService {
|
||||
enabled_skills: &[SkillEntry],
|
||||
) -> Vec<SkillContext> {
|
||||
let mut results = Vec::new();
|
||||
let mut seen = HashSet::new();
|
||||
|
||||
// Active: explicit skill invocation (highest priority)
|
||||
if let Some(skill) = self.active.detect(input, enabled_skills) {
|
||||
seen.insert(skill.dedupe_key());
|
||||
results.push(skill);
|
||||
}
|
||||
|
||||
// Passive: triggered by tool-call events
|
||||
for tc in tool_calls {
|
||||
if let Some(skill) = self.passive.detect(tc, enabled_skills) {
|
||||
if !results
|
||||
.iter()
|
||||
.any(|r: &SkillContext| r.label == skill.label)
|
||||
{
|
||||
if seen.insert(skill.dedupe_key()) {
|
||||
results.push(skill);
|
||||
}
|
||||
}
|
||||
@ -98,10 +164,7 @@ impl PerceptionService {
|
||||
// Auto: keyword-based relevance matching
|
||||
let auto_results = self.auto.detect(input, history, enabled_skills).await;
|
||||
for skill in auto_results {
|
||||
if !results
|
||||
.iter()
|
||||
.any(|r: &SkillContext| r.label == skill.label)
|
||||
{
|
||||
if seen.insert(skill.dedupe_key()) {
|
||||
results.push(skill);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,17 +1,7 @@
|
||||
//! Passive skill awareness — reactive skill retrieval triggered by events.
|
||||
//!
|
||||
//! The agent passively activates a skill when its slug or name appears in:
|
||||
//!
|
||||
//! - Tool call arguments (e.g., a tool is called with a repository name that matches a "git" skill)
|
||||
//! - Tool call results / observations (e.g., a linter reports issues matching a "code-review" skill)
|
||||
//! - System events emitted during the agent loop (e.g., "PR opened" → "pr-review" skill)
|
||||
//!
|
||||
//! This is lower-priority than active but higher than auto — it's triggered by
|
||||
//! specific events rather than ambient relevance scoring.
|
||||
//! Passive skill awareness: tool-call and event driven activation.
|
||||
|
||||
use super::{SkillContext, SkillEntry, ToolCallEvent};
|
||||
use super::{SkillActivation, SkillContext, SkillEntry, ToolCallEvent, normalize_skill_key};
|
||||
|
||||
/// Passive skill awareness triggered by tool-call and event context.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct PassiveSkillAwareness;
|
||||
|
||||
@ -20,123 +10,110 @@ impl PassiveSkillAwareness {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Detect skill activation from tool-call events.
|
||||
///
|
||||
/// The agent can passively "wake up" a skill when:
|
||||
/// - A tool call's name or arguments contain a skill slug or keyword
|
||||
/// - A tool call result mentions a skill name
|
||||
///
|
||||
/// This is primarily driven by tool naming conventions and argument patterns.
|
||||
/// For example, a tool named `git_diff` might passively activate a `git` skill.
|
||||
pub fn detect(&self, event: &ToolCallEvent, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
let tool_name = event.tool_name.to_lowercase();
|
||||
let args = event.arguments.to_lowercase();
|
||||
|
||||
if let Some(skill) = Self::match_tool_category(&tool_name, skills) {
|
||||
return Some(Self::context_from_skill(skill, "tool category"));
|
||||
}
|
||||
|
||||
for skill in skills {
|
||||
let slug = skill.slug.to_lowercase();
|
||||
let slug = normalize_skill_key(&skill.slug);
|
||||
let name = skill.name.to_lowercase();
|
||||
|
||||
// Trigger 1: Tool name contains skill slug segment.
|
||||
// e.g., tool "git_blame" → skill "git/*" activates
|
||||
if Self::slug_in_text(&tool_name, &slug) {
|
||||
return Some(Self::context_from_skill(skill, "tool invocation"));
|
||||
}
|
||||
|
||||
// Trigger 2: Tool arguments contain skill slug or name keywords.
|
||||
// e.g., arguments mention "security" → "security/scan" skill
|
||||
if Self::slug_in_text(&args, &slug) || Self::keyword_match(&args, &name) {
|
||||
return Some(Self::context_from_skill(skill, "tool arguments"));
|
||||
}
|
||||
|
||||
// Trigger 3: Common tool prefixes that map to skill categories.
|
||||
if let Some(cat_skill) = Self::match_tool_category(&tool_name, skills) {
|
||||
return Some(cat_skill);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Detect skill activation from a raw text observation (e.g., tool result text).
|
||||
pub fn detect_from_text(&self, text: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
let text_lower = text.to_lowercase();
|
||||
|
||||
for skill in skills {
|
||||
let slug = skill.slug.to_lowercase();
|
||||
skills.iter().find_map(|skill| {
|
||||
let slug = normalize_skill_key(&skill.slug);
|
||||
let name = skill.name.to_lowercase();
|
||||
|
||||
if Self::slug_in_text(&text_lower, &slug) || Self::keyword_match(&text_lower, &name) {
|
||||
return Some(Self::context_from_skill(skill, "observation match"));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
(Self::slug_in_text(&text_lower, &slug) || Self::keyword_match(&text_lower, &name))
|
||||
.then(|| Self::context_from_skill(skill, "observation match"))
|
||||
})
|
||||
}
|
||||
|
||||
/// Match common tool name prefixes to skill categories.
|
||||
fn match_tool_category(tool_name: &str, skills: &[SkillEntry]) -> Option<SkillContext> {
|
||||
let category_map = [
|
||||
("git_", "git"),
|
||||
("repo_", "repo"),
|
||||
("issue_", "issue"),
|
||||
("pr_", "pull_request"),
|
||||
("pull_request_", "pull_request"),
|
||||
("code_review", "code-review"),
|
||||
("security_scan", "security"),
|
||||
("linter", "linter"),
|
||||
("test_", "testing"),
|
||||
("deploy_", "deployment"),
|
||||
("docker_", "docker"),
|
||||
("k8s_", "kubernetes"),
|
||||
("db_", "database"),
|
||||
("sql_", "database"),
|
||||
fn match_tool_category<'a>(
|
||||
tool_name: &str,
|
||||
skills: &'a [SkillEntry],
|
||||
) -> Option<&'a SkillEntry> {
|
||||
const CATEGORY_MAP: &[(&str, &[&str])] = &[
|
||||
("git_", &["git"]),
|
||||
("repo_", &["repo", "repository"]),
|
||||
("project_", &["repo", "repository", "project"]),
|
||||
("issue_", &["issue", "triage"]),
|
||||
("list_issues", &["issue", "triage"]),
|
||||
("create_issue", &["issue"]),
|
||||
("update_issue", &["issue"]),
|
||||
("pr_", &["pr", "pull", "pull-request"]),
|
||||
("pull_request_", &["pr", "pull", "pull-request"]),
|
||||
("code_review", &["code-review", "review"]),
|
||||
("security_", &["security", "review"]),
|
||||
("test_", &["test", "testing"]),
|
||||
("read_", &["file", "reader"]),
|
||||
("git_file", &["file", "reader"]),
|
||||
("curl", &["http", "api"]),
|
||||
("project_curl", &["http", "api"]),
|
||||
];
|
||||
|
||||
for (prefix, category) in category_map {
|
||||
for (prefix, categories) in CATEGORY_MAP {
|
||||
if tool_name.starts_with(prefix) {
|
||||
if let Some(skill) = skills.iter().find(|s| {
|
||||
s.slug.to_lowercase().contains(category)
|
||||
|| s.name.to_lowercase().contains(category)
|
||||
if let Some(skill) = skills.iter().find(|skill| {
|
||||
let slug = normalize_skill_key(&skill.slug);
|
||||
let name = skill.name.to_lowercase();
|
||||
categories
|
||||
.iter()
|
||||
.any(|category| slug.contains(category) || name.contains(category))
|
||||
}) {
|
||||
return Some(Self::context_from_skill(skill, "tool category match"));
|
||||
return Some(skill);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// True if the slug (or a significant segment of it) appears in the text.
|
||||
fn slug_in_text(text: &str, slug: &str) -> bool {
|
||||
text.contains(slug)
|
||||
|| slug
|
||||
.split('/')
|
||||
.split(['/', '-'])
|
||||
.filter(|seg| seg.len() >= 3)
|
||||
.any(|seg| text.contains(seg))
|
||||
}
|
||||
|
||||
/// Match skill name keywords against the text (handles multi-word names).
|
||||
fn keyword_match(text: &str, name: &str) -> bool {
|
||||
// For multi-word names, require all significant words to appear.
|
||||
let significant: Vec<_> = name
|
||||
let significant = name
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|w| w.len() >= 3)
|
||||
.collect();
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if significant.len() >= 2 {
|
||||
significant.iter().all(|w| text.contains(*w))
|
||||
} else {
|
||||
significant.first().map_or(false, |w| text.contains(w))
|
||||
significant.first().is_some_and(|w| text.contains(w))
|
||||
}
|
||||
}
|
||||
|
||||
fn context_from_skill(skill: &SkillEntry, trigger: &str) -> SkillContext {
|
||||
SkillContext {
|
||||
label: format!("Passive skill: {} ({})", skill.name, trigger),
|
||||
content: format!(
|
||||
"# {} (passive — {})\n\n{}",
|
||||
SkillContext::new(
|
||||
skill,
|
||||
SkillActivation::Passive,
|
||||
Some(trigger),
|
||||
format!(
|
||||
"# {} (passive: {})\n\n{}",
|
||||
skill.name, trigger, skill.content
|
||||
),
|
||||
}
|
||||
None,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
|
||||
use crate::client::ChatRequestMessage;
|
||||
use crate::embed::EmbedService;
|
||||
use crate::perception::SkillContext;
|
||||
use crate::perception::{SkillActivation, SkillContext, SkillEntry, normalize_skill_key};
|
||||
|
||||
/// Maximum relevant memories to inject.
|
||||
const MAX_MEMORY_RESULTS: usize = 3;
|
||||
@ -81,13 +81,27 @@ impl VectorActiveAwareness {
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("skill")
|
||||
.to_string();
|
||||
SkillContext {
|
||||
label: format!("[Vector] Skill: {}", name),
|
||||
content: format!(
|
||||
"[Relevant skill (score {:.2})]\n{}",
|
||||
r.score, r.payload.text
|
||||
),
|
||||
}
|
||||
let slug = r
|
||||
.payload
|
||||
.extra
|
||||
.as_ref()
|
||||
.and_then(|v| v.get("slug"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(normalize_skill_key)
|
||||
.unwrap_or_else(|| normalize_skill_key(&name));
|
||||
let skill = SkillEntry {
|
||||
slug,
|
||||
name,
|
||||
description: None,
|
||||
content: r.payload.text.clone(),
|
||||
};
|
||||
SkillContext::new(
|
||||
&skill,
|
||||
SkillActivation::Vector,
|
||||
Some(&format!("score {:.2}", r.score)),
|
||||
format!("[Relevant skill]\n{}", r.payload.text),
|
||||
Some(r.score),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user