feat(fctool): add knowledge base tools for AI
Add repo_doc_index, repo_doc_read, and repo_doc_search function call tools for AI to index, read, and search through documentation repos.
This commit is contained in:
parent
5f12b07120
commit
4ef0d5b570
384
libs/fctool/src/git_tools/kb.rs
Normal file
384
libs/fctool/src/git_tools/kb.rs
Normal file
@ -0,0 +1,384 @@
|
|||||||
|
//! Knowledge-base (documentation) repository tools for AI.
|
||||||
|
//!
|
||||||
|
//! Provides tools for AI to quickly index, read, and search
|
||||||
|
//! through documentation / knowledge-base repositories.
|
||||||
|
|
||||||
|
use super::ctx::GitToolCtx;
|
||||||
|
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
// ── Helpers ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Extract frontmatter (--- ... ---) from markdown content.
|
||||||
|
fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) {
|
||||||
|
let trimmed = raw.trim_start();
|
||||||
|
if !trimmed.starts_with("---") {
|
||||||
|
return (None, trimmed);
|
||||||
|
}
|
||||||
|
if let Some(end) = trimmed[3..].find("---") {
|
||||||
|
let fm = &trimmed[3..end + 3];
|
||||||
|
let rest = trimmed[3 + end + 3..].trim_start();
|
||||||
|
(Some(fm), rest)
|
||||||
|
} else {
|
||||||
|
(None, trimmed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract all headings (lines starting with #) from markdown body.
|
||||||
|
fn extract_headings(body: &str) -> Vec<serde_json::Value> {
|
||||||
|
body.lines()
|
||||||
|
.filter_map(|line| {
|
||||||
|
let trimmed = line.trim();
|
||||||
|
if trimmed.starts_with("# ") {
|
||||||
|
Some(serde_json::json!({ "level": 1, "text": trimmed[2..].trim() }))
|
||||||
|
} else if trimmed.starts_with("## ") {
|
||||||
|
Some(serde_json::json!({ "level": 2, "text": trimmed[3..].trim() }))
|
||||||
|
} else if trimmed.starts_with("### ") {
|
||||||
|
Some(serde_json::json!({ "level": 3, "text": trimmed[4..].trim() }))
|
||||||
|
} else if trimmed.starts_with("#### ") {
|
||||||
|
Some(serde_json::json!({ "level": 4, "text": trimmed[5..].trim() }))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve HEAD to a tree for traversal.
|
||||||
|
fn head_tree(domain: &git::GitDomain) -> Result<git2::Tree<'_>, String> {
|
||||||
|
let repo = domain.repo();
|
||||||
|
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
|
||||||
|
head.peel_to_tree().map_err(|e| format!("no tree: {e}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Tool executors ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Tool: repo_doc_index — list all markdown docs with frontmatter
|
||||||
|
async fn repo_doc_index_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||||
|
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||||
|
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||||
|
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||||
|
|
||||||
|
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||||
|
let repo = domain.repo();
|
||||||
|
let tree = head_tree(&domain)?;
|
||||||
|
|
||||||
|
let mut docs = Vec::new();
|
||||||
|
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
|
||||||
|
|
||||||
|
while let Some((current_tree, prefix)) = stack.pop() {
|
||||||
|
for entry in current_tree.iter() {
|
||||||
|
let name = match entry.name() {
|
||||||
|
Some(n) => n,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let entry_path = if prefix.is_empty() {
|
||||||
|
name.to_string()
|
||||||
|
} else {
|
||||||
|
format!("{}/{}", prefix, name)
|
||||||
|
};
|
||||||
|
match entry.kind() {
|
||||||
|
Some(git2::ObjectType::Tree) => {
|
||||||
|
if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") {
|
||||||
|
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
|
||||||
|
stack.push((subtree, entry_path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(git2::ObjectType::Blob) => {
|
||||||
|
if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") {
|
||||||
|
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
|
||||||
|
let raw = String::from_utf8_lossy(blob.content());
|
||||||
|
let (fm_raw, body) = extract_frontmatter(&raw);
|
||||||
|
let metadata: serde_json::Value = fm_raw
|
||||||
|
.and_then(|fm| serde_json::from_str(fm).ok())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let title = metadata
|
||||||
|
.get("title")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from)
|
||||||
|
.or_else(|| {
|
||||||
|
// Fall back to first # heading
|
||||||
|
body.lines()
|
||||||
|
.find(|l| l.trim().starts_with("# "))
|
||||||
|
.map(|l| l.trim()[2..].trim().to_string())
|
||||||
|
});
|
||||||
|
|
||||||
|
let description = metadata
|
||||||
|
.get("description")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from)
|
||||||
|
.or_else(|| {
|
||||||
|
// Fall back to first non-heading non-empty line
|
||||||
|
body.lines()
|
||||||
|
.find(|l| {
|
||||||
|
let t = l.trim();
|
||||||
|
!t.is_empty() && !t.starts_with('#')
|
||||||
|
})
|
||||||
|
.map(|l| l.trim().chars().take(200).collect::<String>())
|
||||||
|
});
|
||||||
|
|
||||||
|
let tags: Vec<String> = metadata
|
||||||
|
.get("tags")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let headings = extract_headings(body);
|
||||||
|
|
||||||
|
docs.push(serde_json::json!({
|
||||||
|
"path": entry_path,
|
||||||
|
"title": title,
|
||||||
|
"description": description,
|
||||||
|
"tags": tags,
|
||||||
|
"headings": headings,
|
||||||
|
"size": raw.len(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by path for consistent ordering
|
||||||
|
docs.sort_by(|a, b| {
|
||||||
|
a["path"].as_str().unwrap_or("").cmp(b["path"].as_str().unwrap_or(""))
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(serde_json::json!({
|
||||||
|
"total": docs.len(),
|
||||||
|
"docs": docs
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tool: repo_doc_read — read a specific document with structure
|
||||||
|
async fn repo_doc_read_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||||
|
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||||
|
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||||
|
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||||
|
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
||||||
|
|
||||||
|
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||||
|
let repo = domain.repo();
|
||||||
|
let tree = head_tree(&domain)?;
|
||||||
|
|
||||||
|
// Navigate to the file using git2 path lookup
|
||||||
|
let entry = tree.get_path(std::path::Path::new(path))
|
||||||
|
.map_err(|e| format!("file '{}' not found: {e}", path))?;
|
||||||
|
let blob = entry.to_object(repo).and_then(|o| o.peel_to_blob())
|
||||||
|
.map_err(|e| format!("not a blob: {e}"))?;
|
||||||
|
|
||||||
|
let raw = String::from_utf8_lossy(blob.content());
|
||||||
|
let (fm_raw, body) = extract_frontmatter(&raw);
|
||||||
|
let metadata: serde_json::Value = fm_raw
|
||||||
|
.and_then(|fm| serde_json::from_str(fm).ok())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let title = metadata
|
||||||
|
.get("title")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from)
|
||||||
|
.or_else(|| {
|
||||||
|
body.lines()
|
||||||
|
.find(|l| l.trim().starts_with("# "))
|
||||||
|
.map(|l| l.trim()[2..].trim().to_string())
|
||||||
|
});
|
||||||
|
|
||||||
|
let headings = extract_headings(body);
|
||||||
|
|
||||||
|
Ok(serde_json::json!({
|
||||||
|
"path": path,
|
||||||
|
"title": title,
|
||||||
|
"metadata": metadata,
|
||||||
|
"headings": headings,
|
||||||
|
"content": body.to_string(),
|
||||||
|
"size": raw.len(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tool: repo_doc_search — search through docs content
|
||||||
|
async fn repo_doc_search_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||||
|
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||||
|
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||||
|
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||||
|
let keyword = p.get("keyword").and_then(|v| v.as_str()).ok_or("missing keyword")?;
|
||||||
|
let context_lines = p.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
|
||||||
|
|
||||||
|
let keyword_lower = keyword.to_lowercase();
|
||||||
|
|
||||||
|
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||||
|
let repo = domain.repo();
|
||||||
|
let tree = head_tree(&domain)?;
|
||||||
|
|
||||||
|
let mut matches: Vec<serde_json::Value> = Vec::new();
|
||||||
|
let mut matched_files = 0u64;
|
||||||
|
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
|
||||||
|
|
||||||
|
while let Some((current_tree, prefix)) = stack.pop() {
|
||||||
|
for entry in current_tree.iter() {
|
||||||
|
let name = match entry.name() {
|
||||||
|
Some(n) => n,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let entry_path = if prefix.is_empty() {
|
||||||
|
name.to_string()
|
||||||
|
} else {
|
||||||
|
format!("{}/{}", prefix, name)
|
||||||
|
};
|
||||||
|
match entry.kind() {
|
||||||
|
Some(git2::ObjectType::Tree) => {
|
||||||
|
if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") {
|
||||||
|
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
|
||||||
|
stack.push((subtree, entry_path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(git2::ObjectType::Blob) => {
|
||||||
|
if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") || name.ends_with(".txt") {
|
||||||
|
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
|
||||||
|
let content = String::from_utf8_lossy(blob.content());
|
||||||
|
let lines: Vec<&str> = content.lines().collect();
|
||||||
|
let mut file_hits: Vec<serde_json::Value> = Vec::new();
|
||||||
|
let mut hit_lines = Vec::new();
|
||||||
|
|
||||||
|
for (i, line) in lines.iter().enumerate() {
|
||||||
|
if line.to_lowercase().contains(&keyword_lower) {
|
||||||
|
hit_lines.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hit_lines.is_empty() {
|
||||||
|
matched_files += 1;
|
||||||
|
// Merge overlapping context windows
|
||||||
|
let mut windows: Vec<(usize, usize)> = Vec::new();
|
||||||
|
for &line_idx in &hit_lines {
|
||||||
|
let start = line_idx.saturating_sub(context_lines);
|
||||||
|
let end = (line_idx + context_lines + 1).min(lines.len());
|
||||||
|
if let Some(last) = windows.last_mut() {
|
||||||
|
if start <= last.1 {
|
||||||
|
last.1 = end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
windows.push((start, end));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (start, end) in windows {
|
||||||
|
let snippet: Vec<String> = lines[start..end]
|
||||||
|
.iter()
|
||||||
|
.map(|l| l.to_string())
|
||||||
|
.collect();
|
||||||
|
file_hits.push(serde_json::json!({
|
||||||
|
"line_start": start + 1,
|
||||||
|
"line_end": end,
|
||||||
|
"snippet": snippet.join("\n"),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
matches.push(serde_json::json!({
|
||||||
|
"path": entry_path,
|
||||||
|
"hit_count": hit_lines.len(),
|
||||||
|
"snippets": file_hits,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(serde_json::json!({
|
||||||
|
"keyword": keyword,
|
||||||
|
"matched_files": matched_files,
|
||||||
|
"total_hits": matches.iter().map(|m| m["hit_count"].as_u64().unwrap_or(0)).sum::<u64>(),
|
||||||
|
"matches": matches,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Registration ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
macro_rules! param {
|
||||||
|
($name:expr, $type:expr, $desc:expr, $required:expr) => {
|
||||||
|
(
|
||||||
|
$name.into(),
|
||||||
|
ToolParam {
|
||||||
|
name: $name.into(),
|
||||||
|
param_type: $type.into(),
|
||||||
|
description: Some($desc.into()),
|
||||||
|
required: $required,
|
||||||
|
properties: None,
|
||||||
|
items: None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register_git_tools(registry: &mut ToolRegistry) {
|
||||||
|
// repo_doc_index
|
||||||
|
registry.register(
|
||||||
|
ToolDefinition::new("repo_doc_index")
|
||||||
|
.description("Index all documentation files in a knowledge-base repository. Lists every .md/.mdx file with its title, description, tags, and heading structure. Use this first to understand what documents are available.")
|
||||||
|
.parameters(ToolSchema {
|
||||||
|
schema_type: "object".into(),
|
||||||
|
properties: Some(HashMap::from([
|
||||||
|
param!("project_name", "string", "Project name (slug)", true),
|
||||||
|
param!("repo_name", "string", "Repository name", true),
|
||||||
|
])),
|
||||||
|
required: Some(vec!["project_name".into(), "repo_name".into()]),
|
||||||
|
}),
|
||||||
|
ToolHandler::new(|ctx, args| {
|
||||||
|
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||||
|
Box::pin(async move {
|
||||||
|
repo_doc_index_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// repo_doc_read
|
||||||
|
registry.register(
|
||||||
|
ToolDefinition::new("repo_doc_read")
|
||||||
|
.description("Read a specific document from a knowledge-base repository. Returns the full markdown content plus extracted frontmatter metadata and heading structure. Use this after repo_doc_index to read the documents you need.")
|
||||||
|
.parameters(ToolSchema {
|
||||||
|
schema_type: "object".into(),
|
||||||
|
properties: Some(HashMap::from([
|
||||||
|
param!("project_name", "string", "Project name (slug)", true),
|
||||||
|
param!("repo_name", "string", "Repository name", true),
|
||||||
|
param!("path", "string", "Document file path within the repository", true),
|
||||||
|
])),
|
||||||
|
required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]),
|
||||||
|
}),
|
||||||
|
ToolHandler::new(|ctx, args| {
|
||||||
|
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||||
|
Box::pin(async move {
|
||||||
|
repo_doc_read_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
// repo_doc_search
|
||||||
|
registry.register(
|
||||||
|
ToolDefinition::new("repo_doc_search")
|
||||||
|
.description("Search through all documentation files in a knowledge-base repository for a keyword. Returns matching file paths, hit counts, and context snippets. Use this to find which documents discuss a specific topic.")
|
||||||
|
.parameters(ToolSchema {
|
||||||
|
schema_type: "object".into(),
|
||||||
|
properties: Some(HashMap::from([
|
||||||
|
param!("project_name", "string", "Project name (slug)", true),
|
||||||
|
param!("repo_name", "string", "Repository name", true),
|
||||||
|
param!("keyword", "string", "Search keyword (case-insensitive)", true),
|
||||||
|
param!("context_lines", "integer", "Number of context lines around each match (default: 2)", false),
|
||||||
|
])),
|
||||||
|
required: Some(vec!["project_name".into(), "repo_name".into(), "keyword".into()]),
|
||||||
|
}),
|
||||||
|
ToolHandler::new(|ctx, args| {
|
||||||
|
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||||
|
Box::pin(async move {
|
||||||
|
repo_doc_search_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user