feat(fctool): add knowledge base tools for AI
Add repo_doc_index, repo_doc_read, and repo_doc_search function call tools for AI to index, read, and search through documentation repos.
This commit is contained in:
parent
5f12b07120
commit
4ef0d5b570
384
libs/fctool/src/git_tools/kb.rs
Normal file
384
libs/fctool/src/git_tools/kb.rs
Normal file
@ -0,0 +1,384 @@
|
||||
//! Knowledge-base (documentation) repository tools for AI.
|
||||
//!
|
||||
//! Provides tools for AI to quickly index, read, and search
|
||||
//! through documentation / knowledge-base repositories.
|
||||
|
||||
use super::ctx::GitToolCtx;
|
||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Extract frontmatter (--- ... ---) from markdown content.
|
||||
fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) {
|
||||
let trimmed = raw.trim_start();
|
||||
if !trimmed.starts_with("---") {
|
||||
return (None, trimmed);
|
||||
}
|
||||
if let Some(end) = trimmed[3..].find("---") {
|
||||
let fm = &trimmed[3..end + 3];
|
||||
let rest = trimmed[3 + end + 3..].trim_start();
|
||||
(Some(fm), rest)
|
||||
} else {
|
||||
(None, trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract all headings (lines starting with #) from markdown body.
|
||||
fn extract_headings(body: &str) -> Vec<serde_json::Value> {
|
||||
body.lines()
|
||||
.filter_map(|line| {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with("# ") {
|
||||
Some(serde_json::json!({ "level": 1, "text": trimmed[2..].trim() }))
|
||||
} else if trimmed.starts_with("## ") {
|
||||
Some(serde_json::json!({ "level": 2, "text": trimmed[3..].trim() }))
|
||||
} else if trimmed.starts_with("### ") {
|
||||
Some(serde_json::json!({ "level": 3, "text": trimmed[4..].trim() }))
|
||||
} else if trimmed.starts_with("#### ") {
|
||||
Some(serde_json::json!({ "level": 4, "text": trimmed[5..].trim() }))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Resolve HEAD to a tree for traversal.
|
||||
fn head_tree(domain: &git::GitDomain) -> Result<git2::Tree<'_>, String> {
|
||||
let repo = domain.repo();
|
||||
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
|
||||
head.peel_to_tree().map_err(|e| format!("no tree: {e}"))
|
||||
}
|
||||
|
||||
// ── Tool executors ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Tool: repo_doc_index — list all markdown docs with frontmatter
|
||||
async fn repo_doc_index_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let repo = domain.repo();
|
||||
let tree = head_tree(&domain)?;
|
||||
|
||||
let mut docs = Vec::new();
|
||||
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
|
||||
|
||||
while let Some((current_tree, prefix)) = stack.pop() {
|
||||
for entry in current_tree.iter() {
|
||||
let name = match entry.name() {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
let entry_path = if prefix.is_empty() {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}/{}", prefix, name)
|
||||
};
|
||||
match entry.kind() {
|
||||
Some(git2::ObjectType::Tree) => {
|
||||
if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") {
|
||||
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
|
||||
stack.push((subtree, entry_path));
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(git2::ObjectType::Blob) => {
|
||||
if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") {
|
||||
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
|
||||
let raw = String::from_utf8_lossy(blob.content());
|
||||
let (fm_raw, body) = extract_frontmatter(&raw);
|
||||
let metadata: serde_json::Value = fm_raw
|
||||
.and_then(|fm| serde_json::from_str(fm).ok())
|
||||
.unwrap_or_default();
|
||||
|
||||
let title = metadata
|
||||
.get("title")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.or_else(|| {
|
||||
// Fall back to first # heading
|
||||
body.lines()
|
||||
.find(|l| l.trim().starts_with("# "))
|
||||
.map(|l| l.trim()[2..].trim().to_string())
|
||||
});
|
||||
|
||||
let description = metadata
|
||||
.get("description")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.or_else(|| {
|
||||
// Fall back to first non-heading non-empty line
|
||||
body.lines()
|
||||
.find(|l| {
|
||||
let t = l.trim();
|
||||
!t.is_empty() && !t.starts_with('#')
|
||||
})
|
||||
.map(|l| l.trim().chars().take(200).collect::<String>())
|
||||
});
|
||||
|
||||
let tags: Vec<String> = metadata
|
||||
.get("tags")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let headings = extract_headings(body);
|
||||
|
||||
docs.push(serde_json::json!({
|
||||
"path": entry_path,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"tags": tags,
|
||||
"headings": headings,
|
||||
"size": raw.len(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by path for consistent ordering
|
||||
docs.sort_by(|a, b| {
|
||||
a["path"].as_str().unwrap_or("").cmp(b["path"].as_str().unwrap_or(""))
|
||||
});
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"total": docs.len(),
|
||||
"docs": docs
|
||||
}))
|
||||
}
|
||||
|
||||
/// Tool: repo_doc_read — read a specific document with structure
|
||||
async fn repo_doc_read_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let repo = domain.repo();
|
||||
let tree = head_tree(&domain)?;
|
||||
|
||||
// Navigate to the file using git2 path lookup
|
||||
let entry = tree.get_path(std::path::Path::new(path))
|
||||
.map_err(|e| format!("file '{}' not found: {e}", path))?;
|
||||
let blob = entry.to_object(repo).and_then(|o| o.peel_to_blob())
|
||||
.map_err(|e| format!("not a blob: {e}"))?;
|
||||
|
||||
let raw = String::from_utf8_lossy(blob.content());
|
||||
let (fm_raw, body) = extract_frontmatter(&raw);
|
||||
let metadata: serde_json::Value = fm_raw
|
||||
.and_then(|fm| serde_json::from_str(fm).ok())
|
||||
.unwrap_or_default();
|
||||
|
||||
let title = metadata
|
||||
.get("title")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.or_else(|| {
|
||||
body.lines()
|
||||
.find(|l| l.trim().starts_with("# "))
|
||||
.map(|l| l.trim()[2..].trim().to_string())
|
||||
});
|
||||
|
||||
let headings = extract_headings(body);
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"path": path,
|
||||
"title": title,
|
||||
"metadata": metadata,
|
||||
"headings": headings,
|
||||
"content": body.to_string(),
|
||||
"size": raw.len(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Tool: repo_doc_search — search through docs content
|
||||
async fn repo_doc_search_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
let keyword = p.get("keyword").and_then(|v| v.as_str()).ok_or("missing keyword")?;
|
||||
let context_lines = p.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
|
||||
|
||||
let keyword_lower = keyword.to_lowercase();
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let repo = domain.repo();
|
||||
let tree = head_tree(&domain)?;
|
||||
|
||||
let mut matches: Vec<serde_json::Value> = Vec::new();
|
||||
let mut matched_files = 0u64;
|
||||
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
|
||||
|
||||
while let Some((current_tree, prefix)) = stack.pop() {
|
||||
for entry in current_tree.iter() {
|
||||
let name = match entry.name() {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
let entry_path = if prefix.is_empty() {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}/{}", prefix, name)
|
||||
};
|
||||
match entry.kind() {
|
||||
Some(git2::ObjectType::Tree) => {
|
||||
if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") {
|
||||
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
|
||||
stack.push((subtree, entry_path));
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(git2::ObjectType::Blob) => {
|
||||
if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") || name.ends_with(".txt") {
|
||||
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
|
||||
let content = String::from_utf8_lossy(blob.content());
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
let mut file_hits: Vec<serde_json::Value> = Vec::new();
|
||||
let mut hit_lines = Vec::new();
|
||||
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
if line.to_lowercase().contains(&keyword_lower) {
|
||||
hit_lines.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if !hit_lines.is_empty() {
|
||||
matched_files += 1;
|
||||
// Merge overlapping context windows
|
||||
let mut windows: Vec<(usize, usize)> = Vec::new();
|
||||
for &line_idx in &hit_lines {
|
||||
let start = line_idx.saturating_sub(context_lines);
|
||||
let end = (line_idx + context_lines + 1).min(lines.len());
|
||||
if let Some(last) = windows.last_mut() {
|
||||
if start <= last.1 {
|
||||
last.1 = end;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
windows.push((start, end));
|
||||
}
|
||||
|
||||
for (start, end) in windows {
|
||||
let snippet: Vec<String> = lines[start..end]
|
||||
.iter()
|
||||
.map(|l| l.to_string())
|
||||
.collect();
|
||||
file_hits.push(serde_json::json!({
|
||||
"line_start": start + 1,
|
||||
"line_end": end,
|
||||
"snippet": snippet.join("\n"),
|
||||
}));
|
||||
}
|
||||
|
||||
matches.push(serde_json::json!({
|
||||
"path": entry_path,
|
||||
"hit_count": hit_lines.len(),
|
||||
"snippets": file_hits,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"keyword": keyword,
|
||||
"matched_files": matched_files,
|
||||
"total_hits": matches.iter().map(|m| m["hit_count"].as_u64().unwrap_or(0)).sum::<u64>(),
|
||||
"matches": matches,
|
||||
}))
|
||||
}
|
||||
|
||||
// ── Registration ───────────────────────────────────────────────────────────────
|
||||
|
||||
macro_rules! param {
|
||||
($name:expr, $type:expr, $desc:expr, $required:expr) => {
|
||||
(
|
||||
$name.into(),
|
||||
ToolParam {
|
||||
name: $name.into(),
|
||||
param_type: $type.into(),
|
||||
description: Some($desc.into()),
|
||||
required: $required,
|
||||
properties: None,
|
||||
items: None,
|
||||
},
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
pub fn register_git_tools(registry: &mut ToolRegistry) {
|
||||
// repo_doc_index
|
||||
registry.register(
|
||||
ToolDefinition::new("repo_doc_index")
|
||||
.description("Index all documentation files in a knowledge-base repository. Lists every .md/.mdx file with its title, description, tags, and heading structure. Use this first to understand what documents are available.")
|
||||
.parameters(ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(HashMap::from([
|
||||
param!("project_name", "string", "Project name (slug)", true),
|
||||
param!("repo_name", "string", "Repository name", true),
|
||||
])),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into()]),
|
||||
}),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
repo_doc_index_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
|
||||
// repo_doc_read
|
||||
registry.register(
|
||||
ToolDefinition::new("repo_doc_read")
|
||||
.description("Read a specific document from a knowledge-base repository. Returns the full markdown content plus extracted frontmatter metadata and heading structure. Use this after repo_doc_index to read the documents you need.")
|
||||
.parameters(ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(HashMap::from([
|
||||
param!("project_name", "string", "Project name (slug)", true),
|
||||
param!("repo_name", "string", "Repository name", true),
|
||||
param!("path", "string", "Document file path within the repository", true),
|
||||
])),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]),
|
||||
}),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
repo_doc_read_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
|
||||
// repo_doc_search
|
||||
registry.register(
|
||||
ToolDefinition::new("repo_doc_search")
|
||||
.description("Search through all documentation files in a knowledge-base repository for a keyword. Returns matching file paths, hit counts, and context snippets. Use this to find which documents discuss a specific topic.")
|
||||
.parameters(ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(HashMap::from([
|
||||
param!("project_name", "string", "Project name (slug)", true),
|
||||
param!("repo_name", "string", "Repository name", true),
|
||||
param!("keyword", "string", "Search keyword (case-insensitive)", true),
|
||||
param!("context_lines", "integer", "Number of context lines around each match (default: 2)", false),
|
||||
])),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into(), "keyword".into()]),
|
||||
}),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
repo_doc_search_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user