feat(fctool): add knowledge base tools for AI

Add repo_doc_index, repo_doc_read, and repo_doc_search function call
tools for AI to index, read, and search through documentation repos.
This commit is contained in:
ZhenYi 2026-04-29 09:02:56 +08:00
parent 5f12b07120
commit 4ef0d5b570

View File

@ -0,0 +1,384 @@
//! Knowledge-base (documentation) repository tools for AI.
//!
//! Provides tools for AI to quickly index, read, and search
//! through documentation / knowledge-base repositories.
use super::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use std::collections::HashMap;
// ── Helpers ────────────────────────────────────────────────────────────────────
/// Extract frontmatter (--- ... ---) from markdown content.
fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) {
let trimmed = raw.trim_start();
if !trimmed.starts_with("---") {
return (None, trimmed);
}
if let Some(end) = trimmed[3..].find("---") {
let fm = &trimmed[3..end + 3];
let rest = trimmed[3 + end + 3..].trim_start();
(Some(fm), rest)
} else {
(None, trimmed)
}
}
/// Extract all headings (lines starting with #) from markdown body.
fn extract_headings(body: &str) -> Vec<serde_json::Value> {
body.lines()
.filter_map(|line| {
let trimmed = line.trim();
if trimmed.starts_with("# ") {
Some(serde_json::json!({ "level": 1, "text": trimmed[2..].trim() }))
} else if trimmed.starts_with("## ") {
Some(serde_json::json!({ "level": 2, "text": trimmed[3..].trim() }))
} else if trimmed.starts_with("### ") {
Some(serde_json::json!({ "level": 3, "text": trimmed[4..].trim() }))
} else if trimmed.starts_with("#### ") {
Some(serde_json::json!({ "level": 4, "text": trimmed[5..].trim() }))
} else {
None
}
})
.collect()
}
/// Resolve HEAD to a tree for traversal.
fn head_tree(domain: &git::GitDomain) -> Result<git2::Tree<'_>, String> {
let repo = domain.repo();
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
head.peel_to_tree().map_err(|e| format!("no tree: {e}"))
}
// ── Tool executors ─────────────────────────────────────────────────────────────
/// Tool: repo_doc_index — list all markdown docs with frontmatter
async fn repo_doc_index_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
let mut docs = Vec::new();
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
while let Some((current_tree, prefix)) = stack.pop() {
for entry in current_tree.iter() {
let name = match entry.name() {
Some(n) => n,
None => continue,
};
let entry_path = if prefix.is_empty() {
name.to_string()
} else {
format!("{}/{}", prefix, name)
};
match entry.kind() {
Some(git2::ObjectType::Tree) => {
if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") {
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
stack.push((subtree, entry_path));
}
}
}
Some(git2::ObjectType::Blob) => {
if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") {
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
let raw = String::from_utf8_lossy(blob.content());
let (fm_raw, body) = extract_frontmatter(&raw);
let metadata: serde_json::Value = fm_raw
.and_then(|fm| serde_json::from_str(fm).ok())
.unwrap_or_default();
let title = metadata
.get("title")
.and_then(|v| v.as_str())
.map(String::from)
.or_else(|| {
// Fall back to first # heading
body.lines()
.find(|l| l.trim().starts_with("# "))
.map(|l| l.trim()[2..].trim().to_string())
});
let description = metadata
.get("description")
.and_then(|v| v.as_str())
.map(String::from)
.or_else(|| {
// Fall back to first non-heading non-empty line
body.lines()
.find(|l| {
let t = l.trim();
!t.is_empty() && !t.starts_with('#')
})
.map(|l| l.trim().chars().take(200).collect::<String>())
});
let tags: Vec<String> = metadata
.get("tags")
.and_then(|v| v.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
.unwrap_or_default();
let headings = extract_headings(body);
docs.push(serde_json::json!({
"path": entry_path,
"title": title,
"description": description,
"tags": tags,
"headings": headings,
"size": raw.len(),
}));
}
}
}
_ => {}
}
}
}
// Sort by path for consistent ordering
docs.sort_by(|a, b| {
a["path"].as_str().unwrap_or("").cmp(b["path"].as_str().unwrap_or(""))
});
Ok(serde_json::json!({
"total": docs.len(),
"docs": docs
}))
}
/// Tool: repo_doc_read — read a specific document with structure
async fn repo_doc_read_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
// Navigate to the file using git2 path lookup
let entry = tree.get_path(std::path::Path::new(path))
.map_err(|e| format!("file '{}' not found: {e}", path))?;
let blob = entry.to_object(repo).and_then(|o| o.peel_to_blob())
.map_err(|e| format!("not a blob: {e}"))?;
let raw = String::from_utf8_lossy(blob.content());
let (fm_raw, body) = extract_frontmatter(&raw);
let metadata: serde_json::Value = fm_raw
.and_then(|fm| serde_json::from_str(fm).ok())
.unwrap_or_default();
let title = metadata
.get("title")
.and_then(|v| v.as_str())
.map(String::from)
.or_else(|| {
body.lines()
.find(|l| l.trim().starts_with("# "))
.map(|l| l.trim()[2..].trim().to_string())
});
let headings = extract_headings(body);
Ok(serde_json::json!({
"path": path,
"title": title,
"metadata": metadata,
"headings": headings,
"content": body.to_string(),
"size": raw.len(),
}))
}
/// Tool: repo_doc_search — search through docs content
async fn repo_doc_search_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let keyword = p.get("keyword").and_then(|v| v.as_str()).ok_or("missing keyword")?;
let context_lines = p.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
let keyword_lower = keyword.to_lowercase();
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
let mut matches: Vec<serde_json::Value> = Vec::new();
let mut matched_files = 0u64;
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
while let Some((current_tree, prefix)) = stack.pop() {
for entry in current_tree.iter() {
let name = match entry.name() {
Some(n) => n,
None => continue,
};
let entry_path = if prefix.is_empty() {
name.to_string()
} else {
format!("{}/{}", prefix, name)
};
match entry.kind() {
Some(git2::ObjectType::Tree) => {
if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") {
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
stack.push((subtree, entry_path));
}
}
}
Some(git2::ObjectType::Blob) => {
if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") || name.ends_with(".txt") {
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
let content = String::from_utf8_lossy(blob.content());
let lines: Vec<&str> = content.lines().collect();
let mut file_hits: Vec<serde_json::Value> = Vec::new();
let mut hit_lines = Vec::new();
for (i, line) in lines.iter().enumerate() {
if line.to_lowercase().contains(&keyword_lower) {
hit_lines.push(i);
}
}
if !hit_lines.is_empty() {
matched_files += 1;
// Merge overlapping context windows
let mut windows: Vec<(usize, usize)> = Vec::new();
for &line_idx in &hit_lines {
let start = line_idx.saturating_sub(context_lines);
let end = (line_idx + context_lines + 1).min(lines.len());
if let Some(last) = windows.last_mut() {
if start <= last.1 {
last.1 = end;
continue;
}
}
windows.push((start, end));
}
for (start, end) in windows {
let snippet: Vec<String> = lines[start..end]
.iter()
.map(|l| l.to_string())
.collect();
file_hits.push(serde_json::json!({
"line_start": start + 1,
"line_end": end,
"snippet": snippet.join("\n"),
}));
}
matches.push(serde_json::json!({
"path": entry_path,
"hit_count": hit_lines.len(),
"snippets": file_hits,
}));
}
}
}
}
_ => {}
}
}
}
Ok(serde_json::json!({
"keyword": keyword,
"matched_files": matched_files,
"total_hits": matches.iter().map(|m| m["hit_count"].as_u64().unwrap_or(0)).sum::<u64>(),
"matches": matches,
}))
}
// ── Registration ───────────────────────────────────────────────────────────────
macro_rules! param {
($name:expr, $type:expr, $desc:expr, $required:expr) => {
(
$name.into(),
ToolParam {
name: $name.into(),
param_type: $type.into(),
description: Some($desc.into()),
required: $required,
properties: None,
items: None,
},
)
};
}
pub fn register_git_tools(registry: &mut ToolRegistry) {
// repo_doc_index
registry.register(
ToolDefinition::new("repo_doc_index")
.description("Index all documentation files in a knowledge-base repository. Lists every .md/.mdx file with its title, description, tags, and heading structure. Use this first to understand what documents are available.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_doc_index_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_doc_read
registry.register(
ToolDefinition::new("repo_doc_read")
.description("Read a specific document from a knowledge-base repository. Returns the full markdown content plus extracted frontmatter metadata and heading structure. Use this after repo_doc_index to read the documents you need.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("path", "string", "Document file path within the repository", true),
])),
required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_doc_read_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_doc_search
registry.register(
ToolDefinition::new("repo_doc_search")
.description("Search through all documentation files in a knowledge-base repository for a keyword. Returns matching file paths, hit counts, and context snippets. Use this to find which documents discuss a specific topic.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("keyword", "string", "Search keyword (case-insensitive)", true),
param!("context_lines", "integer", "Number of context lines around each match (default: 2)", false),
])),
required: Some(vec!["project_name".into(), "repo_name".into(), "keyword".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_doc_search_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}