diff --git a/libs/fctool/src/git_tools/kb.rs b/libs/fctool/src/git_tools/kb.rs new file mode 100644 index 0000000..f23e765 --- /dev/null +++ b/libs/fctool/src/git_tools/kb.rs @@ -0,0 +1,384 @@ +//! Knowledge-base (documentation) repository tools for AI. +//! +//! Provides tools for AI to quickly index, read, and search +//! through documentation / knowledge-base repositories. + +use super::ctx::GitToolCtx; +use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; +use std::collections::HashMap; + +// ── Helpers ──────────────────────────────────────────────────────────────────── + +/// Extract frontmatter (--- ... ---) from markdown content. +fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) { + let trimmed = raw.trim_start(); + if !trimmed.starts_with("---") { + return (None, trimmed); + } + if let Some(end) = trimmed[3..].find("---") { + let fm = &trimmed[3..end + 3]; + let rest = trimmed[3 + end + 3..].trim_start(); + (Some(fm), rest) + } else { + (None, trimmed) + } +} + +/// Extract all headings (lines starting with #) from markdown body. +fn extract_headings(body: &str) -> Vec { + body.lines() + .filter_map(|line| { + let trimmed = line.trim(); + if trimmed.starts_with("# ") { + Some(serde_json::json!({ "level": 1, "text": trimmed[2..].trim() })) + } else if trimmed.starts_with("## ") { + Some(serde_json::json!({ "level": 2, "text": trimmed[3..].trim() })) + } else if trimmed.starts_with("### ") { + Some(serde_json::json!({ "level": 3, "text": trimmed[4..].trim() })) + } else if trimmed.starts_with("#### ") { + Some(serde_json::json!({ "level": 4, "text": trimmed[5..].trim() })) + } else { + None + } + }) + .collect() +} + +/// Resolve HEAD to a tree for traversal. +fn head_tree(domain: &git::GitDomain) -> Result, String> { + let repo = domain.repo(); + let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?; + head.peel_to_tree().map_err(|e| format!("no tree: {e}")) +} + +// ── Tool executors ───────────────────────────────────────────────────────────── + +/// Tool: repo_doc_index — list all markdown docs with frontmatter +async fn repo_doc_index_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let repo = domain.repo(); + let tree = head_tree(&domain)?; + + let mut docs = Vec::new(); + let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())]; + + while let Some((current_tree, prefix)) = stack.pop() { + for entry in current_tree.iter() { + let name = match entry.name() { + Some(n) => n, + None => continue, + }; + let entry_path = if prefix.is_empty() { + name.to_string() + } else { + format!("{}/{}", prefix, name) + }; + match entry.kind() { + Some(git2::ObjectType::Tree) => { + if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") { + if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) { + stack.push((subtree, entry_path)); + } + } + } + Some(git2::ObjectType::Blob) => { + if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") { + if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) { + let raw = String::from_utf8_lossy(blob.content()); + let (fm_raw, body) = extract_frontmatter(&raw); + let metadata: serde_json::Value = fm_raw + .and_then(|fm| serde_json::from_str(fm).ok()) + .unwrap_or_default(); + + let title = metadata + .get("title") + .and_then(|v| v.as_str()) + .map(String::from) + .or_else(|| { + // Fall back to first # heading + body.lines() + .find(|l| l.trim().starts_with("# ")) + .map(|l| l.trim()[2..].trim().to_string()) + }); + + let description = metadata + .get("description") + .and_then(|v| v.as_str()) + .map(String::from) + .or_else(|| { + // Fall back to first non-heading non-empty line + body.lines() + .find(|l| { + let t = l.trim(); + !t.is_empty() && !t.starts_with('#') + }) + .map(|l| l.trim().chars().take(200).collect::()) + }); + + let tags: Vec = metadata + .get("tags") + .and_then(|v| v.as_array()) + .map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect()) + .unwrap_or_default(); + + let headings = extract_headings(body); + + docs.push(serde_json::json!({ + "path": entry_path, + "title": title, + "description": description, + "tags": tags, + "headings": headings, + "size": raw.len(), + })); + } + } + } + _ => {} + } + } + } + + // Sort by path for consistent ordering + docs.sort_by(|a, b| { + a["path"].as_str().unwrap_or("").cmp(b["path"].as_str().unwrap_or("")) + }); + + Ok(serde_json::json!({ + "total": docs.len(), + "docs": docs + })) +} + +/// Tool: repo_doc_read — read a specific document with structure +async fn repo_doc_read_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let repo = domain.repo(); + let tree = head_tree(&domain)?; + + // Navigate to the file using git2 path lookup + let entry = tree.get_path(std::path::Path::new(path)) + .map_err(|e| format!("file '{}' not found: {e}", path))?; + let blob = entry.to_object(repo).and_then(|o| o.peel_to_blob()) + .map_err(|e| format!("not a blob: {e}"))?; + + let raw = String::from_utf8_lossy(blob.content()); + let (fm_raw, body) = extract_frontmatter(&raw); + let metadata: serde_json::Value = fm_raw + .and_then(|fm| serde_json::from_str(fm).ok()) + .unwrap_or_default(); + + let title = metadata + .get("title") + .and_then(|v| v.as_str()) + .map(String::from) + .or_else(|| { + body.lines() + .find(|l| l.trim().starts_with("# ")) + .map(|l| l.trim()[2..].trim().to_string()) + }); + + let headings = extract_headings(body); + + Ok(serde_json::json!({ + "path": path, + "title": title, + "metadata": metadata, + "headings": headings, + "content": body.to_string(), + "size": raw.len(), + })) +} + +/// Tool: repo_doc_search — search through docs content +async fn repo_doc_search_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let keyword = p.get("keyword").and_then(|v| v.as_str()).ok_or("missing keyword")?; + let context_lines = p.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(2) as usize; + + let keyword_lower = keyword.to_lowercase(); + + let domain = ctx.open_repo(project_name, repo_name).await?; + let repo = domain.repo(); + let tree = head_tree(&domain)?; + + let mut matches: Vec = Vec::new(); + let mut matched_files = 0u64; + let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())]; + + while let Some((current_tree, prefix)) = stack.pop() { + for entry in current_tree.iter() { + let name = match entry.name() { + Some(n) => n, + None => continue, + }; + let entry_path = if prefix.is_empty() { + name.to_string() + } else { + format!("{}/{}", prefix, name) + }; + match entry.kind() { + Some(git2::ObjectType::Tree) => { + if !name.starts_with('.') && !matches!(name, "node_modules" | "target" | ".git" | ".github" | ".next" | "dist") { + if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) { + stack.push((subtree, entry_path)); + } + } + } + Some(git2::ObjectType::Blob) => { + if name.ends_with(".md") || name.ends_with(".mdx") || name.ends_with(".markdown") || name.ends_with(".txt") { + if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) { + let content = String::from_utf8_lossy(blob.content()); + let lines: Vec<&str> = content.lines().collect(); + let mut file_hits: Vec = Vec::new(); + let mut hit_lines = Vec::new(); + + for (i, line) in lines.iter().enumerate() { + if line.to_lowercase().contains(&keyword_lower) { + hit_lines.push(i); + } + } + + if !hit_lines.is_empty() { + matched_files += 1; + // Merge overlapping context windows + let mut windows: Vec<(usize, usize)> = Vec::new(); + for &line_idx in &hit_lines { + let start = line_idx.saturating_sub(context_lines); + let end = (line_idx + context_lines + 1).min(lines.len()); + if let Some(last) = windows.last_mut() { + if start <= last.1 { + last.1 = end; + continue; + } + } + windows.push((start, end)); + } + + for (start, end) in windows { + let snippet: Vec = lines[start..end] + .iter() + .map(|l| l.to_string()) + .collect(); + file_hits.push(serde_json::json!({ + "line_start": start + 1, + "line_end": end, + "snippet": snippet.join("\n"), + })); + } + + matches.push(serde_json::json!({ + "path": entry_path, + "hit_count": hit_lines.len(), + "snippets": file_hits, + })); + } + } + } + } + _ => {} + } + } + } + + Ok(serde_json::json!({ + "keyword": keyword, + "matched_files": matched_files, + "total_hits": matches.iter().map(|m| m["hit_count"].as_u64().unwrap_or(0)).sum::(), + "matches": matches, + })) +} + +// ── Registration ─────────────────────────────────────────────────────────────── + +macro_rules! param { + ($name:expr, $type:expr, $desc:expr, $required:expr) => { + ( + $name.into(), + ToolParam { + name: $name.into(), + param_type: $type.into(), + description: Some($desc.into()), + required: $required, + properties: None, + items: None, + }, + ) + }; +} + +pub fn register_git_tools(registry: &mut ToolRegistry) { + // repo_doc_index + registry.register( + ToolDefinition::new("repo_doc_index") + .description("Index all documentation files in a knowledge-base repository. Lists every .md/.mdx file with its title, description, tags, and heading structure. Use this first to understand what documents are available.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + ])), + required: Some(vec!["project_name".into(), "repo_name".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_doc_index_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // repo_doc_read + registry.register( + ToolDefinition::new("repo_doc_read") + .description("Read a specific document from a knowledge-base repository. Returns the full markdown content plus extracted frontmatter metadata and heading structure. Use this after repo_doc_index to read the documents you need.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + param!("path", "string", "Document file path within the repository", true), + ])), + required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_doc_read_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // repo_doc_search + registry.register( + ToolDefinition::new("repo_doc_search") + .description("Search through all documentation files in a knowledge-base repository for a keyword. Returns matching file paths, hit counts, and context snippets. Use this to find which documents discuss a specific topic.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + param!("keyword", "string", "Search keyword (case-insensitive)", true), + param!("context_lines", "integer", "Number of context lines around each match (default: 2)", false), + ])), + required: Some(vec!["project_name".into(), "repo_name".into(), "keyword".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_doc_search_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); +}