diff --git a/libs/fctool/src/git_tools/repo_analysis.rs b/libs/fctool/src/git_tools/repo_analysis.rs new file mode 100644 index 0000000..f7fdf32 --- /dev/null +++ b/libs/fctool/src/git_tools/repo_analysis.rs @@ -0,0 +1,627 @@ +//! Repository analysis tools for AI. +//! +//! Provides function-calling tools that let AI quickly understand +//! repository structure, languages, dependencies, and overview. + +use super::ctx::GitToolCtx; +use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; +use std::collections::HashMap; + +// ── Helpers ──────────────────────────────────────────────────────────────────── + +/// Recognised dependency manifest file names and their parser labels. +const DEPENDENCY_MANIFESTS: &[(&str, &str)] = &[ + ("Cargo.toml", "rust"), + ("package.json", "node"), + ("go.mod", "go"), + ("go.sum", "go"), + ("Gemfile", "ruby"), + ("requirements.txt", "python"), + ("Pipfile", "python"), + ("pyproject.toml", "python"), + ("pom.xml", "java"), + ("build.gradle", "java"), + ("build.gradle.kts", "java"), + ("composer.json", "php"), + ("CMakeLists.txt", "cmake"), + ("Makefile", "make"), +]; + +/// Language detection by file extension (lowercase). +fn ext_to_language(ext: &str) -> Option<&'static str> { + match ext { + "rs" => Some("Rust"), + "go" => Some("Go"), + "py" => Some("Python"), + "js" => Some("JavaScript"), + "jsx" => Some("JSX"), + "ts" => Some("TypeScript"), + "tsx" => Some("TSX"), + "java" => Some("Java"), + "kt" | "kts" => Some("Kotlin"), + "rb" => Some("Ruby"), + "php" => Some("PHP"), + "c" => Some("C"), + "h" => Some("C/C++ Header"), + "cpp" | "cc" | "cxx" => Some("C++"), + "hpp" | "hh" => Some("C++ Header"), + "cs" => Some("C#"), + "swift" => Some("Swift"), + "scala" => Some("Scala"), + "zig" => Some("Zig"), + "sh" | "bash" | "zsh" => Some("Shell"), + "ps1" => Some("PowerShell"), + "sql" => Some("SQL"), + "html" | "htm" => Some("HTML"), + "css" | "scss" | "sass" | "less" => Some("CSS"), + "json" => Some("JSON"), + "yaml" | "yml" => Some("YAML"), + "toml" => Some("TOML"), + "md" => Some("Markdown"), + "dockerfile" | "containerfile" => Some("Dockerfile"), + "proto" => Some("Protobuf"), + "vue" => Some("Vue"), + "svelte" => Some("Svelte"), + "lua" => Some("Lua"), + "dart" => Some("Dart"), + "r" | "R" => Some("R"), + "clj" | "cljs" | "cljc" => Some("Clojure"), + "ex" | "exs" => Some("Elixir"), + "erl" => Some("Erlang"), + "hs" => Some("Haskell"), + _ => None, + } +} + +/// Directories that should be ignored in file-tree scans. +fn is_ignored_dir(name: &str) -> bool { + matches!( + name, + ".git" | "node_modules" | "target" | "dist" | "build" | ".next" + | ".nuxt" | ".output" | ".cache" | "__pycache__" | ".tox" + | "vendor" | ".bundle" | ".gradle" | "bin" | "obj" + | ".svn" | ".hg" | ".idea" | ".vscode" | "coverage" + | ".terraform" | ".serverless" | "deps" | "_build" + | "elm-stuff" | ".stack-work" | ".pytest_cache" + ) +} + +/// Recursively collect file extensions and counts from a git tree. +/// Skips ignored directories and binary-looking files. +fn collect_languages( + repo: &git2::Repository, + tree: &git2::Tree, + prefix: &str, + stats: &mut HashMap, + max_files: u64, +) { + let mut count = 0u64; + let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree.clone(), prefix.to_string())]; + while let Some((current_tree, path)) = stack.pop() { + for entry in current_tree.iter() { + if max_files > 0 && count >= max_files { + return; + } + let name = match entry.name() { + Some(n) => n, + None => continue, + }; + let entry_path = if path.is_empty() { + name.to_string() + } else { + format!("{}/{}", path, name) + }; + match entry.kind() { + Some(git2::ObjectType::Tree) => { + if !is_ignored_dir(name) && !name.starts_with('.') { + if let Ok(subtree) = + entry.to_object(repo).and_then(|o| o.peel_to_tree()) + { + stack.push((subtree, entry_path)); + } + } + } + Some(git2::ObjectType::Blob) => { + count += 1; + if let Some(ext) = name.rsplit('.').next() { + let ext = ext.to_lowercase(); + if let Some(lang) = ext_to_language(&ext) { + let entry = stats + .entry(lang.to_string()) + .or_insert_with(|| (ext, 0)); + entry.1 += 1; + } + } + } + _ => {} + } + } + } +} + +/// Collect a recursive file tree (path + kind) up to a given depth and file limit. +fn collect_file_tree( + repo: &git2::Repository, + tree: &git2::Tree, + prefix: &str, + depth: usize, + max_depth: usize, + max_files: u64, + files: &mut Vec, +) { + if depth > max_depth { + return; + } + for entry in tree.iter() { + if max_files > 0 && files.len() as u64 >= max_files { + return; + } + let name = match entry.name() { + Some(n) => n, + None => continue, + }; + let entry_path = if prefix.is_empty() { + name.to_string() + } else { + format!("{}/{}", prefix, name) + }; + match entry.kind() { + Some(git2::ObjectType::Tree) => { + if !is_ignored_dir(name) && !name.starts_with('.') { + files.push(serde_json::json!({ + "path": entry_path, + "kind": "dir" + })); + if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) { + collect_file_tree(repo, &subtree, &entry_path, depth + 1, max_depth, max_files, files); + } + } + } + Some(git2::ObjectType::Blob) => { + files.push(serde_json::json!({ + "path": entry_path, + "kind": "file" + })); + } + _ => {} + } + } +} + +/// Detect config/manifest files in the root tree and return their names. +fn detect_config_files(tree: &git2::Tree) -> Vec { + let mut configs = Vec::new(); + let known_configs = [ + "Cargo.toml", "package.json", "go.mod", "Gemfile", "README.md", + "Dockerfile", "docker-compose.yml", "docker-compose.yaml", + ".github/workflows", ".gitignore", ".gitattributes", + "Makefile", "CMakeLists.txt", "composer.json", "pyproject.toml", + "requirements.txt", "Pipfile", "pom.xml", "build.gradle", + "build.gradle.kts", "settings.gradle", "settings.gradle.kts", + "tsconfig.json", ".eslintrc.js", ".eslintrc.json", + "prettier.config.js", "prettierrc", "webpack.config.js", + "vite.config.ts", "vite.config.js", "next.config.js", + "nuxt.config.ts", "svelte.config.js", + "rust-toolchain", "rust-toolchain.toml", + "clippy.toml", ".rustfmt.toml", "rustfmt.toml", + "renovate.json", ".renovaterc", ".mergify.yml", + "docker-bake.hcl", ".dockerignore", + "Cargo.lock", "yarn.lock", "package-lock.json", "pnpm-lock.yaml", + "Gemfile.lock", "Cargo.lock", + ]; + for entry in tree.iter() { + let name = match entry.name() { + Some(n) => n, + None => continue, + }; + if known_configs.contains(&name) || name.starts_with('.') && !name.starts_with(".git") { + configs.push(name.to_string()); + } + } + configs.sort(); + configs.dedup(); + configs +} + +/// Parse a dependency manifest file content and return a structured summary. +fn parse_dependencies(content: &str, manifest_name: &str) -> serde_json::Value { + match manifest_name { + "Cargo.toml" => { + // Simple TOML-ish parsing for [dependencies] section + let mut deps = Vec::new(); + let mut in_deps = false; + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("[dependencies]") { + in_deps = true; + continue; + } + if trimmed.starts_with('[') { + in_deps = false; + continue; + } + if in_deps { + if let Some(eq_pos) = trimmed.find('=') { + let name = trimmed[..eq_pos].trim().to_string(); + let version = trimmed[eq_pos + 1..].trim().trim_matches('"').trim_matches('\'').to_string(); + if !name.is_empty() && !name.starts_with('#') { + deps.push(serde_json::json!({ "name": name, "version": version })); + } + } else if !trimmed.is_empty() && !trimmed.starts_with('#') { + // bare dependency name (path/git dep without explicit version) + deps.push(serde_json::json!({ "name": trimmed, "version": null })); + } + } + } + serde_json::json!({ "manifest": "Cargo.toml", "ecosystem": "rust", "dependencies": deps }) + } + "package.json" => { + let mut deps = Vec::new(); + if let Ok(parsed) = serde_json::from_str::(content) { + for section in &["dependencies", "devDependencies", "peerDependencies"] { + if let Some(map) = parsed.get(*section).and_then(|v| v.as_object()) { + for (name, version) in map { + deps.push(serde_json::json!({ + "name": name, + "version": version.as_str().unwrap_or("*"), + "scope": section + })); + } + } + } + } + serde_json::json!({ "manifest": "package.json", "ecosystem": "node", "dependencies": deps }) + } + "go.mod" => { + let mut deps = Vec::new(); + let mut in_require = false; + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("require (") || trimmed == "require (" { + in_require = true; + continue; + } + if trimmed == ")" { + in_require = false; + continue; + } + if in_require { + let parts: Vec<&str> = trimmed.split_whitespace().collect(); + if parts.len() >= 2 { + deps.push(serde_json::json!({ "name": parts[0], "version": parts[1] })); + } + } + } + serde_json::json!({ "manifest": "go.mod", "ecosystem": "go", "dependencies": deps }) + } + "Gemfile" => { + let mut deps = Vec::new(); + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("gem ") { + let rest = trimmed.trim_start_matches("gem "); + let name = rest.split(',').next().unwrap_or(rest).trim().trim_matches('"').trim_matches('\''); + let version = rest.split(',').nth(1).map(|v| v.trim().trim_matches('"').trim_matches('\'')); + deps.push(serde_json::json!({ "name": name, "version": version })); + } + } + serde_json::json!({ "manifest": "Gemfile", "ecosystem": "ruby", "dependencies": deps }) + } + "requirements.txt" => { + let mut deps = Vec::new(); + for line in content.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() && !trimmed.starts_with('#') && !trimmed.starts_with("-r") && !trimmed.starts_with("--") { + if let Some(eq_eq) = trimmed.find("==") { + let name = trimmed[..eq_eq].trim().to_string(); + let version = trimmed[eq_eq + 2..].trim().to_string(); + deps.push(serde_json::json!({ "name": name, "version": version })); + } else { + deps.push(serde_json::json!({ "name": trimmed, "version": null })); + } + } + } + serde_json::json!({ "manifest": "requirements.txt", "ecosystem": "python", "dependencies": deps }) + } + _ => serde_json::json!({ "manifest": manifest_name, "ecosystem": "unknown", "dependencies": [] }), + } +} + +// ── Tool executors ───────────────────────────────────────────────────────────── + +/// Resolve HEAD to a tree for traversal. +fn head_tree(domain: &git::GitDomain) -> Result, String> { + let repo = domain.repo(); + let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?; + head.peel_to_tree().map_err(|e| format!("no tree: {e}")) +} + +/// Resolve HEAD to a commit OID. +fn head_oid(domain: &git::GitDomain) -> Result { + let repo = domain.repo(); + let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?; + head.target() + .map(|o| o.to_string()) + .ok_or_else(|| "HEAD has no target".to_string()) +} + +/// Tool: repo_overview — quick project overview +async fn repo_overview_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let repo = domain.repo(); + let tree = head_tree(&domain)?; + + // Default branch + let default_branch = repo + .head() + .ok() + .and_then(|h| h.shorthand().map(|s| s.to_string())) + .unwrap_or_else(|| "unknown".to_string()); + + // Config files in root + let config_files = detect_config_files(&tree); + + // Language stats (up to 5000 files) + let mut lang_stats: HashMap = HashMap::new(); + collect_languages(repo, &tree, "", &mut lang_stats, 5000); + let mut languages: Vec = lang_stats + .into_iter() + .map(|(lang, (_ext, count))| serde_json::json!({ "language": lang, "file_count": count })) + .collect(); + languages.sort_by(|a, b| { + b["file_count"].as_u64().unwrap_or(0) + .cmp(&a["file_count"].as_u64().unwrap_or(0)) + }); + + // Top-level file tree + let mut root_files: Vec = Vec::new(); + collect_file_tree(repo, &tree, "", 0, 1, 100, &mut root_files); + + // Recent commits (last 10) + let head_oid = head_oid(&domain)?; + let recent_commits = domain + .commit_log(Some(&head_oid), 0, 10) + .map_err(|e| e.to_string())?; + let commits: Vec = recent_commits + .iter() + .map(|c| { + serde_json::json!({ + "oid": c.oid.to_string(), + "summary": c.summary, + "author": c.author.name, + "time": c.author.time_secs, + }) + }) + .collect(); + + // Total commit count + let total_commits = domain.commit_total(Some(&head_oid)).unwrap_or(0); + + Ok(serde_json::json!({ + "default_branch": default_branch, + "head_oid": head_oid, + "total_commits": total_commits, + "config_files": config_files, + "languages": languages, + "top_level_entries": root_files, + "recent_commits": commits, + })) +} + +/// Tool: repo_file_tree — recursive file tree with depth/ignore +async fn repo_file_tree_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let max_depth = p.get("max_depth").and_then(|v| v.as_u64()).unwrap_or(3) as usize; + let max_files = p.get("max_files").and_then(|v| v.as_u64()).unwrap_or(200); + + let domain = ctx.open_repo(project_name, repo_name).await?; + let repo = domain.repo(); + let tree = head_tree(&domain)?; + + let mut files = Vec::new(); + collect_file_tree(repo, &tree, "", 0, max_depth, max_files, &mut files); + + Ok(serde_json::json!({ + "total": files.len(), + "entries": files + })) +} + +/// Tool: repo_languages — detailed language breakdown +async fn repo_languages_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let repo = domain.repo(); + let tree = head_tree(&domain)?; + + let mut lang_stats: HashMap = HashMap::new(); + collect_languages(repo, &tree, "", &mut lang_stats, 100_000); + + let mut languages: Vec = lang_stats + .into_iter() + .map(|(lang, (_ext, count))| serde_json::json!({ "language": lang, "file_count": count })) + .collect(); + languages.sort_by(|a, b| { + b["file_count"].as_u64().unwrap_or(0) + .cmp(&a["file_count"].as_u64().unwrap_or(0)) + }); + + Ok(serde_json::json!({ + "total_languages": languages.len(), + "languages": languages + })) +} + +/// Tool: repo_dependencies — parse dependency manifests +async fn repo_dependencies_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { + let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let tree = head_tree(&domain)?; + + // Walk the tree looking for dependency manifests at any depth + let mut manifests: Vec = Vec::new(); + let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())]; + let repo = domain.repo(); + + while let Some((current_tree, prefix)) = stack.pop() { + for entry in current_tree.iter() { + let name = match entry.name() { + Some(n) => n, + None => continue, + }; + let entry_path = if prefix.is_empty() { + name.to_string() + } else { + format!("{}/{}", prefix, name) + }; + match entry.kind() { + Some(git2::ObjectType::Tree) => { + if !is_ignored_dir(name) && !name.starts_with('.') { + if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) { + stack.push((subtree, entry_path)); + } + } + } + Some(git2::ObjectType::Blob) => { + if DEPENDENCY_MANIFESTS.iter().any(|(fname, _)| *fname == name) { + if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) { + let content = String::from_utf8_lossy(blob.content()); + let manifest_type = DEPENDENCY_MANIFESTS + .iter() + .find(|(fname, _)| *fname == name) + .map(|(_, eco)| eco) + .unwrap_or(&"unknown"); + + let parsed = parse_dependencies(&content, name); + manifests.push(serde_json::json!({ + "path": entry_path, + "ecosystem": manifest_type, + "details": parsed + })); + } + } + } + _ => {} + } + } + } + + Ok(serde_json::json!({ + "manifest_count": manifests.len(), + "manifests": manifests + })) +} + +// ── Registration ─────────────────────────────────────────────────────────────── + +macro_rules! param { + ($name:expr, $type:expr, $desc:expr, $required:expr) => { + ( + $name.into(), + ToolParam { + name: $name.into(), + param_type: $type.into(), + description: Some($desc.into()), + required: $required, + properties: None, + items: None, + }, + ) + }; +} + +pub fn register_git_tools(registry: &mut ToolRegistry) { + // repo_overview + registry.register( + ToolDefinition::new("repo_overview") + .description("Get a quick overview of a repository: default branch, detected config files, language breakdown by file count, top-level directory entries, and recent commits. Ideal for first contact with a repo.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + ])), + required: Some(vec!["project_name".into(), "repo_name".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_overview_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // repo_file_tree + registry.register( + ToolDefinition::new("repo_file_tree") + .description("List files and directories in a repository recursively with configurable depth. Ignores common generated/artifact directories (node_modules, target, .git, etc.). Useful for understanding project layout.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + param!("max_depth", "integer", "Maximum directory depth to traverse (default: 3)", false), + param!("max_files", "integer", "Maximum number of entries to return (default: 200)", false), + ])), + required: Some(vec!["project_name".into(), "repo_name".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_file_tree_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // repo_languages + registry.register( + ToolDefinition::new("repo_languages") + .description("Get a detailed breakdown of programming languages used in a repository, sorted by file count. Scans all files in the repo (up to 100K files) and maps extensions to language names.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + ])), + required: Some(vec!["project_name".into(), "repo_name".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_languages_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // repo_dependencies + registry.register( + ToolDefinition::new("repo_dependencies") + .description("Discover and parse dependency manifests (Cargo.toml, package.json, go.mod, Gemfile, requirements.txt, etc.) in a repository. Returns structured dependency lists per manifest.") + .parameters(ToolSchema { + schema_type: "object".into(), + properties: Some(HashMap::from([ + param!("project_name", "string", "Project name (slug)", true), + param!("repo_name", "string", "Repository name", true), + ])), + required: Some(vec!["project_name".into(), "repo_name".into()]), + }), + ToolHandler::new(|ctx, args| { + let gctx = super::ctx::GitToolCtx::new(ctx); + Box::pin(async move { + repo_dependencies_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); +}