feat(fctool): add repo analysis tools for AI

Add repo_overview, repo_file_tree, repo_languages, and repo_dependencies
function call tools for AI to quickly analyze repository structure,
language breakdown, and dependency manifests.
This commit is contained in:
ZhenYi 2026-04-29 09:02:51 +08:00
parent c2b4553537
commit 5f12b07120

View File

@ -0,0 +1,627 @@
//! Repository analysis tools for AI.
//!
//! Provides function-calling tools that let AI quickly understand
//! repository structure, languages, dependencies, and overview.
use super::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use std::collections::HashMap;
// ── Helpers ────────────────────────────────────────────────────────────────────
/// Recognised dependency manifest file names and their parser labels.
const DEPENDENCY_MANIFESTS: &[(&str, &str)] = &[
("Cargo.toml", "rust"),
("package.json", "node"),
("go.mod", "go"),
("go.sum", "go"),
("Gemfile", "ruby"),
("requirements.txt", "python"),
("Pipfile", "python"),
("pyproject.toml", "python"),
("pom.xml", "java"),
("build.gradle", "java"),
("build.gradle.kts", "java"),
("composer.json", "php"),
("CMakeLists.txt", "cmake"),
("Makefile", "make"),
];
/// Language detection by file extension (lowercase).
fn ext_to_language(ext: &str) -> Option<&'static str> {
match ext {
"rs" => Some("Rust"),
"go" => Some("Go"),
"py" => Some("Python"),
"js" => Some("JavaScript"),
"jsx" => Some("JSX"),
"ts" => Some("TypeScript"),
"tsx" => Some("TSX"),
"java" => Some("Java"),
"kt" | "kts" => Some("Kotlin"),
"rb" => Some("Ruby"),
"php" => Some("PHP"),
"c" => Some("C"),
"h" => Some("C/C++ Header"),
"cpp" | "cc" | "cxx" => Some("C++"),
"hpp" | "hh" => Some("C++ Header"),
"cs" => Some("C#"),
"swift" => Some("Swift"),
"scala" => Some("Scala"),
"zig" => Some("Zig"),
"sh" | "bash" | "zsh" => Some("Shell"),
"ps1" => Some("PowerShell"),
"sql" => Some("SQL"),
"html" | "htm" => Some("HTML"),
"css" | "scss" | "sass" | "less" => Some("CSS"),
"json" => Some("JSON"),
"yaml" | "yml" => Some("YAML"),
"toml" => Some("TOML"),
"md" => Some("Markdown"),
"dockerfile" | "containerfile" => Some("Dockerfile"),
"proto" => Some("Protobuf"),
"vue" => Some("Vue"),
"svelte" => Some("Svelte"),
"lua" => Some("Lua"),
"dart" => Some("Dart"),
"r" | "R" => Some("R"),
"clj" | "cljs" | "cljc" => Some("Clojure"),
"ex" | "exs" => Some("Elixir"),
"erl" => Some("Erlang"),
"hs" => Some("Haskell"),
_ => None,
}
}
/// Directories that should be ignored in file-tree scans.
fn is_ignored_dir(name: &str) -> bool {
matches!(
name,
".git" | "node_modules" | "target" | "dist" | "build" | ".next"
| ".nuxt" | ".output" | ".cache" | "__pycache__" | ".tox"
| "vendor" | ".bundle" | ".gradle" | "bin" | "obj"
| ".svn" | ".hg" | ".idea" | ".vscode" | "coverage"
| ".terraform" | ".serverless" | "deps" | "_build"
| "elm-stuff" | ".stack-work" | ".pytest_cache"
)
}
/// Recursively collect file extensions and counts from a git tree.
/// Skips ignored directories and binary-looking files.
fn collect_languages(
repo: &git2::Repository,
tree: &git2::Tree,
prefix: &str,
stats: &mut HashMap<String, (String, u64)>,
max_files: u64,
) {
let mut count = 0u64;
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree.clone(), prefix.to_string())];
while let Some((current_tree, path)) = stack.pop() {
for entry in current_tree.iter() {
if max_files > 0 && count >= max_files {
return;
}
let name = match entry.name() {
Some(n) => n,
None => continue,
};
let entry_path = if path.is_empty() {
name.to_string()
} else {
format!("{}/{}", path, name)
};
match entry.kind() {
Some(git2::ObjectType::Tree) => {
if !is_ignored_dir(name) && !name.starts_with('.') {
if let Ok(subtree) =
entry.to_object(repo).and_then(|o| o.peel_to_tree())
{
stack.push((subtree, entry_path));
}
}
}
Some(git2::ObjectType::Blob) => {
count += 1;
if let Some(ext) = name.rsplit('.').next() {
let ext = ext.to_lowercase();
if let Some(lang) = ext_to_language(&ext) {
let entry = stats
.entry(lang.to_string())
.or_insert_with(|| (ext, 0));
entry.1 += 1;
}
}
}
_ => {}
}
}
}
}
/// Collect a recursive file tree (path + kind) up to a given depth and file limit.
fn collect_file_tree(
repo: &git2::Repository,
tree: &git2::Tree,
prefix: &str,
depth: usize,
max_depth: usize,
max_files: u64,
files: &mut Vec<serde_json::Value>,
) {
if depth > max_depth {
return;
}
for entry in tree.iter() {
if max_files > 0 && files.len() as u64 >= max_files {
return;
}
let name = match entry.name() {
Some(n) => n,
None => continue,
};
let entry_path = if prefix.is_empty() {
name.to_string()
} else {
format!("{}/{}", prefix, name)
};
match entry.kind() {
Some(git2::ObjectType::Tree) => {
if !is_ignored_dir(name) && !name.starts_with('.') {
files.push(serde_json::json!({
"path": entry_path,
"kind": "dir"
}));
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
collect_file_tree(repo, &subtree, &entry_path, depth + 1, max_depth, max_files, files);
}
}
}
Some(git2::ObjectType::Blob) => {
files.push(serde_json::json!({
"path": entry_path,
"kind": "file"
}));
}
_ => {}
}
}
}
/// Detect config/manifest files in the root tree and return their names.
fn detect_config_files(tree: &git2::Tree) -> Vec<String> {
let mut configs = Vec::new();
let known_configs = [
"Cargo.toml", "package.json", "go.mod", "Gemfile", "README.md",
"Dockerfile", "docker-compose.yml", "docker-compose.yaml",
".github/workflows", ".gitignore", ".gitattributes",
"Makefile", "CMakeLists.txt", "composer.json", "pyproject.toml",
"requirements.txt", "Pipfile", "pom.xml", "build.gradle",
"build.gradle.kts", "settings.gradle", "settings.gradle.kts",
"tsconfig.json", ".eslintrc.js", ".eslintrc.json",
"prettier.config.js", "prettierrc", "webpack.config.js",
"vite.config.ts", "vite.config.js", "next.config.js",
"nuxt.config.ts", "svelte.config.js",
"rust-toolchain", "rust-toolchain.toml",
"clippy.toml", ".rustfmt.toml", "rustfmt.toml",
"renovate.json", ".renovaterc", ".mergify.yml",
"docker-bake.hcl", ".dockerignore",
"Cargo.lock", "yarn.lock", "package-lock.json", "pnpm-lock.yaml",
"Gemfile.lock", "Cargo.lock",
];
for entry in tree.iter() {
let name = match entry.name() {
Some(n) => n,
None => continue,
};
if known_configs.contains(&name) || name.starts_with('.') && !name.starts_with(".git") {
configs.push(name.to_string());
}
}
configs.sort();
configs.dedup();
configs
}
/// Parse a dependency manifest file content and return a structured summary.
fn parse_dependencies(content: &str, manifest_name: &str) -> serde_json::Value {
match manifest_name {
"Cargo.toml" => {
// Simple TOML-ish parsing for [dependencies] section
let mut deps = Vec::new();
let mut in_deps = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("[dependencies]") {
in_deps = true;
continue;
}
if trimmed.starts_with('[') {
in_deps = false;
continue;
}
if in_deps {
if let Some(eq_pos) = trimmed.find('=') {
let name = trimmed[..eq_pos].trim().to_string();
let version = trimmed[eq_pos + 1..].trim().trim_matches('"').trim_matches('\'').to_string();
if !name.is_empty() && !name.starts_with('#') {
deps.push(serde_json::json!({ "name": name, "version": version }));
}
} else if !trimmed.is_empty() && !trimmed.starts_with('#') {
// bare dependency name (path/git dep without explicit version)
deps.push(serde_json::json!({ "name": trimmed, "version": null }));
}
}
}
serde_json::json!({ "manifest": "Cargo.toml", "ecosystem": "rust", "dependencies": deps })
}
"package.json" => {
let mut deps = Vec::new();
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
for section in &["dependencies", "devDependencies", "peerDependencies"] {
if let Some(map) = parsed.get(*section).and_then(|v| v.as_object()) {
for (name, version) in map {
deps.push(serde_json::json!({
"name": name,
"version": version.as_str().unwrap_or("*"),
"scope": section
}));
}
}
}
}
serde_json::json!({ "manifest": "package.json", "ecosystem": "node", "dependencies": deps })
}
"go.mod" => {
let mut deps = Vec::new();
let mut in_require = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("require (") || trimmed == "require (" {
in_require = true;
continue;
}
if trimmed == ")" {
in_require = false;
continue;
}
if in_require {
let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() >= 2 {
deps.push(serde_json::json!({ "name": parts[0], "version": parts[1] }));
}
}
}
serde_json::json!({ "manifest": "go.mod", "ecosystem": "go", "dependencies": deps })
}
"Gemfile" => {
let mut deps = Vec::new();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("gem ") {
let rest = trimmed.trim_start_matches("gem ");
let name = rest.split(',').next().unwrap_or(rest).trim().trim_matches('"').trim_matches('\'');
let version = rest.split(',').nth(1).map(|v| v.trim().trim_matches('"').trim_matches('\''));
deps.push(serde_json::json!({ "name": name, "version": version }));
}
}
serde_json::json!({ "manifest": "Gemfile", "ecosystem": "ruby", "dependencies": deps })
}
"requirements.txt" => {
let mut deps = Vec::new();
for line in content.lines() {
let trimmed = line.trim();
if !trimmed.is_empty() && !trimmed.starts_with('#') && !trimmed.starts_with("-r") && !trimmed.starts_with("--") {
if let Some(eq_eq) = trimmed.find("==") {
let name = trimmed[..eq_eq].trim().to_string();
let version = trimmed[eq_eq + 2..].trim().to_string();
deps.push(serde_json::json!({ "name": name, "version": version }));
} else {
deps.push(serde_json::json!({ "name": trimmed, "version": null }));
}
}
}
serde_json::json!({ "manifest": "requirements.txt", "ecosystem": "python", "dependencies": deps })
}
_ => serde_json::json!({ "manifest": manifest_name, "ecosystem": "unknown", "dependencies": [] }),
}
}
// ── Tool executors ─────────────────────────────────────────────────────────────
/// Resolve HEAD to a tree for traversal.
fn head_tree(domain: &git::GitDomain) -> Result<git2::Tree<'_>, String> {
let repo = domain.repo();
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
head.peel_to_tree().map_err(|e| format!("no tree: {e}"))
}
/// Resolve HEAD to a commit OID.
fn head_oid(domain: &git::GitDomain) -> Result<String, String> {
let repo = domain.repo();
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
head.target()
.map(|o| o.to_string())
.ok_or_else(|| "HEAD has no target".to_string())
}
/// Tool: repo_overview — quick project overview
async fn repo_overview_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
// Default branch
let default_branch = repo
.head()
.ok()
.and_then(|h| h.shorthand().map(|s| s.to_string()))
.unwrap_or_else(|| "unknown".to_string());
// Config files in root
let config_files = detect_config_files(&tree);
// Language stats (up to 5000 files)
let mut lang_stats: HashMap<String, (String, u64)> = HashMap::new();
collect_languages(repo, &tree, "", &mut lang_stats, 5000);
let mut languages: Vec<serde_json::Value> = lang_stats
.into_iter()
.map(|(lang, (_ext, count))| serde_json::json!({ "language": lang, "file_count": count }))
.collect();
languages.sort_by(|a, b| {
b["file_count"].as_u64().unwrap_or(0)
.cmp(&a["file_count"].as_u64().unwrap_or(0))
});
// Top-level file tree
let mut root_files: Vec<serde_json::Value> = Vec::new();
collect_file_tree(repo, &tree, "", 0, 1, 100, &mut root_files);
// Recent commits (last 10)
let head_oid = head_oid(&domain)?;
let recent_commits = domain
.commit_log(Some(&head_oid), 0, 10)
.map_err(|e| e.to_string())?;
let commits: Vec<serde_json::Value> = recent_commits
.iter()
.map(|c| {
serde_json::json!({
"oid": c.oid.to_string(),
"summary": c.summary,
"author": c.author.name,
"time": c.author.time_secs,
})
})
.collect();
// Total commit count
let total_commits = domain.commit_total(Some(&head_oid)).unwrap_or(0);
Ok(serde_json::json!({
"default_branch": default_branch,
"head_oid": head_oid,
"total_commits": total_commits,
"config_files": config_files,
"languages": languages,
"top_level_entries": root_files,
"recent_commits": commits,
}))
}
/// Tool: repo_file_tree — recursive file tree with depth/ignore
async fn repo_file_tree_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let max_depth = p.get("max_depth").and_then(|v| v.as_u64()).unwrap_or(3) as usize;
let max_files = p.get("max_files").and_then(|v| v.as_u64()).unwrap_or(200);
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
let mut files = Vec::new();
collect_file_tree(repo, &tree, "", 0, max_depth, max_files, &mut files);
Ok(serde_json::json!({
"total": files.len(),
"entries": files
}))
}
/// Tool: repo_languages — detailed language breakdown
async fn repo_languages_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
let mut lang_stats: HashMap<String, (String, u64)> = HashMap::new();
collect_languages(repo, &tree, "", &mut lang_stats, 100_000);
let mut languages: Vec<serde_json::Value> = lang_stats
.into_iter()
.map(|(lang, (_ext, count))| serde_json::json!({ "language": lang, "file_count": count }))
.collect();
languages.sort_by(|a, b| {
b["file_count"].as_u64().unwrap_or(0)
.cmp(&a["file_count"].as_u64().unwrap_or(0))
});
Ok(serde_json::json!({
"total_languages": languages.len(),
"languages": languages
}))
}
/// Tool: repo_dependencies — parse dependency manifests
async fn repo_dependencies_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let tree = head_tree(&domain)?;
// Walk the tree looking for dependency manifests at any depth
let mut manifests: Vec<serde_json::Value> = Vec::new();
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
let repo = domain.repo();
while let Some((current_tree, prefix)) = stack.pop() {
for entry in current_tree.iter() {
let name = match entry.name() {
Some(n) => n,
None => continue,
};
let entry_path = if prefix.is_empty() {
name.to_string()
} else {
format!("{}/{}", prefix, name)
};
match entry.kind() {
Some(git2::ObjectType::Tree) => {
if !is_ignored_dir(name) && !name.starts_with('.') {
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
stack.push((subtree, entry_path));
}
}
}
Some(git2::ObjectType::Blob) => {
if DEPENDENCY_MANIFESTS.iter().any(|(fname, _)| *fname == name) {
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
let content = String::from_utf8_lossy(blob.content());
let manifest_type = DEPENDENCY_MANIFESTS
.iter()
.find(|(fname, _)| *fname == name)
.map(|(_, eco)| eco)
.unwrap_or(&"unknown");
let parsed = parse_dependencies(&content, name);
manifests.push(serde_json::json!({
"path": entry_path,
"ecosystem": manifest_type,
"details": parsed
}));
}
}
}
_ => {}
}
}
}
Ok(serde_json::json!({
"manifest_count": manifests.len(),
"manifests": manifests
}))
}
// ── Registration ───────────────────────────────────────────────────────────────
macro_rules! param {
($name:expr, $type:expr, $desc:expr, $required:expr) => {
(
$name.into(),
ToolParam {
name: $name.into(),
param_type: $type.into(),
description: Some($desc.into()),
required: $required,
properties: None,
items: None,
},
)
};
}
pub fn register_git_tools(registry: &mut ToolRegistry) {
// repo_overview
registry.register(
ToolDefinition::new("repo_overview")
.description("Get a quick overview of a repository: default branch, detected config files, language breakdown by file count, top-level directory entries, and recent commits. Ideal for first contact with a repo.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_overview_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_file_tree
registry.register(
ToolDefinition::new("repo_file_tree")
.description("List files and directories in a repository recursively with configurable depth. Ignores common generated/artifact directories (node_modules, target, .git, etc.). Useful for understanding project layout.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("max_depth", "integer", "Maximum directory depth to traverse (default: 3)", false),
param!("max_files", "integer", "Maximum number of entries to return (default: 200)", false),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_file_tree_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_languages
registry.register(
ToolDefinition::new("repo_languages")
.description("Get a detailed breakdown of programming languages used in a repository, sorted by file count. Scans all files in the repo (up to 100K files) and maps extensions to language names.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_languages_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_dependencies
registry.register(
ToolDefinition::new("repo_dependencies")
.description("Discover and parse dependency manifests (Cargo.toml, package.json, go.mod, Gemfile, requirements.txt, etc.) in a repository. Returns structured dependency lists per manifest.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_dependencies_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}