feat(fctool): add repo utility tools for AI

Add repo_search, repo_readme, repo_commit_log, repo_contributors,
and repo_diff_summary function call tools for AI to search code,
read README, query commit history, list contributors, and diff revisions.
This commit is contained in:
ZhenYi 2026-04-29 09:03:03 +08:00
parent 4ef0d5b570
commit a5704c9730

View File

@ -0,0 +1,512 @@
//! General-purpose repository utility tools for AI.
//!
//! Code search, README reading, commit history, contributors, and diff summaries.
use super::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use std::collections::HashMap;
// ── Helpers ────────────────────────────────────────────────────────────────────
fn head_tree(domain: &git::GitDomain) -> Result<git2::Tree<'_>, String> {
let repo = domain.repo();
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
head.peel_to_tree().map_err(|e| format!("no tree: {e}"))
}
fn head_oid(domain: &git::GitDomain) -> Result<String, String> {
let repo = domain.repo();
let head = repo.head().map_err(|e| format!("no HEAD: {e}"))?;
head.target()
.map(|o| o.to_string())
.ok_or_else(|| "HEAD has no target".to_string())
}
fn is_ignored_dir(name: &str) -> bool {
matches!(
name,
".git" | "node_modules" | "target" | "dist" | "build" | ".next"
| ".nuxt" | ".output" | ".cache" | "__pycache__" | ".tox"
| "vendor" | ".bundle" | ".gradle" | "bin" | "obj"
| ".svn" | ".hg" | ".idea" | ".vscode" | "coverage"
| ".terraform" | ".serverless" | "deps" | "_build"
| "elm-stuff" | ".stack-work" | ".pytest_cache"
)
}
fn is_binary_ext(name: &str) -> bool {
match name.rsplit('.').next().unwrap_or("") {
"png" | "jpg" | "jpeg" | "gif" | "webp" | "ico" | "svg" | "bmp"
| "mp3" | "mp4" | "wav" | "avi" | "mov" | "mkv" | "webm"
| "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar"
| "exe" | "dll" | "so" | "dylib" | "o" | "a" | "lib"
| "woff" | "woff2" | "ttf" | "otf" | "eot"
| "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx"
| "sqlite" | "db" | "bin" | "dat" | "pyc" | "class"
| "wasm" | "node" => true,
_ => false,
}
}
/// Resolve a rev string to a commit OID.
fn resolve_commit_oid(domain: &git::GitDomain, rev: &str) -> Result<git::commit::types::CommitOid, String> {
domain.resolve_rev(rev).map_err(|e| e.to_string())
}
// ── Tool executors ─────────────────────────────────────────────────────────────
/// Tool: repo_search — search code content across the repo
async fn repo_search_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let keyword = p.get("keyword").and_then(|v| v.as_str()).ok_or("missing keyword")?;
let context_lines = p.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
let max_results = p.get("max_results").and_then(|v| v.as_u64()).unwrap_or(50) as usize;
let keyword_lower = keyword.to_lowercase();
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
let mut matches: Vec<serde_json::Value> = Vec::new();
let mut matched_files = 0usize;
let mut total_hits = 0usize;
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
'outer: while let Some((current_tree, prefix)) = stack.pop() {
for entry in current_tree.iter() {
if matches.len() >= max_results {
break 'outer;
}
let name = match entry.name() {
Some(n) => n,
None => continue,
};
let entry_path = if prefix.is_empty() {
name.to_string()
} else {
format!("{}/{}", prefix, name)
};
match entry.kind() {
Some(git2::ObjectType::Tree) => {
if !is_ignored_dir(name) && !name.starts_with('.') {
if let Ok(subtree) = entry.to_object(repo).and_then(|o| o.peel_to_tree()) {
stack.push((subtree, entry_path));
}
}
}
Some(git2::ObjectType::Blob) => {
if is_binary_ext(name) {
continue;
}
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
if blob.is_binary() {
continue;
}
let content = String::from_utf8_lossy(blob.content());
let lines: Vec<&str> = content.lines().collect();
let mut hit_lines = Vec::new();
for (i, line) in lines.iter().enumerate() {
if line.to_lowercase().contains(&keyword_lower) {
hit_lines.push(i);
}
}
if !hit_lines.is_empty() {
matched_files += 1;
total_hits += hit_lines.len();
// Merge overlapping context windows
let mut windows: Vec<(usize, usize)> = Vec::new();
for &line_idx in &hit_lines {
let start = line_idx.saturating_sub(context_lines);
let end = (line_idx + context_lines + 1).min(lines.len());
if let Some(last) = windows.last_mut() {
if start <= last.1 {
last.1 = end;
continue;
}
}
windows.push((start, end));
}
let snippets: Vec<serde_json::Value> = windows
.iter()
.map(|(start, end)| {
let snippet: Vec<String> = lines[*start..*end]
.iter()
.map(|l| l.to_string())
.collect();
serde_json::json!({
"line_start": start + 1,
"line_end": end,
"snippet": snippet.join("\n"),
})
})
.collect();
matches.push(serde_json::json!({
"path": entry_path,
"hit_count": hit_lines.len(),
"snippets": snippets,
}));
}
}
}
_ => {}
}
}
}
Ok(serde_json::json!({
"keyword": keyword,
"matched_files": matched_files,
"total_hits": total_hits,
"results": matches,
}))
}
/// Tool: repo_readme — get README content
async fn repo_readme_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let tree = head_tree(&domain)?;
// Try common README filenames
let candidates = ["README.md", "README.MD", "README.markdown", "README.rst", "README.txt", "README"];
let mut found = None;
for candidate in &candidates {
if let Ok(entry) = tree.get_path(std::path::Path::new(candidate)) {
if let Ok(blob) = entry.to_object(repo).and_then(|o| o.peel_to_blob()) {
let content = String::from_utf8_lossy(blob.content()).to_string();
found = Some((candidate.to_string(), content));
break;
}
}
}
match found {
Some((filename, content)) => Ok(serde_json::json!({
"filename": filename,
"content": content,
"size": content.len(),
})),
None => Ok(serde_json::json!({
"filename": null,
"content": null,
"error": "No README file found in repository root",
})),
}
}
/// Tool: repo_commit_log — filtered commit history
async fn repo_commit_log_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let author = p.get("author").and_then(|v| v.as_str());
let keyword = p.get("keyword").and_then(|v| v.as_str());
let limit = p.get("limit").and_then(|v| v.as_u64()).unwrap_or(20) as usize;
let domain = ctx.open_repo(project_name, repo_name).await?;
let head_oid = head_oid(&domain)?;
// Fetch extra to allow for filtering
let fetch_limit = if author.is_some() || keyword.is_some() {
limit.saturating_mul(5).max(200)
} else {
limit
};
let commits = domain.commit_log(Some(&head_oid), 0, fetch_limit).map_err(|e| e.to_string())?;
let keyword_lower = keyword.map(|k| k.to_lowercase());
let author_lower = author.map(|a| a.to_lowercase());
let result: Vec<serde_json::Value> = commits
.iter()
.filter(|c| {
if let Some(ref al) = author_lower {
if !c.author.name.to_lowercase().contains(al) {
return false;
}
}
if let Some(ref kl) = keyword_lower {
if !c.message.to_lowercase().contains(kl) {
return false;
}
}
true
})
.take(limit)
.map(|c| {
let oid = c.oid.to_string();
serde_json::json!({
"oid": oid,
"short_oid": oid.get(..7).unwrap_or(&oid),
"summary": c.summary,
"author": c.author.name,
"author_email": c.author.email,
"time": c.author.time_secs,
})
})
.collect();
Ok(serde_json::json!({
"total": result.len(),
"commits": result,
}))
}
/// Tool: repo_contributors — contributor statistics
async fn repo_contributors_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let limit = p.get("limit").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let domain = ctx.open_repo(project_name, repo_name).await?;
let head_oid = head_oid(&domain)?;
// Walk all commits (up to 10000)
let commits = domain.commit_log(Some(&head_oid), 0, 10000).map_err(|e| e.to_string())?;
// Aggregate by author email (more reliable than name)
let mut authors: HashMap<String, serde_json::Value> = HashMap::new();
for c in &commits {
let key = c.author.email.clone();
let entry = authors.entry(key).or_insert_with(|| {
serde_json::json!({
"name": c.author.name,
"email": c.author.email,
"commit_count": 0u64,
"first_commit_time": c.author.time_secs,
"last_commit_time": c.author.time_secs,
})
});
entry["commit_count"] = serde_json::json!(
entry["commit_count"].as_u64().unwrap_or(0) + 1
);
let t = c.author.time_secs;
if t < entry["first_commit_time"].as_i64().unwrap_or(i64::MAX) {
entry["first_commit_time"] = serde_json::json!(t);
}
if t > entry["last_commit_time"].as_i64().unwrap_or(0) {
entry["last_commit_time"] = serde_json::json!(t);
}
}
let mut contributors: Vec<serde_json::Value> = authors.into_values().collect();
contributors.sort_by(|a, b| {
b["commit_count"].as_u64().unwrap_or(0)
.cmp(&a["commit_count"].as_u64().unwrap_or(0))
});
if limit > 0 {
contributors.truncate(limit);
}
Ok(serde_json::json!({
"total_contributors": contributors.len(),
"contributors": contributors,
}))
}
/// Tool: repo_diff_summary — change summary between two revisions
async fn repo_diff_summary_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let from_rev = p.get("from_rev").and_then(|v| v.as_str()).ok_or("missing from_rev")?;
let to_rev = p.get("to_rev").and_then(|v| v.as_str()).unwrap_or("HEAD");
let domain = ctx.open_repo(project_name, repo_name).await?;
let repo = domain.repo();
let from_oid = resolve_commit_oid(&domain, from_rev)?;
let to_oid = resolve_commit_oid(&domain, to_rev)?;
let from_commit = repo.find_commit(from_oid.to_oid().map_err(|e| e.to_string())?)
.map_err(|e| format!("from_rev not found: {e}"))?;
let to_commit = repo.find_commit(to_oid.to_oid().map_err(|e| e.to_string())?)
.map_err(|e| format!("to_rev not found: {e}"))?;
let from_tree = from_commit.tree().map_err(|e| e.to_string())?;
let to_tree = to_commit.tree().map_err(|e| e.to_string())?;
let diff = repo.diff_tree_to_tree(Some(&from_tree), Some(&to_tree), None)
.map_err(|e| e.to_string())?;
let stats = diff.stats().map_err(|e| e.to_string())?;
let files_changed = stats.files_changed();
let insertions = stats.insertions();
let deletions = stats.deletions();
// Collect per-file stats
let mut files: Vec<serde_json::Value> = Vec::new();
for i in 0..diff.deltas().len() {
let delta = diff.deltas().nth(i);
if let Some(d) = delta {
let old_path = d.old_file().path().map(|p| p.to_string_lossy().to_string());
let new_path = d.new_file().path().map(|p| p.to_string_lossy().to_string());
let status = match d.status() {
git2::Delta::Added => "added",
git2::Delta::Deleted => "deleted",
git2::Delta::Modified => "modified",
git2::Delta::Renamed => "renamed",
git2::Delta::Copied => "copied",
_ => "other",
};
files.push(serde_json::json!({
"old_path": old_path,
"new_path": new_path,
"status": status,
}));
}
}
Ok(serde_json::json!({
"from": from_rev,
"to": to_rev,
"files_changed": files_changed,
"insertions": insertions,
"deletions": deletions,
"files": files,
}))
}
// ── Registration ───────────────────────────────────────────────────────────────
macro_rules! param {
($name:expr, $type:expr, $desc:expr, $required:expr) => {
(
$name.into(),
ToolParam {
name: $name.into(),
param_type: $type.into(),
description: Some($desc.into()),
required: $required,
properties: None,
items: None,
},
)
};
}
pub fn register_git_tools(registry: &mut ToolRegistry) {
// repo_search
registry.register(
ToolDefinition::new("repo_search")
.description("Search all files in a repository for a keyword (case-insensitive). Returns matching file paths, hit counts, and context snippets. Skips binary files and generated directories. Use this to find where a function, variable, or concept is defined or used.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("keyword", "string", "Search keyword (case-insensitive)", true),
param!("context_lines", "integer", "Number of context lines around each match (default: 2)", false),
param!("max_results", "integer", "Maximum number of matching files to return (default: 50)", false),
])),
required: Some(vec!["project_name".into(), "repo_name".into(), "keyword".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_search_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_readme
registry.register(
ToolDefinition::new("repo_readme")
.description("Read the README file from a repository. Automatically finds README.md, README.markdown, README.rst, README.txt, or README. Returns the full content. Use this as the first step to understand what a project is about.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_readme_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_commit_log
registry.register(
ToolDefinition::new("repo_commit_log")
.description("Get commit history with optional filters. Filter by author name (partial match), keyword in commit message, or limit the number of results. Use this to understand recent activity, find who made specific changes, or trace feature development.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("author", "string", "Filter by author name (partial match, case-insensitive)", false),
param!("keyword", "string", "Filter by keyword in commit message (case-insensitive)", false),
param!("limit", "integer", "Maximum number of commits to return (default: 20)", false),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_commit_log_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_contributors
registry.register(
ToolDefinition::new("repo_contributors")
.description("List repository contributors sorted by commit count. Shows each contributor's name, email, commit count, and first/last commit timestamps. Use this to understand who is involved in a project and their contribution levels.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("limit", "integer", "Maximum number of contributors to return (0 = all, default: 0)", false),
])),
required: Some(vec!["project_name".into(), "repo_name".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_contributors_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// repo_diff_summary
registry.register(
ToolDefinition::new("repo_diff_summary")
.description("Get a summary of changes between two revisions (commits, branches, or tags). Shows files changed, insertions, deletions, and per-file status (added/modified/deleted/renamed). Use this to understand what changed between versions.")
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(HashMap::from([
param!("project_name", "string", "Project name (slug)", true),
param!("repo_name", "string", "Repository name", true),
param!("from_rev", "string", "Source revision (commit SHA, branch name, or tag)", true),
param!("to_rev", "string", "Target revision (default: HEAD)", false),
])),
required: Some(vec!["project_name".into(), "repo_name".into(), "from_rev".into()]),
}),
ToolHandler::new(|ctx, args| {
let gctx = super::ctx::GitToolCtx::new(ctx);
Box::pin(async move {
repo_diff_summary_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}