//! git_grep — search repository files for patterns. use crate::file_tools::MAX_FILE_SIZE; use crate::git_tools::ctx::GitToolCtx; use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; use regex::RegexBuilder; use std::collections::HashMap; /// Text file extensions to search (skip binary files). const TEXT_EXTS: &[&str] = &[ "rs", "toml", "yaml", "yml", "json", "jsonc", "js", "jsx", "ts", "tsx", "css", "scss", "less", "html", "htm", "xml", "svg", "vue", "svelte", "py", "rb", "go", "java", "kt", "swift", "c", "cpp", "h", "hpp", "cs", "php", "pl", "sh", "bash", "zsh", "fish", "ps1", "bat", "cmd", "sql", "md", "markdown", "rst", "txt", "log", "ini", "cfg", "conf", "dockerfile", "makefile", "cmake", "gradle", "properties", "env", "proto", "graphql", "vue", "lock", ]; fn is_text_ext(path: &str) -> bool { let lower = path.to_lowercase(); TEXT_EXTS.iter().any(|&e| lower.ends_with(&format!(".{}", e))) } fn is_binary_content(data: &[u8]) -> bool { data.iter().take(8192).any(|&b| b == 0) } async fn git_grep_exec( ctx: GitToolCtx, args: serde_json::Value, ) -> Result { let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; let project_name = p .get("project_name") .and_then(|v| v.as_str()) .ok_or("missing project_name")?; let repo_name = p .get("repo_name") .and_then(|v| v.as_str()) .ok_or("missing repo_name")?; let rev = p .get("rev") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| "HEAD".to_string()); let pattern = p .get("pattern") .and_then(|v| v.as_str()) .ok_or("missing pattern")?; let glob = p.get("glob").and_then(|v| v.as_str()).map(String::from); let is_regex = p .get("is_regex") .and_then(|v| v.as_bool()) .unwrap_or(true); let context_lines = p .get("context_lines") .and_then(|v| v.as_u64()) .unwrap_or(0) as usize; let max_results = p .get("max_results") .and_then(|v| v.as_u64()) .unwrap_or(100) as usize; let domain = ctx.open_repo(project_name, repo_name).await?; // Resolve revision to commit oid let commit_oid = if rev.len() >= 40 { git::commit::types::CommitOid::new(&rev) } else { domain .commit_get_prefix(&rev) .map_err(|e| e.to_string())? .oid }; let regex = if is_regex { RegexBuilder::new(pattern) .case_insensitive(true) .build() .map_err(|e| format!("invalid regex '{}': {}", pattern, e))? } else { // Escape for literal search RegexBuilder::new(®ex::escape(pattern)) .case_insensitive(true) .build() .map_err(|e| e.to_string())? }; // Recursive tree walk using git2 let repo = domain.repo(); let commit = repo .find_commit(commit_oid.to_oid().map_err(|e| e.to_string())?) .map_err(|e| e.to_string())?; let tree = commit.tree().map_err(|e| e.to_string())?; let mut results: Vec = Vec::new(); // Stack: (tree, current_path_prefix) let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())]; while let Some((current_tree, current_prefix)) = stack.pop() { for entry in current_tree.iter() { let name = entry.name().unwrap_or_default(); if name.is_empty() { continue; } let path: String = if current_prefix.is_empty() { name.to_string() } else { format!("{}/{}", current_prefix, name) }; if entry.kind() == Some(git2::ObjectType::Tree) { if let Some(subtree) = entry.to_object(&repo).ok().and_then(|o| o.into_tree().ok()) { stack.push((subtree, path)); } continue; } if entry.kind() != Some(git2::ObjectType::Blob) { continue; } // Glob filter if let Some(ref g) = glob { if !glob_match(&path, g) { continue; } } else if !is_text_ext(&path) { continue; } // Read blob content let blob = match entry.to_object(&repo).ok().and_then(|o| o.into_blob().ok()) { Some(b) => b, None => continue, }; let size = blob.size(); if size == 0 || size > MAX_FILE_SIZE { continue; } let data = blob.content(); if is_binary_content(data) { continue; } let content = match String::from_utf8(data.to_vec()) { Ok(s) => s, Err(_) => continue, }; // Search line by line let lines: Vec<&str> = content.lines().collect(); for (line_idx, line) in lines.iter().enumerate() { if regex.is_match(line) { let start = line_idx.saturating_sub(context_lines); let end = (line_idx + context_lines + 1).min(lines.len()); let context: Vec = lines[start..end] .iter() .enumerate() .map(|(i, l)| { let line_num = start + i + 1; let prefix = if start + i == line_idx { ">" } else { " " }; format!("{}{}: {}", prefix, line_num, l) }) .collect(); results.push(serde_json::json!({ "file": path, "line_number": line_idx + 1, "match": line, "context": context.join("\n"), })); if results.len() >= max_results { return Ok(serde_json::json!({ "query": pattern, "rev": rev, "total_matches": results.len(), "truncated": true, "results": results })); } } } } } Ok(serde_json::json!({ "query": pattern, "rev": rev, "total_matches": results.len(), "truncated": false, "results": results })) } fn glob_match(path: &str, pattern: &str) -> bool { // Simple glob: support *, ?, ** let parts: Vec<&str> = pattern.split('/').collect(); let path_parts: Vec<&str> = path.split('/').collect(); let _path_lower = path.to_lowercase(); let pattern_lower = pattern.to_lowercase(); fn matches_part(path_part: &str, pattern_part: &str) -> bool { if pattern_part.is_empty() || pattern_part == "*" { return true; } if pattern_part == "**" { return true; } if let Some(star) = pattern_part.find('*') { let (prefix, suffix) = pattern_part.split_at(star); let suffix = if suffix.starts_with('*') { &suffix[1..] } else { suffix }; if !prefix.is_empty() && !path_part.starts_with(prefix) { return false; } if !suffix.is_empty() && !path_part.ends_with(suffix) { return false; } return true; } path_part == pattern_part } if parts.len() == 1 { // Simple glob pattern on filename only let file_name = path_parts.last().unwrap_or(&""); return matches_part(file_name, &pattern_lower); } // Multi-part glob let mut pi = 0; for part in &parts { while pi < path_parts.len() { if matches_part(path_parts[pi], part) { pi += 1; break; } if *part != "**" { return false; } pi += 1; } } true } pub fn register_grep_tools(registry: &mut ToolRegistry) { let p = HashMap::from([ ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None, }), ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None, }), ("pattern".into(), ToolParam { name: "pattern".into(), param_type: "string".into(), description: Some("Search pattern (regex or literal string)".into()), required: true, properties: None, items: None, }), ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision to search in (branch, tag, commit). Default: HEAD".into()), required: false, properties: None, items: None, }), ("glob".into(), ToolParam { name: "glob".into(), param_type: "string".into(), description: Some("File glob pattern to filter (e.g. *.rs, src/**/*.ts)".into()), required: false, properties: None, items: None, }), ("is_regex".into(), ToolParam { name: "is_regex".into(), param_type: "boolean".into(), description: Some("If true, pattern is a regex. If false, literal string. Default: true".into()), required: false, properties: None, items: None, }), ("context_lines".into(), ToolParam { name: "context_lines".into(), param_type: "integer".into(), description: Some("Number of surrounding lines to include for each match. Default: 0".into()), required: false, properties: None, items: None, }), ("max_results".into(), ToolParam { name: "max_results".into(), param_type: "integer".into(), description: Some("Maximum number of matches to return. Default: 100".into()), required: false, properties: None, items: None, }), ]); let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "pattern".into()]), }; registry.register( ToolDefinition::new("git_grep") .description("Search for a text pattern across all files in a repository at a given revision. Supports regex, glob filtering, and line-level context. Skips binary files automatically.") .parameters(schema), ToolHandler::new(|ctx, args| { let gctx = GitToolCtx::new(ctx); Box::pin(async move { git_grep_exec(gctx, args) .await .map_err(agent::ToolError::ExecutionError) }) }), ); }