diff --git a/Cargo.lock b/Cargo.lock index dc60e72..b287ca0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2757,6 +2757,31 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "fctool" +version = "0.2.9" +dependencies = [ + "agent", + "base64 0.22.1", + "chrono", + "csv", + "db", + "git", + "git2", + "models", + "pulldown-cmark 0.12.2", + "quick-xml 0.37.5", + "regex", + "reqwest 0.13.2", + "sea-orm", + "serde", + "serde_json", + "sqlparser", + "tokio", + "tracing", + "uuid", +] + [[package]] name = "fdeflate" version = "0.3.7" @@ -7705,6 +7730,7 @@ dependencies = [ "db", "deadpool-redis", "email", + "fctool", "flate2", "futures", "git", diff --git a/Cargo.toml b/Cargo.toml index 812b2a3..b759e9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ members = [ "libs/agent", "libs/migrate", "libs/agent-tool-derive", + "libs/fctool", "apps/migrate", "apps/app", "apps/adminrpc", @@ -50,6 +51,7 @@ observability = { path = "libs/observability" } avatar = { path = "libs/avatar" } migrate = { path = "libs/migrate" } session_manager = { path = "libs/session_manager" } +fctool = { path = "libs/fctool" } sea-query = "1.0.0-rc.31" diff --git a/libs/fctool/Cargo.toml b/libs/fctool/Cargo.toml new file mode 100644 index 0000000..0010c5c --- /dev/null +++ b/libs/fctool/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "fctool" +version.workspace = true +edition.workspace = true +authors.workspace = true +description.workspace = true +repository.workspace = true +readme.workspace = true +homepage.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true +documentation.workspace = true + +[lib] +path = "src/lib.rs" +name = "fctool" + +[dependencies] +agent = { workspace = true } +git = { workspace = true } +models = { workspace = true } +db = { workspace = true } +sea-orm = { workspace = true, features = [] } +git2 = { workspace = true } + +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +base64 = { workspace = true } +chrono = { workspace = true, features = ["serde"] } +uuid = { workspace = true, features = ["serde", "v7"] } +reqwest = { workspace = true, features = ["json", "native-tls"] } +regex = { workspace = true } +csv = { workspace = true } +quick-xml = { workspace = true } +sqlparser = { workspace = true } +pulldown-cmark = { workspace = true } +tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } +tracing = { workspace = true } diff --git a/libs/service/file_tools/csv.rs b/libs/fctool/src/file_tools/csv.rs similarity index 99% rename from libs/service/file_tools/csv.rs rename to libs/fctool/src/file_tools/csv.rs index 78db2f9..2971082 100644 --- a/libs/service/file_tools/csv.rs +++ b/libs/fctool/src/file_tools/csv.rs @@ -51,7 +51,7 @@ async fn read_csv_exec( let domain = ctx.open_repo(project_name, repo_name).await?; - let commit_oid = if rev.len() >= 40 { + let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { git::commit::types::CommitOid::new(&rev) } else { domain diff --git a/libs/service/file_tools/grep.rs b/libs/fctool/src/file_tools/grep.rs similarity index 99% rename from libs/service/file_tools/grep.rs rename to libs/fctool/src/file_tools/grep.rs index 972d065..96e867a 100644 --- a/libs/service/file_tools/grep.rs +++ b/libs/fctool/src/file_tools/grep.rs @@ -67,7 +67,7 @@ async fn git_grep_exec( let domain = ctx.open_repo(project_name, repo_name).await?; // Resolve revision to commit oid - let commit_oid = if rev.len() >= 40 { + let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { git::commit::types::CommitOid::new(&rev) } else { domain diff --git a/libs/service/file_tools/json.rs b/libs/fctool/src/file_tools/json.rs similarity index 99% rename from libs/service/file_tools/json.rs rename to libs/fctool/src/file_tools/json.rs index de0a364..11770bc 100644 --- a/libs/service/file_tools/json.rs +++ b/libs/fctool/src/file_tools/json.rs @@ -130,7 +130,7 @@ async fn read_json_exec( let domain = ctx.open_repo(project_name, repo_name).await?; - let commit_oid = if rev.len() >= 40 { + let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { git::commit::types::CommitOid::new(&rev) } else { domain diff --git a/libs/service/file_tools/markdown.rs b/libs/fctool/src/file_tools/markdown.rs similarity index 99% rename from libs/service/file_tools/markdown.rs rename to libs/fctool/src/file_tools/markdown.rs index a3fa466..c301e42 100644 --- a/libs/service/file_tools/markdown.rs +++ b/libs/fctool/src/file_tools/markdown.rs @@ -41,7 +41,7 @@ async fn read_markdown_exec( let domain = ctx.open_repo(project_name, repo_name).await?; - let commit_oid = if rev.len() >= 40 { + let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { git::commit::types::CommitOid::new(&rev) } else { domain diff --git a/libs/service/file_tools/mod.rs b/libs/fctool/src/file_tools/mod.rs similarity index 100% rename from libs/service/file_tools/mod.rs rename to libs/fctool/src/file_tools/mod.rs diff --git a/libs/service/file_tools/sql.rs b/libs/fctool/src/file_tools/sql.rs similarity index 98% rename from libs/service/file_tools/sql.rs rename to libs/fctool/src/file_tools/sql.rs index 0f6c3e3..5d69da4 100644 --- a/libs/service/file_tools/sql.rs +++ b/libs/fctool/src/file_tools/sql.rs @@ -33,7 +33,7 @@ async fn read_sql_exec( let domain = ctx.open_repo(project_name, repo_name).await?; - let commit_oid = if rev.len() >= 40 { + let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { git::commit::types::CommitOid::new(&rev) } else { domain diff --git a/libs/fctool/src/git_tools/blob.rs b/libs/fctool/src/git_tools/blob.rs new file mode 100644 index 0000000..33f6486 --- /dev/null +++ b/libs/fctool/src/git_tools/blob.rs @@ -0,0 +1,273 @@ +//! Git blob tools — raw object-level operations on blob OIDs. + +use super::ctx::GitToolCtx; +use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; +use base64::Engine; +use std::collections::HashMap; + +async fn git_blob_info_exec( + ctx: GitToolCtx, + args: serde_json::Value, +) -> Result { + let p: serde_json::Map = + serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let commit_oid = resolve_oid(&domain, oid)?; + let info = domain.blob_get(&commit_oid).map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "oid": info.oid.to_string(), + "size": info.size, + "is_binary": info.is_binary, + })) +} + +async fn git_blob_exists_exec( + ctx: GitToolCtx, + args: serde_json::Value, +) -> Result { + let p: serde_json::Map = + serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let commit_oid = resolve_oid(&domain, oid)?; + let exists = domain.blob_exists(&commit_oid); + + Ok(serde_json::json!({ "oid": commit_oid.to_string(), "exists": exists })) +} + +async fn git_blob_content_exec( + ctx: GitToolCtx, + args: serde_json::Value, +) -> Result { + let p: serde_json::Map = + serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?; + let max_size = p.get("max_size").and_then(|v| v.as_u64()).unwrap_or(1_048_576) as usize; // 1MB default + + let domain = ctx.open_repo(project_name, repo_name).await?; + let commit_oid = resolve_oid(&domain, oid)?; + let blob = domain.blob_content(&commit_oid).map_err(|e| e.to_string())?; + + if blob.size > max_size { + return Err(format!( + "blob too large ({} bytes), max {} bytes. Use a smaller max_size or retrieve the raw OID.", + blob.size, max_size + )); + } + + let (content, is_binary) = if blob.is_binary { + (base64::engine::general_purpose::STANDARD.encode(&blob.content), true) + } else { + (String::from_utf8_lossy(&blob.content).to_string(), false) + }; + + Ok(serde_json::json!({ + "oid": blob.oid.to_string(), + "size": blob.size, + "is_binary": is_binary, + "content": content, + })) +} + +async fn git_blob_create_exec( + ctx: GitToolCtx, + args: serde_json::Value, +) -> Result { + let p: serde_json::Map = + serde_json::from_value(args).map_err(|e| e.to_string())?; + let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; + let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?; + let content = p.get("content").and_then(|v| v.as_str()).ok_or("missing content")?; + let encoding = p.get("encoding").and_then(|v| v.as_str()).unwrap_or("utf-8"); + + let data = match encoding { + "base64" => base64::engine::general_purpose::STANDARD + .decode(content) + .map_err(|e| format!("invalid base64: {}", e))?, + "utf-8" => content.as_bytes().to_vec(), + other => return Err(format!("unsupported encoding '{}'. Use 'utf-8' or 'base64'.", other)), + }; + + let domain = ctx.open_repo(project_name, repo_name).await?; + let oid = domain.blob_create(&data).map_err(|e| e.to_string())?; + let info = domain.blob_get(&oid).map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "oid": info.oid.to_string(), + "size": info.size, + "is_binary": info.is_binary, + })) +} + +fn resolve_oid( + domain: &git::GitDomain, + rev: &str, +) -> Result { + if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { + Ok(git::commit::types::CommitOid::new(rev)) + } else { + domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid) + } +} + +pub fn register_git_tools(registry: &mut ToolRegistry) { + // git_blob_info + let p = HashMap::from([ + ("project_name".into(), ToolParam { + name: "project_name".into(), param_type: "string".into(), + description: Some("Project name (slug)".into()), + required: true, properties: None, items: None, + }), + ("repo_name".into(), ToolParam { + name: "repo_name".into(), param_type: "string".into(), + description: Some("Repository name".into()), + required: true, properties: None, items: None, + }), + ("oid".into(), ToolParam { + name: "oid".into(), param_type: "string".into(), + description: Some("Blob OID (full 40-char hex or short prefix)".into()), + required: true, properties: None, items: None, + }), + ]); + let schema = ToolSchema { + schema_type: "object".into(), + properties: Some(p), + required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]), + }; + registry.register( + ToolDefinition::new("git_blob_info") + .description("Get metadata about a git blob by its OID. Returns size and whether the blob is binary.") + .parameters(schema), + ToolHandler::new(|ctx, args| { + let gctx = GitToolCtx::new(ctx); + Box::pin(async move { + git_blob_info_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // git_blob_exists + let p = HashMap::from([ + ("project_name".into(), ToolParam { + name: "project_name".into(), param_type: "string".into(), + description: Some("Project name (slug)".into()), + required: true, properties: None, items: None, + }), + ("repo_name".into(), ToolParam { + name: "repo_name".into(), param_type: "string".into(), + description: Some("Repository name".into()), + required: true, properties: None, items: None, + }), + ("oid".into(), ToolParam { + name: "oid".into(), param_type: "string".into(), + description: Some("Blob OID to check".into()), + required: true, properties: None, items: None, + }), + ]); + let schema = ToolSchema { + schema_type: "object".into(), + properties: Some(p), + required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]), + }; + registry.register( + ToolDefinition::new("git_blob_exists") + .description("Check whether a git blob exists in the repository by its OID.") + .parameters(schema), + ToolHandler::new(|ctx, args| { + let gctx = GitToolCtx::new(ctx); + Box::pin(async move { + git_blob_exists_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // git_blob_content + let p = HashMap::from([ + ("project_name".into(), ToolParam { + name: "project_name".into(), param_type: "string".into(), + description: Some("Project name (slug)".into()), + required: true, properties: None, items: None, + }), + ("repo_name".into(), ToolParam { + name: "repo_name".into(), param_type: "string".into(), + description: Some("Repository name".into()), + required: true, properties: None, items: None, + }), + ("oid".into(), ToolParam { + name: "oid".into(), param_type: "string".into(), + description: Some("Blob OID to retrieve content for".into()), + required: true, properties: None, items: None, + }), + ("max_size".into(), ToolParam { + name: "max_size".into(), param_type: "integer".into(), + description: Some("Maximum blob size in bytes (default: 1MB)".into()), + required: false, properties: None, items: None, + }), + ]); + let schema = ToolSchema { + schema_type: "object".into(), + properties: Some(p), + required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]), + }; + registry.register( + ToolDefinition::new("git_blob_content") + .description("Retrieve the raw content of a git blob by its OID. Binary content is base64-encoded. Limits to 1MB by default.") + .parameters(schema), + ToolHandler::new(|ctx, args| { + let gctx = GitToolCtx::new(ctx); + Box::pin(async move { + git_blob_content_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); + + // git_blob_create + let p = HashMap::from([ + ("project_name".into(), ToolParam { + name: "project_name".into(), param_type: "string".into(), + description: Some("Project name (slug)".into()), + required: true, properties: None, items: None, + }), + ("repo_name".into(), ToolParam { + name: "repo_name".into(), param_type: "string".into(), + description: Some("Repository name".into()), + required: true, properties: None, items: None, + }), + ("content".into(), ToolParam { + name: "content".into(), param_type: "string".into(), + description: Some("Blob content (utf-8 string or base64-encoded bytes)".into()), + required: true, properties: None, items: None, + }), + ("encoding".into(), ToolParam { + name: "encoding".into(), param_type: "string".into(), + description: Some("Encoding of content: 'utf-8' (default) or 'base64'".into()), + required: false, properties: None, items: None, + }), + ]); + let schema = ToolSchema { + schema_type: "object".into(), + properties: Some(p), + required: Some(vec!["project_name".into(), "repo_name".into(), "content".into()]), + }; + registry.register( + ToolDefinition::new("git_blob_create") + .description("Create a new git blob in the repository. Writes the raw content to the object database and returns the new blob OID. Supports both utf-8 text and base64-encoded binary content.") + .parameters(schema), + ToolHandler::new(|ctx, args| { + let gctx = GitToolCtx::new(ctx); + Box::pin(async move { + git_blob_create_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) + }) + }), + ); +} diff --git a/libs/service/git_tools/branch.rs b/libs/fctool/src/git_tools/branch.rs similarity index 97% rename from libs/service/git_tools/branch.rs rename to libs/fctool/src/git_tools/branch.rs index 263e782..1ae5887 100644 --- a/libs/service/git_tools/branch.rs +++ b/libs/fctool/src/git_tools/branch.rs @@ -35,8 +35,10 @@ async fn git_branch_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resul let info = domain.branch_get(name).map_err(|e| e.to_string())?; let ahead_behind = if let Some(ref upstream) = info.upstream { - let (ahead, behind) = domain.branch_ahead_behind(name, upstream).unwrap_or((0, 0)); - Some(serde_json::json!({ "ahead": ahead, "behind": behind })) + match domain.branch_ahead_behind(name, upstream) { + Ok((ahead, behind)) => Some(serde_json::json!({ "ahead": ahead, "behind": behind })), + Err(e) => Some(serde_json::json!({ "error": e.to_string() })), + } } else { None }; Ok(serde_json::json!({ diff --git a/libs/service/git_tools/commit.rs b/libs/fctool/src/git_tools/commit.rs similarity index 95% rename from libs/service/git_tools/commit.rs rename to libs/fctool/src/git_tools/commit.rs index 875ee0a..f3a4eaf 100644 --- a/libs/service/git_tools/commit.rs +++ b/libs/fctool/src/git_tools/commit.rs @@ -47,6 +47,16 @@ async fn git_log_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result Result { + if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { + domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string()) + } else { + domain.commit_get_prefix(rev).map_err(|e| e.to_string()) + } +} + async fn git_show_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; @@ -54,11 +64,7 @@ async fn git_show_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result= 40 { - domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())? - } else { - domain.commit_get_prefix(rev).map_err(|e| e.to_string())? - }; + let meta = resolve_commit(&domain, rev).map_err(|e| e.to_string())?; let refs = domain.commit_refs(&meta.oid).map_err(|e| e.to_string())?; @@ -128,11 +134,7 @@ async fn git_commit_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resul let rev = p.get("rev").and_then(|v| v.as_str()).ok_or("missing rev")?; let domain = ctx.open_repo(project_name, repo_name).await?; - let meta = if rev.len() >= 40 { - domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())? - } else { - domain.commit_get_prefix(rev).map_err(|e| e.to_string())? - }; + let meta = resolve_commit(&domain, rev).map_err(|e| e.to_string())?; Ok(flatten_commit(&meta)) } @@ -195,9 +197,11 @@ async fn git_reflog_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result = entries.iter() .take(limit) .map(|e| { - let ts = e.time_secs; + // Convert to UTC by subtracting the timezone offset, consistent + // with all other timestamp conversions in this module. + let ts = e.time_secs - (e.offset_minutes as i64 * 60); let time_str = chrono::Utc.timestamp_opt(ts, 0).single() - .map(|dt| dt.to_rfc3339()).unwrap_or_else(|| format!("{}", ts)); + .map(|dt| dt.to_rfc3339()).unwrap_or_else(|| format!("{}", e.time_secs)); serde_json::json!({ "oid_new": e.oid_new.to_string(), "oid_old": e.oid_old.to_string(), "committer_name": e.committer_name, "committer_email": e.committer_email, diff --git a/libs/service/git_tools/ctx.rs b/libs/fctool/src/git_tools/ctx.rs similarity index 100% rename from libs/service/git_tools/ctx.rs rename to libs/fctool/src/git_tools/ctx.rs diff --git a/libs/service/git_tools/diff.rs b/libs/fctool/src/git_tools/diff.rs similarity index 91% rename from libs/service/git_tools/diff.rs rename to libs/fctool/src/git_tools/diff.rs index 01c6e28..98c1648 100644 --- a/libs/service/git_tools/diff.rs +++ b/libs/fctool/src/git_tools/diff.rs @@ -17,7 +17,7 @@ async fn git_diff_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result Result { - if rev.len() >= 40 { + if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { Ok(git::commit::types::CommitOid::new(rev)) } else { domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid) @@ -68,8 +68,14 @@ async fn git_diff_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result = result.deltas.iter().map(|d| { - serde_json::json!({ "path": d.new_file.path, "status": format!("{:?}", d.status), "is_binary": d.new_file.is_binary }) + let (path, is_binary) = if d.status == DiffDeltaStatus::Deleted { + (d.old_file.path.clone(), d.old_file.is_binary) + } else { + (d.new_file.path.clone(), d.new_file.is_binary) + }; + serde_json::json!({ "path": path, "status": format!("{:?}", d.status), "is_binary": is_binary }) }).collect(); Ok(serde_json::json!({ @@ -87,22 +93,16 @@ async fn git_diff_stats_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result let domain = ctx.open_repo(project_name, repo_name).await?; - let stats = if base.len() >= 40 && head.len() >= 40 { - domain.diff_stats(&git::commit::types::CommitOid::new(base), &git::commit::types::CommitOid::new(head)) - .map_err(|e| e.to_string())? - } else { - let b = if base.len() >= 40 { - git::commit::types::CommitOid::new(base) + let resolve = |rev: &str| -> Result { + if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { + Ok(git::commit::types::CommitOid::new(rev)) } else { - domain.commit_get_prefix(base).map_err(|e| e.to_string())?.oid - }; - let h = if head.len() >= 40 { - git::commit::types::CommitOid::new(head) - } else { - domain.commit_get_prefix(head).map_err(|e| e.to_string())?.oid - }; - domain.diff_stats(&b, &h).map_err(|e| e.to_string())? + domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid) + } }; + let b = resolve(base).map_err(|e| e.to_string())?; + let h = resolve(head).map_err(|e| e.to_string())?; + let stats = domain.diff_stats(&b, &h).map_err(|e| e.to_string())?; Ok(serde_json::json!({ "files_changed": stats.files_changed, @@ -121,11 +121,11 @@ async fn git_blame_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result= 40 { - git::commit::types::CommitOid::new(&rev) + let oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { + Ok(git::commit::types::CommitOid::new(&rev)) } else { - domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid - }; + domain.commit_get_prefix(&rev).map_err(|e| e.to_string()).map(|m| m.oid) + }?; use git::blame::ops::BlameOptions; let mut bopts = BlameOptions::new(); diff --git a/libs/service/git_tools/mod.rs b/libs/fctool/src/git_tools/mod.rs similarity index 91% rename from libs/service/git_tools/mod.rs rename to libs/fctool/src/git_tools/mod.rs index f2f08ed..08b8783 100644 --- a/libs/service/git_tools/mod.rs +++ b/libs/fctool/src/git_tools/mod.rs @@ -3,6 +3,7 @@ //! Each module defines async exec functions + a `register_git_tools()` call. //! All tools take `project_name` + `repo_name` as required params. +pub mod blob; pub mod branch; pub mod commit; pub mod ctx; @@ -16,6 +17,7 @@ pub fn register_all(registry: &mut agent::ToolRegistry) { commit::register_git_tools(registry); branch::register_git_tools(registry); diff::register_git_tools(registry); + blob::register_git_tools(registry); tree::register_git_tools(registry); tag::register_git_tools(registry); } diff --git a/libs/service/git_tools/tag.rs b/libs/fctool/src/git_tools/tag.rs similarity index 91% rename from libs/service/git_tools/tag.rs rename to libs/fctool/src/git_tools/tag.rs index 48c1be9..eed8808 100644 --- a/libs/service/git_tools/tag.rs +++ b/libs/fctool/src/git_tools/tag.rs @@ -16,12 +16,19 @@ async fn git_tag_list_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result = match pattern { Some(ref pat) => { let pat_lower = pat.to_lowercase(); - let has_wildcard = pat.contains('*'); + // Convert glob pattern (only * wildcards) to regex for proper matching. + // "*" matches any sequence of characters. + let regex_pat = pat_lower + .split('*') + .map(|s| regex::escape(s)) + .collect::>() + .join(".*"); + let re = regex::Regex::new(&format!("^{}$", regex_pat)) + .ok(); all_tags.iter() .filter(|t| { let n = t.name.to_lowercase(); - if has_wildcard { n.contains(&pat_lower.replace('*', "")) } - else { n.contains(&pat_lower) } + re.as_ref().map(|r| r.is_match(&n)).unwrap_or(false) }) .map(|t| tag_to_json(t)) .collect() diff --git a/libs/service/git_tools/tree.rs b/libs/fctool/src/git_tools/tree.rs similarity index 94% rename from libs/service/git_tools/tree.rs rename to libs/fctool/src/git_tools/tree.rs index 68e948b..c07a3a1 100644 --- a/libs/service/git_tools/tree.rs +++ b/libs/fctool/src/git_tools/tree.rs @@ -5,6 +5,16 @@ use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; use base64::Engine; use std::collections::HashMap; +/// Resolve a rev string to a commit OID. Tries full OID first (exactly 40 hex chars), +/// falls back to prefix lookup (branch, tag, short hash). +fn resolve_commit_oid(domain: &git::GitDomain, rev: &str) -> Result { + if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) { + Ok(git::commit::types::CommitOid::new(rev)) + } else { + domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid) + } +} + async fn git_file_content_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result { let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?; @@ -13,11 +23,7 @@ async fn git_file_content_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resu let rev = p.get("rev").and_then(|v| v.as_str()).map(|s| s.to_string()).unwrap_or_else(|| "HEAD".to_string()); let domain = ctx.open_repo(project_name, repo_name).await?; - let oid = if rev.len() >= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid - }; + let oid = resolve_commit_oid(&domain, &rev)?; let entry = domain.tree_entry_by_path_from_commit(&oid, path).map_err(|e| e.to_string())?; let blob_info = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?; @@ -46,11 +52,7 @@ async fn git_tree_ls_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid - }; + let commit_oid = resolve_commit_oid(&domain, &rev).map_err(|e| e.to_string())?; let entries = match dir_path { Some(ref dp) => { @@ -102,11 +104,7 @@ async fn git_blob_get_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid - }; + let oid = resolve_commit_oid(&domain, &rev).map_err(|e| e.to_string())?; let entry = domain.tree_entry_by_path_from_commit(&oid, path).map_err(|e| e.to_string())?; let blob_info = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?; diff --git a/libs/service/git_tools/types.rs b/libs/fctool/src/git_tools/types.rs similarity index 96% rename from libs/service/git_tools/types.rs rename to libs/fctool/src/git_tools/types.rs index 7c66d3a..0c2c152 100644 --- a/libs/service/git_tools/types.rs +++ b/libs/fctool/src/git_tools/types.rs @@ -217,16 +217,24 @@ pub struct DiffFileOut { impl DiffFileOut { pub fn from_delta(delta: &DiffDelta) -> Self { - // For deleted files, use old_file.path; for all others, use new_file.path. - let path = match delta.status { - DiffDeltaStatus::Deleted => delta.old_file.path.clone(), - _ => delta.new_file.path.clone(), + // For deleted files, use old_file for all metadata; for all others, use new_file. + let (path, is_binary, size) = match delta.status { + DiffDeltaStatus::Deleted => ( + delta.old_file.path.clone(), + delta.old_file.is_binary, + delta.old_file.size, + ), + _ => ( + delta.new_file.path.clone(), + delta.new_file.is_binary, + delta.new_file.size, + ), }; Self { path, status: format!("{:?}", delta.status), - is_binary: delta.new_file.is_binary, - size: delta.new_file.size, + is_binary, + size, } } } diff --git a/libs/fctool/src/lib.rs b/libs/fctool/src/lib.rs new file mode 100644 index 0000000..e8cb8e9 --- /dev/null +++ b/libs/fctool/src/lib.rs @@ -0,0 +1,5 @@ +//! AI agent function-call tools: git operations, file parsing/search, and project management. + +pub mod git_tools; +pub mod file_tools; +pub mod project_tools; diff --git a/libs/service/project_tools/arxiv.rs b/libs/fctool/src/project_tools/arxiv.rs similarity index 100% rename from libs/service/project_tools/arxiv.rs rename to libs/fctool/src/project_tools/arxiv.rs diff --git a/libs/service/project_tools/boards.rs b/libs/fctool/src/project_tools/boards.rs similarity index 100% rename from libs/service/project_tools/boards.rs rename to libs/fctool/src/project_tools/boards.rs diff --git a/libs/service/project_tools/curl.rs b/libs/fctool/src/project_tools/curl.rs similarity index 100% rename from libs/service/project_tools/curl.rs rename to libs/fctool/src/project_tools/curl.rs diff --git a/libs/service/project_tools/issues.rs b/libs/fctool/src/project_tools/issues.rs similarity index 100% rename from libs/service/project_tools/issues.rs rename to libs/fctool/src/project_tools/issues.rs diff --git a/libs/service/project_tools/members.rs b/libs/fctool/src/project_tools/members.rs similarity index 100% rename from libs/service/project_tools/members.rs rename to libs/fctool/src/project_tools/members.rs diff --git a/libs/service/project_tools/mod.rs b/libs/fctool/src/project_tools/mod.rs similarity index 100% rename from libs/service/project_tools/mod.rs rename to libs/fctool/src/project_tools/mod.rs diff --git a/libs/service/project_tools/repos.rs b/libs/fctool/src/project_tools/repos.rs similarity index 94% rename from libs/service/project_tools/repos.rs rename to libs/fctool/src/project_tools/repos.rs index e5eba91..0b39e60 100644 --- a/libs/service/project_tools/repos.rs +++ b/libs/fctool/src/project_tools/repos.rs @@ -23,6 +23,14 @@ pub async fn list_repos_exec( let project_id = ctx.project_id(); let db = ctx.db(); + // Resolve project name so the AI can use it for git_tools operations + let project = models::projects::project::Entity::find_by_id(project_id) + .one(db) + .await + .map_err(|e| ToolError::ExecutionError(e.to_string()))? + .ok_or_else(|| ToolError::ExecutionError("Project not found".into()))?; + let project_name = project.name.clone(); + let repos = repo::Entity::find() .filter(repo::Column::Project.eq(project_id)) .order_by_asc(repo::Column::RepoName) @@ -36,6 +44,7 @@ pub async fn list_repos_exec( serde_json::json!({ "id": r.id.to_string(), "name": r.repo_name, + "project_name": project_name, "description": r.description, "default_branch": r.default_branch, "is_private": r.is_private, @@ -293,8 +302,9 @@ pub async fn create_commit_exec( let repo_name = args .get("repo_name") + .or_else(|| args.get("name")) .and_then(|v| v.as_str()) - .ok_or_else(|| ToolError::ExecutionError("repo_name is required".into()))?; + .ok_or_else(|| ToolError::ExecutionError("repo_name (or name) is required".into()))?; let message = args .get("message") @@ -308,10 +318,12 @@ pub async fn create_commit_exec( .unwrap_or("main") .to_string(); - // Validate branch: no path traversal, no slashes - if branch.contains("..") || branch.contains('/') || branch.contains('\\') || branch.is_empty() { + // Validate branch: no path traversal, no backslashes, not empty, no lock files + if branch.contains("..") || branch.contains('\\') || branch.is_empty() + || branch.ends_with(".lock") || branch.starts_with('-') + { return Err(ToolError::ExecutionError( - "Invalid branch name: must not contain path separators or '..'".into(), + "Invalid branch name: must not contain '..' or backslashes, and must not be empty".into(), )); } @@ -574,9 +586,14 @@ pub fn create_commit_tool_definition() -> ToolDefinition { let mut p = HashMap::new(); p.insert("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), - description: Some("Repository name (required).".into()), + description: Some("Repository name. Can also use 'name' as alias. Required.".into()), required: true, properties: None, items: None, }); + p.insert("name".into(), ToolParam { + name: "name".into(), param_type: "string".into(), + description: Some("Alias for repo_name. Use the same value as returned by project_list_repos.".into()), + required: false, properties: None, items: None, + }); p.insert("branch".into(), ToolParam { name: "branch".into(), param_type: "string".into(), description: Some("Branch to commit to. Defaults to 'main'. Optional.".into()), diff --git a/libs/service/Cargo.toml b/libs/service/Cargo.toml index d200b37..a6bacf5 100644 --- a/libs/service/Cargo.toml +++ b/libs/service/Cargo.toml @@ -17,6 +17,7 @@ name = "service" [dependencies] config = { workspace = true } agent = { workspace = true } +fctool = { workspace = true } db = { workspace = true } models = { workspace = true } email = { workspace = true } diff --git a/libs/service/file_tools/excel.rs b/libs/service/file_tools/excel.rs deleted file mode 100644 index 3b32b87..0000000 --- a/libs/service/file_tools/excel.rs +++ /dev/null @@ -1,184 +0,0 @@ -//! read_excel — parse and query Excel files (.xlsx, .xls). - -use crate::file_tools::MAX_FILE_SIZE; -use crate::git_tools::ctx::GitToolCtx; -use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; -use calamine::{open_workbook, Reader, Xlsx}; -use futures::FutureExt; -use std::collections::HashMap; - -async fn read_excel_exec( - ctx: GitToolCtx, - args: serde_json::Value, -) -> Result { - let p: serde_json::Map = - serde_json::from_value(args).map_err(|e| e.to_string())?; - - let project_name = p - .get("project_name") - .and_then(|v| v.as_str()) - .ok_or("missing project_name")?; - let repo_name = p - .get("repo_name") - .and_then(|v| v.as_str()) - .ok_or("missing repo_name")?; - let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; - let rev = p - .get("rev") - .and_then(|v| v.as_str()) - .map(String::from) - .unwrap_or_else(|| "HEAD".to_string()); - let sheet_name = p.get("sheet_name").and_then(|v| v.as_str()).map(String::from); - let sheet_index = p.get("sheet_index").and_then(|v| v.as_u64()).map(|v| v as usize); - let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize; - let limit = p - .get("limit") - .and_then(|v| v.as_u64()) - .unwrap_or(100) as usize; - let has_header = p - .get("has_header") - .and_then(|v| v.as_bool()) - .unwrap_or(true); - - let domain = ctx.open_repo(project_name, repo_name).await?; - - let commit_oid = if rev.len() >= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain - .commit_get_prefix(&rev) - .map_err(|e| e.to_string())? - .oid - }; - - let entry = domain - .tree_entry_by_path_from_commit(&commit_oid, path) - .map_err(|e| e.to_string())?; - let blob = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?; - let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; - - let data = &content.content; - if data.len() > MAX_FILE_SIZE { - return Err(format!( - "file too large ({} bytes), max {} bytes", - data.len(), - MAX_FILE_SIZE - )); - } - - // Use cursor-based reading to avoid tempfile - let cursor = std::io::Cursor::new(data.clone()); - let mut workbook: Xlsx>> = - open_workbook(cursor).map_err(|e| format!("failed to open Excel: {}", e))?; - - let sheet_names = workbook.sheet_names().to_vec(); - - // Determine which sheet to read - let sheet_idx = match (sheet_name.clone(), sheet_index) { - (Some(name), _) => sheet_names - .iter() - .position(|n| n == &name) - .ok_or_else(|| format!("sheet '{}' not found. Available: {:?}", name, sheet_names))?, - (_, Some(idx)) => { - if idx >= sheet_names.len() { - return Err(format!( - "sheet index {} out of range (0..{})", - idx, - sheet_names.len() - )); - } - idx - } - _ => 0, - }; - - let range = workbook - .worksheet_range_at(sheet_idx) - .map_err(|e| format!("failed to read sheet: {}", e))?; - - let rows: Vec> = range - .rows() - .skip(if has_header { offset + 1 } else { offset }) - .take(limit) - .map(|row| { - row.iter() - .map(|cell| { - use calamine::Data; - match cell { - Data::Int(i) => serde_json::Value::Number((*i).into()), - Data::Float(f) => { - serde_json::json!(f) - } - Data::String(s) => serde_json::Value::String(s.clone()), - Data::Bool(b) => serde_json::Value::Bool(*b), - Data::DateTime(dt) => { - serde_json::Value::String(format!("{:?}", dt)) - } - Data::DateTimeIso(s) => serde_json::Value::String(s.clone()), - Data::DurationIso(s) => serde_json::Value::String(s.clone()), - Data::Error(e) => serde_json::json!({ "error": format!("{:?}", e) }), - Data::Empty => serde_json::Value::Null, - } - }) - .collect() - }) - .collect(); - - let header_row: Vec = if has_header { - range - .rows() - .next() - .map(|row| { - row.iter() - .map(|c| { - if let calamine::Data::String(s) = c { - s.clone() - } else { - String::new() - } - }) - .collect() - }) - .unwrap_or_default() - } else { - vec![] - }; - - Ok(serde_json::json!({ - "path": path, - "rev": rev, - "sheets": sheet_names, - "active_sheet": sheet_names.get(sheet_idx).cloned(), - "sheet_index": sheet_idx, - "headers": header_row, - "rows": rows, - "row_count": rows.len(), - "total_rows": range.rows().count().saturating_sub(if has_header { 1 } else { 0 }), - })) -} - -pub fn register_excel_tools(registry: &mut ToolRegistry) { - let p = HashMap::from([ - ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }), - ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }), - ("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path within the repository (supports .xlsx, .xls)".into()), required: true, properties: None, items: None }), - ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }), - ("sheet_name".into(), ToolParam { name: "sheet_name".into(), param_type: "string".into(), description: Some("Sheet name to read. Defaults to first sheet.".into()), required: false, properties: None, items: None }), - ("sheet_index".into(), ToolParam { name: "sheet_index".into(), param_type: "integer".into(), description: Some("Sheet index (0-based). Ignored if sheet_name is set.".into()), required: false, properties: None, items: None }), - ("has_header".into(), ToolParam { name: "has_header".into(), param_type: "boolean".into(), description: Some("If true, first row is column headers (default: true)".into()), required: false, properties: None, items: None }), - ("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of rows to skip (default: 0)".into()), required: false, properties: None, items: None }), - ("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum rows to return (default: 100)".into()), required: false, properties: None, items: None }), - ]); - let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) }; - registry.register( - ToolDefinition::new("read_excel") - .description("Parse and query Excel spreadsheets (.xlsx, .xls). Returns sheet names, headers, and rows with support for sheet selection and pagination.") - .parameters(schema), - ToolHandler::new(|ctx, args| { - let gctx = GitToolCtx::new(ctx); - Box::pin(async move { - read_excel_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) - }) - }), - ); -} diff --git a/libs/service/file_tools/pdf.rs b/libs/service/file_tools/pdf.rs deleted file mode 100644 index 2cd5261..0000000 --- a/libs/service/file_tools/pdf.rs +++ /dev/null @@ -1,244 +0,0 @@ -//! read_pdf — extract text from PDF files. - -use crate::file_tools::MAX_FILE_SIZE; -use crate::git_tools::ctx::GitToolCtx; -use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; -use futures::FutureExt; -use lopdf::{Document, Object, ObjectId}; -use std::collections::HashMap; - -/// Extract text content from a PDF page's content stream. -fn extract_page_text(doc: &Document, page_id: ObjectId) -> String { - let mut text = String::new(); - - // Get page dictionary - let page_dict = match doc.get(page_id) { - Ok(dict) => dict, - Err(_) => return text, - }; - - // Get content streams (can be a single stream or array) - let content_streams = match page_dict.get(b"Contents") { - Ok(obj) => obj.clone(), - Err(_) => return text, - }; - - let stream_ids: Vec = match &content_streams { - Object::Reference(id) => vec![*id], - Object::Array(arr) => arr - .iter() - .filter_map(|o| { - if let Object::Reference(id) = o { - Some(*id) - } else { - None - } - }) - .collect(), - _ => return text, - }; - - for stream_id in stream_ids { - if let Ok((_, stream)) = doc.get_stream(stream_id) { - // Decode the stream - if let Ok(decompressed) = stream.decompressed_content() { - text.push_str(&extract_text_from_content(&decompress_pdf_stream(&decompressed))); - text.push('\n'); - } - } - } - - text -} - -/// Very simple PDF content stream text extraction. -/// Handles Tj, TJ, Td, T*, ', " operators. -fn extract_text_from_content(content: &[u8]) -> String { - let data = String::from_utf8_lossy(content); - let mut result = String::new(); - let mut in_parens = false; - let mut current_text = String::new(); - let mut last_was_tj = false; - - let mut chars = data.chars().peekable(); - - while let Some(c) = chars.next() { - match c { - '(' => { - in_parens = true; - current_text.clear(); - } - ')' if in_parens => { - in_parens = false; - if !current_text.is_empty() { - if last_was_tj { - // TJ operator: subtract current text width offset - } - result.push_str(¤t_text); - result.push(' '); - last_was_tj = false; - } - } - c if in_parens => { - if c == '\\' { - if let Some(escaped) = chars.next() { - match escaped { - 'n' => current_text.push('\n'), - 'r' => current_text.push('\r'), - 't' => current_text.push('\t'), - _ => current_text.push(escaped), - } - } - } else { - current_text.push(c); - } - } - '%' => { - // Comment, skip to end of line - while let Some(nc) = chars.next() { - if nc == '\n' || nc == '\r' { - break; - } - } - } - _ => {} - } - } - - // Clean up excessive newlines - let lines: Vec<&str> = result.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect(); - lines.join("\n") -} - -fn decompress_pdf_stream(data: &[u8]) -> Vec { - // Try to detect and decompress flate/zlib streams - if data.len() < 2 { - return data.to_vec(); - } - - // Simple zlib check: zlib-wrapped deflate starts with 0x78 - if data.starts_with(&[0x78]) || data.starts_with(&[0x08, 0x1b]) { - if let Ok(decoded) = flate2::read::ZlibDecoder::new(data).bytes().collect::, _>>() { - return decoded; - } - } - - // Try raw deflate - if let Ok(decoded) = flate2::read::DeflateDecoder::new(data).bytes().collect::, _>>() { - return decoded; - } - - data.to_vec() -} - -async fn read_pdf_exec( - ctx: GitToolCtx, - args: serde_json::Value, -) -> Result { - let p: serde_json::Map = - serde_json::from_value(args).map_err(|e| e.to_string())?; - - let project_name = p - .get("project_name") - .and_then(|v| v.as_str()) - .ok_or("missing project_name")?; - let repo_name = p - .get("repo_name") - .and_then(|v| v.as_str()) - .ok_or("missing repo_name")?; - let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; - let rev = p - .get("rev") - .and_then(|v| v.as_str()) - .map(String::from) - .unwrap_or_else(|| "HEAD".to_string()); - let page_start = p.get("page_start").and_then(|v| v.as_u64()).map(|v| v as usize); - let page_end = p.get("page_end").and_then(|v| v.as_u64()).map(|v| v as usize); - let max_pages = p - .get("max_pages") - .and_then(|v| v.as_u64()) - .unwrap_or(20) as usize; - - let domain = ctx.open_repo(project_name, repo_name).await?; - - let commit_oid = if rev.len() >= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain - .commit_get_prefix(&rev) - .map_err(|e| e.to_string())? - .oid - }; - - let entry = domain - .tree_entry_by_path_from_commit(&commit_oid, path) - .map_err(|e| e.to_string())?; - let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; - - let data = &content.content; - if data.len() > MAX_FILE_SIZE { - return Err(format!( - "file too large ({} bytes), max {} bytes", - data.len(), - MAX_FILE_SIZE - )); - } - - let doc = Document::load_from_mem(data) - .map_err(|e| format!("failed to parse PDF: {}", e))?; - - // Get all page references - let pages: Vec = doc - .pages - .values() - .cloned() - .collect(); - - let total_pages = pages.len(); - - let start = page_start.unwrap_or(0).min(total_pages.saturating_sub(1)); - let end = page_end.unwrap_or(start + max_pages).min(total_pages); - - let mut page_texts: Vec = Vec::new(); - - for (i, page_id) in pages.iter().enumerate().skip(start).take(end - start) { - let text = extract_page_text(&doc, *page_id); - page_texts.push(serde_json::json!({ - "page": i + 1, - "text": text, - "char_count": text.chars().count(), - })); - } - - Ok(serde_json::json!({ - "path": path, - "rev": rev, - "total_pages": total_pages, - "extracted_pages": page_texts.len(), - "pages": page_texts, - })) -} - -pub fn register_pdf_tools(registry: &mut ToolRegistry) { - let p = HashMap::from([ - ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }), - ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }), - ("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the PDF document".into()), required: true, properties: None, items: None }), - ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }), - ("page_start".into(), ToolParam { name: "page_start".into(), param_type: "integer".into(), description: Some("1-based starting page number (default: 1)".into()), required: false, properties: None, items: None }), - ("page_end".into(), ToolParam { name: "page_end".into(), param_type: "integer".into(), description: Some("1-based ending page number (default: page_start + 20)".into()), required: false, properties: None, items: None }), - ("max_pages".into(), ToolParam { name: "max_pages".into(), param_type: "integer".into(), description: Some("Maximum number of pages to extract (default: 20)".into()), required: false, properties: None, items: None }), - ]); - let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) }; - registry.register( - ToolDefinition::new("read_pdf") - .description("Extract text content from PDF files. Returns page-by-page text extraction with character counts. Supports page range selection.") - .parameters(schema), - ToolHandler::new(|ctx, args| { - let gctx = GitToolCtx::new(ctx); - Box::pin(async move { - read_pdf_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) - }) - }), - ); -} diff --git a/libs/service/file_tools/ppt.rs b/libs/service/file_tools/ppt.rs deleted file mode 100644 index ae969ac..0000000 --- a/libs/service/file_tools/ppt.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! read_ppt — extract text from PowerPoint files (.pptx). - -use crate::file_tools::MAX_FILE_SIZE; -use crate::git_tools::ctx::GitToolCtx; -use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; -use futures::FutureExt; -use std::collections::HashMap; -use zip::ZipArchive; - -async fn read_ppt_exec( - ctx: GitToolCtx, - args: serde_json::Value, -) -> Result { - let p: serde_json::Map = - serde_json::from_value(args).map_err(|e| e.to_string())?; - - let project_name = p - .get("project_name") - .and_then(|v| v.as_str()) - .ok_or("missing project_name")?; - let repo_name = p - .get("repo_name") - .and_then(|v| v.as_str()) - .ok_or("missing repo_name")?; - let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; - let rev = p - .get("rev") - .and_then(|v| v.as_str()) - .map(String::from) - .unwrap_or_else(|| "HEAD".to_string()); - let slide_start = p.get("slide_start").and_then(|v| v.as_u64()).map(|v| v as usize); - let slide_end = p.get("slide_end").and_then(|v| v.as_u64()).map(|v| v as usize); - let include_notes = p - .get("include_notes") - .and_then(|v| v.as_bool()) - .unwrap_or(false); - - let domain = ctx.open_repo(project_name, repo_name).await?; - - let commit_oid = if rev.len() >= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain - .commit_get_prefix(&rev) - .map_err(|e| e.to_string())? - .oid - }; - - let entry = domain - .tree_entry_by_path_from_commit(&commit_oid, path) - .map_err(|e| e.to_string())?; - let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; - - let data = &content.content; - if data.len() > MAX_FILE_SIZE { - return Err(format!( - "file too large ({} bytes), max {} bytes", - data.len(), - MAX_FILE_SIZE - )); - } - - let cursor = std::io::Cursor::new(data.clone()); - let mut archive = - ZipArchive::new(cursor).map_err(|e| format!("failed to read PPTX ZIP: {}", e))?; - - let mut slides: Vec = Vec::new(); - - // Collect all slide file names - let mut slide_files: Vec = (1..=1000) - .filter_map(|i| { - let name = format!("ppt/slides/slide{}.xml", i); - if archive.by_name(&name).is_ok() { - Some(name) - } else { - None - } - }) - .collect(); - - let total_slides = slide_files.len(); - let start = slide_start.unwrap_or(0).min(total_slides.saturating_sub(1)); - let end = slide_end.unwrap_or(start + 50).min(total_slides); - - for slide_file in slide_files.iter().skip(start).take(end - start) { - let slide_idx = slides.len() + start + 1; - - let mut file = archive - .by_name(slide_file) - .map_err(|e| format!("failed to read slide {}: {}", slide_file, e))?; - let mut xml_content = String::new(); - use std::io::Read; - file.read_to_string(&mut xml_content) - .map_err(|e| e.to_string())?; - - // Extract text from slide XML - let text = extract_text_from_pptx_xml(&xml_content); - - // Optionally extract notes - let notes = if include_notes { - let notes_file = format!("ppt/notesSlides/notesSlide{}.xml", slide_idx); - if let Ok(mut notes_file) = archive.by_name(¬es_file) { - let mut notes_xml = String::new(); - if notes_file.read_to_string(&mut notes_xml).is_ok() { - Some(extract_text_from_pptx_xml(¬es_xml)) - } else { - None - } - } else { - None - } - } else { - None - }; - - slides.push(serde_json::json!({ - "slide": slide_idx, - "text": text.clone(), - "char_count": text.chars().count(), - "notes": notes, - })); - } - - Ok(serde_json::json!({ - "path": path, - "rev": rev, - "total_slides": total_slides, - "extracted_slides": slides.len(), - "slides": slides, - })) -} - -/// Extract text content from PPTX slide XML using simple tag extraction. -fn extract_text_from_pptx_xml(xml: &str) -> String { - // PPTX uses tags for text content - let mut results: Vec<&str> = Vec::new(); - let mut last_end = 0; - - while let Some(start) = xml[last_end..].find("') { - let content_start = abs_start + tag_end + 1; - if let Some(end_tag) = xml[content_start..].find("") { - let text = &xml[content_start..content_start + end_tag]; - let trimmed = text.trim(); - if !trimmed.is_empty() { - results.push(trimmed); - } - last_end = content_start + end_tag + 7; // len of - } else { - break; - } - } else { - break; - } - } - - // Also try tags (notes slides use Word namespaces) - let mut last_end = 0; - while let Some(start) = xml[last_end..].find("') { - let content_start = abs_start + tag_end + 1; - if let Some(end_tag) = xml[content_start..].find("") { - let text = &xml[content_start..content_start + end_tag]; - let trimmed = text.trim(); - if !trimmed.is_empty() && !results.contains(&trimmed) { - results.push(trimmed); - } - last_end = content_start + end_tag + 6; // len of - } else { - break; - } - } else { - break; - } - } - - results.join(" ") -} - -pub fn register_ppt_tools(registry: &mut ToolRegistry) { - let p = HashMap::from([ - ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }), - ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }), - ("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .pptx document".into()), required: true, properties: None, items: None }), - ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }), - ("slide_start".into(), ToolParam { name: "slide_start".into(), param_type: "integer".into(), description: Some("1-based starting slide number (default: 1)".into()), required: false, properties: None, items: None }), - ("slide_end".into(), ToolParam { name: "slide_end".into(), param_type: "integer".into(), description: Some("1-based ending slide number".into()), required: false, properties: None, items: None }), - ("include_notes".into(), ToolParam { name: "include_notes".into(), param_type: "boolean".into(), description: Some("Include speaker notes (default: false)".into()), required: false, properties: None, items: None }), - ]); - let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) }; - registry.register( - ToolDefinition::new("read_ppt") - .description("Extract text content from PowerPoint presentations (.pptx). Returns slide-by-slide text with character counts. Supports slide range selection and speaker notes.") - .parameters(schema), - ToolHandler::new(|ctx, args| { - let gctx = GitToolCtx::new(ctx); - Box::pin(async move { - read_ppt_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) - }) - }), - ); -} diff --git a/libs/service/file_tools/word.rs b/libs/service/file_tools/word.rs deleted file mode 100644 index b88007e..0000000 --- a/libs/service/file_tools/word.rs +++ /dev/null @@ -1,184 +0,0 @@ -//! read_word — parse and extract text from Word documents (.docx) via zip+xml. - -use crate::file_tools::MAX_FILE_SIZE; -use crate::git_tools::ctx::GitToolCtx; -use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; -use futures::FutureExt; -use quick_xml::events::Event; -use quick_xml::Reader; -use std::collections::HashMap; -use zip::ZipArchive; - -async fn read_word_exec( - ctx: GitToolCtx, - args: serde_json::Value, -) -> Result { - let p: serde_json::Map = - serde_json::from_value(args).map_err(|e| e.to_string())?; - - let project_name = p - .get("project_name") - .and_then(|v| v.as_str()) - .ok_or("missing project_name")?; - let repo_name = p - .get("repo_name") - .and_then(|v| v.as_str()) - .ok_or("missing repo_name")?; - let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; - let rev = p - .get("rev") - .and_then(|v| v.as_str()) - .map(String::from) - .unwrap_or_else(|| "HEAD".to_string()); - let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize; - let limit = p - .get("limit") - .and_then(|v| v.as_u64()) - .unwrap_or(200) as usize; - let sections_only = p - .get("sections_only") - .and_then(|v| v.as_bool()) - .unwrap_or(false); - - let domain = ctx.open_repo(project_name, repo_name).await?; - - let commit_oid = if rev.len() >= 40 { - git::commit::types::CommitOid::new(&rev) - } else { - domain - .commit_get_prefix(&rev) - .map_err(|e| e.to_string())? - .oid - }; - - let entry = domain - .tree_entry_by_path_from_commit(&commit_oid, path) - .map_err(|e| e.to_string())?; - let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; - - let data = &content.content; - if data.len() > MAX_FILE_SIZE { - return Err(format!( - "file too large ({} bytes), max {} bytes", - data.len(), - MAX_FILE_SIZE - )); - } - - // DOCX is a ZIP archive. Read word/document.xml from it. - let cursor = std::io::Cursor::new(data); - let mut archive = ZipArchive::new(cursor).map_err(|e| { - format!( - "failed to open docx as ZIP archive: {}. Make sure the file is a valid .docx document.", - e - ) - })?; - - let doc_xml = { - let file = if let Ok(f) = archive.by_name("word/document.xml") { - f - } else { - archive.by_name("document.xml") - .map_err(|_| "docx archive does not contain word/document.xml or document.xml")? - }; - let mut s = String::new(); - let mut reader = std::io::BufReader::new(file); - std::io::Read::read_to_string(&mut reader, &mut s) - .map_err(|e| format!("failed to read document.xml: {}", e))?; - s - }; - - // Parse paragraphs from elements - let mut reader = Reader::from_str(&doc_xml); - reader.config_mut().trim_text(false); - - let mut paragraphs: Vec = Vec::new(); - let mut buf = Vec::new(); - let mut in_paragraph = false; - let mut current_text = String::new(); - - loop { - match reader.read_event_into(&mut buf) { - Ok(Event::Start(e)) => { - if e.name().as_ref() == b"w:p" { - in_paragraph = true; - current_text.clear(); - } - } - Ok(Event::Text(e)) => { - if in_paragraph { - let txt = e.unescape().map(|s| s.into_owned()).unwrap_or_default(); - current_text.push_str(&txt); - } - } - Ok(Event::End(e)) => { - if e.name().as_ref() == b"w:p" && in_paragraph { - in_paragraph = false; - let text = current_text.trim().to_string(); - if !text.is_empty() { - paragraphs.push(text); - } - } - } - Ok(Event::Eof) => break, - _ => {} - } - buf.clear(); - } - - let total = paragraphs.len(); - - let body: Vec = if sections_only { - paragraphs - .iter() - .enumerate() - .filter(|(_, text)| { - text.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) - && text.chars().filter(|&c| c == ' ').count() < text.len() / 2 - && text.len() < 200 - }) - .skip(offset) - .take(limit) - .map(|(i, t)| serde_json::json!({ "index": i, "text": t })) - .collect() - } else { - paragraphs - .iter() - .skip(offset) - .take(limit) - .enumerate() - .map(|(i, t)| serde_json::json!({ "index": offset + i, "text": t })) - .collect() - }; - - Ok(serde_json::json!({ - "path": path, - "rev": rev, - "paragraph_count": total, - "paragraphs": body, - })) -} - -pub fn register_word_tools(registry: &mut ToolRegistry) { - let p = HashMap::from([ - ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }), - ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }), - ("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .docx document".into()), required: true, properties: None, items: None }), - ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }), - ("sections_only".into(), ToolParam { name: "sections_only".into(), param_type: "boolean".into(), description: Some("If true, extract only section/heading-like paragraphs (short lines starting with uppercase)".into()), required: false, properties: None, items: None }), - ("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of paragraphs to skip (default: 0)".into()), required: false, properties: None, items: None }), - ("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum paragraphs to return (default: 200)".into()), required: false, properties: None, items: None }), - ]); - let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) }; - registry.register( - ToolDefinition::new("read_word") - .description("Parse and extract text from Word documents (.docx). Returns paragraphs with index and text content. Supports pagination.") - .parameters(schema), - ToolHandler::new(|ctx, args| { - let gctx = GitToolCtx::new(ctx); - Box::pin(async move { - read_word_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) - }) - }), - ); -} diff --git a/libs/service/lib.rs b/libs/service/lib.rs index 49e7093..941ce95 100644 --- a/libs/service/lib.rs +++ b/libs/service/lib.rs @@ -196,9 +196,9 @@ impl AppService { tracing::info!(url = %base_url, "AI chat enabled"); let ai_client_config = AiClientConfig::new(api_key).with_base_url(&base_url); let mut registry = ToolRegistry::new(); - git_tools::register_all(&mut registry); - file_tools::register_all(&mut registry); - project_tools::register_all(&mut registry); + fctool::git_tools::register_all(&mut registry); + fctool::file_tools::register_all(&mut registry); + fctool::project_tools::register_all(&mut registry); let mut chat_svc = ChatService::new() .with_ai_client_config(ai_client_config) .with_tool_registry(registry); @@ -324,12 +324,9 @@ impl AppService { pub mod agent; pub mod auth; pub mod error; -pub mod file_tools; pub mod git; -pub mod git_tools; pub mod issue; pub mod project; -pub mod project_tools; pub mod pull_request; pub mod search; pub mod skill;