refactor(fctool): extract tool modules into standalone fctool crate

Move git_tools, file_tools, and project_tools from libs/service into a
new libs/fctool crate with correct workspace dependencies. Fixes the
rev.len() >= 40 bug in all git tool resolve functions (OID check needs
exact 40-char hex, not just >= 40). Adds 4 new git blob tools
(blob_get, blob_exists, blob_content, blob_create). Fixes parameter
naming inconsistency in repos.rs and adds project_name to list_repos
output. Removes unused excel/pdf/ppt/word file tools.
This commit is contained in:
ZhenYi 2026-04-26 23:58:16 +08:00
parent 0e53f4a69f
commit c7a8bc0458
32 changed files with 455 additions and 890 deletions

26
Cargo.lock generated
View File

@ -2757,6 +2757,31 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "fctool"
version = "0.2.9"
dependencies = [
"agent",
"base64 0.22.1",
"chrono",
"csv",
"db",
"git",
"git2",
"models",
"pulldown-cmark 0.12.2",
"quick-xml 0.37.5",
"regex",
"reqwest 0.13.2",
"sea-orm",
"serde",
"serde_json",
"sqlparser",
"tokio",
"tracing",
"uuid",
]
[[package]]
name = "fdeflate"
version = "0.3.7"
@ -7705,6 +7730,7 @@ dependencies = [
"db",
"deadpool-redis",
"email",
"fctool",
"flate2",
"futures",
"git",

View File

@ -19,6 +19,7 @@ members = [
"libs/agent",
"libs/migrate",
"libs/agent-tool-derive",
"libs/fctool",
"apps/migrate",
"apps/app",
"apps/adminrpc",
@ -50,6 +51,7 @@ observability = { path = "libs/observability" }
avatar = { path = "libs/avatar" }
migrate = { path = "libs/migrate" }
session_manager = { path = "libs/session_manager" }
fctool = { path = "libs/fctool" }
sea-query = "1.0.0-rc.31"

39
libs/fctool/Cargo.toml Normal file
View File

@ -0,0 +1,39 @@
[package]
name = "fctool"
version.workspace = true
edition.workspace = true
authors.workspace = true
description.workspace = true
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[lib]
path = "src/lib.rs"
name = "fctool"
[dependencies]
agent = { workspace = true }
git = { workspace = true }
models = { workspace = true }
db = { workspace = true }
sea-orm = { workspace = true, features = [] }
git2 = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
base64 = { workspace = true }
chrono = { workspace = true, features = ["serde"] }
uuid = { workspace = true, features = ["serde", "v7"] }
reqwest = { workspace = true, features = ["json", "native-tls"] }
regex = { workspace = true }
csv = { workspace = true }
quick-xml = { workspace = true }
sqlparser = { workspace = true }
pulldown-cmark = { workspace = true }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tracing = { workspace = true }

View File

@ -51,7 +51,7 @@ async fn read_csv_exec(
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
git::commit::types::CommitOid::new(&rev)
} else {
domain

View File

@ -67,7 +67,7 @@ async fn git_grep_exec(
let domain = ctx.open_repo(project_name, repo_name).await?;
// Resolve revision to commit oid
let commit_oid = if rev.len() >= 40 {
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
git::commit::types::CommitOid::new(&rev)
} else {
domain

View File

@ -130,7 +130,7 @@ async fn read_json_exec(
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
git::commit::types::CommitOid::new(&rev)
} else {
domain

View File

@ -41,7 +41,7 @@ async fn read_markdown_exec(
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
git::commit::types::CommitOid::new(&rev)
} else {
domain

View File

@ -33,7 +33,7 @@ async fn read_sql_exec(
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
git::commit::types::CommitOid::new(&rev)
} else {
domain

View File

@ -0,0 +1,273 @@
//! Git blob tools — raw object-level operations on blob OIDs.
use super::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use base64::Engine;
use std::collections::HashMap;
async fn git_blob_info_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = resolve_oid(&domain, oid)?;
let info = domain.blob_get(&commit_oid).map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"oid": info.oid.to_string(),
"size": info.size,
"is_binary": info.is_binary,
}))
}
async fn git_blob_exists_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = resolve_oid(&domain, oid)?;
let exists = domain.blob_exists(&commit_oid);
Ok(serde_json::json!({ "oid": commit_oid.to_string(), "exists": exists }))
}
async fn git_blob_content_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?;
let max_size = p.get("max_size").and_then(|v| v.as_u64()).unwrap_or(1_048_576) as usize; // 1MB default
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = resolve_oid(&domain, oid)?;
let blob = domain.blob_content(&commit_oid).map_err(|e| e.to_string())?;
if blob.size > max_size {
return Err(format!(
"blob too large ({} bytes), max {} bytes. Use a smaller max_size or retrieve the raw OID.",
blob.size, max_size
));
}
let (content, is_binary) = if blob.is_binary {
(base64::engine::general_purpose::STANDARD.encode(&blob.content), true)
} else {
(String::from_utf8_lossy(&blob.content).to_string(), false)
};
Ok(serde_json::json!({
"oid": blob.oid.to_string(),
"size": blob.size,
"is_binary": is_binary,
"content": content,
}))
}
async fn git_blob_create_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
let content = p.get("content").and_then(|v| v.as_str()).ok_or("missing content")?;
let encoding = p.get("encoding").and_then(|v| v.as_str()).unwrap_or("utf-8");
let data = match encoding {
"base64" => base64::engine::general_purpose::STANDARD
.decode(content)
.map_err(|e| format!("invalid base64: {}", e))?,
"utf-8" => content.as_bytes().to_vec(),
other => return Err(format!("unsupported encoding '{}'. Use 'utf-8' or 'base64'.", other)),
};
let domain = ctx.open_repo(project_name, repo_name).await?;
let oid = domain.blob_create(&data).map_err(|e| e.to_string())?;
let info = domain.blob_get(&oid).map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"oid": info.oid.to_string(),
"size": info.size,
"is_binary": info.is_binary,
}))
}
fn resolve_oid(
domain: &git::GitDomain,
rev: &str,
) -> Result<git::commit::types::CommitOid, String> {
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
Ok(git::commit::types::CommitOid::new(rev))
} else {
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
}
}
pub fn register_git_tools(registry: &mut ToolRegistry) {
// git_blob_info
let p = HashMap::from([
("project_name".into(), ToolParam {
name: "project_name".into(), param_type: "string".into(),
description: Some("Project name (slug)".into()),
required: true, properties: None, items: None,
}),
("repo_name".into(), ToolParam {
name: "repo_name".into(), param_type: "string".into(),
description: Some("Repository name".into()),
required: true, properties: None, items: None,
}),
("oid".into(), ToolParam {
name: "oid".into(), param_type: "string".into(),
description: Some("Blob OID (full 40-char hex or short prefix)".into()),
required: true, properties: None, items: None,
}),
]);
let schema = ToolSchema {
schema_type: "object".into(),
properties: Some(p),
required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]),
};
registry.register(
ToolDefinition::new("git_blob_info")
.description("Get metadata about a git blob by its OID. Returns size and whether the blob is binary.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
git_blob_info_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// git_blob_exists
let p = HashMap::from([
("project_name".into(), ToolParam {
name: "project_name".into(), param_type: "string".into(),
description: Some("Project name (slug)".into()),
required: true, properties: None, items: None,
}),
("repo_name".into(), ToolParam {
name: "repo_name".into(), param_type: "string".into(),
description: Some("Repository name".into()),
required: true, properties: None, items: None,
}),
("oid".into(), ToolParam {
name: "oid".into(), param_type: "string".into(),
description: Some("Blob OID to check".into()),
required: true, properties: None, items: None,
}),
]);
let schema = ToolSchema {
schema_type: "object".into(),
properties: Some(p),
required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]),
};
registry.register(
ToolDefinition::new("git_blob_exists")
.description("Check whether a git blob exists in the repository by its OID.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
git_blob_exists_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// git_blob_content
let p = HashMap::from([
("project_name".into(), ToolParam {
name: "project_name".into(), param_type: "string".into(),
description: Some("Project name (slug)".into()),
required: true, properties: None, items: None,
}),
("repo_name".into(), ToolParam {
name: "repo_name".into(), param_type: "string".into(),
description: Some("Repository name".into()),
required: true, properties: None, items: None,
}),
("oid".into(), ToolParam {
name: "oid".into(), param_type: "string".into(),
description: Some("Blob OID to retrieve content for".into()),
required: true, properties: None, items: None,
}),
("max_size".into(), ToolParam {
name: "max_size".into(), param_type: "integer".into(),
description: Some("Maximum blob size in bytes (default: 1MB)".into()),
required: false, properties: None, items: None,
}),
]);
let schema = ToolSchema {
schema_type: "object".into(),
properties: Some(p),
required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]),
};
registry.register(
ToolDefinition::new("git_blob_content")
.description("Retrieve the raw content of a git blob by its OID. Binary content is base64-encoded. Limits to 1MB by default.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
git_blob_content_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
// git_blob_create
let p = HashMap::from([
("project_name".into(), ToolParam {
name: "project_name".into(), param_type: "string".into(),
description: Some("Project name (slug)".into()),
required: true, properties: None, items: None,
}),
("repo_name".into(), ToolParam {
name: "repo_name".into(), param_type: "string".into(),
description: Some("Repository name".into()),
required: true, properties: None, items: None,
}),
("content".into(), ToolParam {
name: "content".into(), param_type: "string".into(),
description: Some("Blob content (utf-8 string or base64-encoded bytes)".into()),
required: true, properties: None, items: None,
}),
("encoding".into(), ToolParam {
name: "encoding".into(), param_type: "string".into(),
description: Some("Encoding of content: 'utf-8' (default) or 'base64'".into()),
required: false, properties: None, items: None,
}),
]);
let schema = ToolSchema {
schema_type: "object".into(),
properties: Some(p),
required: Some(vec!["project_name".into(), "repo_name".into(), "content".into()]),
};
registry.register(
ToolDefinition::new("git_blob_create")
.description("Create a new git blob in the repository. Writes the raw content to the object database and returns the new blob OID. Supports both utf-8 text and base64-encoded binary content.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
git_blob_create_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}

View File

@ -35,8 +35,10 @@ async fn git_branch_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resul
let info = domain.branch_get(name).map_err(|e| e.to_string())?;
let ahead_behind = if let Some(ref upstream) = info.upstream {
let (ahead, behind) = domain.branch_ahead_behind(name, upstream).unwrap_or((0, 0));
Some(serde_json::json!({ "ahead": ahead, "behind": behind }))
match domain.branch_ahead_behind(name, upstream) {
Ok((ahead, behind)) => Some(serde_json::json!({ "ahead": ahead, "behind": behind })),
Err(e) => Some(serde_json::json!({ "error": e.to_string() })),
}
} else { None };
Ok(serde_json::json!({

View File

@ -47,6 +47,16 @@ async fn git_log_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_
Ok(serde_json::to_value(result).map_err(|e| e.to_string())?)
}
/// Resolve a rev string to commit metadata. Tries full OID first (exactly 40 hex chars),
/// falls back to prefix lookup (branch, tag, short hash).
fn resolve_commit(domain: &git::GitDomain, rev: &str) -> Result<git::commit::types::CommitMeta, String> {
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())
} else {
domain.commit_get_prefix(rev).map_err(|e| e.to_string())
}
}
async fn git_show_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
@ -54,11 +64,7 @@ async fn git_show_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde
let rev = p.get("rev").and_then(|v| v.as_str()).ok_or("missing rev")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let meta = if rev.len() >= 40 {
domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())?
} else {
domain.commit_get_prefix(rev).map_err(|e| e.to_string())?
};
let meta = resolve_commit(&domain, rev).map_err(|e| e.to_string())?;
let refs = domain.commit_refs(&meta.oid).map_err(|e| e.to_string())?;
@ -128,11 +134,7 @@ async fn git_commit_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resul
let rev = p.get("rev").and_then(|v| v.as_str()).ok_or("missing rev")?;
let domain = ctx.open_repo(project_name, repo_name).await?;
let meta = if rev.len() >= 40 {
domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())?
} else {
domain.commit_get_prefix(rev).map_err(|e| e.to_string())?
};
let meta = resolve_commit(&domain, rev).map_err(|e| e.to_string())?;
Ok(flatten_commit(&meta))
}
@ -195,9 +197,11 @@ async fn git_reflog_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<ser
let result: Vec<_> = entries.iter()
.take(limit)
.map(|e| {
let ts = e.time_secs;
// Convert to UTC by subtracting the timezone offset, consistent
// with all other timestamp conversions in this module.
let ts = e.time_secs - (e.offset_minutes as i64 * 60);
let time_str = chrono::Utc.timestamp_opt(ts, 0).single()
.map(|dt| dt.to_rfc3339()).unwrap_or_else(|| format!("{}", ts));
.map(|dt| dt.to_rfc3339()).unwrap_or_else(|| format!("{}", e.time_secs));
serde_json::json!({
"oid_new": e.oid_new.to_string(), "oid_old": e.oid_old.to_string(),
"committer_name": e.committer_name, "committer_email": e.committer_email,

View File

@ -17,7 +17,7 @@ async fn git_diff_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde
let domain = ctx.open_repo(project_name, repo_name).await?;
let resolve = |rev: &str| -> Result<git::commit::types::CommitOid, String> {
if rev.len() >= 40 {
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
Ok(git::commit::types::CommitOid::new(rev))
} else {
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
@ -68,8 +68,14 @@ async fn git_diff_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde
}
};
use git::diff::types::DiffDeltaStatus;
let files: Vec<_> = result.deltas.iter().map(|d| {
serde_json::json!({ "path": d.new_file.path, "status": format!("{:?}", d.status), "is_binary": d.new_file.is_binary })
let (path, is_binary) = if d.status == DiffDeltaStatus::Deleted {
(d.old_file.path.clone(), d.old_file.is_binary)
} else {
(d.new_file.path.clone(), d.new_file.is_binary)
};
serde_json::json!({ "path": path, "status": format!("{:?}", d.status), "is_binary": is_binary })
}).collect();
Ok(serde_json::json!({
@ -87,22 +93,16 @@ async fn git_diff_stats_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result
let domain = ctx.open_repo(project_name, repo_name).await?;
let stats = if base.len() >= 40 && head.len() >= 40 {
domain.diff_stats(&git::commit::types::CommitOid::new(base), &git::commit::types::CommitOid::new(head))
.map_err(|e| e.to_string())?
} else {
let b = if base.len() >= 40 {
git::commit::types::CommitOid::new(base)
let resolve = |rev: &str| -> Result<git::commit::types::CommitOid, String> {
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
Ok(git::commit::types::CommitOid::new(rev))
} else {
domain.commit_get_prefix(base).map_err(|e| e.to_string())?.oid
};
let h = if head.len() >= 40 {
git::commit::types::CommitOid::new(head)
} else {
domain.commit_get_prefix(head).map_err(|e| e.to_string())?.oid
};
domain.diff_stats(&b, &h).map_err(|e| e.to_string())?
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
}
};
let b = resolve(base).map_err(|e| e.to_string())?;
let h = resolve(head).map_err(|e| e.to_string())?;
let stats = domain.diff_stats(&b, &h).map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"files_changed": stats.files_changed,
@ -121,11 +121,11 @@ async fn git_blame_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serd
let to_line = p.get("to_line").and_then(|v| v.as_u64().map(|n| n as u32));
let domain = ctx.open_repo(project_name, repo_name).await?;
let oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
let oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
Ok(git::commit::types::CommitOid::new(&rev))
} else {
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
};
domain.commit_get_prefix(&rev).map_err(|e| e.to_string()).map(|m| m.oid)
}?;
use git::blame::ops::BlameOptions;
let mut bopts = BlameOptions::new();

View File

@ -3,6 +3,7 @@
//! Each module defines async exec functions + a `register_git_tools()` call.
//! All tools take `project_name` + `repo_name` as required params.
pub mod blob;
pub mod branch;
pub mod commit;
pub mod ctx;
@ -16,6 +17,7 @@ pub fn register_all(registry: &mut agent::ToolRegistry) {
commit::register_git_tools(registry);
branch::register_git_tools(registry);
diff::register_git_tools(registry);
blob::register_git_tools(registry);
tree::register_git_tools(registry);
tag::register_git_tools(registry);
}

View File

@ -16,12 +16,19 @@ async fn git_tag_list_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<s
let result: Vec<_> = match pattern {
Some(ref pat) => {
let pat_lower = pat.to_lowercase();
let has_wildcard = pat.contains('*');
// Convert glob pattern (only * wildcards) to regex for proper matching.
// "*" matches any sequence of characters.
let regex_pat = pat_lower
.split('*')
.map(|s| regex::escape(s))
.collect::<Vec<_>>()
.join(".*");
let re = regex::Regex::new(&format!("^{}$", regex_pat))
.ok();
all_tags.iter()
.filter(|t| {
let n = t.name.to_lowercase();
if has_wildcard { n.contains(&pat_lower.replace('*', "")) }
else { n.contains(&pat_lower) }
re.as_ref().map(|r| r.is_match(&n)).unwrap_or(false)
})
.map(|t| tag_to_json(t))
.collect()

View File

@ -5,6 +5,16 @@ use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use base64::Engine;
use std::collections::HashMap;
/// Resolve a rev string to a commit OID. Tries full OID first (exactly 40 hex chars),
/// falls back to prefix lookup (branch, tag, short hash).
fn resolve_commit_oid(domain: &git::GitDomain, rev: &str) -> Result<git::commit::types::CommitOid, String> {
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
Ok(git::commit::types::CommitOid::new(rev))
} else {
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
}
}
async fn git_file_content_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
@ -13,11 +23,7 @@ async fn git_file_content_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resu
let rev = p.get("rev").and_then(|v| v.as_str()).map(|s| s.to_string()).unwrap_or_else(|| "HEAD".to_string());
let domain = ctx.open_repo(project_name, repo_name).await?;
let oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
};
let oid = resolve_commit_oid(&domain, &rev)?;
let entry = domain.tree_entry_by_path_from_commit(&oid, path).map_err(|e| e.to_string())?;
let blob_info = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?;
@ -46,11 +52,7 @@ async fn git_tree_ls_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<se
let rev = p.get("rev").and_then(|v| v.as_str()).map(|s| s.to_string()).unwrap_or_else(|| "HEAD".to_string());
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
};
let commit_oid = resolve_commit_oid(&domain, &rev).map_err(|e| e.to_string())?;
let entries = match dir_path {
Some(ref dp) => {
@ -102,11 +104,7 @@ async fn git_blob_get_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<s
let rev = p.get("rev").and_then(|v| v.as_str()).map(String::from).unwrap_or_else(|| "HEAD".to_string());
let domain = ctx.open_repo(project_name, repo_name).await?;
let oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
};
let oid = resolve_commit_oid(&domain, &rev).map_err(|e| e.to_string())?;
let entry = domain.tree_entry_by_path_from_commit(&oid, path).map_err(|e| e.to_string())?;
let blob_info = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?;

View File

@ -217,16 +217,24 @@ pub struct DiffFileOut {
impl DiffFileOut {
pub fn from_delta(delta: &DiffDelta) -> Self {
// For deleted files, use old_file.path; for all others, use new_file.path.
let path = match delta.status {
DiffDeltaStatus::Deleted => delta.old_file.path.clone(),
_ => delta.new_file.path.clone(),
// For deleted files, use old_file for all metadata; for all others, use new_file.
let (path, is_binary, size) = match delta.status {
DiffDeltaStatus::Deleted => (
delta.old_file.path.clone(),
delta.old_file.is_binary,
delta.old_file.size,
),
_ => (
delta.new_file.path.clone(),
delta.new_file.is_binary,
delta.new_file.size,
),
};
Self {
path,
status: format!("{:?}", delta.status),
is_binary: delta.new_file.is_binary,
size: delta.new_file.size,
is_binary,
size,
}
}
}

5
libs/fctool/src/lib.rs Normal file
View File

@ -0,0 +1,5 @@
//! AI agent function-call tools: git operations, file parsing/search, and project management.
pub mod git_tools;
pub mod file_tools;
pub mod project_tools;

View File

@ -23,6 +23,14 @@ pub async fn list_repos_exec(
let project_id = ctx.project_id();
let db = ctx.db();
// Resolve project name so the AI can use it for git_tools operations
let project = models::projects::project::Entity::find_by_id(project_id)
.one(db)
.await
.map_err(|e| ToolError::ExecutionError(e.to_string()))?
.ok_or_else(|| ToolError::ExecutionError("Project not found".into()))?;
let project_name = project.name.clone();
let repos = repo::Entity::find()
.filter(repo::Column::Project.eq(project_id))
.order_by_asc(repo::Column::RepoName)
@ -36,6 +44,7 @@ pub async fn list_repos_exec(
serde_json::json!({
"id": r.id.to_string(),
"name": r.repo_name,
"project_name": project_name,
"description": r.description,
"default_branch": r.default_branch,
"is_private": r.is_private,
@ -293,8 +302,9 @@ pub async fn create_commit_exec(
let repo_name = args
.get("repo_name")
.or_else(|| args.get("name"))
.and_then(|v| v.as_str())
.ok_or_else(|| ToolError::ExecutionError("repo_name is required".into()))?;
.ok_or_else(|| ToolError::ExecutionError("repo_name (or name) is required".into()))?;
let message = args
.get("message")
@ -308,10 +318,12 @@ pub async fn create_commit_exec(
.unwrap_or("main")
.to_string();
// Validate branch: no path traversal, no slashes
if branch.contains("..") || branch.contains('/') || branch.contains('\\') || branch.is_empty() {
// Validate branch: no path traversal, no backslashes, not empty, no lock files
if branch.contains("..") || branch.contains('\\') || branch.is_empty()
|| branch.ends_with(".lock") || branch.starts_with('-')
{
return Err(ToolError::ExecutionError(
"Invalid branch name: must not contain path separators or '..'".into(),
"Invalid branch name: must not contain '..' or backslashes, and must not be empty".into(),
));
}
@ -574,9 +586,14 @@ pub fn create_commit_tool_definition() -> ToolDefinition {
let mut p = HashMap::new();
p.insert("repo_name".into(), ToolParam {
name: "repo_name".into(), param_type: "string".into(),
description: Some("Repository name (required).".into()),
description: Some("Repository name. Can also use 'name' as alias. Required.".into()),
required: true, properties: None, items: None,
});
p.insert("name".into(), ToolParam {
name: "name".into(), param_type: "string".into(),
description: Some("Alias for repo_name. Use the same value as returned by project_list_repos.".into()),
required: false, properties: None, items: None,
});
p.insert("branch".into(), ToolParam {
name: "branch".into(), param_type: "string".into(),
description: Some("Branch to commit to. Defaults to 'main'. Optional.".into()),

View File

@ -17,6 +17,7 @@ name = "service"
[dependencies]
config = { workspace = true }
agent = { workspace = true }
fctool = { workspace = true }
db = { workspace = true }
models = { workspace = true }
email = { workspace = true }

View File

@ -1,184 +0,0 @@
//! read_excel — parse and query Excel files (.xlsx, .xls).
use crate::file_tools::MAX_FILE_SIZE;
use crate::git_tools::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use calamine::{open_workbook, Reader, Xlsx};
use futures::FutureExt;
use std::collections::HashMap;
async fn read_excel_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p
.get("project_name")
.and_then(|v| v.as_str())
.ok_or("missing project_name")?;
let repo_name = p
.get("repo_name")
.and_then(|v| v.as_str())
.ok_or("missing repo_name")?;
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
let rev = p
.get("rev")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| "HEAD".to_string());
let sheet_name = p.get("sheet_name").and_then(|v| v.as_str()).map(String::from);
let sheet_index = p.get("sheet_index").and_then(|v| v.as_u64()).map(|v| v as usize);
let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let limit = p
.get("limit")
.and_then(|v| v.as_u64())
.unwrap_or(100) as usize;
let has_header = p
.get("has_header")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain
.commit_get_prefix(&rev)
.map_err(|e| e.to_string())?
.oid
};
let entry = domain
.tree_entry_by_path_from_commit(&commit_oid, path)
.map_err(|e| e.to_string())?;
let blob = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?;
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
let data = &content.content;
if data.len() > MAX_FILE_SIZE {
return Err(format!(
"file too large ({} bytes), max {} bytes",
data.len(),
MAX_FILE_SIZE
));
}
// Use cursor-based reading to avoid tempfile
let cursor = std::io::Cursor::new(data.clone());
let mut workbook: Xlsx<std::io::Cursor<Vec<u8>>> =
open_workbook(cursor).map_err(|e| format!("failed to open Excel: {}", e))?;
let sheet_names = workbook.sheet_names().to_vec();
// Determine which sheet to read
let sheet_idx = match (sheet_name.clone(), sheet_index) {
(Some(name), _) => sheet_names
.iter()
.position(|n| n == &name)
.ok_or_else(|| format!("sheet '{}' not found. Available: {:?}", name, sheet_names))?,
(_, Some(idx)) => {
if idx >= sheet_names.len() {
return Err(format!(
"sheet index {} out of range (0..{})",
idx,
sheet_names.len()
));
}
idx
}
_ => 0,
};
let range = workbook
.worksheet_range_at(sheet_idx)
.map_err(|e| format!("failed to read sheet: {}", e))?;
let rows: Vec<Vec<serde_json::Value>> = range
.rows()
.skip(if has_header { offset + 1 } else { offset })
.take(limit)
.map(|row| {
row.iter()
.map(|cell| {
use calamine::Data;
match cell {
Data::Int(i) => serde_json::Value::Number((*i).into()),
Data::Float(f) => {
serde_json::json!(f)
}
Data::String(s) => serde_json::Value::String(s.clone()),
Data::Bool(b) => serde_json::Value::Bool(*b),
Data::DateTime(dt) => {
serde_json::Value::String(format!("{:?}", dt))
}
Data::DateTimeIso(s) => serde_json::Value::String(s.clone()),
Data::DurationIso(s) => serde_json::Value::String(s.clone()),
Data::Error(e) => serde_json::json!({ "error": format!("{:?}", e) }),
Data::Empty => serde_json::Value::Null,
}
})
.collect()
})
.collect();
let header_row: Vec<String> = if has_header {
range
.rows()
.next()
.map(|row| {
row.iter()
.map(|c| {
if let calamine::Data::String(s) = c {
s.clone()
} else {
String::new()
}
})
.collect()
})
.unwrap_or_default()
} else {
vec![]
};
Ok(serde_json::json!({
"path": path,
"rev": rev,
"sheets": sheet_names,
"active_sheet": sheet_names.get(sheet_idx).cloned(),
"sheet_index": sheet_idx,
"headers": header_row,
"rows": rows,
"row_count": rows.len(),
"total_rows": range.rows().count().saturating_sub(if has_header { 1 } else { 0 }),
}))
}
pub fn register_excel_tools(registry: &mut ToolRegistry) {
let p = HashMap::from([
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path within the repository (supports .xlsx, .xls)".into()), required: true, properties: None, items: None }),
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
("sheet_name".into(), ToolParam { name: "sheet_name".into(), param_type: "string".into(), description: Some("Sheet name to read. Defaults to first sheet.".into()), required: false, properties: None, items: None }),
("sheet_index".into(), ToolParam { name: "sheet_index".into(), param_type: "integer".into(), description: Some("Sheet index (0-based). Ignored if sheet_name is set.".into()), required: false, properties: None, items: None }),
("has_header".into(), ToolParam { name: "has_header".into(), param_type: "boolean".into(), description: Some("If true, first row is column headers (default: true)".into()), required: false, properties: None, items: None }),
("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of rows to skip (default: 0)".into()), required: false, properties: None, items: None }),
("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum rows to return (default: 100)".into()), required: false, properties: None, items: None }),
]);
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
registry.register(
ToolDefinition::new("read_excel")
.description("Parse and query Excel spreadsheets (.xlsx, .xls). Returns sheet names, headers, and rows with support for sheet selection and pagination.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
read_excel_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}

View File

@ -1,244 +0,0 @@
//! read_pdf — extract text from PDF files.
use crate::file_tools::MAX_FILE_SIZE;
use crate::git_tools::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use futures::FutureExt;
use lopdf::{Document, Object, ObjectId};
use std::collections::HashMap;
/// Extract text content from a PDF page's content stream.
fn extract_page_text(doc: &Document, page_id: ObjectId) -> String {
let mut text = String::new();
// Get page dictionary
let page_dict = match doc.get(page_id) {
Ok(dict) => dict,
Err(_) => return text,
};
// Get content streams (can be a single stream or array)
let content_streams = match page_dict.get(b"Contents") {
Ok(obj) => obj.clone(),
Err(_) => return text,
};
let stream_ids: Vec<ObjectId> = match &content_streams {
Object::Reference(id) => vec![*id],
Object::Array(arr) => arr
.iter()
.filter_map(|o| {
if let Object::Reference(id) = o {
Some(*id)
} else {
None
}
})
.collect(),
_ => return text,
};
for stream_id in stream_ids {
if let Ok((_, stream)) = doc.get_stream(stream_id) {
// Decode the stream
if let Ok(decompressed) = stream.decompressed_content() {
text.push_str(&extract_text_from_content(&decompress_pdf_stream(&decompressed)));
text.push('\n');
}
}
}
text
}
/// Very simple PDF content stream text extraction.
/// Handles Tj, TJ, Td, T*, ', " operators.
fn extract_text_from_content(content: &[u8]) -> String {
let data = String::from_utf8_lossy(content);
let mut result = String::new();
let mut in_parens = false;
let mut current_text = String::new();
let mut last_was_tj = false;
let mut chars = data.chars().peekable();
while let Some(c) = chars.next() {
match c {
'(' => {
in_parens = true;
current_text.clear();
}
')' if in_parens => {
in_parens = false;
if !current_text.is_empty() {
if last_was_tj {
// TJ operator: subtract current text width offset
}
result.push_str(&current_text);
result.push(' ');
last_was_tj = false;
}
}
c if in_parens => {
if c == '\\' {
if let Some(escaped) = chars.next() {
match escaped {
'n' => current_text.push('\n'),
'r' => current_text.push('\r'),
't' => current_text.push('\t'),
_ => current_text.push(escaped),
}
}
} else {
current_text.push(c);
}
}
'%' => {
// Comment, skip to end of line
while let Some(nc) = chars.next() {
if nc == '\n' || nc == '\r' {
break;
}
}
}
_ => {}
}
}
// Clean up excessive newlines
let lines: Vec<&str> = result.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
lines.join("\n")
}
fn decompress_pdf_stream(data: &[u8]) -> Vec<u8> {
// Try to detect and decompress flate/zlib streams
if data.len() < 2 {
return data.to_vec();
}
// Simple zlib check: zlib-wrapped deflate starts with 0x78
if data.starts_with(&[0x78]) || data.starts_with(&[0x08, 0x1b]) {
if let Ok(decoded) = flate2::read::ZlibDecoder::new(data).bytes().collect::<Result<Vec<_>, _>>() {
return decoded;
}
}
// Try raw deflate
if let Ok(decoded) = flate2::read::DeflateDecoder::new(data).bytes().collect::<Result<Vec<_>, _>>() {
return decoded;
}
data.to_vec()
}
async fn read_pdf_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p
.get("project_name")
.and_then(|v| v.as_str())
.ok_or("missing project_name")?;
let repo_name = p
.get("repo_name")
.and_then(|v| v.as_str())
.ok_or("missing repo_name")?;
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
let rev = p
.get("rev")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| "HEAD".to_string());
let page_start = p.get("page_start").and_then(|v| v.as_u64()).map(|v| v as usize);
let page_end = p.get("page_end").and_then(|v| v.as_u64()).map(|v| v as usize);
let max_pages = p
.get("max_pages")
.and_then(|v| v.as_u64())
.unwrap_or(20) as usize;
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain
.commit_get_prefix(&rev)
.map_err(|e| e.to_string())?
.oid
};
let entry = domain
.tree_entry_by_path_from_commit(&commit_oid, path)
.map_err(|e| e.to_string())?;
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
let data = &content.content;
if data.len() > MAX_FILE_SIZE {
return Err(format!(
"file too large ({} bytes), max {} bytes",
data.len(),
MAX_FILE_SIZE
));
}
let doc = Document::load_from_mem(data)
.map_err(|e| format!("failed to parse PDF: {}", e))?;
// Get all page references
let pages: Vec<ObjectId> = doc
.pages
.values()
.cloned()
.collect();
let total_pages = pages.len();
let start = page_start.unwrap_or(0).min(total_pages.saturating_sub(1));
let end = page_end.unwrap_or(start + max_pages).min(total_pages);
let mut page_texts: Vec<serde_json::Value> = Vec::new();
for (i, page_id) in pages.iter().enumerate().skip(start).take(end - start) {
let text = extract_page_text(&doc, *page_id);
page_texts.push(serde_json::json!({
"page": i + 1,
"text": text,
"char_count": text.chars().count(),
}));
}
Ok(serde_json::json!({
"path": path,
"rev": rev,
"total_pages": total_pages,
"extracted_pages": page_texts.len(),
"pages": page_texts,
}))
}
pub fn register_pdf_tools(registry: &mut ToolRegistry) {
let p = HashMap::from([
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the PDF document".into()), required: true, properties: None, items: None }),
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
("page_start".into(), ToolParam { name: "page_start".into(), param_type: "integer".into(), description: Some("1-based starting page number (default: 1)".into()), required: false, properties: None, items: None }),
("page_end".into(), ToolParam { name: "page_end".into(), param_type: "integer".into(), description: Some("1-based ending page number (default: page_start + 20)".into()), required: false, properties: None, items: None }),
("max_pages".into(), ToolParam { name: "max_pages".into(), param_type: "integer".into(), description: Some("Maximum number of pages to extract (default: 20)".into()), required: false, properties: None, items: None }),
]);
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
registry.register(
ToolDefinition::new("read_pdf")
.description("Extract text content from PDF files. Returns page-by-page text extraction with character counts. Supports page range selection.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
read_pdf_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}

View File

@ -1,204 +0,0 @@
//! read_ppt — extract text from PowerPoint files (.pptx).
use crate::file_tools::MAX_FILE_SIZE;
use crate::git_tools::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use futures::FutureExt;
use std::collections::HashMap;
use zip::ZipArchive;
async fn read_ppt_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p
.get("project_name")
.and_then(|v| v.as_str())
.ok_or("missing project_name")?;
let repo_name = p
.get("repo_name")
.and_then(|v| v.as_str())
.ok_or("missing repo_name")?;
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
let rev = p
.get("rev")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| "HEAD".to_string());
let slide_start = p.get("slide_start").and_then(|v| v.as_u64()).map(|v| v as usize);
let slide_end = p.get("slide_end").and_then(|v| v.as_u64()).map(|v| v as usize);
let include_notes = p
.get("include_notes")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain
.commit_get_prefix(&rev)
.map_err(|e| e.to_string())?
.oid
};
let entry = domain
.tree_entry_by_path_from_commit(&commit_oid, path)
.map_err(|e| e.to_string())?;
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
let data = &content.content;
if data.len() > MAX_FILE_SIZE {
return Err(format!(
"file too large ({} bytes), max {} bytes",
data.len(),
MAX_FILE_SIZE
));
}
let cursor = std::io::Cursor::new(data.clone());
let mut archive =
ZipArchive::new(cursor).map_err(|e| format!("failed to read PPTX ZIP: {}", e))?;
let mut slides: Vec<serde_json::Value> = Vec::new();
// Collect all slide file names
let mut slide_files: Vec<String> = (1..=1000)
.filter_map(|i| {
let name = format!("ppt/slides/slide{}.xml", i);
if archive.by_name(&name).is_ok() {
Some(name)
} else {
None
}
})
.collect();
let total_slides = slide_files.len();
let start = slide_start.unwrap_or(0).min(total_slides.saturating_sub(1));
let end = slide_end.unwrap_or(start + 50).min(total_slides);
for slide_file in slide_files.iter().skip(start).take(end - start) {
let slide_idx = slides.len() + start + 1;
let mut file = archive
.by_name(slide_file)
.map_err(|e| format!("failed to read slide {}: {}", slide_file, e))?;
let mut xml_content = String::new();
use std::io::Read;
file.read_to_string(&mut xml_content)
.map_err(|e| e.to_string())?;
// Extract text from slide XML
let text = extract_text_from_pptx_xml(&xml_content);
// Optionally extract notes
let notes = if include_notes {
let notes_file = format!("ppt/notesSlides/notesSlide{}.xml", slide_idx);
if let Ok(mut notes_file) = archive.by_name(&notes_file) {
let mut notes_xml = String::new();
if notes_file.read_to_string(&mut notes_xml).is_ok() {
Some(extract_text_from_pptx_xml(&notes_xml))
} else {
None
}
} else {
None
}
} else {
None
};
slides.push(serde_json::json!({
"slide": slide_idx,
"text": text.clone(),
"char_count": text.chars().count(),
"notes": notes,
}));
}
Ok(serde_json::json!({
"path": path,
"rev": rev,
"total_slides": total_slides,
"extracted_slides": slides.len(),
"slides": slides,
}))
}
/// Extract text content from PPTX slide XML using simple tag extraction.
fn extract_text_from_pptx_xml(xml: &str) -> String {
// PPTX uses <a:t> tags for text content
let mut results: Vec<&str> = Vec::new();
let mut last_end = 0;
while let Some(start) = xml[last_end..].find("<a:t") {
let abs_start = last_end + start;
if let Some(tag_end) = xml[abs_start..].find('>') {
let content_start = abs_start + tag_end + 1;
if let Some(end_tag) = xml[content_start..].find("</a:t>") {
let text = &xml[content_start..content_start + end_tag];
let trimmed = text.trim();
if !trimmed.is_empty() {
results.push(trimmed);
}
last_end = content_start + end_tag + 7; // len of </a:t>
} else {
break;
}
} else {
break;
}
}
// Also try <w:t> tags (notes slides use Word namespaces)
let mut last_end = 0;
while let Some(start) = xml[last_end..].find("<w:t") {
let abs_start = last_end + start;
if let Some(tag_end) = xml[abs_start..].find('>') {
let content_start = abs_start + tag_end + 1;
if let Some(end_tag) = xml[content_start..].find("</w:t>") {
let text = &xml[content_start..content_start + end_tag];
let trimmed = text.trim();
if !trimmed.is_empty() && !results.contains(&trimmed) {
results.push(trimmed);
}
last_end = content_start + end_tag + 6; // len of </w:t>
} else {
break;
}
} else {
break;
}
}
results.join(" ")
}
pub fn register_ppt_tools(registry: &mut ToolRegistry) {
let p = HashMap::from([
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .pptx document".into()), required: true, properties: None, items: None }),
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
("slide_start".into(), ToolParam { name: "slide_start".into(), param_type: "integer".into(), description: Some("1-based starting slide number (default: 1)".into()), required: false, properties: None, items: None }),
("slide_end".into(), ToolParam { name: "slide_end".into(), param_type: "integer".into(), description: Some("1-based ending slide number".into()), required: false, properties: None, items: None }),
("include_notes".into(), ToolParam { name: "include_notes".into(), param_type: "boolean".into(), description: Some("Include speaker notes (default: false)".into()), required: false, properties: None, items: None }),
]);
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
registry.register(
ToolDefinition::new("read_ppt")
.description("Extract text content from PowerPoint presentations (.pptx). Returns slide-by-slide text with character counts. Supports slide range selection and speaker notes.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
read_ppt_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}

View File

@ -1,184 +0,0 @@
//! read_word — parse and extract text from Word documents (.docx) via zip+xml.
use crate::file_tools::MAX_FILE_SIZE;
use crate::git_tools::ctx::GitToolCtx;
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
use futures::FutureExt;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::collections::HashMap;
use zip::ZipArchive;
async fn read_word_exec(
ctx: GitToolCtx,
args: serde_json::Value,
) -> Result<serde_json::Value, String> {
let p: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(args).map_err(|e| e.to_string())?;
let project_name = p
.get("project_name")
.and_then(|v| v.as_str())
.ok_or("missing project_name")?;
let repo_name = p
.get("repo_name")
.and_then(|v| v.as_str())
.ok_or("missing repo_name")?;
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
let rev = p
.get("rev")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| "HEAD".to_string());
let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let limit = p
.get("limit")
.and_then(|v| v.as_u64())
.unwrap_or(200) as usize;
let sections_only = p
.get("sections_only")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let domain = ctx.open_repo(project_name, repo_name).await?;
let commit_oid = if rev.len() >= 40 {
git::commit::types::CommitOid::new(&rev)
} else {
domain
.commit_get_prefix(&rev)
.map_err(|e| e.to_string())?
.oid
};
let entry = domain
.tree_entry_by_path_from_commit(&commit_oid, path)
.map_err(|e| e.to_string())?;
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
let data = &content.content;
if data.len() > MAX_FILE_SIZE {
return Err(format!(
"file too large ({} bytes), max {} bytes",
data.len(),
MAX_FILE_SIZE
));
}
// DOCX is a ZIP archive. Read word/document.xml from it.
let cursor = std::io::Cursor::new(data);
let mut archive = ZipArchive::new(cursor).map_err(|e| {
format!(
"failed to open docx as ZIP archive: {}. Make sure the file is a valid .docx document.",
e
)
})?;
let doc_xml = {
let file = if let Ok(f) = archive.by_name("word/document.xml") {
f
} else {
archive.by_name("document.xml")
.map_err(|_| "docx archive does not contain word/document.xml or document.xml")?
};
let mut s = String::new();
let mut reader = std::io::BufReader::new(file);
std::io::Read::read_to_string(&mut reader, &mut s)
.map_err(|e| format!("failed to read document.xml: {}", e))?;
s
};
// Parse paragraphs from <w:p> elements
let mut reader = Reader::from_str(&doc_xml);
reader.config_mut().trim_text(false);
let mut paragraphs: Vec<String> = Vec::new();
let mut buf = Vec::new();
let mut in_paragraph = false;
let mut current_text = String::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => {
if e.name().as_ref() == b"w:p" {
in_paragraph = true;
current_text.clear();
}
}
Ok(Event::Text(e)) => {
if in_paragraph {
let txt = e.unescape().map(|s| s.into_owned()).unwrap_or_default();
current_text.push_str(&txt);
}
}
Ok(Event::End(e)) => {
if e.name().as_ref() == b"w:p" && in_paragraph {
in_paragraph = false;
let text = current_text.trim().to_string();
if !text.is_empty() {
paragraphs.push(text);
}
}
}
Ok(Event::Eof) => break,
_ => {}
}
buf.clear();
}
let total = paragraphs.len();
let body: Vec<serde_json::Value> = if sections_only {
paragraphs
.iter()
.enumerate()
.filter(|(_, text)| {
text.chars().next().map(|c| c.is_uppercase()).unwrap_or(false)
&& text.chars().filter(|&c| c == ' ').count() < text.len() / 2
&& text.len() < 200
})
.skip(offset)
.take(limit)
.map(|(i, t)| serde_json::json!({ "index": i, "text": t }))
.collect()
} else {
paragraphs
.iter()
.skip(offset)
.take(limit)
.enumerate()
.map(|(i, t)| serde_json::json!({ "index": offset + i, "text": t }))
.collect()
};
Ok(serde_json::json!({
"path": path,
"rev": rev,
"paragraph_count": total,
"paragraphs": body,
}))
}
pub fn register_word_tools(registry: &mut ToolRegistry) {
let p = HashMap::from([
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .docx document".into()), required: true, properties: None, items: None }),
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
("sections_only".into(), ToolParam { name: "sections_only".into(), param_type: "boolean".into(), description: Some("If true, extract only section/heading-like paragraphs (short lines starting with uppercase)".into()), required: false, properties: None, items: None }),
("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of paragraphs to skip (default: 0)".into()), required: false, properties: None, items: None }),
("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum paragraphs to return (default: 200)".into()), required: false, properties: None, items: None }),
]);
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
registry.register(
ToolDefinition::new("read_word")
.description("Parse and extract text from Word documents (.docx). Returns paragraphs with index and text content. Supports pagination.")
.parameters(schema),
ToolHandler::new(|ctx, args| {
let gctx = GitToolCtx::new(ctx);
Box::pin(async move {
read_word_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
})
}),
);
}

View File

@ -196,9 +196,9 @@ impl AppService {
tracing::info!(url = %base_url, "AI chat enabled");
let ai_client_config = AiClientConfig::new(api_key).with_base_url(&base_url);
let mut registry = ToolRegistry::new();
git_tools::register_all(&mut registry);
file_tools::register_all(&mut registry);
project_tools::register_all(&mut registry);
fctool::git_tools::register_all(&mut registry);
fctool::file_tools::register_all(&mut registry);
fctool::project_tools::register_all(&mut registry);
let mut chat_svc = ChatService::new()
.with_ai_client_config(ai_client_config)
.with_tool_registry(registry);
@ -324,12 +324,9 @@ impl AppService {
pub mod agent;
pub mod auth;
pub mod error;
pub mod file_tools;
pub mod git;
pub mod git_tools;
pub mod issue;
pub mod project;
pub mod project_tools;
pub mod pull_request;
pub mod search;
pub mod skill;