refactor(fctool): extract tool modules into standalone fctool crate
Move git_tools, file_tools, and project_tools from libs/service into a new libs/fctool crate with correct workspace dependencies. Fixes the rev.len() >= 40 bug in all git tool resolve functions (OID check needs exact 40-char hex, not just >= 40). Adds 4 new git blob tools (blob_get, blob_exists, blob_content, blob_create). Fixes parameter naming inconsistency in repos.rs and adds project_name to list_repos output. Removes unused excel/pdf/ppt/word file tools.
This commit is contained in:
parent
0e53f4a69f
commit
c7a8bc0458
26
Cargo.lock
generated
26
Cargo.lock
generated
@ -2757,6 +2757,31 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fctool"
|
||||
version = "0.2.9"
|
||||
dependencies = [
|
||||
"agent",
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"csv",
|
||||
"db",
|
||||
"git",
|
||||
"git2",
|
||||
"models",
|
||||
"pulldown-cmark 0.12.2",
|
||||
"quick-xml 0.37.5",
|
||||
"regex",
|
||||
"reqwest 0.13.2",
|
||||
"sea-orm",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sqlparser",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fdeflate"
|
||||
version = "0.3.7"
|
||||
@ -7705,6 +7730,7 @@ dependencies = [
|
||||
"db",
|
||||
"deadpool-redis",
|
||||
"email",
|
||||
"fctool",
|
||||
"flate2",
|
||||
"futures",
|
||||
"git",
|
||||
|
||||
@ -19,6 +19,7 @@ members = [
|
||||
"libs/agent",
|
||||
"libs/migrate",
|
||||
"libs/agent-tool-derive",
|
||||
"libs/fctool",
|
||||
"apps/migrate",
|
||||
"apps/app",
|
||||
"apps/adminrpc",
|
||||
@ -50,6 +51,7 @@ observability = { path = "libs/observability" }
|
||||
avatar = { path = "libs/avatar" }
|
||||
migrate = { path = "libs/migrate" }
|
||||
session_manager = { path = "libs/session_manager" }
|
||||
fctool = { path = "libs/fctool" }
|
||||
|
||||
sea-query = "1.0.0-rc.31"
|
||||
|
||||
|
||||
39
libs/fctool/Cargo.toml
Normal file
39
libs/fctool/Cargo.toml
Normal file
@ -0,0 +1,39 @@
|
||||
[package]
|
||||
name = "fctool"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
readme.workspace = true
|
||||
homepage.workspace = true
|
||||
license.workspace = true
|
||||
keywords.workspace = true
|
||||
categories.workspace = true
|
||||
documentation.workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
name = "fctool"
|
||||
|
||||
[dependencies]
|
||||
agent = { workspace = true }
|
||||
git = { workspace = true }
|
||||
models = { workspace = true }
|
||||
db = { workspace = true }
|
||||
sea-orm = { workspace = true, features = [] }
|
||||
git2 = { workspace = true }
|
||||
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
uuid = { workspace = true, features = ["serde", "v7"] }
|
||||
reqwest = { workspace = true, features = ["json", "native-tls"] }
|
||||
regex = { workspace = true }
|
||||
csv = { workspace = true }
|
||||
quick-xml = { workspace = true }
|
||||
sqlparser = { workspace = true }
|
||||
pulldown-cmark = { workspace = true }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tracing = { workspace = true }
|
||||
@ -51,7 +51,7 @@ async fn read_csv_exec(
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
@ -67,7 +67,7 @@ async fn git_grep_exec(
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
// Resolve revision to commit oid
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
@ -130,7 +130,7 @@ async fn read_json_exec(
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
@ -41,7 +41,7 @@ async fn read_markdown_exec(
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
@ -33,7 +33,7 @@ async fn read_sql_exec(
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
let commit_oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
273
libs/fctool/src/git_tools/blob.rs
Normal file
273
libs/fctool/src/git_tools/blob.rs
Normal file
@ -0,0 +1,273 @@
|
||||
//! Git blob tools — raw object-level operations on blob OIDs.
|
||||
|
||||
use super::ctx::GitToolCtx;
|
||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use base64::Engine;
|
||||
use std::collections::HashMap;
|
||||
|
||||
async fn git_blob_info_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let commit_oid = resolve_oid(&domain, oid)?;
|
||||
let info = domain.blob_get(&commit_oid).map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"oid": info.oid.to_string(),
|
||||
"size": info.size,
|
||||
"is_binary": info.is_binary,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn git_blob_exists_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let commit_oid = resolve_oid(&domain, oid)?;
|
||||
let exists = domain.blob_exists(&commit_oid);
|
||||
|
||||
Ok(serde_json::json!({ "oid": commit_oid.to_string(), "exists": exists }))
|
||||
}
|
||||
|
||||
async fn git_blob_content_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
let oid = p.get("oid").and_then(|v| v.as_str()).ok_or("missing oid")?;
|
||||
let max_size = p.get("max_size").and_then(|v| v.as_u64()).unwrap_or(1_048_576) as usize; // 1MB default
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let commit_oid = resolve_oid(&domain, oid)?;
|
||||
let blob = domain.blob_content(&commit_oid).map_err(|e| e.to_string())?;
|
||||
|
||||
if blob.size > max_size {
|
||||
return Err(format!(
|
||||
"blob too large ({} bytes), max {} bytes. Use a smaller max_size or retrieve the raw OID.",
|
||||
blob.size, max_size
|
||||
));
|
||||
}
|
||||
|
||||
let (content, is_binary) = if blob.is_binary {
|
||||
(base64::engine::general_purpose::STANDARD.encode(&blob.content), true)
|
||||
} else {
|
||||
(String::from_utf8_lossy(&blob.content).to_string(), false)
|
||||
};
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"oid": blob.oid.to_string(),
|
||||
"size": blob.size,
|
||||
"is_binary": is_binary,
|
||||
"content": content,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn git_blob_create_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
let repo_name = p.get("repo_name").and_then(|v| v.as_str()).ok_or("missing repo_name")?;
|
||||
let content = p.get("content").and_then(|v| v.as_str()).ok_or("missing content")?;
|
||||
let encoding = p.get("encoding").and_then(|v| v.as_str()).unwrap_or("utf-8");
|
||||
|
||||
let data = match encoding {
|
||||
"base64" => base64::engine::general_purpose::STANDARD
|
||||
.decode(content)
|
||||
.map_err(|e| format!("invalid base64: {}", e))?,
|
||||
"utf-8" => content.as_bytes().to_vec(),
|
||||
other => return Err(format!("unsupported encoding '{}'. Use 'utf-8' or 'base64'.", other)),
|
||||
};
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let oid = domain.blob_create(&data).map_err(|e| e.to_string())?;
|
||||
let info = domain.blob_get(&oid).map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"oid": info.oid.to_string(),
|
||||
"size": info.size,
|
||||
"is_binary": info.is_binary,
|
||||
}))
|
||||
}
|
||||
|
||||
fn resolve_oid(
|
||||
domain: &git::GitDomain,
|
||||
rev: &str,
|
||||
) -> Result<git::commit::types::CommitOid, String> {
|
||||
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
Ok(git::commit::types::CommitOid::new(rev))
|
||||
} else {
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_git_tools(registry: &mut ToolRegistry) {
|
||||
// git_blob_info
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam {
|
||||
name: "project_name".into(), param_type: "string".into(),
|
||||
description: Some("Project name (slug)".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("repo_name".into(), ToolParam {
|
||||
name: "repo_name".into(), param_type: "string".into(),
|
||||
description: Some("Repository name".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("oid".into(), ToolParam {
|
||||
name: "oid".into(), param_type: "string".into(),
|
||||
description: Some("Blob OID (full 40-char hex or short prefix)".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
]);
|
||||
let schema = ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(p),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]),
|
||||
};
|
||||
registry.register(
|
||||
ToolDefinition::new("git_blob_info")
|
||||
.description("Get metadata about a git blob by its OID. Returns size and whether the blob is binary.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
git_blob_info_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
|
||||
// git_blob_exists
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam {
|
||||
name: "project_name".into(), param_type: "string".into(),
|
||||
description: Some("Project name (slug)".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("repo_name".into(), ToolParam {
|
||||
name: "repo_name".into(), param_type: "string".into(),
|
||||
description: Some("Repository name".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("oid".into(), ToolParam {
|
||||
name: "oid".into(), param_type: "string".into(),
|
||||
description: Some("Blob OID to check".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
]);
|
||||
let schema = ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(p),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]),
|
||||
};
|
||||
registry.register(
|
||||
ToolDefinition::new("git_blob_exists")
|
||||
.description("Check whether a git blob exists in the repository by its OID.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
git_blob_exists_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
|
||||
// git_blob_content
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam {
|
||||
name: "project_name".into(), param_type: "string".into(),
|
||||
description: Some("Project name (slug)".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("repo_name".into(), ToolParam {
|
||||
name: "repo_name".into(), param_type: "string".into(),
|
||||
description: Some("Repository name".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("oid".into(), ToolParam {
|
||||
name: "oid".into(), param_type: "string".into(),
|
||||
description: Some("Blob OID to retrieve content for".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("max_size".into(), ToolParam {
|
||||
name: "max_size".into(), param_type: "integer".into(),
|
||||
description: Some("Maximum blob size in bytes (default: 1MB)".into()),
|
||||
required: false, properties: None, items: None,
|
||||
}),
|
||||
]);
|
||||
let schema = ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(p),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into(), "oid".into()]),
|
||||
};
|
||||
registry.register(
|
||||
ToolDefinition::new("git_blob_content")
|
||||
.description("Retrieve the raw content of a git blob by its OID. Binary content is base64-encoded. Limits to 1MB by default.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
git_blob_content_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
|
||||
// git_blob_create
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam {
|
||||
name: "project_name".into(), param_type: "string".into(),
|
||||
description: Some("Project name (slug)".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("repo_name".into(), ToolParam {
|
||||
name: "repo_name".into(), param_type: "string".into(),
|
||||
description: Some("Repository name".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("content".into(), ToolParam {
|
||||
name: "content".into(), param_type: "string".into(),
|
||||
description: Some("Blob content (utf-8 string or base64-encoded bytes)".into()),
|
||||
required: true, properties: None, items: None,
|
||||
}),
|
||||
("encoding".into(), ToolParam {
|
||||
name: "encoding".into(), param_type: "string".into(),
|
||||
description: Some("Encoding of content: 'utf-8' (default) or 'base64'".into()),
|
||||
required: false, properties: None, items: None,
|
||||
}),
|
||||
]);
|
||||
let schema = ToolSchema {
|
||||
schema_type: "object".into(),
|
||||
properties: Some(p),
|
||||
required: Some(vec!["project_name".into(), "repo_name".into(), "content".into()]),
|
||||
};
|
||||
registry.register(
|
||||
ToolDefinition::new("git_blob_create")
|
||||
.description("Create a new git blob in the repository. Writes the raw content to the object database and returns the new blob OID. Supports both utf-8 text and base64-encoded binary content.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
git_blob_create_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -35,8 +35,10 @@ async fn git_branch_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resul
|
||||
let info = domain.branch_get(name).map_err(|e| e.to_string())?;
|
||||
|
||||
let ahead_behind = if let Some(ref upstream) = info.upstream {
|
||||
let (ahead, behind) = domain.branch_ahead_behind(name, upstream).unwrap_or((0, 0));
|
||||
Some(serde_json::json!({ "ahead": ahead, "behind": behind }))
|
||||
match domain.branch_ahead_behind(name, upstream) {
|
||||
Ok((ahead, behind)) => Some(serde_json::json!({ "ahead": ahead, "behind": behind })),
|
||||
Err(e) => Some(serde_json::json!({ "error": e.to_string() })),
|
||||
}
|
||||
} else { None };
|
||||
|
||||
Ok(serde_json::json!({
|
||||
@ -47,6 +47,16 @@ async fn git_log_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_
|
||||
Ok(serde_json::to_value(result).map_err(|e| e.to_string())?)
|
||||
}
|
||||
|
||||
/// Resolve a rev string to commit metadata. Tries full OID first (exactly 40 hex chars),
|
||||
/// falls back to prefix lookup (branch, tag, short hash).
|
||||
fn resolve_commit(domain: &git::GitDomain, rev: &str) -> Result<git::commit::types::CommitMeta, String> {
|
||||
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())
|
||||
} else {
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
async fn git_show_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
@ -54,11 +64,7 @@ async fn git_show_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde
|
||||
let rev = p.get("rev").and_then(|v| v.as_str()).ok_or("missing rev")?;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let meta = if rev.len() >= 40 {
|
||||
domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())?
|
||||
} else {
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string())?
|
||||
};
|
||||
let meta = resolve_commit(&domain, rev).map_err(|e| e.to_string())?;
|
||||
|
||||
let refs = domain.commit_refs(&meta.oid).map_err(|e| e.to_string())?;
|
||||
|
||||
@ -128,11 +134,7 @@ async fn git_commit_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resul
|
||||
let rev = p.get("rev").and_then(|v| v.as_str()).ok_or("missing rev")?;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let meta = if rev.len() >= 40 {
|
||||
domain.commit_get(&git::commit::types::CommitOid::new(rev)).map_err(|e| e.to_string())?
|
||||
} else {
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string())?
|
||||
};
|
||||
let meta = resolve_commit(&domain, rev).map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(flatten_commit(&meta))
|
||||
}
|
||||
@ -195,9 +197,11 @@ async fn git_reflog_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<ser
|
||||
let result: Vec<_> = entries.iter()
|
||||
.take(limit)
|
||||
.map(|e| {
|
||||
let ts = e.time_secs;
|
||||
// Convert to UTC by subtracting the timezone offset, consistent
|
||||
// with all other timestamp conversions in this module.
|
||||
let ts = e.time_secs - (e.offset_minutes as i64 * 60);
|
||||
let time_str = chrono::Utc.timestamp_opt(ts, 0).single()
|
||||
.map(|dt| dt.to_rfc3339()).unwrap_or_else(|| format!("{}", ts));
|
||||
.map(|dt| dt.to_rfc3339()).unwrap_or_else(|| format!("{}", e.time_secs));
|
||||
serde_json::json!({
|
||||
"oid_new": e.oid_new.to_string(), "oid_old": e.oid_old.to_string(),
|
||||
"committer_name": e.committer_name, "committer_email": e.committer_email,
|
||||
@ -17,7 +17,7 @@ async fn git_diff_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let resolve = |rev: &str| -> Result<git::commit::types::CommitOid, String> {
|
||||
if rev.len() >= 40 {
|
||||
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
Ok(git::commit::types::CommitOid::new(rev))
|
||||
} else {
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
|
||||
@ -68,8 +68,14 @@ async fn git_diff_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde
|
||||
}
|
||||
};
|
||||
|
||||
use git::diff::types::DiffDeltaStatus;
|
||||
let files: Vec<_> = result.deltas.iter().map(|d| {
|
||||
serde_json::json!({ "path": d.new_file.path, "status": format!("{:?}", d.status), "is_binary": d.new_file.is_binary })
|
||||
let (path, is_binary) = if d.status == DiffDeltaStatus::Deleted {
|
||||
(d.old_file.path.clone(), d.old_file.is_binary)
|
||||
} else {
|
||||
(d.new_file.path.clone(), d.new_file.is_binary)
|
||||
};
|
||||
serde_json::json!({ "path": path, "status": format!("{:?}", d.status), "is_binary": is_binary })
|
||||
}).collect();
|
||||
|
||||
Ok(serde_json::json!({
|
||||
@ -87,22 +93,16 @@ async fn git_diff_stats_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let stats = if base.len() >= 40 && head.len() >= 40 {
|
||||
domain.diff_stats(&git::commit::types::CommitOid::new(base), &git::commit::types::CommitOid::new(head))
|
||||
.map_err(|e| e.to_string())?
|
||||
} else {
|
||||
let b = if base.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(base)
|
||||
let resolve = |rev: &str| -> Result<git::commit::types::CommitOid, String> {
|
||||
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
Ok(git::commit::types::CommitOid::new(rev))
|
||||
} else {
|
||||
domain.commit_get_prefix(base).map_err(|e| e.to_string())?.oid
|
||||
};
|
||||
let h = if head.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(head)
|
||||
} else {
|
||||
domain.commit_get_prefix(head).map_err(|e| e.to_string())?.oid
|
||||
};
|
||||
domain.diff_stats(&b, &h).map_err(|e| e.to_string())?
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
|
||||
}
|
||||
};
|
||||
let b = resolve(base).map_err(|e| e.to_string())?;
|
||||
let h = resolve(head).map_err(|e| e.to_string())?;
|
||||
let stats = domain.diff_stats(&b, &h).map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"files_changed": stats.files_changed,
|
||||
@ -121,11 +121,11 @@ async fn git_blame_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serd
|
||||
let to_line = p.get("to_line").and_then(|v| v.as_u64().map(|n| n as u32));
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
let oid = if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
Ok(git::commit::types::CommitOid::new(&rev))
|
||||
} else {
|
||||
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
|
||||
};
|
||||
domain.commit_get_prefix(&rev).map_err(|e| e.to_string()).map(|m| m.oid)
|
||||
}?;
|
||||
|
||||
use git::blame::ops::BlameOptions;
|
||||
let mut bopts = BlameOptions::new();
|
||||
@ -3,6 +3,7 @@
|
||||
//! Each module defines async exec functions + a `register_git_tools()` call.
|
||||
//! All tools take `project_name` + `repo_name` as required params.
|
||||
|
||||
pub mod blob;
|
||||
pub mod branch;
|
||||
pub mod commit;
|
||||
pub mod ctx;
|
||||
@ -16,6 +17,7 @@ pub fn register_all(registry: &mut agent::ToolRegistry) {
|
||||
commit::register_git_tools(registry);
|
||||
branch::register_git_tools(registry);
|
||||
diff::register_git_tools(registry);
|
||||
blob::register_git_tools(registry);
|
||||
tree::register_git_tools(registry);
|
||||
tag::register_git_tools(registry);
|
||||
}
|
||||
@ -16,12 +16,19 @@ async fn git_tag_list_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<s
|
||||
let result: Vec<_> = match pattern {
|
||||
Some(ref pat) => {
|
||||
let pat_lower = pat.to_lowercase();
|
||||
let has_wildcard = pat.contains('*');
|
||||
// Convert glob pattern (only * wildcards) to regex for proper matching.
|
||||
// "*" matches any sequence of characters.
|
||||
let regex_pat = pat_lower
|
||||
.split('*')
|
||||
.map(|s| regex::escape(s))
|
||||
.collect::<Vec<_>>()
|
||||
.join(".*");
|
||||
let re = regex::Regex::new(&format!("^{}$", regex_pat))
|
||||
.ok();
|
||||
all_tags.iter()
|
||||
.filter(|t| {
|
||||
let n = t.name.to_lowercase();
|
||||
if has_wildcard { n.contains(&pat_lower.replace('*', "")) }
|
||||
else { n.contains(&pat_lower) }
|
||||
re.as_ref().map(|r| r.is_match(&n)).unwrap_or(false)
|
||||
})
|
||||
.map(|t| tag_to_json(t))
|
||||
.collect()
|
||||
@ -5,6 +5,16 @@ use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use base64::Engine;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Resolve a rev string to a commit OID. Tries full OID first (exactly 40 hex chars),
|
||||
/// falls back to prefix lookup (branch, tag, short hash).
|
||||
fn resolve_commit_oid(domain: &git::GitDomain, rev: &str) -> Result<git::commit::types::CommitOid, String> {
|
||||
if rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
Ok(git::commit::types::CommitOid::new(rev))
|
||||
} else {
|
||||
domain.commit_get_prefix(rev).map_err(|e| e.to_string()).map(|m| m.oid)
|
||||
}
|
||||
}
|
||||
|
||||
async fn git_file_content_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||
@ -13,11 +23,7 @@ async fn git_file_content_exec(ctx: GitToolCtx, args: serde_json::Value) -> Resu
|
||||
let rev = p.get("rev").and_then(|v| v.as_str()).map(|s| s.to_string()).unwrap_or_else(|| "HEAD".to_string());
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
|
||||
};
|
||||
let oid = resolve_commit_oid(&domain, &rev)?;
|
||||
|
||||
let entry = domain.tree_entry_by_path_from_commit(&oid, path).map_err(|e| e.to_string())?;
|
||||
let blob_info = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?;
|
||||
@ -46,11 +52,7 @@ async fn git_tree_ls_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<se
|
||||
let rev = p.get("rev").and_then(|v| v.as_str()).map(|s| s.to_string()).unwrap_or_else(|| "HEAD".to_string());
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
|
||||
};
|
||||
let commit_oid = resolve_commit_oid(&domain, &rev).map_err(|e| e.to_string())?;
|
||||
|
||||
let entries = match dir_path {
|
||||
Some(ref dp) => {
|
||||
@ -102,11 +104,7 @@ async fn git_blob_get_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<s
|
||||
let rev = p.get("rev").and_then(|v| v.as_str()).map(String::from).unwrap_or_else(|| "HEAD".to_string());
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
let oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain.commit_get_prefix(&rev).map_err(|e| e.to_string())?.oid
|
||||
};
|
||||
let oid = resolve_commit_oid(&domain, &rev).map_err(|e| e.to_string())?;
|
||||
|
||||
let entry = domain.tree_entry_by_path_from_commit(&oid, path).map_err(|e| e.to_string())?;
|
||||
let blob_info = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?;
|
||||
@ -217,16 +217,24 @@ pub struct DiffFileOut {
|
||||
|
||||
impl DiffFileOut {
|
||||
pub fn from_delta(delta: &DiffDelta) -> Self {
|
||||
// For deleted files, use old_file.path; for all others, use new_file.path.
|
||||
let path = match delta.status {
|
||||
DiffDeltaStatus::Deleted => delta.old_file.path.clone(),
|
||||
_ => delta.new_file.path.clone(),
|
||||
// For deleted files, use old_file for all metadata; for all others, use new_file.
|
||||
let (path, is_binary, size) = match delta.status {
|
||||
DiffDeltaStatus::Deleted => (
|
||||
delta.old_file.path.clone(),
|
||||
delta.old_file.is_binary,
|
||||
delta.old_file.size,
|
||||
),
|
||||
_ => (
|
||||
delta.new_file.path.clone(),
|
||||
delta.new_file.is_binary,
|
||||
delta.new_file.size,
|
||||
),
|
||||
};
|
||||
Self {
|
||||
path,
|
||||
status: format!("{:?}", delta.status),
|
||||
is_binary: delta.new_file.is_binary,
|
||||
size: delta.new_file.size,
|
||||
is_binary,
|
||||
size,
|
||||
}
|
||||
}
|
||||
}
|
||||
5
libs/fctool/src/lib.rs
Normal file
5
libs/fctool/src/lib.rs
Normal file
@ -0,0 +1,5 @@
|
||||
//! AI agent function-call tools: git operations, file parsing/search, and project management.
|
||||
|
||||
pub mod git_tools;
|
||||
pub mod file_tools;
|
||||
pub mod project_tools;
|
||||
@ -23,6 +23,14 @@ pub async fn list_repos_exec(
|
||||
let project_id = ctx.project_id();
|
||||
let db = ctx.db();
|
||||
|
||||
// Resolve project name so the AI can use it for git_tools operations
|
||||
let project = models::projects::project::Entity::find_by_id(project_id)
|
||||
.one(db)
|
||||
.await
|
||||
.map_err(|e| ToolError::ExecutionError(e.to_string()))?
|
||||
.ok_or_else(|| ToolError::ExecutionError("Project not found".into()))?;
|
||||
let project_name = project.name.clone();
|
||||
|
||||
let repos = repo::Entity::find()
|
||||
.filter(repo::Column::Project.eq(project_id))
|
||||
.order_by_asc(repo::Column::RepoName)
|
||||
@ -36,6 +44,7 @@ pub async fn list_repos_exec(
|
||||
serde_json::json!({
|
||||
"id": r.id.to_string(),
|
||||
"name": r.repo_name,
|
||||
"project_name": project_name,
|
||||
"description": r.description,
|
||||
"default_branch": r.default_branch,
|
||||
"is_private": r.is_private,
|
||||
@ -293,8 +302,9 @@ pub async fn create_commit_exec(
|
||||
|
||||
let repo_name = args
|
||||
.get("repo_name")
|
||||
.or_else(|| args.get("name"))
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| ToolError::ExecutionError("repo_name is required".into()))?;
|
||||
.ok_or_else(|| ToolError::ExecutionError("repo_name (or name) is required".into()))?;
|
||||
|
||||
let message = args
|
||||
.get("message")
|
||||
@ -308,10 +318,12 @@ pub async fn create_commit_exec(
|
||||
.unwrap_or("main")
|
||||
.to_string();
|
||||
|
||||
// Validate branch: no path traversal, no slashes
|
||||
if branch.contains("..") || branch.contains('/') || branch.contains('\\') || branch.is_empty() {
|
||||
// Validate branch: no path traversal, no backslashes, not empty, no lock files
|
||||
if branch.contains("..") || branch.contains('\\') || branch.is_empty()
|
||||
|| branch.ends_with(".lock") || branch.starts_with('-')
|
||||
{
|
||||
return Err(ToolError::ExecutionError(
|
||||
"Invalid branch name: must not contain path separators or '..'".into(),
|
||||
"Invalid branch name: must not contain '..' or backslashes, and must not be empty".into(),
|
||||
));
|
||||
}
|
||||
|
||||
@ -574,9 +586,14 @@ pub fn create_commit_tool_definition() -> ToolDefinition {
|
||||
let mut p = HashMap::new();
|
||||
p.insert("repo_name".into(), ToolParam {
|
||||
name: "repo_name".into(), param_type: "string".into(),
|
||||
description: Some("Repository name (required).".into()),
|
||||
description: Some("Repository name. Can also use 'name' as alias. Required.".into()),
|
||||
required: true, properties: None, items: None,
|
||||
});
|
||||
p.insert("name".into(), ToolParam {
|
||||
name: "name".into(), param_type: "string".into(),
|
||||
description: Some("Alias for repo_name. Use the same value as returned by project_list_repos.".into()),
|
||||
required: false, properties: None, items: None,
|
||||
});
|
||||
p.insert("branch".into(), ToolParam {
|
||||
name: "branch".into(), param_type: "string".into(),
|
||||
description: Some("Branch to commit to. Defaults to 'main'. Optional.".into()),
|
||||
@ -17,6 +17,7 @@ name = "service"
|
||||
[dependencies]
|
||||
config = { workspace = true }
|
||||
agent = { workspace = true }
|
||||
fctool = { workspace = true }
|
||||
db = { workspace = true }
|
||||
models = { workspace = true }
|
||||
email = { workspace = true }
|
||||
|
||||
@ -1,184 +0,0 @@
|
||||
//! read_excel — parse and query Excel files (.xlsx, .xls).
|
||||
|
||||
use crate::file_tools::MAX_FILE_SIZE;
|
||||
use crate::git_tools::ctx::GitToolCtx;
|
||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use calamine::{open_workbook, Reader, Xlsx};
|
||||
use futures::FutureExt;
|
||||
use std::collections::HashMap;
|
||||
|
||||
async fn read_excel_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
|
||||
let project_name = p
|
||||
.get("project_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing project_name")?;
|
||||
let repo_name = p
|
||||
.get("repo_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing repo_name")?;
|
||||
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
||||
let rev = p
|
||||
.get("rev")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| "HEAD".to_string());
|
||||
let sheet_name = p.get("sheet_name").and_then(|v| v.as_str()).map(String::from);
|
||||
let sheet_index = p.get("sheet_index").and_then(|v| v.as_u64()).map(|v| v as usize);
|
||||
let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
|
||||
let limit = p
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(100) as usize;
|
||||
let has_header = p
|
||||
.get("has_header")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
.commit_get_prefix(&rev)
|
||||
.map_err(|e| e.to_string())?
|
||||
.oid
|
||||
};
|
||||
|
||||
let entry = domain
|
||||
.tree_entry_by_path_from_commit(&commit_oid, path)
|
||||
.map_err(|e| e.to_string())?;
|
||||
let blob = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?;
|
||||
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
|
||||
|
||||
let data = &content.content;
|
||||
if data.len() > MAX_FILE_SIZE {
|
||||
return Err(format!(
|
||||
"file too large ({} bytes), max {} bytes",
|
||||
data.len(),
|
||||
MAX_FILE_SIZE
|
||||
));
|
||||
}
|
||||
|
||||
// Use cursor-based reading to avoid tempfile
|
||||
let cursor = std::io::Cursor::new(data.clone());
|
||||
let mut workbook: Xlsx<std::io::Cursor<Vec<u8>>> =
|
||||
open_workbook(cursor).map_err(|e| format!("failed to open Excel: {}", e))?;
|
||||
|
||||
let sheet_names = workbook.sheet_names().to_vec();
|
||||
|
||||
// Determine which sheet to read
|
||||
let sheet_idx = match (sheet_name.clone(), sheet_index) {
|
||||
(Some(name), _) => sheet_names
|
||||
.iter()
|
||||
.position(|n| n == &name)
|
||||
.ok_or_else(|| format!("sheet '{}' not found. Available: {:?}", name, sheet_names))?,
|
||||
(_, Some(idx)) => {
|
||||
if idx >= sheet_names.len() {
|
||||
return Err(format!(
|
||||
"sheet index {} out of range (0..{})",
|
||||
idx,
|
||||
sheet_names.len()
|
||||
));
|
||||
}
|
||||
idx
|
||||
}
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
let range = workbook
|
||||
.worksheet_range_at(sheet_idx)
|
||||
.map_err(|e| format!("failed to read sheet: {}", e))?;
|
||||
|
||||
let rows: Vec<Vec<serde_json::Value>> = range
|
||||
.rows()
|
||||
.skip(if has_header { offset + 1 } else { offset })
|
||||
.take(limit)
|
||||
.map(|row| {
|
||||
row.iter()
|
||||
.map(|cell| {
|
||||
use calamine::Data;
|
||||
match cell {
|
||||
Data::Int(i) => serde_json::Value::Number((*i).into()),
|
||||
Data::Float(f) => {
|
||||
serde_json::json!(f)
|
||||
}
|
||||
Data::String(s) => serde_json::Value::String(s.clone()),
|
||||
Data::Bool(b) => serde_json::Value::Bool(*b),
|
||||
Data::DateTime(dt) => {
|
||||
serde_json::Value::String(format!("{:?}", dt))
|
||||
}
|
||||
Data::DateTimeIso(s) => serde_json::Value::String(s.clone()),
|
||||
Data::DurationIso(s) => serde_json::Value::String(s.clone()),
|
||||
Data::Error(e) => serde_json::json!({ "error": format!("{:?}", e) }),
|
||||
Data::Empty => serde_json::Value::Null,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let header_row: Vec<String> = if has_header {
|
||||
range
|
||||
.rows()
|
||||
.next()
|
||||
.map(|row| {
|
||||
row.iter()
|
||||
.map(|c| {
|
||||
if let calamine::Data::String(s) = c {
|
||||
s.clone()
|
||||
} else {
|
||||
String::new()
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"path": path,
|
||||
"rev": rev,
|
||||
"sheets": sheet_names,
|
||||
"active_sheet": sheet_names.get(sheet_idx).cloned(),
|
||||
"sheet_index": sheet_idx,
|
||||
"headers": header_row,
|
||||
"rows": rows,
|
||||
"row_count": rows.len(),
|
||||
"total_rows": range.rows().count().saturating_sub(if has_header { 1 } else { 0 }),
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn register_excel_tools(registry: &mut ToolRegistry) {
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
|
||||
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
|
||||
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path within the repository (supports .xlsx, .xls)".into()), required: true, properties: None, items: None }),
|
||||
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
|
||||
("sheet_name".into(), ToolParam { name: "sheet_name".into(), param_type: "string".into(), description: Some("Sheet name to read. Defaults to first sheet.".into()), required: false, properties: None, items: None }),
|
||||
("sheet_index".into(), ToolParam { name: "sheet_index".into(), param_type: "integer".into(), description: Some("Sheet index (0-based). Ignored if sheet_name is set.".into()), required: false, properties: None, items: None }),
|
||||
("has_header".into(), ToolParam { name: "has_header".into(), param_type: "boolean".into(), description: Some("If true, first row is column headers (default: true)".into()), required: false, properties: None, items: None }),
|
||||
("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of rows to skip (default: 0)".into()), required: false, properties: None, items: None }),
|
||||
("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum rows to return (default: 100)".into()), required: false, properties: None, items: None }),
|
||||
]);
|
||||
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
|
||||
registry.register(
|
||||
ToolDefinition::new("read_excel")
|
||||
.description("Parse and query Excel spreadsheets (.xlsx, .xls). Returns sheet names, headers, and rows with support for sheet selection and pagination.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
read_excel_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -1,244 +0,0 @@
|
||||
//! read_pdf — extract text from PDF files.
|
||||
|
||||
use crate::file_tools::MAX_FILE_SIZE;
|
||||
use crate::git_tools::ctx::GitToolCtx;
|
||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use futures::FutureExt;
|
||||
use lopdf::{Document, Object, ObjectId};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Extract text content from a PDF page's content stream.
|
||||
fn extract_page_text(doc: &Document, page_id: ObjectId) -> String {
|
||||
let mut text = String::new();
|
||||
|
||||
// Get page dictionary
|
||||
let page_dict = match doc.get(page_id) {
|
||||
Ok(dict) => dict,
|
||||
Err(_) => return text,
|
||||
};
|
||||
|
||||
// Get content streams (can be a single stream or array)
|
||||
let content_streams = match page_dict.get(b"Contents") {
|
||||
Ok(obj) => obj.clone(),
|
||||
Err(_) => return text,
|
||||
};
|
||||
|
||||
let stream_ids: Vec<ObjectId> = match &content_streams {
|
||||
Object::Reference(id) => vec![*id],
|
||||
Object::Array(arr) => arr
|
||||
.iter()
|
||||
.filter_map(|o| {
|
||||
if let Object::Reference(id) = o {
|
||||
Some(*id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
_ => return text,
|
||||
};
|
||||
|
||||
for stream_id in stream_ids {
|
||||
if let Ok((_, stream)) = doc.get_stream(stream_id) {
|
||||
// Decode the stream
|
||||
if let Ok(decompressed) = stream.decompressed_content() {
|
||||
text.push_str(&extract_text_from_content(&decompress_pdf_stream(&decompressed)));
|
||||
text.push('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
text
|
||||
}
|
||||
|
||||
/// Very simple PDF content stream text extraction.
|
||||
/// Handles Tj, TJ, Td, T*, ', " operators.
|
||||
fn extract_text_from_content(content: &[u8]) -> String {
|
||||
let data = String::from_utf8_lossy(content);
|
||||
let mut result = String::new();
|
||||
let mut in_parens = false;
|
||||
let mut current_text = String::new();
|
||||
let mut last_was_tj = false;
|
||||
|
||||
let mut chars = data.chars().peekable();
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
match c {
|
||||
'(' => {
|
||||
in_parens = true;
|
||||
current_text.clear();
|
||||
}
|
||||
')' if in_parens => {
|
||||
in_parens = false;
|
||||
if !current_text.is_empty() {
|
||||
if last_was_tj {
|
||||
// TJ operator: subtract current text width offset
|
||||
}
|
||||
result.push_str(¤t_text);
|
||||
result.push(' ');
|
||||
last_was_tj = false;
|
||||
}
|
||||
}
|
||||
c if in_parens => {
|
||||
if c == '\\' {
|
||||
if let Some(escaped) = chars.next() {
|
||||
match escaped {
|
||||
'n' => current_text.push('\n'),
|
||||
'r' => current_text.push('\r'),
|
||||
't' => current_text.push('\t'),
|
||||
_ => current_text.push(escaped),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
current_text.push(c);
|
||||
}
|
||||
}
|
||||
'%' => {
|
||||
// Comment, skip to end of line
|
||||
while let Some(nc) = chars.next() {
|
||||
if nc == '\n' || nc == '\r' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up excessive newlines
|
||||
let lines: Vec<&str> = result.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
fn decompress_pdf_stream(data: &[u8]) -> Vec<u8> {
|
||||
// Try to detect and decompress flate/zlib streams
|
||||
if data.len() < 2 {
|
||||
return data.to_vec();
|
||||
}
|
||||
|
||||
// Simple zlib check: zlib-wrapped deflate starts with 0x78
|
||||
if data.starts_with(&[0x78]) || data.starts_with(&[0x08, 0x1b]) {
|
||||
if let Ok(decoded) = flate2::read::ZlibDecoder::new(data).bytes().collect::<Result<Vec<_>, _>>() {
|
||||
return decoded;
|
||||
}
|
||||
}
|
||||
|
||||
// Try raw deflate
|
||||
if let Ok(decoded) = flate2::read::DeflateDecoder::new(data).bytes().collect::<Result<Vec<_>, _>>() {
|
||||
return decoded;
|
||||
}
|
||||
|
||||
data.to_vec()
|
||||
}
|
||||
|
||||
async fn read_pdf_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
|
||||
let project_name = p
|
||||
.get("project_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing project_name")?;
|
||||
let repo_name = p
|
||||
.get("repo_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing repo_name")?;
|
||||
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
||||
let rev = p
|
||||
.get("rev")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| "HEAD".to_string());
|
||||
let page_start = p.get("page_start").and_then(|v| v.as_u64()).map(|v| v as usize);
|
||||
let page_end = p.get("page_end").and_then(|v| v.as_u64()).map(|v| v as usize);
|
||||
let max_pages = p
|
||||
.get("max_pages")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(20) as usize;
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
.commit_get_prefix(&rev)
|
||||
.map_err(|e| e.to_string())?
|
||||
.oid
|
||||
};
|
||||
|
||||
let entry = domain
|
||||
.tree_entry_by_path_from_commit(&commit_oid, path)
|
||||
.map_err(|e| e.to_string())?;
|
||||
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
|
||||
|
||||
let data = &content.content;
|
||||
if data.len() > MAX_FILE_SIZE {
|
||||
return Err(format!(
|
||||
"file too large ({} bytes), max {} bytes",
|
||||
data.len(),
|
||||
MAX_FILE_SIZE
|
||||
));
|
||||
}
|
||||
|
||||
let doc = Document::load_from_mem(data)
|
||||
.map_err(|e| format!("failed to parse PDF: {}", e))?;
|
||||
|
||||
// Get all page references
|
||||
let pages: Vec<ObjectId> = doc
|
||||
.pages
|
||||
.values()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let total_pages = pages.len();
|
||||
|
||||
let start = page_start.unwrap_or(0).min(total_pages.saturating_sub(1));
|
||||
let end = page_end.unwrap_or(start + max_pages).min(total_pages);
|
||||
|
||||
let mut page_texts: Vec<serde_json::Value> = Vec::new();
|
||||
|
||||
for (i, page_id) in pages.iter().enumerate().skip(start).take(end - start) {
|
||||
let text = extract_page_text(&doc, *page_id);
|
||||
page_texts.push(serde_json::json!({
|
||||
"page": i + 1,
|
||||
"text": text,
|
||||
"char_count": text.chars().count(),
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"path": path,
|
||||
"rev": rev,
|
||||
"total_pages": total_pages,
|
||||
"extracted_pages": page_texts.len(),
|
||||
"pages": page_texts,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn register_pdf_tools(registry: &mut ToolRegistry) {
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
|
||||
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
|
||||
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the PDF document".into()), required: true, properties: None, items: None }),
|
||||
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
|
||||
("page_start".into(), ToolParam { name: "page_start".into(), param_type: "integer".into(), description: Some("1-based starting page number (default: 1)".into()), required: false, properties: None, items: None }),
|
||||
("page_end".into(), ToolParam { name: "page_end".into(), param_type: "integer".into(), description: Some("1-based ending page number (default: page_start + 20)".into()), required: false, properties: None, items: None }),
|
||||
("max_pages".into(), ToolParam { name: "max_pages".into(), param_type: "integer".into(), description: Some("Maximum number of pages to extract (default: 20)".into()), required: false, properties: None, items: None }),
|
||||
]);
|
||||
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
|
||||
registry.register(
|
||||
ToolDefinition::new("read_pdf")
|
||||
.description("Extract text content from PDF files. Returns page-by-page text extraction with character counts. Supports page range selection.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
read_pdf_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -1,204 +0,0 @@
|
||||
//! read_ppt — extract text from PowerPoint files (.pptx).
|
||||
|
||||
use crate::file_tools::MAX_FILE_SIZE;
|
||||
use crate::git_tools::ctx::GitToolCtx;
|
||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use futures::FutureExt;
|
||||
use std::collections::HashMap;
|
||||
use zip::ZipArchive;
|
||||
|
||||
async fn read_ppt_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
|
||||
let project_name = p
|
||||
.get("project_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing project_name")?;
|
||||
let repo_name = p
|
||||
.get("repo_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing repo_name")?;
|
||||
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
||||
let rev = p
|
||||
.get("rev")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| "HEAD".to_string());
|
||||
let slide_start = p.get("slide_start").and_then(|v| v.as_u64()).map(|v| v as usize);
|
||||
let slide_end = p.get("slide_end").and_then(|v| v.as_u64()).map(|v| v as usize);
|
||||
let include_notes = p
|
||||
.get("include_notes")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
.commit_get_prefix(&rev)
|
||||
.map_err(|e| e.to_string())?
|
||||
.oid
|
||||
};
|
||||
|
||||
let entry = domain
|
||||
.tree_entry_by_path_from_commit(&commit_oid, path)
|
||||
.map_err(|e| e.to_string())?;
|
||||
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
|
||||
|
||||
let data = &content.content;
|
||||
if data.len() > MAX_FILE_SIZE {
|
||||
return Err(format!(
|
||||
"file too large ({} bytes), max {} bytes",
|
||||
data.len(),
|
||||
MAX_FILE_SIZE
|
||||
));
|
||||
}
|
||||
|
||||
let cursor = std::io::Cursor::new(data.clone());
|
||||
let mut archive =
|
||||
ZipArchive::new(cursor).map_err(|e| format!("failed to read PPTX ZIP: {}", e))?;
|
||||
|
||||
let mut slides: Vec<serde_json::Value> = Vec::new();
|
||||
|
||||
// Collect all slide file names
|
||||
let mut slide_files: Vec<String> = (1..=1000)
|
||||
.filter_map(|i| {
|
||||
let name = format!("ppt/slides/slide{}.xml", i);
|
||||
if archive.by_name(&name).is_ok() {
|
||||
Some(name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let total_slides = slide_files.len();
|
||||
let start = slide_start.unwrap_or(0).min(total_slides.saturating_sub(1));
|
||||
let end = slide_end.unwrap_or(start + 50).min(total_slides);
|
||||
|
||||
for slide_file in slide_files.iter().skip(start).take(end - start) {
|
||||
let slide_idx = slides.len() + start + 1;
|
||||
|
||||
let mut file = archive
|
||||
.by_name(slide_file)
|
||||
.map_err(|e| format!("failed to read slide {}: {}", slide_file, e))?;
|
||||
let mut xml_content = String::new();
|
||||
use std::io::Read;
|
||||
file.read_to_string(&mut xml_content)
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
// Extract text from slide XML
|
||||
let text = extract_text_from_pptx_xml(&xml_content);
|
||||
|
||||
// Optionally extract notes
|
||||
let notes = if include_notes {
|
||||
let notes_file = format!("ppt/notesSlides/notesSlide{}.xml", slide_idx);
|
||||
if let Ok(mut notes_file) = archive.by_name(¬es_file) {
|
||||
let mut notes_xml = String::new();
|
||||
if notes_file.read_to_string(&mut notes_xml).is_ok() {
|
||||
Some(extract_text_from_pptx_xml(¬es_xml))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
slides.push(serde_json::json!({
|
||||
"slide": slide_idx,
|
||||
"text": text.clone(),
|
||||
"char_count": text.chars().count(),
|
||||
"notes": notes,
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"path": path,
|
||||
"rev": rev,
|
||||
"total_slides": total_slides,
|
||||
"extracted_slides": slides.len(),
|
||||
"slides": slides,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Extract text content from PPTX slide XML using simple tag extraction.
|
||||
fn extract_text_from_pptx_xml(xml: &str) -> String {
|
||||
// PPTX uses <a:t> tags for text content
|
||||
let mut results: Vec<&str> = Vec::new();
|
||||
let mut last_end = 0;
|
||||
|
||||
while let Some(start) = xml[last_end..].find("<a:t") {
|
||||
let abs_start = last_end + start;
|
||||
if let Some(tag_end) = xml[abs_start..].find('>') {
|
||||
let content_start = abs_start + tag_end + 1;
|
||||
if let Some(end_tag) = xml[content_start..].find("</a:t>") {
|
||||
let text = &xml[content_start..content_start + end_tag];
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() {
|
||||
results.push(trimmed);
|
||||
}
|
||||
last_end = content_start + end_tag + 7; // len of </a:t>
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Also try <w:t> tags (notes slides use Word namespaces)
|
||||
let mut last_end = 0;
|
||||
while let Some(start) = xml[last_end..].find("<w:t") {
|
||||
let abs_start = last_end + start;
|
||||
if let Some(tag_end) = xml[abs_start..].find('>') {
|
||||
let content_start = abs_start + tag_end + 1;
|
||||
if let Some(end_tag) = xml[content_start..].find("</w:t>") {
|
||||
let text = &xml[content_start..content_start + end_tag];
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() && !results.contains(&trimmed) {
|
||||
results.push(trimmed);
|
||||
}
|
||||
last_end = content_start + end_tag + 6; // len of </w:t>
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
results.join(" ")
|
||||
}
|
||||
|
||||
pub fn register_ppt_tools(registry: &mut ToolRegistry) {
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
|
||||
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
|
||||
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .pptx document".into()), required: true, properties: None, items: None }),
|
||||
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
|
||||
("slide_start".into(), ToolParam { name: "slide_start".into(), param_type: "integer".into(), description: Some("1-based starting slide number (default: 1)".into()), required: false, properties: None, items: None }),
|
||||
("slide_end".into(), ToolParam { name: "slide_end".into(), param_type: "integer".into(), description: Some("1-based ending slide number".into()), required: false, properties: None, items: None }),
|
||||
("include_notes".into(), ToolParam { name: "include_notes".into(), param_type: "boolean".into(), description: Some("Include speaker notes (default: false)".into()), required: false, properties: None, items: None }),
|
||||
]);
|
||||
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
|
||||
registry.register(
|
||||
ToolDefinition::new("read_ppt")
|
||||
.description("Extract text content from PowerPoint presentations (.pptx). Returns slide-by-slide text with character counts. Supports slide range selection and speaker notes.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
read_ppt_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -1,184 +0,0 @@
|
||||
//! read_word — parse and extract text from Word documents (.docx) via zip+xml.
|
||||
|
||||
use crate::file_tools::MAX_FILE_SIZE;
|
||||
use crate::git_tools::ctx::GitToolCtx;
|
||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||
use futures::FutureExt;
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
use std::collections::HashMap;
|
||||
use zip::ZipArchive;
|
||||
|
||||
async fn read_word_exec(
|
||||
ctx: GitToolCtx,
|
||||
args: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let p: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||
|
||||
let project_name = p
|
||||
.get("project_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing project_name")?;
|
||||
let repo_name = p
|
||||
.get("repo_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing repo_name")?;
|
||||
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
||||
let rev = p
|
||||
.get("rev")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| "HEAD".to_string());
|
||||
let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
|
||||
let limit = p
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(200) as usize;
|
||||
let sections_only = p
|
||||
.get("sections_only")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
let domain = ctx.open_repo(project_name, repo_name).await?;
|
||||
|
||||
let commit_oid = if rev.len() >= 40 {
|
||||
git::commit::types::CommitOid::new(&rev)
|
||||
} else {
|
||||
domain
|
||||
.commit_get_prefix(&rev)
|
||||
.map_err(|e| e.to_string())?
|
||||
.oid
|
||||
};
|
||||
|
||||
let entry = domain
|
||||
.tree_entry_by_path_from_commit(&commit_oid, path)
|
||||
.map_err(|e| e.to_string())?;
|
||||
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
|
||||
|
||||
let data = &content.content;
|
||||
if data.len() > MAX_FILE_SIZE {
|
||||
return Err(format!(
|
||||
"file too large ({} bytes), max {} bytes",
|
||||
data.len(),
|
||||
MAX_FILE_SIZE
|
||||
));
|
||||
}
|
||||
|
||||
// DOCX is a ZIP archive. Read word/document.xml from it.
|
||||
let cursor = std::io::Cursor::new(data);
|
||||
let mut archive = ZipArchive::new(cursor).map_err(|e| {
|
||||
format!(
|
||||
"failed to open docx as ZIP archive: {}. Make sure the file is a valid .docx document.",
|
||||
e
|
||||
)
|
||||
})?;
|
||||
|
||||
let doc_xml = {
|
||||
let file = if let Ok(f) = archive.by_name("word/document.xml") {
|
||||
f
|
||||
} else {
|
||||
archive.by_name("document.xml")
|
||||
.map_err(|_| "docx archive does not contain word/document.xml or document.xml")?
|
||||
};
|
||||
let mut s = String::new();
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
std::io::Read::read_to_string(&mut reader, &mut s)
|
||||
.map_err(|e| format!("failed to read document.xml: {}", e))?;
|
||||
s
|
||||
};
|
||||
|
||||
// Parse paragraphs from <w:p> elements
|
||||
let mut reader = Reader::from_str(&doc_xml);
|
||||
reader.config_mut().trim_text(false);
|
||||
|
||||
let mut paragraphs: Vec<String> = Vec::new();
|
||||
let mut buf = Vec::new();
|
||||
let mut in_paragraph = false;
|
||||
let mut current_text = String::new();
|
||||
|
||||
loop {
|
||||
match reader.read_event_into(&mut buf) {
|
||||
Ok(Event::Start(e)) => {
|
||||
if e.name().as_ref() == b"w:p" {
|
||||
in_paragraph = true;
|
||||
current_text.clear();
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(e)) => {
|
||||
if in_paragraph {
|
||||
let txt = e.unescape().map(|s| s.into_owned()).unwrap_or_default();
|
||||
current_text.push_str(&txt);
|
||||
}
|
||||
}
|
||||
Ok(Event::End(e)) => {
|
||||
if e.name().as_ref() == b"w:p" && in_paragraph {
|
||||
in_paragraph = false;
|
||||
let text = current_text.trim().to_string();
|
||||
if !text.is_empty() {
|
||||
paragraphs.push(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
_ => {}
|
||||
}
|
||||
buf.clear();
|
||||
}
|
||||
|
||||
let total = paragraphs.len();
|
||||
|
||||
let body: Vec<serde_json::Value> = if sections_only {
|
||||
paragraphs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, text)| {
|
||||
text.chars().next().map(|c| c.is_uppercase()).unwrap_or(false)
|
||||
&& text.chars().filter(|&c| c == ' ').count() < text.len() / 2
|
||||
&& text.len() < 200
|
||||
})
|
||||
.skip(offset)
|
||||
.take(limit)
|
||||
.map(|(i, t)| serde_json::json!({ "index": i, "text": t }))
|
||||
.collect()
|
||||
} else {
|
||||
paragraphs
|
||||
.iter()
|
||||
.skip(offset)
|
||||
.take(limit)
|
||||
.enumerate()
|
||||
.map(|(i, t)| serde_json::json!({ "index": offset + i, "text": t }))
|
||||
.collect()
|
||||
};
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"path": path,
|
||||
"rev": rev,
|
||||
"paragraph_count": total,
|
||||
"paragraphs": body,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn register_word_tools(registry: &mut ToolRegistry) {
|
||||
let p = HashMap::from([
|
||||
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
|
||||
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
|
||||
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .docx document".into()), required: true, properties: None, items: None }),
|
||||
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
|
||||
("sections_only".into(), ToolParam { name: "sections_only".into(), param_type: "boolean".into(), description: Some("If true, extract only section/heading-like paragraphs (short lines starting with uppercase)".into()), required: false, properties: None, items: None }),
|
||||
("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of paragraphs to skip (default: 0)".into()), required: false, properties: None, items: None }),
|
||||
("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum paragraphs to return (default: 200)".into()), required: false, properties: None, items: None }),
|
||||
]);
|
||||
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
|
||||
registry.register(
|
||||
ToolDefinition::new("read_word")
|
||||
.description("Parse and extract text from Word documents (.docx). Returns paragraphs with index and text content. Supports pagination.")
|
||||
.parameters(schema),
|
||||
ToolHandler::new(|ctx, args| {
|
||||
let gctx = GitToolCtx::new(ctx);
|
||||
Box::pin(async move {
|
||||
read_word_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -196,9 +196,9 @@ impl AppService {
|
||||
tracing::info!(url = %base_url, "AI chat enabled");
|
||||
let ai_client_config = AiClientConfig::new(api_key).with_base_url(&base_url);
|
||||
let mut registry = ToolRegistry::new();
|
||||
git_tools::register_all(&mut registry);
|
||||
file_tools::register_all(&mut registry);
|
||||
project_tools::register_all(&mut registry);
|
||||
fctool::git_tools::register_all(&mut registry);
|
||||
fctool::file_tools::register_all(&mut registry);
|
||||
fctool::project_tools::register_all(&mut registry);
|
||||
let mut chat_svc = ChatService::new()
|
||||
.with_ai_client_config(ai_client_config)
|
||||
.with_tool_registry(registry);
|
||||
@ -324,12 +324,9 @@ impl AppService {
|
||||
pub mod agent;
|
||||
pub mod auth;
|
||||
pub mod error;
|
||||
pub mod file_tools;
|
||||
pub mod git;
|
||||
pub mod git_tools;
|
||||
pub mod issue;
|
||||
pub mod project;
|
||||
pub mod project_tools;
|
||||
pub mod pull_request;
|
||||
pub mod search;
|
||||
pub mod skill;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user