//! read_excel — parse and query Excel files (.xlsx, .xls). use crate::file_tools::MAX_FILE_SIZE; use crate::git_tools::ctx::GitToolCtx; use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; use calamine::{open_workbook, Reader, Xlsx}; use futures::FutureExt; use std::collections::HashMap; async fn read_excel_exec( ctx: GitToolCtx, args: serde_json::Value, ) -> Result { let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; let project_name = p .get("project_name") .and_then(|v| v.as_str()) .ok_or("missing project_name")?; let repo_name = p .get("repo_name") .and_then(|v| v.as_str()) .ok_or("missing repo_name")?; let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; let rev = p .get("rev") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| "HEAD".to_string()); let sheet_name = p.get("sheet_name").and_then(|v| v.as_str()).map(String::from); let sheet_index = p.get("sheet_index").and_then(|v| v.as_u64()).map(|v| v as usize); let offset = p.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize; let limit = p .get("limit") .and_then(|v| v.as_u64()) .unwrap_or(100) as usize; let has_header = p .get("has_header") .and_then(|v| v.as_bool()) .unwrap_or(true); let domain = ctx.open_repo(project_name, repo_name).await?; let commit_oid = if rev.len() >= 40 { git::commit::types::CommitOid::new(&rev) } else { domain .commit_get_prefix(&rev) .map_err(|e| e.to_string())? .oid }; let entry = domain .tree_entry_by_path_from_commit(&commit_oid, path) .map_err(|e| e.to_string())?; let blob = domain.blob_get(&entry.oid).map_err(|e| e.to_string())?; let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; let data = &content.content; if data.len() > MAX_FILE_SIZE { return Err(format!( "file too large ({} bytes), max {} bytes", data.len(), MAX_FILE_SIZE )); } // Use cursor-based reading to avoid tempfile let cursor = std::io::Cursor::new(data.clone()); let mut workbook: Xlsx>> = open_workbook(cursor).map_err(|e| format!("failed to open Excel: {}", e))?; let sheet_names = workbook.sheet_names().to_vec(); // Determine which sheet to read let sheet_idx = match (sheet_name.clone(), sheet_index) { (Some(name), _) => sheet_names .iter() .position(|n| n == &name) .ok_or_else(|| format!("sheet '{}' not found. Available: {:?}", name, sheet_names))?, (_, Some(idx)) => { if idx >= sheet_names.len() { return Err(format!( "sheet index {} out of range (0..{})", idx, sheet_names.len() )); } idx } _ => 0, }; let range = workbook .worksheet_range_at(sheet_idx) .map_err(|e| format!("failed to read sheet: {}", e))?; let rows: Vec> = range .rows() .skip(if has_header { offset + 1 } else { offset }) .take(limit) .map(|row| { row.iter() .map(|cell| { use calamine::Data; match cell { Data::Int(i) => serde_json::Value::Number((*i).into()), Data::Float(f) => { serde_json::json!(f) } Data::String(s) => serde_json::Value::String(s.clone()), Data::Bool(b) => serde_json::Value::Bool(*b), Data::DateTime(dt) => { serde_json::Value::String(format!("{:?}", dt)) } Data::DateTimeIso(s) => serde_json::Value::String(s.clone()), Data::DurationIso(s) => serde_json::Value::String(s.clone()), Data::Error(e) => serde_json::json!({ "error": format!("{:?}", e) }), Data::Empty => serde_json::Value::Null, } }) .collect() }) .collect(); let header_row: Vec = if has_header { range .rows() .next() .map(|row| { row.iter() .map(|c| { if let calamine::Data::String(s) = c { s.clone() } else { String::new() } }) .collect() }) .unwrap_or_default() } else { vec![] }; Ok(serde_json::json!({ "path": path, "rev": rev, "sheets": sheet_names, "active_sheet": sheet_names.get(sheet_idx).cloned(), "sheet_index": sheet_idx, "headers": header_row, "rows": rows, "row_count": rows.len(), "total_rows": range.rows().count().saturating_sub(if has_header { 1 } else { 0 }), })) } pub fn register_excel_tools(registry: &mut ToolRegistry) { let p = HashMap::from([ ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }), ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }), ("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path within the repository (supports .xlsx, .xls)".into()), required: true, properties: None, items: None }), ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }), ("sheet_name".into(), ToolParam { name: "sheet_name".into(), param_type: "string".into(), description: Some("Sheet name to read. Defaults to first sheet.".into()), required: false, properties: None, items: None }), ("sheet_index".into(), ToolParam { name: "sheet_index".into(), param_type: "integer".into(), description: Some("Sheet index (0-based). Ignored if sheet_name is set.".into()), required: false, properties: None, items: None }), ("has_header".into(), ToolParam { name: "has_header".into(), param_type: "boolean".into(), description: Some("If true, first row is column headers (default: true)".into()), required: false, properties: None, items: None }), ("offset".into(), ToolParam { name: "offset".into(), param_type: "integer".into(), description: Some("Number of rows to skip (default: 0)".into()), required: false, properties: None, items: None }), ("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum rows to return (default: 100)".into()), required: false, properties: None, items: None }), ]); let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) }; registry.register( ToolDefinition::new("read_excel") .description("Parse and query Excel spreadsheets (.xlsx, .xls). Returns sheet names, headers, and rows with support for sheet selection and pagination.") .parameters(schema), ToolHandler::new(|ctx, args| { let gctx = GitToolCtx::new(ctx); Box::pin(async move { read_excel_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) }) }), ); }