//! read_ppt — extract text from PowerPoint files (.pptx). use crate::file_tools::MAX_FILE_SIZE; use crate::git_tools::ctx::GitToolCtx; use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; use futures::FutureExt; use std::collections::HashMap; use zip::ZipArchive; async fn read_ppt_exec( ctx: GitToolCtx, args: serde_json::Value, ) -> Result { let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; let project_name = p .get("project_name") .and_then(|v| v.as_str()) .ok_or("missing project_name")?; let repo_name = p .get("repo_name") .and_then(|v| v.as_str()) .ok_or("missing repo_name")?; let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?; let rev = p .get("rev") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| "HEAD".to_string()); let slide_start = p.get("slide_start").and_then(|v| v.as_u64()).map(|v| v as usize); let slide_end = p.get("slide_end").and_then(|v| v.as_u64()).map(|v| v as usize); let include_notes = p .get("include_notes") .and_then(|v| v.as_bool()) .unwrap_or(false); let domain = ctx.open_repo(project_name, repo_name).await?; let commit_oid = if rev.len() >= 40 { git::commit::types::CommitOid::new(&rev) } else { domain .commit_get_prefix(&rev) .map_err(|e| e.to_string())? .oid }; let entry = domain .tree_entry_by_path_from_commit(&commit_oid, path) .map_err(|e| e.to_string())?; let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; let data = &content.content; if data.len() > MAX_FILE_SIZE { return Err(format!( "file too large ({} bytes), max {} bytes", data.len(), MAX_FILE_SIZE )); } let cursor = std::io::Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).map_err(|e| format!("failed to read PPTX ZIP: {}", e))?; let mut slides: Vec = Vec::new(); // Collect all slide file names let mut slide_files: Vec = (1..=1000) .filter_map(|i| { let name = format!("ppt/slides/slide{}.xml", i); if archive.by_name(&name).is_ok() { Some(name) } else { None } }) .collect(); let total_slides = slide_files.len(); let start = slide_start.unwrap_or(0).min(total_slides.saturating_sub(1)); let end = slide_end.unwrap_or(start + 50).min(total_slides); for slide_file in slide_files.iter().skip(start).take(end - start) { let slide_idx = slides.len() + start + 1; let mut file = archive .by_name(slide_file) .map_err(|e| format!("failed to read slide {}: {}", slide_file, e))?; let mut xml_content = String::new(); use std::io::Read; file.read_to_string(&mut xml_content) .map_err(|e| e.to_string())?; // Extract text from slide XML let text = extract_text_from_pptx_xml(&xml_content); // Optionally extract notes let notes = if include_notes { let notes_file = format!("ppt/notesSlides/notesSlide{}.xml", slide_idx); if let Ok(mut notes_file) = archive.by_name(¬es_file) { let mut notes_xml = String::new(); if notes_file.read_to_string(&mut notes_xml).is_ok() { Some(extract_text_from_pptx_xml(¬es_xml)) } else { None } } else { None } } else { None }; slides.push(serde_json::json!({ "slide": slide_idx, "text": text.clone(), "char_count": text.chars().count(), "notes": notes, })); } Ok(serde_json::json!({ "path": path, "rev": rev, "total_slides": total_slides, "extracted_slides": slides.len(), "slides": slides, })) } /// Extract text content from PPTX slide XML using simple tag extraction. fn extract_text_from_pptx_xml(xml: &str) -> String { // PPTX uses tags for text content let mut results: Vec<&str> = Vec::new(); let mut last_end = 0; while let Some(start) = xml[last_end..].find("') { let content_start = abs_start + tag_end + 1; if let Some(end_tag) = xml[content_start..].find("") { let text = &xml[content_start..content_start + end_tag]; let trimmed = text.trim(); if !trimmed.is_empty() { results.push(trimmed); } last_end = content_start + end_tag + 7; // len of } else { break; } } else { break; } } // Also try tags (notes slides use Word namespaces) let mut last_end = 0; while let Some(start) = xml[last_end..].find("') { let content_start = abs_start + tag_end + 1; if let Some(end_tag) = xml[content_start..].find("") { let text = &xml[content_start..content_start + end_tag]; let trimmed = text.trim(); if !trimmed.is_empty() && !results.contains(&trimmed) { results.push(trimmed); } last_end = content_start + end_tag + 6; // len of } else { break; } } else { break; } } results.join(" ") } pub fn register_ppt_tools(registry: &mut ToolRegistry) { let p = HashMap::from([ ("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }), ("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }), ("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the .pptx document".into()), required: true, properties: None, items: None }), ("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }), ("slide_start".into(), ToolParam { name: "slide_start".into(), param_type: "integer".into(), description: Some("1-based starting slide number (default: 1)".into()), required: false, properties: None, items: None }), ("slide_end".into(), ToolParam { name: "slide_end".into(), param_type: "integer".into(), description: Some("1-based ending slide number".into()), required: false, properties: None, items: None }), ("include_notes".into(), ToolParam { name: "include_notes".into(), param_type: "boolean".into(), description: Some("Include speaker notes (default: false)".into()), required: false, properties: None, items: None }), ]); let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) }; registry.register( ToolDefinition::new("read_ppt") .description("Extract text content from PowerPoint presentations (.pptx). Returns slide-by-slide text with character counts. Supports slide range selection and speaker notes.") .parameters(schema), ToolHandler::new(|ctx, args| { let gctx = GitToolCtx::new(ctx); Box::pin(async move { read_ppt_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) }) }), ); }