//! read_markdown — parse and analyze Markdown files. use crate::file_tools::MAX_FILE_SIZE; use crate::git_tools::ctx::GitToolCtx; use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema}; use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Parser, Tag, TagEnd}; use std::collections::HashMap; async fn read_markdown_exec( ctx: GitToolCtx, args: serde_json::Value, ) -> Result { let p: serde_json::Map = serde_json::from_value(args).map_err(|e| e.to_string())?; let project_name = p .get("project_name") .and_then(|v| v.as_str()) .ok_or("missing project_name")?; let repo_name = p .get("repo_name") .and_then(|v| v.as_str()) .ok_or("missing repo_name")?; let path = p .get("path") .and_then(|v| v.as_str()) .ok_or("missing path")?; let rev = p .get("rev") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| "HEAD".to_string()); let include_code = p .get("include_code") .and_then(|v| v.as_bool()) .unwrap_or(true); let sections_only = p .get("sections_only") .and_then(|v| v.as_bool()) .unwrap_or(false); let domain = ctx.open_repo(project_name, repo_name).await?; let commit_oid = if rev.len() >= 40 { git::commit::types::CommitOid::new(&rev) } else { domain .commit_get_prefix(&rev) .map_err(|e| e.to_string())? .oid }; let entry = domain .tree_entry_by_path_from_commit(&commit_oid, path) .map_err(|e| e.to_string())?; let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?; let data = &content.content; if data.len() > MAX_FILE_SIZE { return Err(format!( "file too large ({} bytes), max {} bytes", data.len(), MAX_FILE_SIZE )); } let text = String::from_utf8_lossy(data); let parser = Parser::new(&text); let mut sections: Vec = Vec::new(); let mut code_blocks: Vec = Vec::new(); let mut links: Vec = Vec::new(); let mut images: Vec = Vec::new(); let mut current_heading_level: Option = None; let mut current_heading_text = String::new(); let mut in_code_block = false; let mut code_block_lang = String::new(); let mut code_block_content = String::new(); let mut toc: Vec = Vec::new(); for event in parser { match event { Event::Start(Tag::Heading { level, .. }) => { current_heading_level = Some(match level { HeadingLevel::H1 => 1, HeadingLevel::H2 => 2, HeadingLevel::H3 => 3, HeadingLevel::H4 => 4, HeadingLevel::H5 => 5, HeadingLevel::H6 => 6, }); current_heading_text.clear(); } Event::End(TagEnd::Heading(level)) => { let lvl = match level { HeadingLevel::H1 => 1, HeadingLevel::H2 => 2, HeadingLevel::H3 => 3, HeadingLevel::H4 => 4, HeadingLevel::H5 => 5, HeadingLevel::H6 => 6, }; let heading = current_heading_text.trim().to_string(); if !heading.is_empty() { let section = serde_json::json!({ "level": lvl, "title": heading, }); toc.push(section.clone()); if !sections_only { sections.push(serde_json::json!({ "level": lvl, "title": heading, "content": "", })); } } current_heading_level = None; } Event::Text(text) => { if in_code_block { code_block_content.push_str(&text); code_block_content.push('\n'); } else if let Some(_) = current_heading_level { current_heading_text.push_str(&text); current_heading_text.push(' '); } } Event::Code(code) => { code_blocks.push(serde_json::json!({ "language": "", "code": code.as_ref(), })); } Event::Start(Tag::CodeBlock(kind)) => { in_code_block = true; code_block_content.clear(); code_block_lang = match kind { CodeBlockKind::Fenced(info) => info.as_ref().to_string(), CodeBlockKind::Indented => String::new(), }; } Event::End(TagEnd::CodeBlock) => { in_code_block = false; if include_code { code_blocks.push(serde_json::json!({ "language": code_block_lang, "code": code_block_content.trim().to_string(), })); } code_block_lang.clear(); } Event::Start(Tag::Link { dest_url, .. }) => { links.push(serde_json::json!({ "url": dest_url.to_string() })); } Event::Start(Tag::Image { dest_url, .. }) => { images.push(serde_json::json!({ "url": dest_url.to_string() })); } _ => {} } } // Build outline (h1/h2/h3 only) let outline: Vec = toc .iter() .filter(|s| { let lvl = s.get("level").and_then(|v| v.as_u64()).unwrap_or(0) as u32; lvl <= 3 }) .cloned() .collect(); Ok(serde_json::json!({ "path": path, "rev": rev, "stats": { "chars": text.chars().count(), "words": text.split_whitespace().count(), "lines": text.lines().count(), "headings": toc.len(), "code_blocks": code_blocks.len(), "links": links.len(), "images": images.len(), }, "outline": outline, "headings": toc, "code_blocks": if include_code { code_blocks } else { vec![] }, "links": links, "images": images, })) } pub fn register_markdown_tools(registry: &mut ToolRegistry) { let p = HashMap::from([ ( "project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None, }, ), ( "repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None, }, ), ( "path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the Markdown file".into()), required: true, properties: None, items: None, }, ), ( "rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None, }, ), ( "sections_only".into(), ToolParam { name: "sections_only".into(), param_type: "boolean".into(), description: Some( "If true, return only section headings (outline). Default: false".into(), ), required: false, properties: None, items: None, }, ), ( "include_code".into(), ToolParam { name: "include_code".into(), param_type: "boolean".into(), description: Some("Include code blocks in result. Default: true".into()), required: false, properties: None, items: None, }, ), ]); let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec![ "project_name".into(), "repo_name".into(), "path".into(), ]), }; registry.register( ToolDefinition::new("read_markdown") .description("Parse and analyze a Markdown file. Returns document statistics, heading outline, code blocks with languages, links, and images.") .parameters(schema), ToolHandler::new(|ctx, args| { let gctx = GitToolCtx::new(ctx); Box::pin(async move { read_markdown_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError) }) }), ); }