//! Repository skill scanner. //! //! Scans repositories for SKILL.md files and upserts skill records. use crate::error::AppError; use chrono::Utc; use git2::Repository; use models::ActiveModelTrait; use models::projects::project_skill::ActiveModel as SkillActiveModel; use models::projects::project_skill::Column as C; use models::projects::project_skill::Entity as SkillEntity; use models::repos::repo::Model as RepoModel; use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set}; use sha1::Digest; use std::path::Path; use uuid::Uuid; const SKILL_ROOTS: &[(&str, &str)] = &[(".claude/skills", "claude"), (".codex/skills", "codex")]; const ROOT_SKILL_SYSTEM: &str = "root"; fn should_descend_dir(name: &str) -> bool { name != ".git" } /// Skill discovery result from a single repository. #[derive(Debug)] pub struct DiscoveredSkill { /// URL-safe slug derived from the directory name. pub slug: String, /// Human-readable name (from frontmatter or slug). pub name: String, /// Short description (from frontmatter). pub description: Option, /// Raw markdown body after the frontmatter. pub content: String, /// Parsed frontmatter as JSON. pub metadata: serde_json::Value, /// Relative path of the SKILL.md file in the repository. pub relative_path: Option, /// Skill system/source directory, e.g. "claude" or "codex". pub system: Option, /// Git commit SHA where this skill was found (git hook path only). pub commit_sha: Option, /// Git blob SHA-1 of the SKILL.md file. pub blob_hash: Option, } /// Compute the git blob SHA-1 hash of `content`. /// Format: "blob {len}\0{data}" fn git_blob_hash(content: &[u8]) -> String { let size = content.len(); let header = format!("blob {}\0", size); let mut hasher = sha1::Sha1::new(); hasher.update(header.as_bytes()); hasher.update(content); hex::encode(hasher.finalize()) } fn parse_frontmatter(frontmatter: Option<&str>) -> serde_json::Value { frontmatter .and_then(|fm| serde_json::from_str(fm).ok()) .or_else(|| frontmatter.and_then(|fm| serde_yaml::from_str(fm).ok())) .unwrap_or_default() } /// Parse a SKILL.md file and extract metadata + content. fn parse_skill_file(slug: &str, raw: &str) -> DiscoveredSkill { let (frontmatter, content) = extract_frontmatter(raw); let metadata = parse_frontmatter(frontmatter); let name = metadata .get("name") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| slug.replace('-', " ").replace('_', " ")); let description = metadata .get("description") .and_then(|v| v.as_str()) .map(String::from); DiscoveredSkill { slug: slug.to_string(), name, description, content: content.trim().to_string(), metadata, relative_path: None, system: None, commit_sha: None, blob_hash: None, } } /// Split frontmatter (--- ... ---) from markdown content. fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) { let trimmed = raw.trim_start(); if !trimmed.starts_with("---") { return (None, trimmed); } if let Some(end) = trimmed[3..].find("---") { let fm = &trimmed[3..end + 3]; let rest = trimmed[3 + end + 3..].trim_start(); (Some(fm), rest) } else { (None, trimmed) } } /// Recursively scan supported skill roots for `SKILL.md` files (filesystem walk, non-bare repos). /// The skill slug is `{short_repo_id}/{system}/{relative_skill_dir}`. pub fn scan_repo_for_skills( repo_path: &Path, repo_id: Uuid, ) -> Result, AppError> { let repo_id_prefix = &repo_id.to_string()[..8]; let mut discovered = Vec::new(); for (root, system) in SKILL_ROOTS { let root_path = repo_path.join(root); if root_path.exists() { scan_skill_root_fs(&root_path, repo_id_prefix, system, root, &mut discovered); } } scan_root_skill_pack_fs(repo_path, repo_id_prefix, &mut discovered); Ok(discovered) } fn scan_skill_root_fs( root_path: &Path, repo_id_prefix: &str, system: &str, root: &str, discovered: &mut Vec, ) { let mut stack = vec![root_path.to_path_buf()]; while let Some(dir) = stack.pop() { let entries = match std::fs::read_dir(&dir) { Ok(e) => e, Err(_) => continue, }; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { stack.push(path); continue; } if !is_skill_file_name(&path) { continue; } let Some(parent) = path.parent() else { continue; }; let relative_skill_dir = parent .strip_prefix(root_path) .ok() .and_then(path_to_slug) .filter(|s| !s.is_empty()); let Some(relative_skill_dir) = relative_skill_dir else { continue; }; let slug = format!("{}/{}/{}", repo_id_prefix, system, relative_skill_dir); if let Ok(raw) = std::fs::read(&path) { let blob_hash = git_blob_hash(&raw); let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(&raw)); skill.blob_hash = Some(blob_hash); skill.system = Some(system.to_string()); skill.relative_path = Some(format!("{}/{}/SKILL.md", root, relative_skill_dir)); skill.metadata = enrich_metadata(skill.metadata, system, skill.relative_path.as_deref()); discovered.push(skill); } } } } fn scan_root_skill_pack_fs( repo_path: &Path, repo_id_prefix: &str, discovered: &mut Vec, ) { let entries = match std::fs::read_dir(repo_path) { Ok(entries) => entries, Err(_) => return, }; for entry in entries.flatten() { let path = entry.path(); if !path.is_dir() { continue; } let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) else { continue; }; if dir_name == ".git" || dir_name == ".claude" || dir_name == ".codex" { continue; } let skill_file = path.join("SKILL.md"); if !skill_file.exists() { continue; } let relative_skill_dir = slugify_segment(dir_name); if relative_skill_dir.is_empty() { continue; } let slug = format!("{}/{}", repo_id_prefix, relative_skill_dir); if let Ok(raw) = std::fs::read(&skill_file) { let blob_hash = git_blob_hash(&raw); let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(&raw)); skill.blob_hash = Some(blob_hash); skill.system = Some(ROOT_SKILL_SYSTEM.to_string()); skill.relative_path = Some(format!("{}/SKILL.md", relative_skill_dir)); skill.metadata = enrich_metadata( skill.metadata, ROOT_SKILL_SYSTEM, skill.relative_path.as_deref(), ); discovered.push(skill); } } } /// Scan git tree objects for `SKILL.md` files (works for bare repos). /// Traverses the HEAD commit tree using libgit2, reading blob content from objects. pub fn scan_repo_tree_for_skills( git_repo: &Repository, repo_id: Uuid, ) -> Result, AppError> { let repo_id_prefix = &repo_id.to_string()[..8]; let head = git_repo .head() .map_err(|e| AppError::InternalServerError(format!("no HEAD: {e}")))?; let tree = head .peel_to_tree() .map_err(|e| AppError::InternalServerError(format!("no tree: {e}")))?; let mut discovered = Vec::new(); let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())]; while let Some((current_tree, prefix)) = stack.pop() { for entry in current_tree.iter() { let name = match entry.name() { Some(n) => n, None => continue, }; let entry_path = if prefix.is_empty() { name.to_string() } else { format!("{}/{}", prefix, name) }; match entry.kind() { Some(git2::ObjectType::Tree) => { if should_descend_dir(name) { if let Ok(subtree) = entry.to_object(git_repo).and_then(|o| o.peel_to_tree()) { stack.push((subtree, entry_path)); } } } Some(git2::ObjectType::Blob) if name.eq_ignore_ascii_case("SKILL.md") => { let Some((system, relative_skill_dir, legacy_slug)) = skill_location_from_path(&entry_path) else { continue; }; let slug = if legacy_slug { format!("{}/{}", repo_id_prefix, relative_skill_dir) } else { format!("{}/{}/{}", repo_id_prefix, system, relative_skill_dir) }; if let Ok(blob) = entry.to_object(git_repo).and_then(|o| o.peel_to_blob()) { let raw = blob.content(); let blob_hash = git_blob_hash(raw); let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(raw)); skill.blob_hash = Some(blob_hash); skill.system = Some(system.to_string()); skill.relative_path = Some(entry_path.clone()); skill.metadata = enrich_metadata(skill.metadata, system, skill.relative_path.as_deref()); discovered.push(skill); } } _ => {} } } } Ok(discovered) } fn is_skill_file_name(path: &Path) -> bool { path.file_name() .and_then(|n| n.to_str()) .is_some_and(|name| name.eq_ignore_ascii_case("SKILL.md")) } fn path_to_slug(path: &Path) -> Option { let parts: Vec = path .components() .filter_map(|c| c.as_os_str().to_str()) .map(slugify_segment) .filter(|s| !s.is_empty()) .collect(); (!parts.is_empty()).then(|| parts.join("/")) } fn slugify_segment(input: &str) -> String { let mut out = String::with_capacity(input.len()); let mut last_dash = false; for ch in input.chars() { let ch = ch.to_ascii_lowercase(); if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' { out.push(ch); last_dash = false; } else if !last_dash { out.push('-'); last_dash = true; } } out.trim_matches('-').to_string() } fn skill_location_from_path(path: &str) -> Option<(&'static str, String, bool)> { let normalized = path.replace('\\', "/"); for (root, system) in SKILL_ROOTS { let prefix = format!("{}/", root); let suffix = "/SKILL.md"; if normalized.starts_with(&prefix) && normalized.ends_with(suffix) { let relative = &normalized[prefix.len()..normalized.len() - suffix.len()]; let slug = relative .split('/') .map(slugify_segment) .filter(|s| !s.is_empty()) .collect::>() .join("/"); if !slug.is_empty() { return Some((*system, slug, false)); } } } let suffix = "/SKILL.md"; if normalized.ends_with(suffix) && !normalized.starts_with('.') { let relative = &normalized[..normalized.len() - suffix.len()]; if !relative.contains('/') { let slug = slugify_segment(relative); if !slug.is_empty() { return Some((ROOT_SKILL_SYSTEM, slug, true)); } } } None } fn enrich_metadata( mut metadata: serde_json::Value, system: &str, relative_path: Option<&str>, ) -> serde_json::Value { if !metadata.is_object() { metadata = serde_json::json!({}); } if let Some(obj) = metadata.as_object_mut() { obj.entry("system") .or_insert_with(|| serde_json::Value::String(system.to_string())); if let Some(relative_path) = relative_path { obj.entry("path") .or_insert_with(|| serde_json::Value::String(relative_path.to_string())); } } metadata } #[cfg(test)] mod tests { use super::{scan_repo_for_skills, scan_repo_tree_for_skills}; use git2::{Repository, Signature}; use std::fs; use tempfile::tempdir; use uuid::Uuid; #[test] fn tree_scan_discovers_skills_under_hidden_dirs() { let temp = tempdir().expect("tempdir"); let repo = Repository::init(temp.path()).expect("init repo"); let skill_dir = temp .path() .join(".claude") .join("skills") .join("demo-skill"); fs::create_dir_all(&skill_dir).expect("create skill dir"); fs::write( skill_dir.join("SKILL.md"), "---\nname: Demo Skill\ndescription: test\n---\ncontent", ) .expect("write skill"); let mut index = repo.index().expect("index"); index .add_path(std::path::Path::new(".claude/skills/demo-skill/SKILL.md")) .expect("add skill"); index.write().expect("write index"); let tree_id = index.write_tree().expect("write tree"); let tree = repo.find_tree(tree_id).expect("find tree"); let sig = Signature::now("tester", "tester@example.com").expect("signature"); repo.commit(Some("HEAD"), &sig, &sig, "add skill", &tree, &[]) .expect("commit"); let discovered = scan_repo_tree_for_skills(&repo, Uuid::nil()).expect("scan tree"); assert_eq!(discovered.len(), 1); assert_eq!(discovered[0].name, "Demo Skill"); assert_eq!(discovered[0].slug, "00000000/claude/demo-skill"); assert_eq!( discovered[0].metadata["system"], serde_json::Value::String("claude".into()) ); } #[test] fn tree_scan_keeps_claude_and_codex_skills_separate() { let temp = tempdir().expect("tempdir"); let repo = Repository::init(temp.path()).expect("init repo"); for root in [".claude", ".codex"] { let skill_dir = temp.path().join(root).join("skills").join("review"); fs::create_dir_all(&skill_dir).expect("create skill dir"); fs::write( skill_dir.join("SKILL.md"), format!("---\nname: {} Review\n---\ncontent", root), ) .expect("write skill"); } let mut index = repo.index().expect("index"); index .add_path(std::path::Path::new(".claude/skills/review/SKILL.md")) .expect("add claude skill"); index .add_path(std::path::Path::new(".codex/skills/review/SKILL.md")) .expect("add codex skill"); index.write().expect("write index"); let tree_id = index.write_tree().expect("write tree"); let tree = repo.find_tree(tree_id).expect("find tree"); let sig = Signature::now("tester", "tester@example.com").expect("signature"); repo.commit(Some("HEAD"), &sig, &sig, "add skills", &tree, &[]) .expect("commit"); let mut slugs = scan_repo_tree_for_skills(&repo, Uuid::nil()) .expect("scan tree") .into_iter() .map(|s| s.slug) .collect::>(); slugs.sort(); assert_eq!( slugs, vec!["00000000/claude/review", "00000000/codex/review"] ); } #[test] fn scans_root_level_skill_pack_layout() { let temp = tempdir().expect("tempdir"); let repo = Repository::init(temp.path()).expect("init repo"); let skill_dir = temp.path().join("code-review"); fs::create_dir_all(&skill_dir).expect("create skill dir"); fs::write(skill_dir.join("SKILL.md"), "# Code Review\n\ncontent").expect("write skill"); let mut index = repo.index().expect("index"); index .add_path(std::path::Path::new("code-review/SKILL.md")) .expect("add skill"); index.write().expect("write index"); let tree_id = index.write_tree().expect("write tree"); let tree = repo.find_tree(tree_id).expect("find tree"); let sig = Signature::now("tester", "tester@example.com").expect("signature"); repo.commit(Some("HEAD"), &sig, &sig, "add skill", &tree, &[]) .expect("commit"); let fs_discovered = scan_repo_for_skills(temp.path(), Uuid::nil()).expect("scan fs"); let tree_discovered = scan_repo_tree_for_skills(&repo, Uuid::nil()).expect("scan tree"); assert_eq!(fs_discovered.len(), 1); assert_eq!(tree_discovered.len(), 1); assert_eq!(fs_discovered[0].slug, "00000000/code-review"); assert_eq!(tree_discovered[0].slug, "00000000/code-review"); assert_eq!( tree_discovered[0].metadata["system"], serde_json::Value::String("root".into()) ); } } /// Scan a git2::Repository for skills and upsert them into the database. /// Uses filesystem walk for normal repos, git tree traversal for bare repos. pub async fn scan_and_sync_skills( db: &db::database::AppDatabase, project_uuid: Uuid, repo: &RepoModel, ) -> Result { // Open with git2 to get the actual workdir let git_repo = match Repository::open(&repo.storage_path) { Ok(r) => r, Err(e) => { tracing::warn!("failed to open git repo {}: {:?}", repo.storage_path, e); return Ok(ScanSyncResult { discovered: 0, created: 0, updated: 0, removed: 0, }); } }; let commit_sha = git_repo .head() .ok() .and_then(|h| h.target()) .map(|oid| oid.to_string()); // For bare repos (no workdir), scan git tree objects directly let mut discovered = if git_repo.is_bare() || git_repo.workdir().is_none() { match scan_repo_tree_for_skills(&git_repo, repo.id) { Ok(skills) => skills, Err(e) => { tracing::warn!("tree scan failed for repo {}: {:?}", repo.storage_path, e); vec![] } } } else { let workdir = git_repo.workdir().unwrap(); scan_repo_for_skills(workdir, repo.id)? }; // Fill in commit_sha for discovered skills for skill in &mut discovered { skill.commit_sha = commit_sha.clone(); } sync_discovered_skills(db, project_uuid, repo.id, discovered).await } /// Sync discovered skills by stable slug. async fn sync_discovered_skills( db: &db::database::AppDatabase, project_uuid: Uuid, repo_id: Uuid, discovered: Vec, ) -> Result { if discovered.is_empty() { return Ok(ScanSyncResult { discovered: 0, created: 0, updated: 0, removed: 0, }); } let now = Utc::now(); let mut created = 0i64; let mut updated = 0i64; // Deduplicate by slug. The slug includes repo prefix + skill system + relative skill path. let mut deduped: std::collections::HashMap = std::collections::HashMap::new(); for skill in discovered { match deduped.get(&skill.slug) { Some(existing) => { // Keep the one with the later commit_sha if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) { deduped.insert(skill.slug.clone(), skill); } } None => { deduped.insert(skill.slug.clone(), skill); } } } // Query existing skills for this repo let existing: Vec<_> = SkillEntity::find() .filter(C::ProjectUuid.eq(project_uuid)) .filter(C::Source.eq("repo")) .filter(C::RepoId.eq(repo_id)) .all(db) .await?; let existing_by_slug: std::collections::HashMap<_, _> = existing.into_iter().map(|s| (s.slug.clone(), s)).collect(); let mut seen_keys = std::collections::HashSet::new(); let discovered_count = deduped.len() as i64; for (key, skill) in deduped { seen_keys.insert(key.clone()); let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default(); if let Some(existing_skill) = existing_by_slug.get(&key) { if existing_skill.content != skill.content || existing_skill.metadata != json_meta || existing_skill.commit_sha != skill.commit_sha || existing_skill.blob_hash != skill.blob_hash || existing_skill.name != skill.name || existing_skill.description != skill.description { let mut active: SkillActiveModel = existing_skill.clone().into(); active.name = Set(skill.name); active.description = Set(skill.description); active.content = Set(skill.content); active.metadata = Set(json_meta); active.commit_sha = Set(skill.commit_sha); active.blob_hash = Set(skill.blob_hash); active.updated_at = Set(now); active.update(db).await?; updated += 1; } } else { let active = SkillActiveModel { id: Set(0), project_uuid: Set(project_uuid), slug: Set(skill.slug), name: Set(skill.name), description: Set(skill.description), source: Set("repo".to_string()), repo_id: Set(Some(repo_id)), commit_sha: Set(skill.commit_sha), blob_hash: Set(skill.blob_hash), content: Set(skill.content), metadata: Set(json_meta), enabled: Set(true), created_by: Set(None), created_at: Set(now), updated_at: Set(now), }; active.insert(db).await?; created += 1; } } // Remove skills that no longer exist in the repo let mut removed = 0i64; for (key, old_skill) in existing_by_slug { if !seen_keys.contains(&key) { SkillEntity::delete_by_id(old_skill.id).exec(db).await?; removed += 1; } } Ok(ScanSyncResult { discovered: discovered_count, created, updated, removed, }) } /// Result of a scan + sync operation. #[derive(Debug)] pub struct ScanSyncResult { pub discovered: i64, pub created: i64, pub updated: i64, pub removed: i64, }