//! Repository skill scanner. //! //! Scans repositories for SKILL.md files and upserts skill records. use crate::error::AppError; use chrono::Utc; use git2::Repository; use models::ActiveModelTrait; use models::projects::project_skill::ActiveModel as SkillActiveModel; use models::projects::project_skill::Column as C; use models::projects::project_skill::Entity as SkillEntity; use models::repos::repo::Model as RepoModel; use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set}; use sha1::Digest; use std::path::Path; use uuid::Uuid; /// Skill discovery result from a single repository. #[derive(Debug)] pub struct DiscoveredSkill { /// URL-safe slug derived from the directory name. pub slug: String, /// Human-readable name (from frontmatter or slug). pub name: String, /// Short description (from frontmatter). pub description: Option, /// Raw markdown body after the frontmatter. pub content: String, /// Parsed frontmatter as JSON. pub metadata: serde_json::Value, /// Git commit SHA where this skill was found (git hook path only). pub commit_sha: Option, /// Git blob SHA-1 of the SKILL.md file. pub blob_hash: Option, } /// Compute the git blob SHA-1 hash of `content`. /// Format: "blob {len}\0{data}" fn git_blob_hash(content: &[u8]) -> String { let size = content.len(); let header = format!("blob {}\0", size); let mut hasher = sha1::Sha1::new(); hasher.update(header.as_bytes()); hasher.update(content); hex::encode(hasher.finalize()) } /// Parse a SKILL.md file and extract metadata + content. fn parse_skill_file(slug: &str, raw: &str) -> DiscoveredSkill { let (frontmatter, content) = extract_frontmatter(raw); let metadata: serde_json::Value = frontmatter .map(|fm| serde_json::from_str(fm).unwrap_or_default()) .unwrap_or_default(); let name = metadata .get("name") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| slug.replace('-', " ").replace('_', " ")); let description = metadata .get("description") .and_then(|v| v.as_str()) .map(String::from); DiscoveredSkill { slug: slug.to_string(), name, description, content: content.trim().to_string(), metadata, commit_sha: None, blob_hash: None, } } /// Split frontmatter (--- ... ---) from markdown content. fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) { let trimmed = raw.trim_start(); if !trimmed.starts_with("---") { return (None, trimmed); } if let Some(end) = trimmed[3..].find("---") { let fm = &trimmed[3..end + 3]; let rest = trimmed[3 + end + 3..].trim_start(); (Some(fm), rest) } else { (None, trimmed) } } /// Recursively scan `repo_path` for `SKILL.md` files (filesystem walk, non-bare repos). /// The skill slug is `{short_repo_id}/{parent_dir_name}` to ensure uniqueness across repos. pub fn scan_repo_for_skills( repo_path: &Path, repo_id: Uuid, ) -> Result, AppError> { let repo_id_prefix = &repo_id.to_string()[..8]; let mut discovered = Vec::new(); let mut stack = vec![repo_path.to_path_buf()]; while let Some(dir) = stack.pop() { let entries = match std::fs::read_dir(&dir) { Ok(e) => e, Err(_) => continue, }; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { stack.push(path); } else if path .file_name() .and_then(|n| n.to_str()) .map(|s| s.to_lowercase()) == Some("skill.md".to_string()) { if let Some(dir_name) = path.parent() .and_then(|p| p.file_name()) .and_then(|n| n.to_str()) .filter(|s| !s.starts_with('.')) { let slug = format!("{}/{}", repo_id_prefix, dir_name); if let Ok(raw) = std::fs::read(&path) { let blob_hash = git_blob_hash(&raw); let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(&raw)); skill.blob_hash = Some(blob_hash); discovered.push(skill); } } } } } Ok(discovered) } /// Scan git tree objects for `SKILL.md` files (works for bare repos). /// Traverses the HEAD commit tree using libgit2, reading blob content from objects. pub fn scan_repo_tree_for_skills( git_repo: &Repository, repo_id: Uuid, ) -> Result, AppError> { let repo_id_prefix = &repo_id.to_string()[..8]; let head = git_repo .head() .map_err(|e| AppError::InternalServerError(format!("no HEAD: {e}")))?; let tree = head .peel_to_tree() .map_err(|e| AppError::InternalServerError(format!("no tree: {e}")))?; let mut discovered = Vec::new(); // Stack: (tree, path_prefix relative to root) let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())]; while let Some((current_tree, prefix)) = stack.pop() { for entry in current_tree.iter() { let name = match entry.name() { Some(n) => n, None => continue, }; let entry_path = if prefix.is_empty() { name.to_string() } else { format!("{}/{}", prefix, name) }; match entry.kind() { Some(git2::ObjectType::Tree) => { if !name.starts_with('.') { if let Ok(subtree) = entry.to_object(git_repo).and_then(|o| o.peel_to_tree()) { stack.push((subtree, entry_path)); } } } Some(git2::ObjectType::Blob) if name.to_lowercase() == "skill.md" => { // Derive skill name from parent directory let dir_name = std::path::Path::new(&entry_path) .parent() .and_then(|p| p.file_name()) .and_then(|n| n.to_str()) .filter(|s| !s.starts_with('.')); let Some(dir_name) = dir_name else { continue }; let slug = format!("{}/{}", repo_id_prefix, dir_name); if let Ok(blob) = entry.to_object(git_repo).and_then(|o| o.peel_to_blob()) { let raw = blob.content(); let blob_hash = git_blob_hash(raw); let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(raw)); skill.blob_hash = Some(blob_hash); discovered.push(skill); } } _ => {} } } } Ok(discovered) } /// Scan a git2::Repository for skills and upsert them into the database. /// Uses filesystem walk for normal repos, git tree traversal for bare repos. pub async fn scan_and_sync_skills( db: &db::database::AppDatabase, project_uuid: Uuid, repo: &RepoModel, ) -> Result { // Open with git2 to get the actual workdir let git_repo = match Repository::open(&repo.storage_path) { Ok(r) => r, Err(e) => { tracing::warn!("failed to open git repo {}: {:?}", repo.storage_path, e); return Ok(ScanSyncResult { discovered: 0, created: 0, updated: 0, removed: 0, }); } }; let commit_sha = git_repo.head().ok().and_then(|h| h.target()).map(|oid| oid.to_string()); // For bare repos (no workdir), scan git tree objects directly let mut discovered = if git_repo.is_bare() || git_repo.workdir().is_none() { match scan_repo_tree_for_skills(&git_repo, repo.id) { Ok(skills) => skills, Err(e) => { tracing::warn!("tree scan failed for repo {}: {:?}", repo.storage_path, e); vec![] } } } else { let workdir = git_repo.workdir().unwrap(); scan_repo_for_skills(workdir, repo.id)? }; // Fill in commit_sha for discovered skills for skill in &mut discovered { skill.commit_sha = commit_sha.clone(); } sync_discovered_skills(db, project_uuid, repo.id, discovered).await } /// Sync discovered skills with deduplication by {repo_id}+{blob_hash}. async fn sync_discovered_skills( db: &db::database::AppDatabase, project_uuid: Uuid, repo_id: Uuid, discovered: Vec, ) -> Result { if discovered.is_empty() { return Ok(ScanSyncResult { discovered: 0, created: 0, updated: 0, removed: 0, }); } let now = Utc::now(); let mut created = 0i64; let mut updated = 0i64; // Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha let mut deduped: std::collections::HashMap = std::collections::HashMap::new(); for skill in discovered { let key = format!("{}:{}", repo_id, skill.blob_hash.as_ref().unwrap_or(&skill.slug)); match deduped.get(&key) { Some(existing) => { // Keep the one with the later commit_sha if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) { deduped.insert(key, skill); } } None => { deduped.insert(key, skill); } } } // Query existing skills for this repo let existing: Vec<_> = SkillEntity::find() .filter(C::ProjectUuid.eq(project_uuid)) .filter(C::Source.eq("repo")) .filter(C::RepoId.eq(repo_id)) .all(db) .await?; let existing_by_hash: std::collections::HashMap<_, _> = existing .into_iter() .map(|s| { let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default()); (key, s) }) .collect(); let mut seen_keys = std::collections::HashSet::new(); let discovered_count = deduped.len() as i64; for (key, skill) in deduped { seen_keys.insert(key.clone()); let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default(); if let Some(existing_skill) = existing_by_hash.get(&key) { if existing_skill.content != skill.content || existing_skill.metadata != json_meta || existing_skill.commit_sha != skill.commit_sha { let mut active: SkillActiveModel = existing_skill.clone().into(); active.content = Set(skill.content); active.metadata = Set(json_meta); active.commit_sha = Set(skill.commit_sha); active.blob_hash = Set(skill.blob_hash); active.updated_at = Set(now); active.update(db).await?; updated += 1; } } else { let active = SkillActiveModel { id: Set(0), project_uuid: Set(project_uuid), slug: Set(skill.slug), name: Set(skill.name), description: Set(skill.description), source: Set("repo".to_string()), repo_id: Set(Some(repo_id)), commit_sha: Set(skill.commit_sha), blob_hash: Set(skill.blob_hash), content: Set(skill.content), metadata: Set(json_meta), enabled: Set(true), created_by: Set(None), created_at: Set(now), updated_at: Set(now), }; active.insert(db).await?; created += 1; } } // Remove skills that no longer exist in the repo let mut removed = 0i64; for (key, old_skill) in existing_by_hash { if !seen_keys.contains(&key) { SkillEntity::delete_by_id(old_skill.id).exec(db).await?; removed += 1; } } Ok(ScanSyncResult { discovered: discovered_count, created, updated, removed, }) } /// Result of a scan + sync operation. #[derive(Debug)] pub struct ScanSyncResult { pub discovered: i64, pub created: i64, pub updated: i64, pub removed: i64, }