//! Repository skill scanner. //! //! Scans repositories for SKILL.md files and upserts skill records. use crate::error::AppError; use chrono::Utc; use models::ActiveModelTrait; use models::projects::project_skill::ActiveModel as SkillActiveModel; use models::projects::project_skill::Column as C; use models::projects::project_skill::Entity as SkillEntity; use models::repos::repo::Model as RepoModel; use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set}; use sha1::Digest; use std::path::Path; use uuid::Uuid; /// Skill discovery result from a single repository. #[derive(Debug)] pub struct DiscoveredSkill { /// URL-safe slug derived from the directory name. pub slug: String, /// Human-readable name (from frontmatter or slug). pub name: String, /// Short description (from frontmatter). pub description: Option, /// Raw markdown body after the frontmatter. pub content: String, /// Parsed frontmatter as JSON. pub metadata: serde_json::Value, /// Git commit SHA where this skill was found (git hook path only). pub commit_sha: Option, /// Git blob SHA-1 of the SKILL.md file. pub blob_hash: Option, } /// Compute the git blob SHA-1 hash of `content`. /// Format: "blob {len}\0{data}" fn git_blob_hash(content: &[u8]) -> String { let size = content.len(); let header = format!("blob {}\0", size); let mut hasher = sha1::Sha1::new(); hasher.update(header.as_bytes()); hasher.update(content); hex::encode(hasher.finalize()) } /// Parse a SKILL.md file and extract metadata + content. fn parse_skill_file(slug: &str, raw: &str) -> DiscoveredSkill { let (frontmatter, content) = extract_frontmatter(raw); let metadata: serde_json::Value = frontmatter .map(|fm| serde_json::from_str(fm).unwrap_or_default()) .unwrap_or_default(); let name = metadata .get("name") .and_then(|v| v.as_str()) .map(String::from) .unwrap_or_else(|| slug.replace('-', " ").replace('_', " ")); let description = metadata .get("description") .and_then(|v| v.as_str()) .map(String::from); DiscoveredSkill { slug: slug.to_string(), name, description, content: content.trim().to_string(), metadata, commit_sha: None, blob_hash: None, } } /// Split frontmatter (--- ... ---) from markdown content. fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) { let trimmed = raw.trim_start(); if !trimmed.starts_with("---") { return (None, trimmed); } if let Some(end) = trimmed[3..].find("---") { let fm = &trimmed[3..end + 3]; let rest = trimmed[3 + end + 3..].trim_start(); (Some(fm), rest) } else { (None, trimmed) } } /// Recursively scan `repo_path` for `SKILL.md` files. /// The skill slug is `{short_repo_id}/{parent_dir_name}` to ensure uniqueness across repos. pub fn scan_repo_for_skills( repo_path: &Path, repo_id: Uuid, ) -> Result, AppError> { let repo_id_prefix = &repo_id.to_string()[..8]; let mut discovered = Vec::new(); let mut stack = vec![repo_path.to_path_buf()]; while let Some(dir) = stack.pop() { let entries = match std::fs::read_dir(&dir) { Ok(e) => e, Err(_) => continue, }; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { stack.push(path); } else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") { if let Some(dir_name) = path.parent() .and_then(|p| p.file_name()) .and_then(|n| n.to_str()) .filter(|s| !s.starts_with('.')) { let slug = format!("{}/{}", repo_id_prefix, dir_name); if let Ok(raw) = std::fs::read(&path) { let blob_hash = git_blob_hash(&raw); let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(&raw)); skill.blob_hash = Some(blob_hash); discovered.push(skill); } } } } } Ok(discovered) } /// Scan a git2::Repository for skills and upsert them into the database. /// Called from the git hook sync path. pub async fn scan_and_sync_skills( db: &db::database::AppDatabase, project_uuid: Uuid, repo: &RepoModel, ) -> Result { // Resolve the repo path let storage_path = Path::new(&repo.storage_path); let discovered = scan_repo_for_skills(storage_path, repo.id)?; if discovered.is_empty() { return Ok(ScanSyncResult { discovered: 0, created: 0, updated: 0, removed: 0, }); } let now = Utc::now(); let mut created = 0i64; let mut updated = 0i64; // Collect all repo-sourced skills in this repo for this project let existing: Vec<_> = SkillEntity::find() .filter(C::ProjectUuid.eq(project_uuid)) .filter(C::Source.eq("repo")) .filter(C::RepoId.eq(repo.id)) .all(db) .await?; let existing_by_slug: std::collections::HashMap<_, _> = existing .into_iter() .map(|s| (s.slug.clone(), s)) .collect(); let mut seen_slugs = std::collections::HashSet::new(); let discovered_count = discovered.len() as i64; for skill in discovered { seen_slugs.insert(skill.slug.clone()); let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default(); if let Some(existing_skill) = existing_by_slug.get(&skill.slug) { if existing_skill.content != skill.content || existing_skill.metadata != json_meta || existing_skill.blob_hash != skill.blob_hash { let mut active: SkillActiveModel = existing_skill.clone().into(); active.content = Set(skill.content); active.metadata = Set(json_meta); active.commit_sha = Set(skill.commit_sha.clone()); active.blob_hash = Set(skill.blob_hash.clone()); active.updated_at = Set(now); active.update(db).await?; updated += 1; } } else { let active = SkillActiveModel { id: Set(0), project_uuid: Set(project_uuid), slug: Set(skill.slug.clone()), name: Set(skill.name), description: Set(skill.description), source: Set("repo".to_string()), repo_id: Set(Some(repo.id)), commit_sha: Set(skill.commit_sha.clone()), blob_hash: Set(skill.blob_hash.clone()), content: Set(skill.content), metadata: Set(json_meta), enabled: Set(true), created_by: Set(None), created_at: Set(now), updated_at: Set(now), }; active.insert(db).await?; created += 1; } } // Remove skills that no longer exist in the repo let mut removed = 0i64; for (slug, old_skill) in existing_by_slug { if !seen_slugs.contains(&slug) { SkillEntity::delete_by_id(old_skill.id).exec(db).await?; removed += 1; } } Ok(ScanSyncResult { discovered: discovered_count, created, updated, removed, }) } /// Result of a scan + sync operation. #[derive(Debug)] pub struct ScanSyncResult { pub discovered: i64, pub created: i64, pub updated: i64, pub removed: i64, }