diff --git a/libs/service/skill/scan.rs b/libs/service/skill/scan.rs index 37d901c..a2e1835 100644 --- a/libs/service/skill/scan.rs +++ b/libs/service/skill/scan.rs @@ -24,17 +24,16 @@ impl AppService { project_uid: Uuid, _caller_uid: Uuid, ) -> Result { - // Collect all repo IDs for this project - let repos: Vec<_> = RepoEntity::find() - .filter(RCol::Project.eq(project_uid)) - .all(&self.db) - .await?; - let mut total_created = 0i64; let mut total_updated = 0i64; let mut total_removed = 0i64; let mut total_discovered = 0i64; + let repos: Vec<_> = RepoEntity::find() + .filter(RCol::Project.eq(project_uid)) + .all(&self.db) + .await?; + for repo in repos { let result = scanner::scan_and_sync_skills(&self.db, project_uid, &repo).await?; total_created += result.created; diff --git a/libs/service/skill/scanner.rs b/libs/service/skill/scanner.rs index a4ce7f3..c4aba5c 100644 --- a/libs/service/skill/scanner.rs +++ b/libs/service/skill/scanner.rs @@ -4,6 +4,7 @@ use crate::error::AppError; use chrono::Utc; +use git2::Repository; use models::ActiveModelTrait; use models::projects::project_skill::ActiveModel as SkillActiveModel; use models::projects::project_skill::Column as C; @@ -108,7 +109,12 @@ pub fn scan_repo_for_skills( let path = entry.path(); if path.is_dir() { stack.push(path); - } else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") { + } else if path + .file_name() + .and_then(|n| n.to_str()) + .map(|s| s.to_lowercase()) + == Some("skill.md".to_string()) + { if let Some(dir_name) = path.parent() .and_then(|p| p.file_name()) .and_then(|n| n.to_str()) @@ -136,10 +142,40 @@ pub async fn scan_and_sync_skills( project_uuid: Uuid, repo: &RepoModel, ) -> Result { - // Resolve the repo path - let storage_path = Path::new(&repo.storage_path); - let discovered = scan_repo_for_skills(storage_path, repo.id)?; + // Open with git2 to get the actual workdir + let git_repo = match Repository::open(&repo.storage_path) { + Ok(r) => r, + Err(e) => { + tracing::warn!("failed to open git repo {}: {:?}", repo.storage_path, e); + return Ok(ScanSyncResult { + discovered: 0, + created: 0, + updated: 0, + removed: 0, + }); + } + }; + let workdir = git_repo.workdir().map(|p| p.to_path_buf()).unwrap_or_else(|| Path::new(&repo.storage_path).to_path_buf()); + let commit_sha = git_repo.head().ok().and_then(|h| h.target()).map(|oid| oid.to_string()); + + let mut discovered = scan_repo_for_skills(&workdir, repo.id)?; + + // Fill in commit_sha for discovered skills + for skill in &mut discovered { + skill.commit_sha = commit_sha.clone(); + } + + sync_discovered_skills(db, project_uuid, repo.id, discovered).await +} + +/// Sync discovered skills with deduplication by {repo_id}+{blob_hash}. +async fn sync_discovered_skills( + db: &db::database::AppDatabase, + project_uuid: Uuid, + repo_id: Uuid, + discovered: Vec, +) -> Result { if discovered.is_empty() { return Ok(ScanSyncResult { discovered: 0, @@ -153,37 +189,56 @@ pub async fn scan_and_sync_skills( let mut created = 0i64; let mut updated = 0i64; - // Collect all repo-sourced skills in this repo for this project + // Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha + let mut deduped: std::collections::HashMap = std::collections::HashMap::new(); + for skill in discovered { + let key = format!("{}:{}", repo_id, skill.blob_hash.as_ref().unwrap_or(&skill.slug)); + match deduped.get(&key) { + Some(existing) => { + // Keep the one with the later commit_sha + if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) { + deduped.insert(key, skill); + } + } + None => { + deduped.insert(key, skill); + } + } + } + + // Query existing skills for this repo let existing: Vec<_> = SkillEntity::find() .filter(C::ProjectUuid.eq(project_uuid)) .filter(C::Source.eq("repo")) - .filter(C::RepoId.eq(repo.id)) + .filter(C::RepoId.eq(repo_id)) .all(db) .await?; - let existing_by_slug: std::collections::HashMap<_, _> = existing + let existing_by_hash: std::collections::HashMap<_, _> = existing .into_iter() - .map(|s| (s.slug.clone(), s)) + .map(|s| { + let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default()); + (key, s) + }) .collect(); - let mut seen_slugs = std::collections::HashSet::new(); - - let discovered_count = discovered.len() as i64; - for skill in discovered { - seen_slugs.insert(skill.slug.clone()); + let mut seen_keys = std::collections::HashSet::new(); + let discovered_count = deduped.len() as i64; + for (key, skill) in deduped { + seen_keys.insert(key.clone()); let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default(); - if let Some(existing_skill) = existing_by_slug.get(&skill.slug) { + if let Some(existing_skill) = existing_by_hash.get(&key) { if existing_skill.content != skill.content || existing_skill.metadata != json_meta - || existing_skill.blob_hash != skill.blob_hash + || existing_skill.commit_sha != skill.commit_sha { let mut active: SkillActiveModel = existing_skill.clone().into(); active.content = Set(skill.content); active.metadata = Set(json_meta); - active.commit_sha = Set(skill.commit_sha.clone()); - active.blob_hash = Set(skill.blob_hash.clone()); + active.commit_sha = Set(skill.commit_sha); + active.blob_hash = Set(skill.blob_hash); active.updated_at = Set(now); active.update(db).await?; updated += 1; @@ -192,13 +247,13 @@ pub async fn scan_and_sync_skills( let active = SkillActiveModel { id: Set(0), project_uuid: Set(project_uuid), - slug: Set(skill.slug.clone()), + slug: Set(skill.slug), name: Set(skill.name), description: Set(skill.description), source: Set("repo".to_string()), - repo_id: Set(Some(repo.id)), - commit_sha: Set(skill.commit_sha.clone()), - blob_hash: Set(skill.blob_hash.clone()), + repo_id: Set(Some(repo_id)), + commit_sha: Set(skill.commit_sha), + blob_hash: Set(skill.blob_hash), content: Set(skill.content), metadata: Set(json_meta), enabled: Set(true), @@ -213,8 +268,8 @@ pub async fn scan_and_sync_skills( // Remove skills that no longer exist in the repo let mut removed = 0i64; - for (slug, old_skill) in existing_by_slug { - if !seen_slugs.contains(&slug) { + for (key, old_skill) in existing_by_hash { + if !seen_keys.contains(&key) { SkillEntity::delete_by_id(old_skill.id).exec(db).await?; removed += 1; }