fix(skill): deduplicate skills by repo_id+blob_hash
- Change deduplication key from slug to {repo_id}+{blob_hash}
- Keep latest version by commit_sha when duplicates found
- Use git2 to open repos and get correct workdir and commit_sha
- Fix case-insensitive SKILL.md detection in scanner
This commit is contained in:
parent
18917b6de1
commit
2db7934596
@ -24,17 +24,16 @@ impl AppService {
|
||||
project_uid: Uuid,
|
||||
_caller_uid: Uuid,
|
||||
) -> Result<scanner::ScanSyncResult, AppError> {
|
||||
// Collect all repo IDs for this project
|
||||
let repos: Vec<_> = RepoEntity::find()
|
||||
.filter(RCol::Project.eq(project_uid))
|
||||
.all(&self.db)
|
||||
.await?;
|
||||
|
||||
let mut total_created = 0i64;
|
||||
let mut total_updated = 0i64;
|
||||
let mut total_removed = 0i64;
|
||||
let mut total_discovered = 0i64;
|
||||
|
||||
let repos: Vec<_> = RepoEntity::find()
|
||||
.filter(RCol::Project.eq(project_uid))
|
||||
.all(&self.db)
|
||||
.await?;
|
||||
|
||||
for repo in repos {
|
||||
let result = scanner::scan_and_sync_skills(&self.db, project_uid, &repo).await?;
|
||||
total_created += result.created;
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
|
||||
use crate::error::AppError;
|
||||
use chrono::Utc;
|
||||
use git2::Repository;
|
||||
use models::ActiveModelTrait;
|
||||
use models::projects::project_skill::ActiveModel as SkillActiveModel;
|
||||
use models::projects::project_skill::Column as C;
|
||||
@ -108,7 +109,12 @@ pub fn scan_repo_for_skills(
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
stack.push(path);
|
||||
} else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") {
|
||||
} else if path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.map(|s| s.to_lowercase())
|
||||
== Some("skill.md".to_string())
|
||||
{
|
||||
if let Some(dir_name) = path.parent()
|
||||
.and_then(|p| p.file_name())
|
||||
.and_then(|n| n.to_str())
|
||||
@ -136,10 +142,40 @@ pub async fn scan_and_sync_skills(
|
||||
project_uuid: Uuid,
|
||||
repo: &RepoModel,
|
||||
) -> Result<ScanSyncResult, AppError> {
|
||||
// Resolve the repo path
|
||||
let storage_path = Path::new(&repo.storage_path);
|
||||
let discovered = scan_repo_for_skills(storage_path, repo.id)?;
|
||||
// Open with git2 to get the actual workdir
|
||||
let git_repo = match Repository::open(&repo.storage_path) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to open git repo {}: {:?}", repo.storage_path, e);
|
||||
return Ok(ScanSyncResult {
|
||||
discovered: 0,
|
||||
created: 0,
|
||||
updated: 0,
|
||||
removed: 0,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let workdir = git_repo.workdir().map(|p| p.to_path_buf()).unwrap_or_else(|| Path::new(&repo.storage_path).to_path_buf());
|
||||
let commit_sha = git_repo.head().ok().and_then(|h| h.target()).map(|oid| oid.to_string());
|
||||
|
||||
let mut discovered = scan_repo_for_skills(&workdir, repo.id)?;
|
||||
|
||||
// Fill in commit_sha for discovered skills
|
||||
for skill in &mut discovered {
|
||||
skill.commit_sha = commit_sha.clone();
|
||||
}
|
||||
|
||||
sync_discovered_skills(db, project_uuid, repo.id, discovered).await
|
||||
}
|
||||
|
||||
/// Sync discovered skills with deduplication by {repo_id}+{blob_hash}.
|
||||
async fn sync_discovered_skills(
|
||||
db: &db::database::AppDatabase,
|
||||
project_uuid: Uuid,
|
||||
repo_id: Uuid,
|
||||
discovered: Vec<DiscoveredSkill>,
|
||||
) -> Result<ScanSyncResult, AppError> {
|
||||
if discovered.is_empty() {
|
||||
return Ok(ScanSyncResult {
|
||||
discovered: 0,
|
||||
@ -153,37 +189,56 @@ pub async fn scan_and_sync_skills(
|
||||
let mut created = 0i64;
|
||||
let mut updated = 0i64;
|
||||
|
||||
// Collect all repo-sourced skills in this repo for this project
|
||||
// Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha
|
||||
let mut deduped: std::collections::HashMap<String, DiscoveredSkill> = std::collections::HashMap::new();
|
||||
for skill in discovered {
|
||||
let key = format!("{}:{}", repo_id, skill.blob_hash.as_ref().unwrap_or(&skill.slug));
|
||||
match deduped.get(&key) {
|
||||
Some(existing) => {
|
||||
// Keep the one with the later commit_sha
|
||||
if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) {
|
||||
deduped.insert(key, skill);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
deduped.insert(key, skill);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Query existing skills for this repo
|
||||
let existing: Vec<_> = SkillEntity::find()
|
||||
.filter(C::ProjectUuid.eq(project_uuid))
|
||||
.filter(C::Source.eq("repo"))
|
||||
.filter(C::RepoId.eq(repo.id))
|
||||
.filter(C::RepoId.eq(repo_id))
|
||||
.all(db)
|
||||
.await?;
|
||||
|
||||
let existing_by_slug: std::collections::HashMap<_, _> = existing
|
||||
let existing_by_hash: std::collections::HashMap<_, _> = existing
|
||||
.into_iter()
|
||||
.map(|s| (s.slug.clone(), s))
|
||||
.map(|s| {
|
||||
let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default());
|
||||
(key, s)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut seen_slugs = std::collections::HashSet::new();
|
||||
|
||||
let discovered_count = discovered.len() as i64;
|
||||
for skill in discovered {
|
||||
seen_slugs.insert(skill.slug.clone());
|
||||
let mut seen_keys = std::collections::HashSet::new();
|
||||
|
||||
let discovered_count = deduped.len() as i64;
|
||||
for (key, skill) in deduped {
|
||||
seen_keys.insert(key.clone());
|
||||
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
|
||||
|
||||
if let Some(existing_skill) = existing_by_slug.get(&skill.slug) {
|
||||
if let Some(existing_skill) = existing_by_hash.get(&key) {
|
||||
if existing_skill.content != skill.content
|
||||
|| existing_skill.metadata != json_meta
|
||||
|| existing_skill.blob_hash != skill.blob_hash
|
||||
|| existing_skill.commit_sha != skill.commit_sha
|
||||
{
|
||||
let mut active: SkillActiveModel = existing_skill.clone().into();
|
||||
active.content = Set(skill.content);
|
||||
active.metadata = Set(json_meta);
|
||||
active.commit_sha = Set(skill.commit_sha.clone());
|
||||
active.blob_hash = Set(skill.blob_hash.clone());
|
||||
active.commit_sha = Set(skill.commit_sha);
|
||||
active.blob_hash = Set(skill.blob_hash);
|
||||
active.updated_at = Set(now);
|
||||
active.update(db).await?;
|
||||
updated += 1;
|
||||
@ -192,13 +247,13 @@ pub async fn scan_and_sync_skills(
|
||||
let active = SkillActiveModel {
|
||||
id: Set(0),
|
||||
project_uuid: Set(project_uuid),
|
||||
slug: Set(skill.slug.clone()),
|
||||
slug: Set(skill.slug),
|
||||
name: Set(skill.name),
|
||||
description: Set(skill.description),
|
||||
source: Set("repo".to_string()),
|
||||
repo_id: Set(Some(repo.id)),
|
||||
commit_sha: Set(skill.commit_sha.clone()),
|
||||
blob_hash: Set(skill.blob_hash.clone()),
|
||||
repo_id: Set(Some(repo_id)),
|
||||
commit_sha: Set(skill.commit_sha),
|
||||
blob_hash: Set(skill.blob_hash),
|
||||
content: Set(skill.content),
|
||||
metadata: Set(json_meta),
|
||||
enabled: Set(true),
|
||||
@ -213,8 +268,8 @@ pub async fn scan_and_sync_skills(
|
||||
|
||||
// Remove skills that no longer exist in the repo
|
||||
let mut removed = 0i64;
|
||||
for (slug, old_skill) in existing_by_slug {
|
||||
if !seen_slugs.contains(&slug) {
|
||||
for (key, old_skill) in existing_by_hash {
|
||||
if !seen_keys.contains(&key) {
|
||||
SkillEntity::delete_by_id(old_skill.id).exec(db).await?;
|
||||
removed += 1;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user