fix(skill): deduplicate skills by repo_id+blob_hash
- Change deduplication key from slug to {repo_id}+{blob_hash}
- Keep latest version by commit_sha when duplicates found
- Use git2 to open repos and get correct workdir and commit_sha
- Fix case-insensitive SKILL.md detection in scanner
This commit is contained in:
parent
18917b6de1
commit
2db7934596
@ -24,17 +24,16 @@ impl AppService {
|
|||||||
project_uid: Uuid,
|
project_uid: Uuid,
|
||||||
_caller_uid: Uuid,
|
_caller_uid: Uuid,
|
||||||
) -> Result<scanner::ScanSyncResult, AppError> {
|
) -> Result<scanner::ScanSyncResult, AppError> {
|
||||||
// Collect all repo IDs for this project
|
|
||||||
let repos: Vec<_> = RepoEntity::find()
|
|
||||||
.filter(RCol::Project.eq(project_uid))
|
|
||||||
.all(&self.db)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
let mut total_created = 0i64;
|
let mut total_created = 0i64;
|
||||||
let mut total_updated = 0i64;
|
let mut total_updated = 0i64;
|
||||||
let mut total_removed = 0i64;
|
let mut total_removed = 0i64;
|
||||||
let mut total_discovered = 0i64;
|
let mut total_discovered = 0i64;
|
||||||
|
|
||||||
|
let repos: Vec<_> = RepoEntity::find()
|
||||||
|
.filter(RCol::Project.eq(project_uid))
|
||||||
|
.all(&self.db)
|
||||||
|
.await?;
|
||||||
|
|
||||||
for repo in repos {
|
for repo in repos {
|
||||||
let result = scanner::scan_and_sync_skills(&self.db, project_uid, &repo).await?;
|
let result = scanner::scan_and_sync_skills(&self.db, project_uid, &repo).await?;
|
||||||
total_created += result.created;
|
total_created += result.created;
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
use crate::error::AppError;
|
use crate::error::AppError;
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
|
use git2::Repository;
|
||||||
use models::ActiveModelTrait;
|
use models::ActiveModelTrait;
|
||||||
use models::projects::project_skill::ActiveModel as SkillActiveModel;
|
use models::projects::project_skill::ActiveModel as SkillActiveModel;
|
||||||
use models::projects::project_skill::Column as C;
|
use models::projects::project_skill::Column as C;
|
||||||
@ -108,7 +109,12 @@ pub fn scan_repo_for_skills(
|
|||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
if path.is_dir() {
|
if path.is_dir() {
|
||||||
stack.push(path);
|
stack.push(path);
|
||||||
} else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") {
|
} else if path
|
||||||
|
.file_name()
|
||||||
|
.and_then(|n| n.to_str())
|
||||||
|
.map(|s| s.to_lowercase())
|
||||||
|
== Some("skill.md".to_string())
|
||||||
|
{
|
||||||
if let Some(dir_name) = path.parent()
|
if let Some(dir_name) = path.parent()
|
||||||
.and_then(|p| p.file_name())
|
.and_then(|p| p.file_name())
|
||||||
.and_then(|n| n.to_str())
|
.and_then(|n| n.to_str())
|
||||||
@ -136,10 +142,40 @@ pub async fn scan_and_sync_skills(
|
|||||||
project_uuid: Uuid,
|
project_uuid: Uuid,
|
||||||
repo: &RepoModel,
|
repo: &RepoModel,
|
||||||
) -> Result<ScanSyncResult, AppError> {
|
) -> Result<ScanSyncResult, AppError> {
|
||||||
// Resolve the repo path
|
// Open with git2 to get the actual workdir
|
||||||
let storage_path = Path::new(&repo.storage_path);
|
let git_repo = match Repository::open(&repo.storage_path) {
|
||||||
let discovered = scan_repo_for_skills(storage_path, repo.id)?;
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!("failed to open git repo {}: {:?}", repo.storage_path, e);
|
||||||
|
return Ok(ScanSyncResult {
|
||||||
|
discovered: 0,
|
||||||
|
created: 0,
|
||||||
|
updated: 0,
|
||||||
|
removed: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let workdir = git_repo.workdir().map(|p| p.to_path_buf()).unwrap_or_else(|| Path::new(&repo.storage_path).to_path_buf());
|
||||||
|
let commit_sha = git_repo.head().ok().and_then(|h| h.target()).map(|oid| oid.to_string());
|
||||||
|
|
||||||
|
let mut discovered = scan_repo_for_skills(&workdir, repo.id)?;
|
||||||
|
|
||||||
|
// Fill in commit_sha for discovered skills
|
||||||
|
for skill in &mut discovered {
|
||||||
|
skill.commit_sha = commit_sha.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
sync_discovered_skills(db, project_uuid, repo.id, discovered).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sync discovered skills with deduplication by {repo_id}+{blob_hash}.
|
||||||
|
async fn sync_discovered_skills(
|
||||||
|
db: &db::database::AppDatabase,
|
||||||
|
project_uuid: Uuid,
|
||||||
|
repo_id: Uuid,
|
||||||
|
discovered: Vec<DiscoveredSkill>,
|
||||||
|
) -> Result<ScanSyncResult, AppError> {
|
||||||
if discovered.is_empty() {
|
if discovered.is_empty() {
|
||||||
return Ok(ScanSyncResult {
|
return Ok(ScanSyncResult {
|
||||||
discovered: 0,
|
discovered: 0,
|
||||||
@ -153,37 +189,56 @@ pub async fn scan_and_sync_skills(
|
|||||||
let mut created = 0i64;
|
let mut created = 0i64;
|
||||||
let mut updated = 0i64;
|
let mut updated = 0i64;
|
||||||
|
|
||||||
// Collect all repo-sourced skills in this repo for this project
|
// Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha
|
||||||
|
let mut deduped: std::collections::HashMap<String, DiscoveredSkill> = std::collections::HashMap::new();
|
||||||
|
for skill in discovered {
|
||||||
|
let key = format!("{}:{}", repo_id, skill.blob_hash.as_ref().unwrap_or(&skill.slug));
|
||||||
|
match deduped.get(&key) {
|
||||||
|
Some(existing) => {
|
||||||
|
// Keep the one with the later commit_sha
|
||||||
|
if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) {
|
||||||
|
deduped.insert(key, skill);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
deduped.insert(key, skill);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query existing skills for this repo
|
||||||
let existing: Vec<_> = SkillEntity::find()
|
let existing: Vec<_> = SkillEntity::find()
|
||||||
.filter(C::ProjectUuid.eq(project_uuid))
|
.filter(C::ProjectUuid.eq(project_uuid))
|
||||||
.filter(C::Source.eq("repo"))
|
.filter(C::Source.eq("repo"))
|
||||||
.filter(C::RepoId.eq(repo.id))
|
.filter(C::RepoId.eq(repo_id))
|
||||||
.all(db)
|
.all(db)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let existing_by_slug: std::collections::HashMap<_, _> = existing
|
let existing_by_hash: std::collections::HashMap<_, _> = existing
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|s| (s.slug.clone(), s))
|
.map(|s| {
|
||||||
|
let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default());
|
||||||
|
(key, s)
|
||||||
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut seen_slugs = std::collections::HashSet::new();
|
let mut seen_keys = std::collections::HashSet::new();
|
||||||
|
|
||||||
let discovered_count = discovered.len() as i64;
|
|
||||||
for skill in discovered {
|
|
||||||
seen_slugs.insert(skill.slug.clone());
|
|
||||||
|
|
||||||
|
let discovered_count = deduped.len() as i64;
|
||||||
|
for (key, skill) in deduped {
|
||||||
|
seen_keys.insert(key.clone());
|
||||||
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
|
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
|
||||||
|
|
||||||
if let Some(existing_skill) = existing_by_slug.get(&skill.slug) {
|
if let Some(existing_skill) = existing_by_hash.get(&key) {
|
||||||
if existing_skill.content != skill.content
|
if existing_skill.content != skill.content
|
||||||
|| existing_skill.metadata != json_meta
|
|| existing_skill.metadata != json_meta
|
||||||
|| existing_skill.blob_hash != skill.blob_hash
|
|| existing_skill.commit_sha != skill.commit_sha
|
||||||
{
|
{
|
||||||
let mut active: SkillActiveModel = existing_skill.clone().into();
|
let mut active: SkillActiveModel = existing_skill.clone().into();
|
||||||
active.content = Set(skill.content);
|
active.content = Set(skill.content);
|
||||||
active.metadata = Set(json_meta);
|
active.metadata = Set(json_meta);
|
||||||
active.commit_sha = Set(skill.commit_sha.clone());
|
active.commit_sha = Set(skill.commit_sha);
|
||||||
active.blob_hash = Set(skill.blob_hash.clone());
|
active.blob_hash = Set(skill.blob_hash);
|
||||||
active.updated_at = Set(now);
|
active.updated_at = Set(now);
|
||||||
active.update(db).await?;
|
active.update(db).await?;
|
||||||
updated += 1;
|
updated += 1;
|
||||||
@ -192,13 +247,13 @@ pub async fn scan_and_sync_skills(
|
|||||||
let active = SkillActiveModel {
|
let active = SkillActiveModel {
|
||||||
id: Set(0),
|
id: Set(0),
|
||||||
project_uuid: Set(project_uuid),
|
project_uuid: Set(project_uuid),
|
||||||
slug: Set(skill.slug.clone()),
|
slug: Set(skill.slug),
|
||||||
name: Set(skill.name),
|
name: Set(skill.name),
|
||||||
description: Set(skill.description),
|
description: Set(skill.description),
|
||||||
source: Set("repo".to_string()),
|
source: Set("repo".to_string()),
|
||||||
repo_id: Set(Some(repo.id)),
|
repo_id: Set(Some(repo_id)),
|
||||||
commit_sha: Set(skill.commit_sha.clone()),
|
commit_sha: Set(skill.commit_sha),
|
||||||
blob_hash: Set(skill.blob_hash.clone()),
|
blob_hash: Set(skill.blob_hash),
|
||||||
content: Set(skill.content),
|
content: Set(skill.content),
|
||||||
metadata: Set(json_meta),
|
metadata: Set(json_meta),
|
||||||
enabled: Set(true),
|
enabled: Set(true),
|
||||||
@ -213,8 +268,8 @@ pub async fn scan_and_sync_skills(
|
|||||||
|
|
||||||
// Remove skills that no longer exist in the repo
|
// Remove skills that no longer exist in the repo
|
||||||
let mut removed = 0i64;
|
let mut removed = 0i64;
|
||||||
for (slug, old_skill) in existing_by_slug {
|
for (key, old_skill) in existing_by_hash {
|
||||||
if !seen_slugs.contains(&slug) {
|
if !seen_keys.contains(&key) {
|
||||||
SkillEntity::delete_by_id(old_skill.id).exec(db).await?;
|
SkillEntity::delete_by_id(old_skill.id).exec(db).await?;
|
||||||
removed += 1;
|
removed += 1;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user