fix(git): deduplicate skills by repo_id+blob_hash in hook sync

- Apply same deduplication logic as service scanner
- Keep latest version by commit_sha when duplicates found
- Fix type error: Ok("skill.md") → Some("skill.md".to_string())
This commit is contained in:
ZhenYi 2026-04-28 21:28:19 +08:00
parent 2db7934596
commit b3fb027848

View File

@ -8,11 +8,11 @@ pub mod tag;
use db::cache::AppCache;
use db::database::AppDatabase;
use models::projects::project_skill::{Column as SkillCol, Entity as SkillEntity};
use models::projects::project_skill::ActiveModel as SkillActiveModel;
use models::projects::project_skill::{Column as SkillCol, Entity as SkillEntity};
use models::repos::repo::Model as RepoModel;
use models::RepoId;
use models::ActiveModelTrait;
use models::RepoId;
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set};
use std::collections::HashMap;
use std::path::Path;
@ -43,8 +43,14 @@ fn scan_skills_from_dir(
let path = entry.path();
if path.is_dir() {
stack.push(path);
} else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") {
if let Some(dir_name) = path.parent()
} else if path
.file_name()
.and_then(|n| n.to_str())
.map(|x| x.to_lowercase())
== Some("skill.md".to_string())
{
if let Some(dir_name) = path
.parent()
.and_then(|p| p.file_name())
.and_then(|n| n.to_str())
.filter(|s| !s.starts_with('.'))
@ -135,11 +141,7 @@ pub struct HookMetaDataSync {
}
impl HookMetaDataSync {
pub fn new(
db: AppDatabase,
cache: AppCache,
repo: RepoModel,
) -> Result<Self, crate::GitError> {
pub fn new(db: AppDatabase, cache: AppCache, repo: RepoModel) -> Result<Self, crate::GitError> {
let domain = GitDomain::from_model(repo.clone())?;
Ok(Self {
db,
@ -185,18 +187,16 @@ impl HookMetaDataSync {
/// Full sync pipeline (no locking — caller is responsible).
async fn sync_work(&self) -> Result<(), crate::GitError> {
let mut txn = self
.db
.begin()
.await
.map_err(|e| crate::GitError::IoError(format!("failed to begin transaction: {}", e)))?;
let mut txn =
self.db.begin().await.map_err(|e| {
crate::GitError::IoError(format!("failed to begin transaction: {}", e))
})?;
self.sync_refs(&mut txn).await?;
self.sync_commits(&mut txn).await?;
self.sync_tags(&mut txn).await?;
self.sync_lfs_objects(&mut txn).await?;
self.run_fsck_and_rollback_if_corrupt(&mut txn)
.await?;
self.run_fsck_and_rollback_if_corrupt(&mut txn).await?;
txn.commit().await.map_err(|e| {
crate::GitError::IoError(format!("failed to commit transaction: {}", e))
@ -210,14 +210,12 @@ impl HookMetaDataSync {
/// Fsck only work (no locking — caller is responsible).
async fn fsck_work(&self) -> Result<(), crate::GitError> {
let mut txn = self
.db
.begin()
.await
.map_err(|e| crate::GitError::IoError(format!("failed to begin transaction: {}", e)))?;
let mut txn =
self.db.begin().await.map_err(|e| {
crate::GitError::IoError(format!("failed to begin transaction: {}", e))
})?;
self.run_fsck_and_rollback_if_corrupt(&mut txn)
.await?;
self.run_fsck_and_rollback_if_corrupt(&mut txn).await?;
txn.commit().await.map_err(|e| {
crate::GitError::IoError(format!("failed to commit transaction: {}", e))
@ -332,22 +330,40 @@ impl HookMetaDataSync {
}
};
let existing_by_slug: HashMap<_, _> = existing
// Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha
let mut deduped: std::collections::HashMap<String, DiscoveredSkill> = std::collections::HashMap::new();
for skill in discovered {
let key = format!("{}:{}", self.repo.id, skill.blob_hash.as_ref().unwrap_or(&skill.slug));
match deduped.get(&key) {
Some(existing) => {
if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) {
deduped.insert(key, skill);
}
}
None => {
deduped.insert(key, skill);
}
}
}
let existing_by_hash: HashMap<_, _> = existing
.into_iter()
.map(|s| (s.slug.clone(), s))
.map(|s| {
let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default());
(key, s)
})
.collect();
let mut seen_slugs = std::collections::HashSet::new();
let mut seen_keys = std::collections::HashSet::new();
for skill in discovered {
seen_slugs.insert(skill.slug.clone());
for (key, skill) in deduped {
seen_keys.insert(key.clone());
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
if let Some(existing_skill) = existing_by_slug.get(&skill.slug) {
if let Some(existing_skill) = existing_by_hash.get(&key) {
if existing_skill.content != skill.content
|| existing_skill.metadata != json_meta
|| existing_skill.commit_sha.as_ref() != skill.commit_sha.as_ref()
|| existing_skill.blob_hash.as_ref() != skill.blob_hash.as_ref()
|| existing_skill.commit_sha != skill.commit_sha
{
let mut active: SkillActiveModel = existing_skill.clone().into();
active.content = Set(skill.content);
@ -383,16 +399,25 @@ impl HookMetaDataSync {
}
}
for (slug, old_skill) in existing_by_slug {
if !seen_slugs.contains(&slug) {
if SkillEntity::delete_by_id(old_skill.id).exec(&self.db).await.is_ok() {
for (key, old_skill) in existing_by_hash {
if !seen_keys.contains(&key) {
if SkillEntity::delete_by_id(old_skill.id)
.exec(&self.db)
.await
.is_ok()
{
removed += 1;
}
}
}
if created > 0 || updated > 0 || removed > 0 {
tracing::info!("skills synced created={} updated={} removed={}", created, updated, removed);
tracing::info!(
"skills synced created={} updated={} removed={}",
created,
updated,
removed
);
}
}
}