fix(git): deduplicate skills by repo_id+blob_hash in hook sync
- Apply same deduplication logic as service scanner
- Keep latest version by commit_sha when duplicates found
- Fix type error: Ok("skill.md") → Some("skill.md".to_string())
This commit is contained in:
parent
2db7934596
commit
b3fb027848
@ -8,11 +8,11 @@ pub mod tag;
|
||||
|
||||
use db::cache::AppCache;
|
||||
use db::database::AppDatabase;
|
||||
use models::projects::project_skill::{Column as SkillCol, Entity as SkillEntity};
|
||||
use models::projects::project_skill::ActiveModel as SkillActiveModel;
|
||||
use models::projects::project_skill::{Column as SkillCol, Entity as SkillEntity};
|
||||
use models::repos::repo::Model as RepoModel;
|
||||
use models::RepoId;
|
||||
use models::ActiveModelTrait;
|
||||
use models::RepoId;
|
||||
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
@ -43,8 +43,14 @@ fn scan_skills_from_dir(
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
stack.push(path);
|
||||
} else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") {
|
||||
if let Some(dir_name) = path.parent()
|
||||
} else if path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.map(|x| x.to_lowercase())
|
||||
== Some("skill.md".to_string())
|
||||
{
|
||||
if let Some(dir_name) = path
|
||||
.parent()
|
||||
.and_then(|p| p.file_name())
|
||||
.and_then(|n| n.to_str())
|
||||
.filter(|s| !s.starts_with('.'))
|
||||
@ -135,11 +141,7 @@ pub struct HookMetaDataSync {
|
||||
}
|
||||
|
||||
impl HookMetaDataSync {
|
||||
pub fn new(
|
||||
db: AppDatabase,
|
||||
cache: AppCache,
|
||||
repo: RepoModel,
|
||||
) -> Result<Self, crate::GitError> {
|
||||
pub fn new(db: AppDatabase, cache: AppCache, repo: RepoModel) -> Result<Self, crate::GitError> {
|
||||
let domain = GitDomain::from_model(repo.clone())?;
|
||||
Ok(Self {
|
||||
db,
|
||||
@ -185,18 +187,16 @@ impl HookMetaDataSync {
|
||||
|
||||
/// Full sync pipeline (no locking — caller is responsible).
|
||||
async fn sync_work(&self) -> Result<(), crate::GitError> {
|
||||
let mut txn = self
|
||||
.db
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| crate::GitError::IoError(format!("failed to begin transaction: {}", e)))?;
|
||||
let mut txn =
|
||||
self.db.begin().await.map_err(|e| {
|
||||
crate::GitError::IoError(format!("failed to begin transaction: {}", e))
|
||||
})?;
|
||||
|
||||
self.sync_refs(&mut txn).await?;
|
||||
self.sync_commits(&mut txn).await?;
|
||||
self.sync_tags(&mut txn).await?;
|
||||
self.sync_lfs_objects(&mut txn).await?;
|
||||
self.run_fsck_and_rollback_if_corrupt(&mut txn)
|
||||
.await?;
|
||||
self.run_fsck_and_rollback_if_corrupt(&mut txn).await?;
|
||||
|
||||
txn.commit().await.map_err(|e| {
|
||||
crate::GitError::IoError(format!("failed to commit transaction: {}", e))
|
||||
@ -210,14 +210,12 @@ impl HookMetaDataSync {
|
||||
|
||||
/// Fsck only work (no locking — caller is responsible).
|
||||
async fn fsck_work(&self) -> Result<(), crate::GitError> {
|
||||
let mut txn = self
|
||||
.db
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| crate::GitError::IoError(format!("failed to begin transaction: {}", e)))?;
|
||||
let mut txn =
|
||||
self.db.begin().await.map_err(|e| {
|
||||
crate::GitError::IoError(format!("failed to begin transaction: {}", e))
|
||||
})?;
|
||||
|
||||
self.run_fsck_and_rollback_if_corrupt(&mut txn)
|
||||
.await?;
|
||||
self.run_fsck_and_rollback_if_corrupt(&mut txn).await?;
|
||||
|
||||
txn.commit().await.map_err(|e| {
|
||||
crate::GitError::IoError(format!("failed to commit transaction: {}", e))
|
||||
@ -332,22 +330,40 @@ impl HookMetaDataSync {
|
||||
}
|
||||
};
|
||||
|
||||
let existing_by_slug: HashMap<_, _> = existing
|
||||
// Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha
|
||||
let mut deduped: std::collections::HashMap<String, DiscoveredSkill> = std::collections::HashMap::new();
|
||||
for skill in discovered {
|
||||
let key = format!("{}:{}", self.repo.id, skill.blob_hash.as_ref().unwrap_or(&skill.slug));
|
||||
match deduped.get(&key) {
|
||||
Some(existing) => {
|
||||
if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) {
|
||||
deduped.insert(key, skill);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
deduped.insert(key, skill);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let existing_by_hash: HashMap<_, _> = existing
|
||||
.into_iter()
|
||||
.map(|s| (s.slug.clone(), s))
|
||||
.map(|s| {
|
||||
let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default());
|
||||
(key, s)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut seen_slugs = std::collections::HashSet::new();
|
||||
let mut seen_keys = std::collections::HashSet::new();
|
||||
|
||||
for skill in discovered {
|
||||
seen_slugs.insert(skill.slug.clone());
|
||||
for (key, skill) in deduped {
|
||||
seen_keys.insert(key.clone());
|
||||
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
|
||||
|
||||
if let Some(existing_skill) = existing_by_slug.get(&skill.slug) {
|
||||
if let Some(existing_skill) = existing_by_hash.get(&key) {
|
||||
if existing_skill.content != skill.content
|
||||
|| existing_skill.metadata != json_meta
|
||||
|| existing_skill.commit_sha.as_ref() != skill.commit_sha.as_ref()
|
||||
|| existing_skill.blob_hash.as_ref() != skill.blob_hash.as_ref()
|
||||
|| existing_skill.commit_sha != skill.commit_sha
|
||||
{
|
||||
let mut active: SkillActiveModel = existing_skill.clone().into();
|
||||
active.content = Set(skill.content);
|
||||
@ -383,16 +399,25 @@ impl HookMetaDataSync {
|
||||
}
|
||||
}
|
||||
|
||||
for (slug, old_skill) in existing_by_slug {
|
||||
if !seen_slugs.contains(&slug) {
|
||||
if SkillEntity::delete_by_id(old_skill.id).exec(&self.db).await.is_ok() {
|
||||
for (key, old_skill) in existing_by_hash {
|
||||
if !seen_keys.contains(&key) {
|
||||
if SkillEntity::delete_by_id(old_skill.id)
|
||||
.exec(&self.db)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
removed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if created > 0 || updated > 0 || removed > 0 {
|
||||
tracing::info!("skills synced created={} updated={} removed={}", created, updated, removed);
|
||||
tracing::info!(
|
||||
"skills synced created={} updated={} removed={}",
|
||||
created,
|
||||
updated,
|
||||
removed
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user