From 3faaff62209e441b9825b7d7c6c1357c3fe0fce5 Mon Sep 17 00:00:00 2001 From: ZhenYi <434836402@qq.com> Date: Mon, 18 May 2026 20:43:16 +0800 Subject: [PATCH] refactor(git): expand hook sync with skill scanning and multi-root discovery Update sync module to support .claude/skills and .codex/skills roots, add system/source tracking to discovered skills, and refactor migration path for the new SQL-file based migrator. --- libs/git/hook/sync/mod.rs | 269 ++++++++++++++++++++++++++++++-------- 1 file changed, 216 insertions(+), 53 deletions(-) diff --git a/libs/git/hook/sync/mod.rs b/libs/git/hook/sync/mod.rs index a1c11ad..8ce746e 100644 --- a/libs/git/hook/sync/mod.rs +++ b/libs/git/hook/sync/mod.rs @@ -23,12 +23,16 @@ use crate::GitDomain; use sha1::Digest; +const SKILL_ROOTS: &[(&str, &str)] = &[(".claude/skills", "claude"), (".codex/skills", "codex")]; +const ROOT_SKILL_SYSTEM: &str = "root"; + fn should_descend_dir(name: &str) -> bool { name != ".git" } -/// Recursively scan `base` for files named `SKILL.md`. -/// The skill slug is `{short_repo_id}/{parent_dir_name}` to ensure uniqueness across repos. +/// Recursively scan supported skill locations for files named `SKILL.md`. +/// Root-level skill packs keep the legacy slug `{short_repo_id}/{skill_dir}`. +/// System skills use `{short_repo_id}/{system}/{relative_skill_dir}`. fn scan_skills_from_dir( base: &Path, repo_id: &RepoId, @@ -36,8 +40,34 @@ fn scan_skills_from_dir( ) -> Result, std::io::Error> { let repo_id_prefix = &repo_id.to_string()[..8]; let mut discovered = Vec::new(); - let mut stack = vec![base.to_path_buf()]; + for (root, system) in SKILL_ROOTS { + let root_path = base.join(root); + if root_path.exists() { + scan_skill_root_from_dir( + &root_path, + repo_id_prefix, + system, + root, + commit_sha, + &mut discovered, + ); + } + } + scan_root_skill_pack_from_dir(base, repo_id_prefix, commit_sha, &mut discovered); + + Ok(discovered) +} + +fn scan_skill_root_from_dir( + root_path: &Path, + repo_id_prefix: &str, + system: &str, + root: &str, + commit_sha: &str, + discovered: &mut Vec, +) { + let mut stack = vec![root_path.to_path_buf()]; while let Some(dir) = stack.pop() { let entries = match std::fs::read_dir(&dir) { Ok(e) => e, @@ -47,30 +77,82 @@ fn scan_skills_from_dir( let path = entry.path(); if path.is_dir() { stack.push(path); - } else if path - .file_name() - .and_then(|n| n.to_str()) - .map(|x| x.to_lowercase()) - == Some("skill.md".to_string()) - { - if let Some(dir_name) = path - .parent() - .and_then(|p| p.file_name()) - .and_then(|n| n.to_str()) - { - let slug = format!("{}/{}", repo_id_prefix, dir_name); - if let Ok(raw) = std::fs::read(&path) { - let blob_hash = git_blob_hash(&raw); - let mut skill = parse_skill_content(&slug, &raw); - skill.commit_sha = Some(commit_sha.to_string()); - skill.blob_hash = Some(blob_hash); - discovered.push(skill); - } - } + continue; + } + if !is_skill_file_name(&path) { + continue; + } + let Some(parent) = path.parent() else { + continue; + }; + let relative_skill_dir = parent + .strip_prefix(root_path) + .ok() + .and_then(path_to_slug) + .filter(|s| !s.is_empty()); + let Some(relative_skill_dir) = relative_skill_dir else { + continue; + }; + let slug = format!("{}/{}/{}", repo_id_prefix, system, relative_skill_dir); + if let Ok(raw) = std::fs::read(&path) { + let blob_hash = git_blob_hash(&raw); + let mut skill = parse_skill_content(&slug, &raw); + skill.commit_sha = Some(commit_sha.to_string()); + skill.blob_hash = Some(blob_hash); + skill.metadata = enrich_metadata( + skill.metadata, + system, + Some(&format!("{}/{}/SKILL.md", root, relative_skill_dir)), + ); + discovered.push(skill); } } } - Ok(discovered) +} + +fn scan_root_skill_pack_from_dir( + base: &Path, + repo_id_prefix: &str, + commit_sha: &str, + discovered: &mut Vec, +) { + let entries = match std::fs::read_dir(base) { + Ok(e) => e, + Err(_) => return, + }; + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_dir() { + continue; + } + let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + if dir_name == ".git" || dir_name == ".claude" || dir_name == ".codex" { + continue; + } + let skill_file = path.join("SKILL.md"); + if !skill_file.exists() { + continue; + } + let relative_skill_dir = slugify_segment(dir_name); + if relative_skill_dir.is_empty() { + continue; + } + let slug = format!("{}/{}", repo_id_prefix, relative_skill_dir); + if let Ok(raw) = std::fs::read(&skill_file) { + let blob_hash = git_blob_hash(&raw); + let mut skill = parse_skill_content(&slug, &raw); + skill.commit_sha = Some(commit_sha.to_string()); + skill.blob_hash = Some(blob_hash); + skill.metadata = enrich_metadata( + skill.metadata, + ROOT_SKILL_SYSTEM, + Some(&format!("{}/SKILL.md", relative_skill_dir)), + ); + discovered.push(skill); + } + } } fn git_blob_hash(content: &[u8]) -> String { @@ -126,6 +208,57 @@ struct DiscoveredSkill { blob_hash: Option, } +fn is_skill_file_name(path: &Path) -> bool { + path.file_name() + .and_then(|n| n.to_str()) + .is_some_and(|name| name.eq_ignore_ascii_case("SKILL.md")) +} + +fn path_to_slug(path: &Path) -> Option { + let parts: Vec = path + .components() + .filter_map(|c| c.as_os_str().to_str()) + .map(slugify_segment) + .filter(|s| !s.is_empty()) + .collect(); + (!parts.is_empty()).then(|| parts.join("/")) +} + +fn slugify_segment(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + let mut last_dash = false; + for ch in input.chars() { + let ch = ch.to_ascii_lowercase(); + if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' { + out.push(ch); + last_dash = false; + } else if !last_dash { + out.push('-'); + last_dash = true; + } + } + out.trim_matches('-').to_string() +} + +fn enrich_metadata( + mut metadata: serde_json::Value, + system: &str, + relative_path: Option<&str>, +) -> serde_json::Value { + if !metadata.is_object() { + metadata = serde_json::json!({}); + } + if let Some(obj) = metadata.as_object_mut() { + obj.entry("system") + .or_insert_with(|| serde_json::Value::String(system.to_string())); + if let Some(relative_path) = relative_path { + obj.entry("path") + .or_insert_with(|| serde_json::Value::String(relative_path.to_string())); + } + } + metadata +} + fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) { let trimmed = raw.trim_start(); if !trimmed.starts_with("---") { @@ -175,20 +308,25 @@ fn scan_skills_from_tree( } } } - Some(git2::ObjectType::Blob) if name.to_lowercase() == "skill.md" => { - let dir_name = std::path::Path::new(&entry_path) - .parent() - .and_then(|p| p.file_name()) - .and_then(|n| n.to_str()); - let Some(dir_name) = dir_name else { continue }; + Some(git2::ObjectType::Blob) if name.eq_ignore_ascii_case("SKILL.md") => { + let Some((system, relative_skill_dir, legacy_slug)) = + skill_location_from_path(&entry_path) + else { + continue; + }; - let slug = format!("{}/{}", repo_id_prefix, dir_name); + let slug = if legacy_slug { + format!("{}/{}", repo_id_prefix, relative_skill_dir) + } else { + format!("{}/{}/{}", repo_id_prefix, system, relative_skill_dir) + }; if let Ok(blob) = entry.to_object(git_repo).and_then(|o| o.peel_to_blob()) { let raw = blob.content(); let blob_hash = git_blob_hash(raw); let mut skill = parse_skill_content(&slug, raw); skill.commit_sha = Some(commit_sha.to_string()); skill.blob_hash = Some(blob_hash); + skill.metadata = enrich_metadata(skill.metadata, system, Some(&entry_path)); discovered.push(skill); } } @@ -200,6 +338,39 @@ fn scan_skills_from_tree( Ok(discovered) } +fn skill_location_from_path(path: &str) -> Option<(&'static str, String, bool)> { + let normalized = path.replace('\\', "/"); + for (root, system) in SKILL_ROOTS { + let prefix = format!("{}/", root); + let suffix = "/SKILL.md"; + if normalized.starts_with(&prefix) && normalized.ends_with(suffix) { + let relative = &normalized[prefix.len()..normalized.len() - suffix.len()]; + let slug = relative + .split('/') + .map(slugify_segment) + .filter(|s| !s.is_empty()) + .collect::>() + .join("/"); + if !slug.is_empty() { + return Some((*system, slug, false)); + } + } + } + + let suffix = "/SKILL.md"; + if normalized.ends_with(suffix) && !normalized.starts_with('.') { + let relative = &normalized[..normalized.len() - suffix.len()]; + if !relative.contains('/') { + let slug = slugify_segment(relative); + if !slug.is_empty() { + return Some((ROOT_SKILL_SYSTEM, slug, true)); + } + } + } + + None +} + #[derive(Clone)] pub struct HookMetaDataSync { pub db: AppDatabase, @@ -414,40 +585,27 @@ impl HookMetaDataSync { } }; - // Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha + // Deduplicate by stable slug. Blob hash changes when content changes and must not be the + // upsert key because project_skill has a unique (project_uuid, slug) constraint. let mut deduped: std::collections::HashMap = std::collections::HashMap::new(); for skill in discovered { - let key = if let Some(ref hash) = skill.blob_hash { - format!("{}:{}", self.repo.id, hash) - } else { - format!("{}:{}:slug", self.repo.id, skill.slug) - }; - match deduped.get(&key) { + match deduped.get(&skill.slug) { Some(existing) => { if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) { - deduped.insert(key, skill); + deduped.insert(skill.slug.clone(), skill); } } None => { - deduped.insert(key, skill); + deduped.insert(skill.slug.clone(), skill); } } } - let existing_by_hash: HashMap<_, _> = existing - .into_iter() - .map(|s| { - let key = format!( - "{}:{}", - s.repo_id.unwrap_or_default(), - s.blob_hash.clone().unwrap_or_default() - ); - (key, s) - }) - .collect(); + let existing_by_slug: HashMap<_, _> = + existing.into_iter().map(|s| (s.slug.clone(), s)).collect(); let mut seen_keys = std::collections::HashSet::new(); @@ -455,12 +613,17 @@ impl HookMetaDataSync { seen_keys.insert(key.clone()); let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default(); - if let Some(existing_skill) = existing_by_hash.get(&key) { + if let Some(existing_skill) = existing_by_slug.get(&key) { if existing_skill.content != skill.content || existing_skill.metadata != json_meta || existing_skill.commit_sha != skill.commit_sha + || existing_skill.blob_hash != skill.blob_hash + || existing_skill.name != skill.name + || existing_skill.description != skill.description { let mut active: SkillActiveModel = existing_skill.clone().into(); + active.name = Set(skill.name); + active.description = Set(skill.description); active.content = Set(skill.content); active.metadata = Set(json_meta); active.commit_sha = Set(skill.commit_sha); @@ -494,7 +657,7 @@ impl HookMetaDataSync { } } - for (key, old_skill) in existing_by_hash { + for (key, old_skill) in existing_by_slug { if !seen_keys.contains(&key) { if SkillEntity::delete_by_id(old_skill.id) .exec(&self.db)