Add scan_repo_tree_for_skills and scan_skills_from_tree functions that traverse git objects directly instead of filesystem, enabling skill discovery in bare repositories created via git2::Repository::init_bare.
493 lines
16 KiB
Rust
493 lines
16 KiB
Rust
pub mod branch;
|
|
pub mod commit;
|
|
pub mod fsck;
|
|
pub mod gc;
|
|
pub mod lfs;
|
|
pub mod lock;
|
|
pub mod tag;
|
|
|
|
use db::cache::AppCache;
|
|
use db::database::AppDatabase;
|
|
use models::projects::project_skill::ActiveModel as SkillActiveModel;
|
|
use models::projects::project_skill::{Column as SkillCol, Entity as SkillEntity};
|
|
use models::repos::repo::Model as RepoModel;
|
|
use models::ActiveModelTrait;
|
|
use models::RepoId;
|
|
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set};
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
|
|
use crate::GitDomain;
|
|
|
|
// ── Skill discovery (local, no service crate dependency) ────────────────────────
|
|
|
|
use sha1::Digest;
|
|
|
|
/// Recursively scan `base` for files named `SKILL.md`.
|
|
/// The skill slug is `{short_repo_id}/{parent_dir_name}` to ensure uniqueness across repos.
|
|
fn scan_skills_from_dir(
|
|
base: &Path,
|
|
repo_id: &RepoId,
|
|
commit_sha: &str,
|
|
) -> Result<Vec<DiscoveredSkill>, std::io::Error> {
|
|
let repo_id_prefix = &repo_id.to_string()[..8];
|
|
let mut discovered = Vec::new();
|
|
let mut stack = vec![base.to_path_buf()];
|
|
|
|
while let Some(dir) = stack.pop() {
|
|
let entries = match std::fs::read_dir(&dir) {
|
|
Ok(e) => e,
|
|
Err(_) => continue,
|
|
};
|
|
for entry in entries.flatten() {
|
|
let path = entry.path();
|
|
if path.is_dir() {
|
|
stack.push(path);
|
|
} else if path
|
|
.file_name()
|
|
.and_then(|n| n.to_str())
|
|
.map(|x| x.to_lowercase())
|
|
== Some("skill.md".to_string())
|
|
{
|
|
if let Some(dir_name) = path
|
|
.parent()
|
|
.and_then(|p| p.file_name())
|
|
.and_then(|n| n.to_str())
|
|
.filter(|s| !s.starts_with('.'))
|
|
{
|
|
let slug = format!("{}/{}", repo_id_prefix, dir_name);
|
|
if let Ok(raw) = std::fs::read(&path) {
|
|
let blob_hash = git_blob_hash(&raw);
|
|
let mut skill = parse_skill_content(&slug, &raw);
|
|
skill.commit_sha = Some(commit_sha.to_string());
|
|
skill.blob_hash = Some(blob_hash);
|
|
discovered.push(skill);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(discovered)
|
|
}
|
|
|
|
fn git_blob_hash(content: &[u8]) -> String {
|
|
let size = content.len();
|
|
let header = format!("blob {}\0", size);
|
|
let mut hasher = sha1::Sha1::new();
|
|
hasher.update(header.as_bytes());
|
|
hasher.update(content);
|
|
hex::encode(hasher.finalize())
|
|
}
|
|
|
|
fn parse_skill_content(slug: &str, raw: &[u8]) -> DiscoveredSkill {
|
|
let content = String::from_utf8_lossy(raw);
|
|
let (frontmatter, body) = extract_frontmatter(&content);
|
|
let metadata: serde_json::Value = frontmatter
|
|
.and_then(|fm| serde_json::from_str(fm).ok())
|
|
.unwrap_or_default();
|
|
|
|
let name = metadata
|
|
.get("name")
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from)
|
|
.unwrap_or_else(|| slug.replace('-', " ").replace('_', " "));
|
|
|
|
let description = metadata
|
|
.get("description")
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from);
|
|
|
|
DiscoveredSkill {
|
|
slug: slug.to_string(),
|
|
name,
|
|
description,
|
|
content: body.trim().to_string(),
|
|
metadata,
|
|
commit_sha: None,
|
|
blob_hash: None,
|
|
}
|
|
}
|
|
|
|
struct DiscoveredSkill {
|
|
slug: String,
|
|
name: String,
|
|
description: Option<String>,
|
|
content: String,
|
|
metadata: serde_json::Value,
|
|
commit_sha: Option<String>,
|
|
blob_hash: Option<String>,
|
|
}
|
|
|
|
fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) {
|
|
let trimmed = raw.trim_start();
|
|
if !trimmed.starts_with("---") {
|
|
return (None, trimmed);
|
|
}
|
|
if let Some(end) = trimmed[3..].find("---") {
|
|
let fm = &trimmed[3..end + 3];
|
|
let rest = trimmed[3 + end + 3..].trim_start();
|
|
(Some(fm), rest)
|
|
} else {
|
|
(None, trimmed)
|
|
}
|
|
}
|
|
|
|
/// Scan git tree objects for `SKILL.md` files (works for bare repos).
|
|
fn scan_skills_from_tree(
|
|
git_repo: &git2::Repository,
|
|
repo_id: &RepoId,
|
|
commit_sha: &str,
|
|
) -> Result<Vec<DiscoveredSkill>, String> {
|
|
let repo_id_prefix = &repo_id.to_string()[..8];
|
|
let head = git_repo.head().map_err(|e| format!("no HEAD: {e}"))?;
|
|
let tree = head.peel_to_tree().map_err(|e| format!("no tree: {e}"))?;
|
|
|
|
let mut discovered = Vec::new();
|
|
let mut stack: Vec<(git2::Tree<'_>, String)> = vec![(tree, String::new())];
|
|
|
|
while let Some((current_tree, prefix)) = stack.pop() {
|
|
for entry in current_tree.iter() {
|
|
let name = match entry.name() {
|
|
Some(n) => n,
|
|
None => continue,
|
|
};
|
|
let entry_path = if prefix.is_empty() {
|
|
name.to_string()
|
|
} else {
|
|
format!("{}/{}", prefix, name)
|
|
};
|
|
|
|
match entry.kind() {
|
|
Some(git2::ObjectType::Tree) => {
|
|
if !name.starts_with('.') {
|
|
if let Ok(subtree) = entry.to_object(git_repo).and_then(|o| o.peel_to_tree()) {
|
|
stack.push((subtree, entry_path));
|
|
}
|
|
}
|
|
}
|
|
Some(git2::ObjectType::Blob) if name.to_lowercase() == "skill.md" => {
|
|
let dir_name = std::path::Path::new(&entry_path)
|
|
.parent()
|
|
.and_then(|p| p.file_name())
|
|
.and_then(|n| n.to_str())
|
|
.filter(|s| !s.starts_with('.'));
|
|
let Some(dir_name) = dir_name else { continue };
|
|
|
|
let slug = format!("{}/{}", repo_id_prefix, dir_name);
|
|
if let Ok(blob) = entry.to_object(git_repo).and_then(|o| o.peel_to_blob()) {
|
|
let raw = blob.content();
|
|
let blob_hash = git_blob_hash(raw);
|
|
let mut skill = parse_skill_content(&slug, raw);
|
|
skill.commit_sha = Some(commit_sha.to_string());
|
|
skill.blob_hash = Some(blob_hash);
|
|
discovered.push(skill);
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(discovered)
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct HookMetaDataSync {
|
|
pub db: AppDatabase,
|
|
pub cache: AppCache,
|
|
pub repo: RepoModel,
|
|
pub domain: GitDomain,
|
|
}
|
|
|
|
impl HookMetaDataSync {
|
|
pub fn new(db: AppDatabase, cache: AppCache, repo: RepoModel) -> Result<Self, crate::GitError> {
|
|
let domain = GitDomain::from_model(repo.clone())?;
|
|
Ok(Self {
|
|
db,
|
|
cache,
|
|
repo,
|
|
domain,
|
|
})
|
|
}
|
|
|
|
/// Full sync with lock. Caller (worker) manages locking.
|
|
pub async fn sync(&self) -> Result<(), crate::GitError> {
|
|
let lock_value = self.acquire_lock().await?;
|
|
|
|
let res = self.sync_work().await;
|
|
|
|
if let Err(ref e) = res {
|
|
tracing::error!("sync failed error={}", e);
|
|
}
|
|
|
|
let _ = self.release_lock(&lock_value).await;
|
|
res
|
|
}
|
|
|
|
/// Fsck only with lock. Caller manages locking.
|
|
pub async fn fsck_only(&self) -> Result<(), crate::GitError> {
|
|
let lock_value = self.acquire_lock().await?;
|
|
|
|
let res = self.fsck_work().await;
|
|
|
|
let _ = self.release_lock(&lock_value).await;
|
|
res
|
|
}
|
|
|
|
/// GC only with lock. Caller manages locking.
|
|
pub async fn gc_only(&self) -> Result<(), crate::GitError> {
|
|
let lock_value = self.acquire_lock().await?;
|
|
|
|
let res = self.gc_work().await;
|
|
|
|
let _ = self.release_lock(&lock_value).await;
|
|
res
|
|
}
|
|
|
|
/// Full sync pipeline (no locking — caller is responsible).
|
|
async fn sync_work(&self) -> Result<(), crate::GitError> {
|
|
let mut txn =
|
|
self.db.begin().await.map_err(|e| {
|
|
crate::GitError::IoError(format!("failed to begin transaction: {}", e))
|
|
})?;
|
|
|
|
self.sync_refs(&mut txn).await?;
|
|
self.sync_commits(&mut txn).await?;
|
|
self.sync_tags(&mut txn).await?;
|
|
self.sync_lfs_objects(&mut txn).await?;
|
|
self.run_fsck_and_rollback_if_corrupt(&mut txn).await?;
|
|
|
|
txn.commit().await.map_err(|e| {
|
|
crate::GitError::IoError(format!("failed to commit transaction: {}", e))
|
|
})?;
|
|
|
|
self.run_gc().await?;
|
|
self.sync_skills().await;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Fsck only work (no locking — caller is responsible).
|
|
async fn fsck_work(&self) -> Result<(), crate::GitError> {
|
|
let mut txn =
|
|
self.db.begin().await.map_err(|e| {
|
|
crate::GitError::IoError(format!("failed to begin transaction: {}", e))
|
|
})?;
|
|
|
|
self.run_fsck_and_rollback_if_corrupt(&mut txn).await?;
|
|
|
|
txn.commit().await.map_err(|e| {
|
|
crate::GitError::IoError(format!("failed to commit transaction: {}", e))
|
|
})?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// GC only work (no locking — caller is responsible).
|
|
async fn gc_work(&self) -> Result<(), crate::GitError> {
|
|
self.run_gc().await
|
|
}
|
|
|
|
/// Returns a list of (branch_name, oid) for all local branches.
|
|
pub fn list_branch_tips(&self) -> Vec<(String, String)> {
|
|
let repo = self.domain.repo();
|
|
let mut tips = Vec::new();
|
|
if let Ok(refs) = repo.references() {
|
|
for ref_result in refs {
|
|
if let Ok(r) = ref_result {
|
|
if r.is_branch() && !r.is_remote() {
|
|
if let Some(name) = r.name() {
|
|
let branch = name.strip_prefix("refs/heads/").unwrap_or(name);
|
|
if let Some(target) = r.target() {
|
|
tips.push((branch.to_string(), target.to_string()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
tips
|
|
}
|
|
|
|
/// Returns a list of (tag_name, oid) for all tags.
|
|
pub fn list_tag_tips(&self) -> Vec<(String, String)> {
|
|
let repo = self.domain.repo();
|
|
let mut tips = Vec::new();
|
|
if let Ok(refs) = repo.references() {
|
|
for ref_result in refs {
|
|
if let Ok(r) = ref_result {
|
|
if r.is_tag() {
|
|
if let Some(name) = r.name() {
|
|
let tag = name.strip_prefix("refs/tags/").unwrap_or(name);
|
|
if let Some(target) = r.target() {
|
|
tips.push((tag.to_string(), target.to_string()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
tips
|
|
}
|
|
|
|
/// Scan the repository for `SKILL.md` files and sync skills to the project.
|
|
/// Best-effort — failures are logged but do not fail the sync.
|
|
pub async fn sync_skills(&self) {
|
|
let project_uid = self.repo.project;
|
|
let git_repo = self.domain.repo();
|
|
|
|
let commit_sha = git_repo
|
|
.head()
|
|
.ok()
|
|
.and_then(|h| h.target())
|
|
.map(|oid| oid.to_string())
|
|
.unwrap_or_default();
|
|
|
|
let repo_id = self.repo.id;
|
|
let is_bare = git_repo.is_bare() || git_repo.workdir().is_none();
|
|
|
|
let discovered = if is_bare {
|
|
// Bare repo: scan git tree objects directly
|
|
let git_repo_ref = self.domain.repo();
|
|
match scan_skills_from_tree(git_repo_ref, &repo_id, &commit_sha) {
|
|
Ok(d) => d,
|
|
Err(e) => {
|
|
tracing::warn!("failed to scan skills from tree error={}", e);
|
|
return;
|
|
}
|
|
}
|
|
} else {
|
|
// Normal repo: walk filesystem
|
|
let repo_root = git_repo.workdir().unwrap().to_path_buf();
|
|
match tokio::task::spawn_blocking(move || {
|
|
scan_skills_from_dir(&repo_root, &repo_id, &commit_sha)
|
|
})
|
|
.await
|
|
{
|
|
Ok(Ok(d)) => d,
|
|
Ok(Err(e)) => {
|
|
tracing::warn!("failed to scan skills directory error={}", e);
|
|
return;
|
|
}
|
|
Err(e) => {
|
|
tracing::warn!("spawn_blocking join error error={}", e);
|
|
return;
|
|
}
|
|
}
|
|
};
|
|
|
|
if discovered.is_empty() {
|
|
return;
|
|
}
|
|
|
|
let now = chrono::Utc::now();
|
|
let mut created = 0i64;
|
|
let mut updated = 0i64;
|
|
let mut removed = 0i64;
|
|
|
|
let existing: Vec<_> = match SkillEntity::find()
|
|
.filter(SkillCol::ProjectUuid.eq(project_uid))
|
|
.filter(SkillCol::Source.eq("repo"))
|
|
.filter(SkillCol::RepoId.eq(self.repo.id))
|
|
.all(&self.db)
|
|
.await
|
|
{
|
|
Ok(e) => e,
|
|
Err(e) => {
|
|
tracing::warn!("failed to query existing skills error={}", e);
|
|
return;
|
|
}
|
|
};
|
|
|
|
// Deduplicate by {repo_id}+{blob_hash}, keep latest by commit_sha
|
|
let mut deduped: std::collections::HashMap<String, DiscoveredSkill> = std::collections::HashMap::new();
|
|
for skill in discovered {
|
|
let key = format!("{}:{}", self.repo.id, skill.blob_hash.as_ref().unwrap_or(&skill.slug));
|
|
match deduped.get(&key) {
|
|
Some(existing) => {
|
|
if skill.commit_sha.as_ref().unwrap_or(&String::new()) > existing.commit_sha.as_ref().unwrap_or(&String::new()) {
|
|
deduped.insert(key, skill);
|
|
}
|
|
}
|
|
None => {
|
|
deduped.insert(key, skill);
|
|
}
|
|
}
|
|
}
|
|
|
|
let existing_by_hash: HashMap<_, _> = existing
|
|
.into_iter()
|
|
.map(|s| {
|
|
let key = format!("{}:{}", s.repo_id.unwrap_or_default(), s.blob_hash.clone().unwrap_or_default());
|
|
(key, s)
|
|
})
|
|
.collect();
|
|
|
|
let mut seen_keys = std::collections::HashSet::new();
|
|
|
|
for (key, skill) in deduped {
|
|
seen_keys.insert(key.clone());
|
|
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
|
|
|
|
if let Some(existing_skill) = existing_by_hash.get(&key) {
|
|
if existing_skill.content != skill.content
|
|
|| existing_skill.metadata != json_meta
|
|
|| existing_skill.commit_sha != skill.commit_sha
|
|
{
|
|
let mut active: SkillActiveModel = existing_skill.clone().into();
|
|
active.content = Set(skill.content);
|
|
active.metadata = Set(json_meta);
|
|
active.commit_sha = Set(skill.commit_sha);
|
|
active.blob_hash = Set(skill.blob_hash);
|
|
active.updated_at = Set(now);
|
|
if active.update(&self.db).await.is_ok() {
|
|
updated += 1;
|
|
}
|
|
}
|
|
} else {
|
|
let active = SkillActiveModel {
|
|
id: Set(0),
|
|
project_uuid: Set(project_uid),
|
|
slug: Set(skill.slug.clone()),
|
|
name: Set(skill.name),
|
|
description: Set(skill.description),
|
|
source: Set("repo".to_string()),
|
|
repo_id: Set(Some(self.repo.id)),
|
|
commit_sha: Set(skill.commit_sha),
|
|
blob_hash: Set(skill.blob_hash),
|
|
content: Set(skill.content),
|
|
metadata: Set(json_meta),
|
|
enabled: Set(true),
|
|
created_by: Set(None),
|
|
created_at: Set(now),
|
|
updated_at: Set(now),
|
|
};
|
|
if SkillEntity::insert(active).exec(&self.db).await.is_ok() {
|
|
created += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (key, old_skill) in existing_by_hash {
|
|
if !seen_keys.contains(&key) {
|
|
if SkillEntity::delete_by_id(old_skill.id)
|
|
.exec(&self.db)
|
|
.await
|
|
.is_ok()
|
|
{
|
|
removed += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if created > 0 || updated > 0 || removed > 0 {
|
|
tracing::info!(
|
|
"skills synced created={} updated={} removed={}",
|
|
created,
|
|
updated,
|
|
removed
|
|
);
|
|
}
|
|
}
|
|
}
|