239 lines
7.7 KiB
Rust
239 lines
7.7 KiB
Rust
//! Repository skill scanner.
|
|
//!
|
|
//! Scans repositories for SKILL.md files and upserts skill records.
|
|
|
|
use crate::error::AppError;
|
|
use chrono::Utc;
|
|
use models::ActiveModelTrait;
|
|
use models::projects::project_skill::ActiveModel as SkillActiveModel;
|
|
use models::projects::project_skill::Column as C;
|
|
use models::projects::project_skill::Entity as SkillEntity;
|
|
use models::repos::repo::Model as RepoModel;
|
|
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Set};
|
|
use sha1::Digest;
|
|
use std::path::Path;
|
|
use uuid::Uuid;
|
|
|
|
/// Skill discovery result from a single repository.
|
|
#[derive(Debug)]
|
|
pub struct DiscoveredSkill {
|
|
/// URL-safe slug derived from the directory name.
|
|
pub slug: String,
|
|
/// Human-readable name (from frontmatter or slug).
|
|
pub name: String,
|
|
/// Short description (from frontmatter).
|
|
pub description: Option<String>,
|
|
/// Raw markdown body after the frontmatter.
|
|
pub content: String,
|
|
/// Parsed frontmatter as JSON.
|
|
pub metadata: serde_json::Value,
|
|
/// Git commit SHA where this skill was found (git hook path only).
|
|
pub commit_sha: Option<String>,
|
|
/// Git blob SHA-1 of the SKILL.md file.
|
|
pub blob_hash: Option<String>,
|
|
}
|
|
|
|
/// Compute the git blob SHA-1 hash of `content`.
|
|
/// Format: "blob {len}\0{data}"
|
|
fn git_blob_hash(content: &[u8]) -> String {
|
|
let size = content.len();
|
|
let header = format!("blob {}\0", size);
|
|
let mut hasher = sha1::Sha1::new();
|
|
hasher.update(header.as_bytes());
|
|
hasher.update(content);
|
|
hex::encode(hasher.finalize())
|
|
}
|
|
|
|
/// Parse a SKILL.md file and extract metadata + content.
|
|
fn parse_skill_file(slug: &str, raw: &str) -> DiscoveredSkill {
|
|
let (frontmatter, content) = extract_frontmatter(raw);
|
|
|
|
let metadata: serde_json::Value = frontmatter
|
|
.map(|fm| serde_json::from_str(fm).unwrap_or_default())
|
|
.unwrap_or_default();
|
|
|
|
let name = metadata
|
|
.get("name")
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from)
|
|
.unwrap_or_else(|| slug.replace('-', " ").replace('_', " "));
|
|
|
|
let description = metadata
|
|
.get("description")
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from);
|
|
|
|
DiscoveredSkill {
|
|
slug: slug.to_string(),
|
|
name,
|
|
description,
|
|
content: content.trim().to_string(),
|
|
metadata,
|
|
commit_sha: None,
|
|
blob_hash: None,
|
|
}
|
|
}
|
|
|
|
/// Split frontmatter (--- ... ---) from markdown content.
|
|
fn extract_frontmatter(raw: &str) -> (Option<&str>, &str) {
|
|
let trimmed = raw.trim_start();
|
|
if !trimmed.starts_with("---") {
|
|
return (None, trimmed);
|
|
}
|
|
if let Some(end) = trimmed[3..].find("---") {
|
|
let fm = &trimmed[3..end + 3];
|
|
let rest = trimmed[3 + end + 3..].trim_start();
|
|
(Some(fm), rest)
|
|
} else {
|
|
(None, trimmed)
|
|
}
|
|
}
|
|
|
|
/// Recursively scan `repo_path` for `SKILL.md` files.
|
|
/// The skill slug is `{short_repo_id}/{parent_dir_name}` to ensure uniqueness across repos.
|
|
pub fn scan_repo_for_skills(
|
|
repo_path: &Path,
|
|
repo_id: Uuid,
|
|
) -> Result<Vec<DiscoveredSkill>, AppError> {
|
|
let repo_id_prefix = &repo_id.to_string()[..8];
|
|
let mut discovered = Vec::new();
|
|
let mut stack = vec![repo_path.to_path_buf()];
|
|
|
|
while let Some(dir) = stack.pop() {
|
|
let entries = match std::fs::read_dir(&dir) {
|
|
Ok(e) => e,
|
|
Err(_) => continue,
|
|
};
|
|
for entry in entries.flatten() {
|
|
let path = entry.path();
|
|
if path.is_dir() {
|
|
stack.push(path);
|
|
} else if path.file_name().and_then(|n| n.to_str()) == Some("SKILL.md") {
|
|
if let Some(dir_name) = path.parent()
|
|
.and_then(|p| p.file_name())
|
|
.and_then(|n| n.to_str())
|
|
.filter(|s| !s.starts_with('.'))
|
|
{
|
|
let slug = format!("{}/{}", repo_id_prefix, dir_name);
|
|
if let Ok(raw) = std::fs::read(&path) {
|
|
let blob_hash = git_blob_hash(&raw);
|
|
let mut skill = parse_skill_file(&slug, &String::from_utf8_lossy(&raw));
|
|
skill.blob_hash = Some(blob_hash);
|
|
discovered.push(skill);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(discovered)
|
|
}
|
|
|
|
/// Scan a git2::Repository for skills and upsert them into the database.
|
|
/// Called from the git hook sync path.
|
|
pub async fn scan_and_sync_skills(
|
|
db: &db::database::AppDatabase,
|
|
project_uuid: Uuid,
|
|
repo: &RepoModel,
|
|
) -> Result<ScanSyncResult, AppError> {
|
|
// Resolve the repo path
|
|
let storage_path = Path::new(&repo.storage_path);
|
|
let discovered = scan_repo_for_skills(storage_path, repo.id)?;
|
|
|
|
if discovered.is_empty() {
|
|
return Ok(ScanSyncResult {
|
|
discovered: 0,
|
|
created: 0,
|
|
updated: 0,
|
|
removed: 0,
|
|
});
|
|
}
|
|
|
|
let now = Utc::now();
|
|
let mut created = 0i64;
|
|
let mut updated = 0i64;
|
|
|
|
// Collect all repo-sourced skills in this repo for this project
|
|
let existing: Vec<_> = SkillEntity::find()
|
|
.filter(C::ProjectUuid.eq(project_uuid))
|
|
.filter(C::Source.eq("repo"))
|
|
.filter(C::RepoId.eq(repo.id))
|
|
.all(db)
|
|
.await?;
|
|
|
|
let existing_by_slug: std::collections::HashMap<_, _> = existing
|
|
.into_iter()
|
|
.map(|s| (s.slug.clone(), s))
|
|
.collect();
|
|
|
|
let mut seen_slugs = std::collections::HashSet::new();
|
|
|
|
let discovered_count = discovered.len() as i64;
|
|
for skill in discovered {
|
|
seen_slugs.insert(skill.slug.clone());
|
|
|
|
let json_meta = serde_json::to_value(&skill.metadata).unwrap_or_default();
|
|
|
|
if let Some(existing_skill) = existing_by_slug.get(&skill.slug) {
|
|
if existing_skill.content != skill.content
|
|
|| existing_skill.metadata != json_meta
|
|
|| existing_skill.blob_hash != skill.blob_hash
|
|
{
|
|
let mut active: SkillActiveModel = existing_skill.clone().into();
|
|
active.content = Set(skill.content);
|
|
active.metadata = Set(json_meta);
|
|
active.commit_sha = Set(skill.commit_sha.clone());
|
|
active.blob_hash = Set(skill.blob_hash.clone());
|
|
active.updated_at = Set(now);
|
|
active.update(db).await?;
|
|
updated += 1;
|
|
}
|
|
} else {
|
|
let active = SkillActiveModel {
|
|
id: Set(0),
|
|
project_uuid: Set(project_uuid),
|
|
slug: Set(skill.slug.clone()),
|
|
name: Set(skill.name),
|
|
description: Set(skill.description),
|
|
source: Set("repo".to_string()),
|
|
repo_id: Set(Some(repo.id)),
|
|
commit_sha: Set(skill.commit_sha.clone()),
|
|
blob_hash: Set(skill.blob_hash.clone()),
|
|
content: Set(skill.content),
|
|
metadata: Set(json_meta),
|
|
enabled: Set(true),
|
|
created_by: Set(None),
|
|
created_at: Set(now),
|
|
updated_at: Set(now),
|
|
};
|
|
active.insert(db).await?;
|
|
created += 1;
|
|
}
|
|
}
|
|
|
|
// Remove skills that no longer exist in the repo
|
|
let mut removed = 0i64;
|
|
for (slug, old_skill) in existing_by_slug {
|
|
if !seen_slugs.contains(&slug) {
|
|
SkillEntity::delete_by_id(old_skill.id).exec(db).await?;
|
|
removed += 1;
|
|
}
|
|
}
|
|
|
|
Ok(ScanSyncResult {
|
|
discovered: discovered_count,
|
|
created,
|
|
updated,
|
|
removed,
|
|
})
|
|
}
|
|
|
|
/// Result of a scan + sync operation.
|
|
#[derive(Debug)]
|
|
pub struct ScanSyncResult {
|
|
pub discovered: i64,
|
|
pub created: i64,
|
|
pub updated: i64,
|
|
pub removed: i64,
|
|
}
|