use std::collections::HashMap; use db::{database::AppDatabase, sqlx}; use uuid::Uuid; use crate::{bare::GitBare, cmd::oid::ObjectId, errors::GitError}; fn language_from_extension(ext: &str) -> Option<&str> { match ext { "rs" => Some("Rust"), "ts" | "tsx" => Some("TypeScript"), "js" | "jsx" | "mjs" | "cjs" => Some("JavaScript"), "py" | "pyi" => Some("Python"), "go" => Some("Go"), "java" => Some("Java"), "kt" | "kts" => Some("Kotlin"), "c" | "h" => Some("C"), "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Some("C++"), "cs" => Some("C#"), "rb" => Some("Ruby"), "php" => Some("PHP"), "swift" => Some("Swift"), "scala" => Some("Scala"), "lua" => Some("Lua"), "r" | "R" => Some("R"), "sql" => Some("SQL"), "sh" | "bash" => Some("Shell"), "ps1" => Some("PowerShell"), "dart" => Some("Dart"), "el" | "lisp" => Some("Emacs Lisp"), "clj" | "cljs" => Some("Clojure"), "hs" => Some("Haskell"), "ex" | "exs" => Some("Elixir"), "erl" => Some("Erlang"), "vue" => Some("Vue"), "svelte" => Some("Svelte"), "css" | "scss" | "sass" | "less" => Some("CSS"), "html" | "htm" => Some("HTML"), "xml" | "xsl" | "xsd" => Some("XML"), "json" | "jsonl" => Some("JSON"), "yaml" | "yml" => Some("YAML"), "toml" => Some("TOML"), "md" | "markdown" => Some("Markdown"), "dockerfile" => Some("Dockerfile"), "proto" => Some("Protocol Buffers"), "tf" => Some("HCL"), "zig" => Some("Zig"), "nim" => Some("Nim"), "v" => Some("V"), "wasm" => Some("WebAssembly"), "glsl" => Some("GLSL"), "cu" | "cuh" => Some("CUDA"), "makefile" => Some("Makefile"), _ => None, } } fn language_from_filename(name: &str) -> Option<&str> { let lower = name.to_ascii_lowercase(); match lower.as_str() { "makefile" | "gnumakefile" => Some("Makefile"), "dockerfile" => Some("Dockerfile"), "cmakelists.txt" => Some("CMake"), "cargo.toml" => Some("TOML"), "package.json" => Some("JSON"), "tsconfig.json" => Some("JSON"), ".gitignore" | ".gitattributes" => Some("Gitignore"), _ => None, } } fn collect_language_stats( bare: &GitBare, ) -> Result, GitError> { let repo = bare.gix_repo()?; let head_id = repo.head_id().map_err(|e| { GitError::Internal(format!("failed to resolve HEAD: {}", e)) })?; let commit = repo.find_commit(head_id.detach()).map_err(|e| { GitError::Internal(format!("failed to find HEAD commit: {}", e)) })?; let decoded = commit.decode().map_err(|e| { GitError::Internal(format!("failed to decode commit: {}", e)) })?; let tree_oid = ObjectId::new(decoded.tree().to_hex().to_string()); let mut stats: HashMap = HashMap::new(); walk_tree(bare, &tree_oid, &mut stats)?; Ok(stats) } fn walk_tree( bare: &GitBare, tree_oid: &ObjectId, stats: &mut HashMap, ) -> Result<(), GitError> { let entries = bare.tree_entries(tree_oid.clone())?; for entry in entries { if entry.kind == crate::cmd::tree::TreeKind::Tree { walk_tree(bare, &entry.oid, stats)?; continue; } if entry.kind == crate::cmd::tree::TreeKind::LfsPointer { continue; } if entry.is_binary { continue; } let language = language_from_filename(&entry.name).or_else(|| { let ext = entry.name.rsplit('.').next().unwrap_or(""); language_from_extension(ext) }); if let Some(lang) = language { let size = blob_size(bare, &entry.oid)?; *stats.entry(lang.to_string()).or_insert(0) += size; } } Ok(()) } fn blob_size(bare: &GitBare, oid: &ObjectId) -> Result { let repo = bare.gix_repo()?; let gix_id: gix::hash::ObjectId = oid .try_into() .map_err(|e| GitError::Internal(format!("invalid oid: {}", e)))?; let header = repo.find_header(gix_id).map_err(|e| { GitError::Internal(format!("blob header not found: {}", e)) })?; Ok(header.size() as u64) } #[tracing::instrument(skip(db, bare), fields(repo_id = %repo_id))] pub async fn sync_languages( db: &AppDatabase, bare: &GitBare, repo_id: Uuid, ) -> Result<(), GitError> { let stats = collect_language_stats(bare)?; if stats.is_empty() { return Ok(()); } let total_bytes: u64 = stats.values().sum(); let pool = db.writer(); let mut tx = pool.begin().await.map_err(|e| { GitError::Internal(format!("failed to begin tx: {}", e)) })?; sqlx::query("DELETE FROM repo_language WHERE repo = $1") .bind(repo_id) .execute(&mut *tx) .await .map_err(|e| { GitError::Internal(format!("failed to delete repo_language: {}", e)) })?; for (language, bytes) in &stats { let percentage = if total_bytes > 0 { (*bytes as f32 / total_bytes as f32) * 100.0 } else { 0.0 }; sqlx::query( "INSERT INTO repo_language (repo, language, bytes, percentage) VALUES ($1, $2, $3, $4)" ) .bind(repo_id) .bind(language) .bind(*bytes as i64) .bind(percentage) .execute(&mut *tx) .await .map_err(|e| GitError::Internal(format!("failed to insert repo_language: {}", e)))?; } tx.commit().await.map_err(|e| { GitError::Internal(format!("failed to commit tx: {}", e)) })?; tracing::info!( repo_id = %repo_id, languages = stats.len(), total_bytes, "language stats synced" ); Ok(()) }