use crate::AppService; use crate::error::AppError; use crate::git::BlobInfo; use crate::git_spawn; use base64::{Engine, engine::general_purpose::STANDARD as BASE64}; use redis::AsyncCommands; use serde::{Deserialize, Serialize}; use session::Session; const BLOB_CACHE_SIZE_LIMIT: usize = 512 * 1024; const README_SIZE_LIMIT: usize = 1024 * 1024; const README_VARIANTS: &[(&str, bool, bool)] = &[ ("README.md", true, true), ("README.markdown", true, true), ("README.mkd", true, true), ("README.mkdn", true, true), ("README.mdown", true, true), ("README.rst", false, true), ("README.adoc", false, true), ("README.txt", true, true), ("README.md.txt", true, true), ("readme.md", true, true), ("Readme.md", true, true), ("README.MD", true, true), ("readme.markdown", true, true), ("Readme", false, true), ("readme", false, true), ("README", false, true), ("readme.rst", false, true), ("readme.txt", false, true), ("README.md.orig", true, true), ("README.md.bak", true, true), ("docs/README.md", true, false), ("doc/README.md", true, false), ("docs/README", false, false), ("doc/README", false, false), ("docs/README.markdown", true, false), ("doc/README.markdown", true, false), ("docs/readme.md", true, false), ("doc/readme.md", true, false), (".github/README.md", true, false), ("wiki/README.md", true, false), ("site/README.md", true, false), ]; #[derive(Debug, Clone, Deserialize, utoipa::ToSchema)] pub struct BlobGetQuery { #[serde(default)] pub oid: String, } #[derive(Debug, Clone, Serialize, utoipa::ToSchema)] pub struct BlobInfoResponse { pub oid: String, pub size: usize, pub is_binary: bool, } impl From for BlobInfoResponse { fn from(b: BlobInfo) -> Self { Self { oid: b.oid.to_string(), size: b.size, is_binary: b.is_binary, } } } #[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] pub struct BlobContentResponse { pub oid: String, pub size: usize, pub is_binary: bool, pub content: String, } #[derive(Debug, Clone, Serialize, utoipa::ToSchema)] pub struct BlobExistsResponse { pub oid: String, pub exists: bool, } #[derive(Debug, Clone, Serialize, utoipa::ToSchema)] pub struct BlobIsBinaryResponse { pub oid: String, pub is_binary: bool, } #[derive(Debug, Clone, Serialize, utoipa::ToSchema)] pub struct BlobSizeResponse { pub oid: String, pub size: usize, } #[derive(Debug, Clone, Serialize, utoipa::ToSchema)] pub struct BlobCreateResponse { pub oid: String, pub size: usize, } #[derive(Debug, Clone, Deserialize, utoipa::ToSchema)] pub struct BlobCreateRequest { pub data: String, } #[derive(Debug, Clone, Deserialize, utoipa::ToSchema)] pub struct GitReadmeQuery { pub r#ref: Option, } #[derive(Debug, Clone, Serialize, utoipa::ToSchema)] pub struct GitReadmeResponse { pub path: Option, pub content: Option, pub size: Option, pub encoding: Option, #[serde(default)] pub truncated: bool, #[serde(default)] pub is_binary: bool, } impl AppService { pub async fn git_readme( &self, namespace: String, repo_name: String, query: GitReadmeQuery, ctx: &Session, ) -> Result { let repo = self .utils_find_repo(namespace.clone(), repo_name.clone(), ctx) .await?; let rev = query.r#ref.unwrap_or_else(|| "HEAD".to_string()); let tree_oid: git::CommitOid = { let rev_clone = rev.clone(); git_spawn!(repo, domain -> { domain.resolve_rev(&rev_clone) })? .into() }; let (root_blobs, subdirs): ( std::collections::HashMap, std::collections::HashMap, ) = { let oid = tree_oid; git_spawn!(repo, domain -> { let entries = domain.tree_list(&oid)?; let mut blobs: std::collections::HashMap = Default::default(); let mut dirs: std::collections::HashMap = Default::default(); for entry in entries { let name_lower = entry.name.to_lowercase(); if entry.kind == "tree" { dirs.insert(name_lower, (entry.name.clone(), entry.oid)); } else if entry.kind == "blob" { blobs.insert(name_lower, (entry.name.clone(), entry.oid)); } } Ok::<_, AppError>((blobs, dirs)) })? }; let subdir_blobs: std::collections::HashMap< String, std::collections::HashMap, > = { let repo_clone = repo.clone(); let subdirs_clone = subdirs.clone(); let mut result: std::collections::HashMap< String, std::collections::HashMap, > = Default::default(); for (subdir_lower, (subdir_original, subdir_oid)) in subdirs_clone.clone() { let interested = matches!( subdir_lower.as_str(), "docs" | "doc" | ".github" | "wiki" | "site" ); if !interested { continue; } let oid = subdir_oid; let repo_inner = repo_clone.clone(); let entries: std::collections::HashMap = git_spawn!(repo_inner, domain -> { let entries = domain.tree_list(&oid)?; Ok::, AppError>( entries.into_iter() .filter(|e| e.kind == "blob") .map(|e| (e.name.to_lowercase(), (e.name.clone(), e.oid))) .collect(), ) })?; result.insert(subdir_original.clone(), entries); } result }; #[derive(Clone)] struct Candidate { path: String, oid: git::CommitOid, score: isize, } let mut best: Option = None; for &(variant, is_markdown, is_root) in README_VARIANTS { let lookup = variant.to_lowercase(); let found: Option<(String, git::CommitOid)> = if is_root { root_blobs.get(&lookup).map(|(n, o)| (n.clone(), o.clone())) } else { lookup.split_once('/').and_then(|(subdir, rest)| { subdir_blobs.get(subdir).and_then(|subdir_map| { subdir_map.get(rest).map(|(n, o)| (n.clone(), o.clone())) }) }) }; let Some((_blob_name, oid)) = found else { continue; }; let score = if is_root { 1000 } else { 0 } + if is_markdown { 100 } else { 0 } - variant.len() as isize; let better = best.as_ref().map(|b| score > b.score).unwrap_or(true); if better { best = Some(Candidate { path: variant.to_string(), oid, score, }); } } let Some(candidate) = best else { return Ok(GitReadmeResponse { path: None, content: None, size: None, encoding: None, truncated: false, is_binary: false, }); }; let (raw_bytes, is_binary, total_size) = { let oid = candidate.oid; git_spawn!(repo, domain -> { let content = domain.blob_content(&oid)?; Ok::<_, AppError>((content.content, content.is_binary, content.size)) })? }; if is_binary { return Ok(GitReadmeResponse { path: Some(candidate.path), content: None, size: Some(total_size), encoding: Some("binary".to_string()), truncated: false, is_binary: true, }); } let truncated = raw_bytes.len() > README_SIZE_LIMIT; let to_encode: Vec = if truncated { let mut cut = raw_bytes[..README_SIZE_LIMIT].to_vec(); while !cut.is_empty() && std::str::from_utf8(&cut).is_err() { cut.pop(); } cut } else { raw_bytes }; let (content_b64, is_binary_final, encoding) = match std::str::from_utf8(&to_encode) { Ok(_) => (BASE64.encode(&to_encode), false, "base64".to_string()), Err(_) => (BASE64.encode(&to_encode), true, "binary".to_string()), }; Ok(GitReadmeResponse { path: Some(candidate.path), content: Some(content_b64), size: Some(total_size), encoding: Some(encoding), truncated, is_binary: is_binary_final, }) } pub async fn git_blob_get( &self, namespace: String, repo_name: String, query: BlobGetQuery, ctx: &Session, ) -> Result { let repo = self.utils_find_repo(namespace, repo_name, ctx).await?; let oid_str = query.oid.clone(); let info = tokio::task::spawn_blocking(move || { let domain = git::GitDomain::from_model(repo)?; let oid = git::CommitOid::new(&oid_str); domain.blob_get(&oid) }) .await .map_err(|e| AppError::InternalServerError(format!("Task join error: {}", e)))? .map_err(AppError::from)?; Ok(BlobInfoResponse::from(info)) } pub async fn git_blob_exists( &self, namespace: String, repo_name: String, query: BlobGetQuery, ctx: &Session, ) -> Result { let repo = self.utils_find_repo(namespace, repo_name, ctx).await?; let oid_str = query.oid.clone(); let exists = tokio::task::spawn_blocking(move || { let domain = git::GitDomain::from_model(repo)?; let oid = git::CommitOid::new(&oid_str); Ok::<_, git::GitError>(domain.blob_exists(&oid)) }) .await .map_err(|e| AppError::InternalServerError(format!("Task join error: {}", e)))? .map_err(AppError::from)?; Ok(BlobExistsResponse { oid: query.oid, exists, }) } pub async fn git_blob_is_binary( &self, namespace: String, repo_name: String, query: BlobGetQuery, ctx: &Session, ) -> Result { let repo = self.utils_find_repo(namespace, repo_name, ctx).await?; let oid_str = query.oid.clone(); let is_binary = tokio::task::spawn_blocking(move || { let domain = git::GitDomain::from_model(repo)?; let oid = git::CommitOid::new(&oid_str); domain.blob_is_binary(&oid) }) .await .map_err(|e| AppError::InternalServerError(format!("Task join error: {}", e)))? .map_err(AppError::from)?; Ok(BlobIsBinaryResponse { oid: query.oid, is_binary, }) } pub async fn git_blob_content( &self, namespace: String, repo_name: String, query: BlobGetQuery, ctx: &Session, ) -> Result { let repo = self .utils_find_repo(namespace.clone(), repo_name.clone(), ctx) .await?; let cache_key = format!("git:blob:{}:{}:{}", namespace, repo_name, query.oid); if let Ok(mut conn) = self.cache.conn().await { if let Ok(cached) = conn.get::<_, String>(cache_key.clone()).await { if let Ok(cached) = serde_json::from_str::(&cached) { return Ok(cached); } } } let repo_clone = repo.clone(); let oid_str = query.oid.clone(); let content = tokio::task::spawn_blocking(move || { let domain = git::GitDomain::from_model(repo_clone)?; let oid = git::CommitOid::new(&oid_str); domain.blob_content(&oid) }) .await .map_err(|e| AppError::InternalServerError(format!("Task join error: {}", e)))? .map_err(AppError::from)?; let response = BlobContentResponse { oid: query.oid.clone(), size: content.size, is_binary: content.is_binary, content: BASE64.encode(&content.content), }; // Only cache blobs smaller than the size limit to prevent memory exhaustion if response.size < BLOB_CACHE_SIZE_LIMIT { if let Ok(mut conn) = self.cache.conn().await { if let Err(e) = conn .set_ex::( cache_key, serde_json::to_string(&response).unwrap_or_default(), 60 * 60, ) .await { slog::debug!(self.logs, "cache set failed (non-fatal): {}", e); } } } Ok(response) } pub async fn git_blob_size( &self, namespace: String, repo_name: String, query: BlobGetQuery, ctx: &Session, ) -> Result { let repo = self.utils_find_repo(namespace, repo_name, ctx).await?; let oid_str = query.oid.clone(); let size = tokio::task::spawn_blocking(move || { let domain = git::GitDomain::from_model(repo)?; let oid = git::CommitOid::new(&oid_str); domain.blob_size(&oid) }) .await .map_err(|e| AppError::InternalServerError(format!("Task join error: {}", e)))? .map_err(AppError::from)?; Ok(BlobSizeResponse { oid: query.oid, size, }) } pub async fn git_blob_create( &self, namespace: String, repo_name: String, request: BlobCreateRequest, ctx: &Session, ) -> Result { let repo = self.utils_find_repo(namespace, repo_name, ctx).await?; let data = BASE64 .decode(&request.data) .map_err(|_| AppError::InternalServerError("invalid base64 data".to_string()))?; let repo_clone = repo.clone(); let data_clone = data.clone(); let oid = tokio::task::spawn_blocking(move || { let domain = git::GitDomain::from_model(repo_clone)?; domain.blob_create(&data_clone) }) .await .map_err(|e| AppError::InternalServerError(format!("Task join error: {}", e)))? .map_err(AppError::from)?; Ok(BlobCreateResponse { oid: oid.to_string(), size: data.len(), }) } }