//! Archive operations. //! //! Generates .tar, .tar.gz, and .zip archives from git trees with caching support. use std::fs; use std::io::{Cursor, Write}; use std::path::PathBuf; use flate2::Compression; use flate2::write::GzEncoder; use crate::archive::types::{ArchiveEntry, ArchiveFormat, ArchiveOptions, ArchiveSummary}; use crate::commit::types::CommitOid; use crate::{GitDomain, GitError, GitResult}; impl GitDomain { /// Directory where cached archives are stored. fn archive_cache_dir(&self) -> PathBuf { PathBuf::from(self.repo().path()).join(".git-archives") } /// Path to the cached archive file for a given commit/format/options. fn archive_cache_path( &self, commit_oid: &CommitOid, format: ArchiveFormat, opts: &ArchiveOptions, ) -> PathBuf { let ext = match format { ArchiveFormat::Tar => "tar", ArchiveFormat::TarGz => "tar.gz", ArchiveFormat::Zip => "zip", }; let key = opts.cache_key(); self.archive_cache_dir() .join(format!("{}{}.{}", commit_oid.as_str(), key, ext)) } /// Ensure the cache directory exists. fn ensure_archive_cache_dir(&self) -> GitResult<()> { let dir = self.archive_cache_dir(); if !dir.exists() { fs::create_dir_all(&dir).map_err(|e| GitError::IoError(e.to_string()))?; } Ok(()) } /// Generate a plain tar archive from a commit's tree. /// Caches the result after first build. pub fn archive_tar( &self, commit_oid: &CommitOid, opts: Option, ) -> GitResult> { let opts = opts.unwrap_or_default(); let cache_path = self.archive_cache_path(commit_oid, ArchiveFormat::Tar, &opts); if cache_path.exists() { return fs::read(&cache_path).map_err(|e| GitError::IoError(e.to_string())); } let tree = self.tree_from_commit(commit_oid)?; let mut buf = Vec::new(); let base = opts.prefix.as_deref().unwrap_or(""); self.walk_tar(&mut buf, &tree, base, &opts)?; self.ensure_archive_cache_dir()?; fs::write(&cache_path, &buf).map_err(|e| GitError::IoError(e.to_string()))?; Ok(buf) } /// Generate a tar.gz archive from a commit's tree. /// Caches the result after first build. pub fn archive_tar_gz( &self, commit_oid: &CommitOid, opts: Option, ) -> GitResult> { let opts = opts.unwrap_or_default(); let cache_path = self.archive_cache_path(commit_oid, ArchiveFormat::TarGz, &opts); if cache_path.exists() { return fs::read(&cache_path).map_err(|e| GitError::IoError(e.to_string())); } let tree = self.tree_from_commit(commit_oid)?; let mut buf = Vec::new(); { let encoder = GzEncoder::new(&mut buf, Compression::default()); let mut builder = tar::Builder::new(encoder); let base = opts.prefix.as_deref().unwrap_or(""); self.walk_tar_builder(&mut builder, &tree, base, &opts)?; let encoder = builder .into_inner() .map_err(|e| GitError::Internal(e.to_string()))?; encoder .finish() .map_err(|e| GitError::Internal(e.to_string()))?; } self.ensure_archive_cache_dir()?; fs::write(&cache_path, &buf).map_err(|e| GitError::IoError(e.to_string()))?; Ok(buf) } /// Generate a zip archive from a commit's tree. /// Caches the result after first build. pub fn archive_zip( &self, commit_oid: &CommitOid, opts: Option, ) -> GitResult> { let opts = opts.unwrap_or_default(); let cache_path = self.archive_cache_path(commit_oid, ArchiveFormat::Zip, &opts); if cache_path.exists() { return fs::read(&cache_path).map_err(|e| GitError::IoError(e.to_string())); } let tree = self.tree_from_commit(commit_oid)?; let mut zip_buf = Vec::new(); let base = opts.prefix.as_deref().unwrap_or(""); self.walk_zip(&mut zip_buf, &tree, base, &opts)?; self.ensure_archive_cache_dir()?; fs::write(&cache_path, &zip_buf).map_err(|e| GitError::IoError(e.to_string()))?; Ok(zip_buf) } /// Generate an archive in the specified format. /// Results are cached keyed by (commit_oid, format, options). pub fn archive( &self, commit_oid: &CommitOid, format: ArchiveFormat, opts: Option, ) -> GitResult> { match format { ArchiveFormat::Tar => self.archive_tar(commit_oid, opts), ArchiveFormat::TarGz => self.archive_tar_gz(commit_oid, opts), ArchiveFormat::Zip => self.archive_zip(commit_oid, opts), } } /// List all entries that would be included in an archive. pub fn archive_list( &self, commit_oid: &CommitOid, opts: Option, ) -> GitResult> { let tree = self.tree_from_commit(commit_oid)?; let opts = opts.unwrap_or_default(); let mut entries = Vec::new(); self.collect_tree_entries(&mut entries, &tree, "", 0, &opts)?; Ok(entries) } pub fn archive_summary( &self, commit_oid: &CommitOid, format: ArchiveFormat, opts: Option, ) -> GitResult { let entries = self.archive_list(commit_oid, opts)?; let total_size: u64 = entries.iter().map(|e| e.size).sum(); Ok(ArchiveSummary { commit_oid: commit_oid.to_string(), format, total_entries: entries.len(), total_size, }) } pub fn archive_cached( &self, commit_oid: &CommitOid, format: ArchiveFormat, opts: Option, ) -> bool { let opts = opts.unwrap_or_default(); self.archive_cache_path(commit_oid, format, &opts).exists() } /// Invalidate (delete) a cached archive, if it exists. /// Call this when you need a fresh build after the repo state changes. pub fn archive_invalidate( &self, commit_oid: &CommitOid, format: ArchiveFormat, opts: Option, ) -> GitResult { let opts = opts.unwrap_or_default(); let path = self.archive_cache_path(commit_oid, format, &opts); if path.exists() { fs::remove_file(&path).map_err(|e| GitError::IoError(e.to_string()))?; Ok(true) } else { Ok(false) } } /// List all cached archive paths for a given commit. pub fn archive_cache_list(&self, commit_oid: &CommitOid) -> GitResult> { let dir = self.archive_cache_dir(); if !dir.exists() { return Ok(Vec::new()); } let prefix = commit_oid.as_str(); let mut paths = Vec::new(); for entry in fs::read_dir(&dir).map_err(|e| GitError::IoError(e.to_string()))? { let entry = entry.map_err(|e| GitError::IoError(e.to_string()))?; let name = entry.file_name(); let name = name.to_string_lossy(); if name.starts_with(prefix) { paths.push(entry.path()); } } Ok(paths) } /// Invalidate all cached archives for a given commit. pub fn archive_invalidate_all(&self, commit_oid: &CommitOid) -> GitResult { let paths = self.archive_cache_list(commit_oid)?; let count = paths.len(); for p in paths { fs::remove_file(&p).map_err(|e| GitError::IoError(e.to_string()))?; } Ok(count) } fn tree_from_commit(&self, commit_oid: &CommitOid) -> GitResult> { let oid = commit_oid .to_oid() .map_err(|_| GitError::InvalidOid(commit_oid.to_string()))?; let commit = self .repo() .find_commit(oid) .map_err(|e| GitError::Internal(e.to_string()))?; self.repo() .find_tree(commit.tree_id()) .map_err(|e| GitError::Internal(e.to_string())) } fn walk_tar( &self, buf: &mut Vec, tree: &git2::Tree<'_>, base: &str, opts: &ArchiveOptions, ) -> GitResult<()> { for entry in tree.iter() { let name = entry.name().unwrap_or(""); let full_path = if base.is_empty() { name.to_string() } else { format!("{}/{}", base, name) }; if !self.entry_passes_filter(&full_path, opts) { continue; } let oid = entry.id(); let obj = match self.repo().find_object(oid, None) { Ok(o) => o, Err(_) => continue, }; let mode = entry.filemode() as u32; if obj.kind() == Some(git2::ObjectType::Tree) { if opts .max_depth .map_or(true, |d| full_path.matches('/').count() < d) { let sub_tree = self .repo() .find_tree(oid) .map_err(|e| GitError::Internal(e.to_string()))?; self.walk_tar(buf, &sub_tree, &full_path, opts)?; } } else { let blob = match obj.as_blob() { Some(b) => b, None => continue, }; let content = blob.content(); let size = content.len() as u64; let mut header = [0u8; 512]; let path_bytes = full_path.as_bytes(); // tar USTAR format: prefix (≤155) + "/" + name (≤100) = max 255 bytes. // Split at the last "/" that keeps prefix ≤ 155. Fall back to truncation error. const NAME_MAX: usize = 100; const PREFIX_MAX: usize = 155; if path_bytes.len() <= NAME_MAX { // Fits directly in name field. header[..path_bytes.len()].copy_from_slice(path_bytes); } else if path_bytes.len() <= PREFIX_MAX + 1 + NAME_MAX { // Find last "/" that leaves prefix ≤ PREFIX_MAX. let split_at = path_bytes[..path_bytes.len() - NAME_MAX] .iter() .rposition(|&b| b == b'/') .map(|pos| pos + 1) .unwrap_or(0); let prefix_len = split_at; let name_len = path_bytes.len() - split_at; if prefix_len > PREFIX_MAX || name_len > NAME_MAX { return Err(GitError::Internal(format!( "path too long for tar format: {}", full_path ))); } header[..prefix_len].copy_from_slice(&path_bytes[..prefix_len]); header[prefix_len..prefix_len + 1].copy_from_slice(b"/"); header[prefix_len + 1..prefix_len + 1 + name_len] .copy_from_slice(&path_bytes[prefix_len..]); } else { return Err(GitError::Internal(format!( "path too long for tar format: {}", full_path ))); } let mode_octal = format!("{:o}", mode & 0o777); header[100..108].copy_from_slice(mode_octal.as_bytes()); let size_octal = format!("{:o}", size); if size_octal.len() > 12 { return Err(GitError::Internal(format!( "file size {} exceeds maximum for tar format (12-byte octal field)", size ))); } header[124..136].copy_from_slice(size_octal.as_bytes()); header[136..148].copy_from_slice(b"0 "); header[148..156].copy_from_slice(b" "); header[156] = b'0'; header[257..265].copy_from_slice(b"ustar\0"); // Calculate checksum: sum all 512 bytes with checksum field filled with spaces. let sum: u32 = header.iter().map(|&b| b as u32).sum::(); // tar spec: 8-byte checksum field, formatted as 6 octal digits + space + null. let sum_octal = format!("{:06o} \0", sum); header[148..156].copy_from_slice(sum_octal.as_bytes()); buf.write_all(&header) .map_err(|e| GitError::IoError(e.to_string()))?; buf.write_all(content) .map_err(|e| GitError::IoError(e.to_string()))?; let written = 512 + content.len(); let padding = (512 - written % 512) % 512; if padding > 0 { buf.write_all(&vec![0u8; padding]) .map_err(|e| GitError::IoError(e.to_string()))?; } } } Ok(()) } fn walk_tar_builder( &self, builder: &mut tar::Builder>>, tree: &git2::Tree<'_>, base: &str, opts: &ArchiveOptions, ) -> GitResult<()> { for entry in tree.iter() { let name = entry.name().unwrap_or(""); let full_path = if base.is_empty() { name.to_string() } else { format!("{}/{}", base, name) }; if !self.entry_passes_filter(&full_path, opts) { continue; } let oid = entry.id(); let obj = match self.repo().find_object(oid, None) { Ok(o) => o, Err(_) => continue, }; let mode = entry.filemode() as u32; if obj.kind() == Some(git2::ObjectType::Tree) { if opts .max_depth .map_or(true, |d| full_path.matches('/').count() < d) { let sub_tree = self .repo() .find_tree(oid) .map_err(|e| GitError::Internal(e.to_string()))?; self.walk_tar_builder(builder, &sub_tree, &full_path, opts)?; } } else { let blob = match obj.as_blob() { Some(b) => b, None => continue, }; let content = blob.content(); let mut header = tar::Header::new_gnu(); header .set_path(&full_path) .map_err(|e| GitError::Internal(e.to_string()))?; header.set_size(content.len() as u64); header.set_mode(mode & 0o755); header.set_cksum(); builder .append(&header, content) .map_err(|e| GitError::Internal(e.to_string()))?; } } Ok(()) } fn walk_zip( &self, zip_buf: &mut Vec, tree: &git2::Tree<'_>, base: &str, opts: &ArchiveOptions, ) -> GitResult<()> { let cursor = Cursor::new(zip_buf); let mut zip = zip::ZipWriter::new(cursor); zip = self.walk_zip_impl(zip, tree, base, opts)?; let _cursor = zip .finish() .map_err(|e| GitError::Internal(e.to_string()))?; Ok(()) } fn walk_zip_impl<'a>( &'a self, mut zip: zip::ZipWriter>>, tree: &git2::Tree<'_>, base: &str, opts: &ArchiveOptions, ) -> GitResult>>> { for entry in tree.iter() { let name = entry.name().unwrap_or(""); let full_path = if base.is_empty() { name.to_string() } else { format!("{}/{}", base, name) }; if !self.entry_passes_filter(&full_path, opts) { continue; } let oid = entry.id(); let obj = match self.repo().find_object(oid, None) { Ok(o) => o, Err(_) => continue, }; let mode = entry.filemode() as u32; if obj.kind() == Some(git2::ObjectType::Tree) { if opts .max_depth .map_or(true, |d| full_path.matches('/').count() < d) { let sub_tree = self .repo() .find_tree(oid) .map_err(|e| GitError::Internal(e.to_string()))?; zip = self.walk_zip_impl(zip, &sub_tree, &full_path, opts)?; } } else { let blob = match obj.as_blob() { Some(b) => b, None => continue, }; let content = blob.content(); let options = zip::write::SimpleFileOptions::default() .compression_method(zip::CompressionMethod::Deflated) .unix_permissions(mode & 0o755); zip.start_file(&full_path, options) .map_err(|e| GitError::Internal(e.to_string()))?; zip.write_all(content) .map_err(|e| GitError::Internal(e.to_string()))?; } } Ok(zip) } fn collect_tree_entries( &self, entries: &mut Vec, tree: &git2::Tree<'_>, prefix: &str, depth: usize, opts: &ArchiveOptions, ) -> GitResult<()> { for entry in tree.iter() { let name = entry.name().unwrap_or(""); let full_path = if prefix.is_empty() { name.to_string() } else { format!("{}/{}", prefix, name) }; if !self.entry_passes_filter(&full_path, opts) { continue; } if opts.max_depth.map_or(false, |d| depth >= d) { continue; } let oid = entry.id(); let obj = match self.repo().find_object(oid, None) { Ok(o) => o, Err(_) => continue, }; let mode = entry.filemode() as u32; let size = obj.as_blob().map(|b| b.size() as u64).unwrap_or(0); if obj.kind() == Some(git2::ObjectType::Tree) { let sub_tree = self .repo() .find_tree(oid) .map_err(|e| GitError::Internal(e.to_string()))?; self.collect_tree_entries(entries, &sub_tree, &full_path, depth + 1, opts)?; } else { entries.push(ArchiveEntry { path: full_path, oid: oid.to_string(), size, mode, }); } } Ok(()) } fn entry_passes_filter(&self, full_path: &str, opts: &ArchiveOptions) -> bool { if let Some(ref filter) = opts.path_filter { if !full_path.starts_with(filter) { return false; } } true } }