553 lines
19 KiB
Rust
553 lines
19 KiB
Rust
//! Archive operations.
|
|
//!
|
|
//! Generates .tar, .tar.gz, and .zip archives from git trees with caching support.
|
|
|
|
use std::fs;
|
|
use std::io::{Cursor, Write};
|
|
use std::path::PathBuf;
|
|
|
|
use flate2::Compression;
|
|
use flate2::write::GzEncoder;
|
|
|
|
use crate::archive::types::{ArchiveEntry, ArchiveFormat, ArchiveOptions, ArchiveSummary};
|
|
use crate::commit::types::CommitOid;
|
|
use crate::{GitDomain, GitError, GitResult};
|
|
|
|
impl GitDomain {
|
|
/// Directory where cached archives are stored.
|
|
fn archive_cache_dir(&self) -> PathBuf {
|
|
PathBuf::from(self.repo().path()).join(".git-archives")
|
|
}
|
|
|
|
/// Path to the cached archive file for a given commit/format/options.
|
|
fn archive_cache_path(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
format: ArchiveFormat,
|
|
opts: &ArchiveOptions,
|
|
) -> PathBuf {
|
|
let ext = match format {
|
|
ArchiveFormat::Tar => "tar",
|
|
ArchiveFormat::TarGz => "tar.gz",
|
|
ArchiveFormat::Zip => "zip",
|
|
};
|
|
let key = opts.cache_key();
|
|
self.archive_cache_dir()
|
|
.join(format!("{}{}.{}", commit_oid.as_str(), key, ext))
|
|
}
|
|
|
|
/// Ensure the cache directory exists.
|
|
fn ensure_archive_cache_dir(&self) -> GitResult<()> {
|
|
let dir = self.archive_cache_dir();
|
|
if !dir.exists() {
|
|
fs::create_dir_all(&dir).map_err(|e| GitError::IoError(e.to_string()))?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Generate a plain tar archive from a commit's tree.
|
|
/// Caches the result after first build.
|
|
pub fn archive_tar(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<Vec<u8>> {
|
|
let opts = opts.unwrap_or_default();
|
|
let cache_path = self.archive_cache_path(commit_oid, ArchiveFormat::Tar, &opts);
|
|
|
|
if cache_path.exists() {
|
|
return fs::read(&cache_path).map_err(|e| GitError::IoError(e.to_string()));
|
|
}
|
|
|
|
let tree = self.tree_from_commit(commit_oid)?;
|
|
let mut buf = Vec::new();
|
|
let base = opts.prefix.as_deref().unwrap_or("");
|
|
self.walk_tar(&mut buf, &tree, base, &opts)?;
|
|
|
|
self.ensure_archive_cache_dir()?;
|
|
fs::write(&cache_path, &buf).map_err(|e| GitError::IoError(e.to_string()))?;
|
|
|
|
Ok(buf)
|
|
}
|
|
|
|
/// Generate a tar.gz archive from a commit's tree.
|
|
/// Caches the result after first build.
|
|
pub fn archive_tar_gz(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<Vec<u8>> {
|
|
let opts = opts.unwrap_or_default();
|
|
let cache_path = self.archive_cache_path(commit_oid, ArchiveFormat::TarGz, &opts);
|
|
|
|
if cache_path.exists() {
|
|
return fs::read(&cache_path).map_err(|e| GitError::IoError(e.to_string()));
|
|
}
|
|
|
|
let tree = self.tree_from_commit(commit_oid)?;
|
|
let mut buf = Vec::new();
|
|
{
|
|
let encoder = GzEncoder::new(&mut buf, Compression::default());
|
|
let mut builder = tar::Builder::new(encoder);
|
|
let base = opts.prefix.as_deref().unwrap_or("");
|
|
self.walk_tar_builder(&mut builder, &tree, base, &opts)?;
|
|
let encoder = builder
|
|
.into_inner()
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
encoder
|
|
.finish()
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
}
|
|
|
|
self.ensure_archive_cache_dir()?;
|
|
fs::write(&cache_path, &buf).map_err(|e| GitError::IoError(e.to_string()))?;
|
|
|
|
Ok(buf)
|
|
}
|
|
|
|
/// Generate a zip archive from a commit's tree.
|
|
/// Caches the result after first build.
|
|
pub fn archive_zip(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<Vec<u8>> {
|
|
let opts = opts.unwrap_or_default();
|
|
let cache_path = self.archive_cache_path(commit_oid, ArchiveFormat::Zip, &opts);
|
|
|
|
if cache_path.exists() {
|
|
return fs::read(&cache_path).map_err(|e| GitError::IoError(e.to_string()));
|
|
}
|
|
|
|
let tree = self.tree_from_commit(commit_oid)?;
|
|
let mut zip_buf = Vec::new();
|
|
let base = opts.prefix.as_deref().unwrap_or("");
|
|
self.walk_zip(&mut zip_buf, &tree, base, &opts)?;
|
|
|
|
self.ensure_archive_cache_dir()?;
|
|
fs::write(&cache_path, &zip_buf).map_err(|e| GitError::IoError(e.to_string()))?;
|
|
|
|
Ok(zip_buf)
|
|
}
|
|
|
|
/// Generate an archive in the specified format.
|
|
/// Results are cached keyed by (commit_oid, format, options).
|
|
pub fn archive(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
format: ArchiveFormat,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<Vec<u8>> {
|
|
match format {
|
|
ArchiveFormat::Tar => self.archive_tar(commit_oid, opts),
|
|
ArchiveFormat::TarGz => self.archive_tar_gz(commit_oid, opts),
|
|
ArchiveFormat::Zip => self.archive_zip(commit_oid, opts),
|
|
}
|
|
}
|
|
|
|
/// List all entries that would be included in an archive.
|
|
pub fn archive_list(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<Vec<ArchiveEntry>> {
|
|
let tree = self.tree_from_commit(commit_oid)?;
|
|
let opts = opts.unwrap_or_default();
|
|
let mut entries = Vec::new();
|
|
self.collect_tree_entries(&mut entries, &tree, "", 0, &opts)?;
|
|
Ok(entries)
|
|
}
|
|
|
|
pub fn archive_summary(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
format: ArchiveFormat,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<ArchiveSummary> {
|
|
let entries = self.archive_list(commit_oid, opts)?;
|
|
let total_size: u64 = entries.iter().map(|e| e.size).sum();
|
|
Ok(ArchiveSummary {
|
|
commit_oid: commit_oid.to_string(),
|
|
format,
|
|
total_entries: entries.len(),
|
|
total_size,
|
|
})
|
|
}
|
|
|
|
pub fn archive_cached(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
format: ArchiveFormat,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> bool {
|
|
let opts = opts.unwrap_or_default();
|
|
self.archive_cache_path(commit_oid, format, &opts).exists()
|
|
}
|
|
|
|
/// Invalidate (delete) a cached archive, if it exists.
|
|
/// Call this when you need a fresh build after the repo state changes.
|
|
pub fn archive_invalidate(
|
|
&self,
|
|
commit_oid: &CommitOid,
|
|
format: ArchiveFormat,
|
|
opts: Option<ArchiveOptions>,
|
|
) -> GitResult<bool> {
|
|
let opts = opts.unwrap_or_default();
|
|
let path = self.archive_cache_path(commit_oid, format, &opts);
|
|
if path.exists() {
|
|
fs::remove_file(&path).map_err(|e| GitError::IoError(e.to_string()))?;
|
|
Ok(true)
|
|
} else {
|
|
Ok(false)
|
|
}
|
|
}
|
|
|
|
/// List all cached archive paths for a given commit.
|
|
pub fn archive_cache_list(&self, commit_oid: &CommitOid) -> GitResult<Vec<PathBuf>> {
|
|
let dir = self.archive_cache_dir();
|
|
if !dir.exists() {
|
|
return Ok(Vec::new());
|
|
}
|
|
let prefix = commit_oid.as_str();
|
|
let mut paths = Vec::new();
|
|
for entry in fs::read_dir(&dir).map_err(|e| GitError::IoError(e.to_string()))? {
|
|
let entry = entry.map_err(|e| GitError::IoError(e.to_string()))?;
|
|
let name = entry.file_name();
|
|
let name = name.to_string_lossy();
|
|
if name.starts_with(prefix) {
|
|
paths.push(entry.path());
|
|
}
|
|
}
|
|
Ok(paths)
|
|
}
|
|
|
|
/// Invalidate all cached archives for a given commit.
|
|
pub fn archive_invalidate_all(&self, commit_oid: &CommitOid) -> GitResult<usize> {
|
|
let paths = self.archive_cache_list(commit_oid)?;
|
|
let count = paths.len();
|
|
for p in paths {
|
|
fs::remove_file(&p).map_err(|e| GitError::IoError(e.to_string()))?;
|
|
}
|
|
Ok(count)
|
|
}
|
|
|
|
fn tree_from_commit(&self, commit_oid: &CommitOid) -> GitResult<git2::Tree<'_>> {
|
|
let oid = commit_oid
|
|
.to_oid()
|
|
.map_err(|_| GitError::InvalidOid(commit_oid.to_string()))?;
|
|
let commit = self
|
|
.repo()
|
|
.find_commit(oid)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
self.repo()
|
|
.find_tree(commit.tree_id())
|
|
.map_err(|e| GitError::Internal(e.to_string()))
|
|
}
|
|
|
|
fn walk_tar(
|
|
&self,
|
|
buf: &mut Vec<u8>,
|
|
tree: &git2::Tree<'_>,
|
|
base: &str,
|
|
opts: &ArchiveOptions,
|
|
) -> GitResult<()> {
|
|
for entry in tree.iter() {
|
|
let name = entry.name().unwrap_or("");
|
|
let full_path = if base.is_empty() {
|
|
name.to_string()
|
|
} else {
|
|
format!("{}/{}", base, name)
|
|
};
|
|
|
|
if !self.entry_passes_filter(&full_path, opts) {
|
|
continue;
|
|
}
|
|
|
|
let oid = entry.id();
|
|
let obj = match self.repo().find_object(oid, None) {
|
|
Ok(o) => o,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
let mode = entry.filemode() as u32;
|
|
if obj.kind() == Some(git2::ObjectType::Tree) {
|
|
if opts
|
|
.max_depth
|
|
.map_or(true, |d| full_path.matches('/').count() < d)
|
|
{
|
|
let sub_tree = self
|
|
.repo()
|
|
.find_tree(oid)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
self.walk_tar(buf, &sub_tree, &full_path, opts)?;
|
|
}
|
|
} else {
|
|
let blob = match obj.as_blob() {
|
|
Some(b) => b,
|
|
None => continue,
|
|
};
|
|
let content = blob.content();
|
|
let size = content.len() as u64;
|
|
|
|
let mut header = [0u8; 512];
|
|
let path_bytes = full_path.as_bytes();
|
|
// tar USTAR format: prefix (≤155) + "/" + name (≤100) = max 255 bytes.
|
|
// Split at the last "/" that keeps prefix ≤ 155. Fall back to truncation error.
|
|
const NAME_MAX: usize = 100;
|
|
const PREFIX_MAX: usize = 155;
|
|
if path_bytes.len() <= NAME_MAX {
|
|
// Fits directly in name field.
|
|
header[..path_bytes.len()].copy_from_slice(path_bytes);
|
|
} else if path_bytes.len() <= PREFIX_MAX + 1 + NAME_MAX {
|
|
// Find last "/" that leaves prefix ≤ PREFIX_MAX.
|
|
let split_at = path_bytes[..path_bytes.len() - NAME_MAX]
|
|
.iter()
|
|
.rposition(|&b| b == b'/')
|
|
.map(|pos| pos + 1)
|
|
.unwrap_or(0);
|
|
let prefix_len = split_at;
|
|
let name_len = path_bytes.len() - split_at;
|
|
if prefix_len > PREFIX_MAX || name_len > NAME_MAX {
|
|
return Err(GitError::Internal(format!(
|
|
"path too long for tar format: {}",
|
|
full_path
|
|
)));
|
|
}
|
|
header[..prefix_len].copy_from_slice(&path_bytes[..prefix_len]);
|
|
header[prefix_len..prefix_len + 1].copy_from_slice(b"/");
|
|
header[prefix_len + 1..prefix_len + 1 + name_len]
|
|
.copy_from_slice(&path_bytes[prefix_len..]);
|
|
} else {
|
|
return Err(GitError::Internal(format!(
|
|
"path too long for tar format: {}",
|
|
full_path
|
|
)));
|
|
}
|
|
let mode_octal = format!("{:o}", mode & 0o777);
|
|
header[100..108].copy_from_slice(mode_octal.as_bytes());
|
|
let size_octal = format!("{:o}", size);
|
|
if size_octal.len() > 12 {
|
|
return Err(GitError::Internal(format!(
|
|
"file size {} exceeds maximum for tar format (12-byte octal field)",
|
|
size
|
|
)));
|
|
}
|
|
header[124..136].copy_from_slice(size_octal.as_bytes());
|
|
header[136..148].copy_from_slice(b"0 ");
|
|
header[148..156].copy_from_slice(b" ");
|
|
header[156] = b'0';
|
|
header[257..265].copy_from_slice(b"ustar\0");
|
|
|
|
// Calculate checksum: sum all 512 bytes with checksum field filled with spaces.
|
|
let sum: u32 = header.iter().map(|&b| b as u32).sum::<u32>();
|
|
// tar spec: 8-byte checksum field, formatted as 6 octal digits + space + null.
|
|
let sum_octal = format!("{:06o} \0", sum);
|
|
header[148..156].copy_from_slice(sum_octal.as_bytes());
|
|
|
|
buf.write_all(&header)
|
|
.map_err(|e| GitError::IoError(e.to_string()))?;
|
|
buf.write_all(content)
|
|
.map_err(|e| GitError::IoError(e.to_string()))?;
|
|
let written = 512 + content.len();
|
|
let padding = (512 - written % 512) % 512;
|
|
if padding > 0 {
|
|
buf.write_all(&vec![0u8; padding])
|
|
.map_err(|e| GitError::IoError(e.to_string()))?;
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn walk_tar_builder(
|
|
&self,
|
|
builder: &mut tar::Builder<GzEncoder<&mut Vec<u8>>>,
|
|
tree: &git2::Tree<'_>,
|
|
base: &str,
|
|
opts: &ArchiveOptions,
|
|
) -> GitResult<()> {
|
|
for entry in tree.iter() {
|
|
let name = entry.name().unwrap_or("");
|
|
let full_path = if base.is_empty() {
|
|
name.to_string()
|
|
} else {
|
|
format!("{}/{}", base, name)
|
|
};
|
|
|
|
if !self.entry_passes_filter(&full_path, opts) {
|
|
continue;
|
|
}
|
|
|
|
let oid = entry.id();
|
|
let obj = match self.repo().find_object(oid, None) {
|
|
Ok(o) => o,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
let mode = entry.filemode() as u32;
|
|
if obj.kind() == Some(git2::ObjectType::Tree) {
|
|
if opts
|
|
.max_depth
|
|
.map_or(true, |d| full_path.matches('/').count() < d)
|
|
{
|
|
let sub_tree = self
|
|
.repo()
|
|
.find_tree(oid)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
self.walk_tar_builder(builder, &sub_tree, &full_path, opts)?;
|
|
}
|
|
} else {
|
|
let blob = match obj.as_blob() {
|
|
Some(b) => b,
|
|
None => continue,
|
|
};
|
|
let content = blob.content();
|
|
|
|
let mut header = tar::Header::new_gnu();
|
|
header
|
|
.set_path(&full_path)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
header.set_size(content.len() as u64);
|
|
header.set_mode(mode & 0o755);
|
|
header.set_cksum();
|
|
|
|
builder
|
|
.append(&header, content)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn walk_zip(
|
|
&self,
|
|
zip_buf: &mut Vec<u8>,
|
|
tree: &git2::Tree<'_>,
|
|
base: &str,
|
|
opts: &ArchiveOptions,
|
|
) -> GitResult<()> {
|
|
let cursor = Cursor::new(zip_buf);
|
|
let mut zip = zip::ZipWriter::new(cursor);
|
|
zip = self.walk_zip_impl(zip, tree, base, opts)?;
|
|
let _cursor = zip
|
|
.finish()
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
Ok(())
|
|
}
|
|
|
|
fn walk_zip_impl<'a>(
|
|
&'a self,
|
|
mut zip: zip::ZipWriter<Cursor<&'a mut Vec<u8>>>,
|
|
tree: &git2::Tree<'_>,
|
|
base: &str,
|
|
opts: &ArchiveOptions,
|
|
) -> GitResult<zip::ZipWriter<Cursor<&'a mut Vec<u8>>>> {
|
|
for entry in tree.iter() {
|
|
let name = entry.name().unwrap_or("");
|
|
let full_path = if base.is_empty() {
|
|
name.to_string()
|
|
} else {
|
|
format!("{}/{}", base, name)
|
|
};
|
|
|
|
if !self.entry_passes_filter(&full_path, opts) {
|
|
continue;
|
|
}
|
|
|
|
let oid = entry.id();
|
|
let obj = match self.repo().find_object(oid, None) {
|
|
Ok(o) => o,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
let mode = entry.filemode() as u32;
|
|
if obj.kind() == Some(git2::ObjectType::Tree) {
|
|
if opts
|
|
.max_depth
|
|
.map_or(true, |d| full_path.matches('/').count() < d)
|
|
{
|
|
let sub_tree = self
|
|
.repo()
|
|
.find_tree(oid)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
zip = self.walk_zip_impl(zip, &sub_tree, &full_path, opts)?;
|
|
}
|
|
} else {
|
|
let blob = match obj.as_blob() {
|
|
Some(b) => b,
|
|
None => continue,
|
|
};
|
|
let content = blob.content();
|
|
let options = zip::write::SimpleFileOptions::default()
|
|
.compression_method(zip::CompressionMethod::Deflated)
|
|
.unix_permissions(mode & 0o755);
|
|
|
|
zip.start_file(&full_path, options)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
zip.write_all(content)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
}
|
|
}
|
|
Ok(zip)
|
|
}
|
|
|
|
fn collect_tree_entries(
|
|
&self,
|
|
entries: &mut Vec<ArchiveEntry>,
|
|
tree: &git2::Tree<'_>,
|
|
prefix: &str,
|
|
depth: usize,
|
|
opts: &ArchiveOptions,
|
|
) -> GitResult<()> {
|
|
for entry in tree.iter() {
|
|
let name = entry.name().unwrap_or("");
|
|
let full_path = if prefix.is_empty() {
|
|
name.to_string()
|
|
} else {
|
|
format!("{}/{}", prefix, name)
|
|
};
|
|
|
|
if !self.entry_passes_filter(&full_path, opts) {
|
|
continue;
|
|
}
|
|
|
|
if opts.max_depth.map_or(false, |d| depth >= d) {
|
|
continue;
|
|
}
|
|
|
|
let oid = entry.id();
|
|
let obj = match self.repo().find_object(oid, None) {
|
|
Ok(o) => o,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
let mode = entry.filemode() as u32;
|
|
let size = obj.as_blob().map(|b| b.size() as u64).unwrap_or(0);
|
|
if obj.kind() == Some(git2::ObjectType::Tree) {
|
|
let sub_tree = self
|
|
.repo()
|
|
.find_tree(oid)
|
|
.map_err(|e| GitError::Internal(e.to_string()))?;
|
|
self.collect_tree_entries(entries, &sub_tree, &full_path, depth + 1, opts)?;
|
|
} else {
|
|
entries.push(ArchiveEntry {
|
|
path: full_path,
|
|
oid: oid.to_string(),
|
|
size,
|
|
mode,
|
|
});
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn entry_passes_filter(&self, full_path: &str, opts: &ArchiveOptions) -> bool {
|
|
if let Some(ref filter) = opts.path_filter {
|
|
if !full_path.starts_with(filter) {
|
|
return false;
|
|
}
|
|
}
|
|
true
|
|
}
|
|
}
|