gitdataai/lib/git/http/lfs.rs
2026-05-30 01:38:40 +08:00

727 lines
23 KiB
Rust

use std::{collections::HashMap, path::PathBuf};
use actix_web::{HttpResponse, web};
use cache::AppCache;
use db::database::AppDatabase;
use model::repos::{
RepoModel, repo_lfs_lock::RepoLfsLockModel,
repo_lfs_object::RepoLfsObjectModel,
};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::{errors::GitError, http::handler::is_valid_lfs_oid};
const LFS_AUTH_TOKEN_EXPIRY: u64 = 3600;
const LFS_MAX_OBJECT_SIZE: i64 = 50 * 1024 * 1024 * 1024;
#[derive(Deserialize, Serialize)]
pub struct BatchRequest {
pub operation: String,
pub objects: Vec<LfsObjectReq>,
#[serde(skip_serializing_if = "Option::is_none")]
pub transfers: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub r#ref: Option<LfsRef>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hash_algo: Option<String>,
}
#[derive(Deserialize, Serialize)]
pub struct LfsRef {
pub name: String,
}
#[derive(Deserialize, Serialize, Clone)]
pub struct LfsObjectReq {
pub oid: String,
pub size: i64,
}
#[derive(Serialize)]
pub struct BatchResponse {
pub transfer: String,
pub objects: Vec<LfsObjectResponse>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hash_algo: Option<String>,
}
#[derive(Serialize)]
pub struct LfsObjectResponse {
pub oid: String,
pub size: i64,
#[serde(skip_serializing_if = "Option::is_none")]
pub authenticated: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub actions: Option<HashMap<String, LfsAction>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<LfsError>,
}
#[derive(Serialize)]
pub struct LfsAction {
pub href: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub header: Option<HashMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub expires_in: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub expires_at: Option<String>,
}
#[derive(Serialize)]
pub struct LfsError {
pub code: i32,
pub message: String,
}
#[derive(Deserialize)]
pub struct CreateLockRequest {
pub oid: String,
}
#[derive(Serialize)]
pub struct LockResponse {
pub id: Uuid,
pub path: String,
pub locked_by: Uuid,
pub locked_at: String,
}
pub struct LfsHandler {
pub storage_path: PathBuf,
pub model: RepoModel,
pub namespace: String,
pub db: AppDatabase,
}
impl LfsHandler {
pub fn new(
storage_path: PathBuf,
model: RepoModel,
namespace: String,
db: AppDatabase,
) -> Self {
Self {
storage_path,
model,
namespace,
db,
}
}
fn get_lfs_storage_path(&self) -> PathBuf {
self.storage_path.join(".lfs")
}
fn get_object_path(&self, oid: &str) -> PathBuf {
let prefix = &oid[..2];
self.get_lfs_storage_path()
.join("objects")
.join(prefix)
.join(oid)
}
fn build_object_url(&self, base_url: &str, oid: &str) -> String {
format!(
"{}/{}/{}.git/info/lfs/objects/{}",
base_url, self.namespace, self.model.name, oid
)
}
pub async fn batch(
&self,
req: BatchRequest,
base_url: &str,
) -> Result<BatchResponse, GitError> {
let operation = req.operation.as_str();
if operation != "upload" && operation != "download" {
return Err(GitError::InvalidOid(format!(
"Invalid operation: {}",
operation
)));
}
for obj in &req.objects {
if obj.size > LFS_MAX_OBJECT_SIZE {
return Err(GitError::InvalidOid(format!(
"Object size {} exceeds maximum allowed size {}",
obj.size, LFS_MAX_OBJECT_SIZE
)));
}
}
let oids: Vec<String> =
req.objects.iter().map(|o| o.oid.clone()).collect();
let existing: Vec<RepoLfsObjectModel> =
sqlx::query_as::<_, RepoLfsObjectModel>(
"SELECT repo, oid, size_bytes, storage_key, created_at \
FROM repo_lfs_object \
WHERE oid = ANY($1) AND repo = $2",
)
.bind(&oids)
.bind(self.model.id)
.fetch_all(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?;
let existing_map: HashMap<&str, &RepoLfsObjectModel> =
existing.iter().map(|m| (m.oid.as_str(), m)).collect();
let mut response_objects = Vec::with_capacity(req.objects.len());
for obj in req.objects {
let existing = existing_map.get(obj.oid.as_str());
let mut actions = HashMap::new();
match operation {
"upload" => {
if existing.is_none() {
let upload_url =
self.build_object_url(base_url, &obj.oid);
let token = Uuid::now_v7().to_string();
let mut headers = HashMap::new();
headers.insert(
"authorization".to_string(),
format!("Bearer {}", token),
);
actions.insert(
"upload".to_string(),
LfsAction {
href: upload_url,
header: Some(headers),
expires_in: Some(LFS_AUTH_TOKEN_EXPIRY as i64),
expires_at: None,
},
);
}
}
"download" => match existing {
Some(_) => {
let download_url =
self.build_object_url(base_url, &obj.oid);
let token = Uuid::now_v7().to_string();
let mut headers = HashMap::new();
headers.insert(
"authorization".to_string(),
format!("Bearer {}", token),
);
actions.insert(
"download".to_string(),
LfsAction {
href: download_url,
header: Some(headers),
expires_in: Some(LFS_AUTH_TOKEN_EXPIRY as i64),
expires_at: None,
},
);
}
None => {
response_objects.push(LfsObjectResponse {
oid: obj.oid,
size: obj.size,
authenticated: None,
actions: None,
error: Some(LfsError {
code: 404,
message: "Object does not exist".to_string(),
}),
});
continue;
}
},
_ => {}
}
response_objects.push(LfsObjectResponse {
oid: obj.oid,
size: obj.size,
authenticated: Some(true),
actions: if actions.is_empty() {
None
} else {
Some(actions)
},
error: None,
});
}
Ok(BatchResponse {
transfer: "basic".to_string(),
objects: response_objects,
hash_algo: req.hash_algo,
})
}
pub async fn batch_with_auth(
&self,
req: BatchRequest,
base_url: &str,
user_id: uuid::Uuid,
cache: &AppCache,
) -> Result<BatchResponse, GitError> {
let operation = req.operation.as_str();
if operation != "upload" && operation != "download" {
return Err(GitError::InvalidOid(format!(
"Invalid operation: {}",
operation
)));
}
for obj in &req.objects {
if obj.size > LFS_MAX_OBJECT_SIZE {
return Err(GitError::InvalidOid(format!(
"Object size {} exceeds maximum allowed size {}",
obj.size, LFS_MAX_OBJECT_SIZE
)));
}
}
let oids: Vec<String> =
req.objects.iter().map(|o| o.oid.clone()).collect();
let existing: Vec<RepoLfsObjectModel> =
sqlx::query_as::<_, RepoLfsObjectModel>(
"SELECT repo, oid, size_bytes, storage_key, created_at \
FROM repo_lfs_object \
WHERE oid = ANY($1) AND repo = $2",
)
.bind(&oids)
.bind(self.model.id)
.fetch_all(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?;
let existing_map: HashMap<&str, &RepoLfsObjectModel> =
existing.iter().map(|m| (m.oid.as_str(), m)).collect();
let mut response_objects = Vec::with_capacity(req.objects.len());
for obj in req.objects {
let existing = existing_map.get(obj.oid.as_str());
let mut actions = HashMap::new();
match operation {
"upload" => {
if existing.is_none() {
let upload_url =
self.build_object_url(base_url, &obj.oid);
let token = Uuid::now_v7().to_string();
crate::http::lfs_routes::store_lfs_token(
cache,
&token,
self.model.id,
user_id,
"upload",
)
.await;
let mut headers = HashMap::new();
headers.insert(
"authorization".to_string(),
format!("Bearer {}", token),
);
actions.insert(
"upload".to_string(),
LfsAction {
href: upload_url,
header: Some(headers),
expires_in: Some(LFS_AUTH_TOKEN_EXPIRY as i64),
expires_at: None,
},
);
}
}
"download" => match existing {
Some(_) => {
let download_url =
self.build_object_url(base_url, &obj.oid);
let token = Uuid::now_v7().to_string();
crate::http::lfs_routes::store_lfs_token(
cache,
&token,
self.model.id,
user_id,
"download",
)
.await;
let mut headers = HashMap::new();
headers.insert(
"authorization".to_string(),
format!("Bearer {}", token),
);
actions.insert(
"download".to_string(),
LfsAction {
href: download_url,
header: Some(headers),
expires_in: Some(LFS_AUTH_TOKEN_EXPIRY as i64),
expires_at: None,
},
);
}
None => {
response_objects.push(LfsObjectResponse {
oid: obj.oid,
size: obj.size,
authenticated: None,
actions: None,
error: Some(LfsError {
code: 404,
message: "Object does not exist".to_string(),
}),
});
continue;
}
},
_ => {}
}
response_objects.push(LfsObjectResponse {
oid: obj.oid,
size: obj.size,
authenticated: Some(true),
actions: if actions.is_empty() {
None
} else {
Some(actions)
},
error: None,
});
}
Ok(BatchResponse {
transfer: "basic".to_string(),
objects: response_objects,
hash_algo: req.hash_algo,
})
}
pub async fn upload_object(
&self,
oid: &str,
payload: web::Payload,
) -> Result<HttpResponse, GitError> {
if !is_valid_lfs_oid(oid) {
return Err(GitError::InvalidOid(format!(
"Invalid OID format: {}",
oid
)));
}
let object_path = self.get_object_path(oid);
if let Some(parent) = object_path.parent() {
tokio::fs::create_dir_all(parent).await.map_err(|e| {
GitError::Internal(format!("Failed to create directory: {}", e))
})?;
}
let temp_path = object_path.with_extension("tmp");
let mut file =
tokio::fs::File::create(&temp_path).await.map_err(|e| {
GitError::Internal(format!("Failed to create temp file: {}", e))
})?;
use futures_util::stream::StreamExt;
use sha2::Digest;
use tokio::io::AsyncWriteExt;
let mut payload = payload;
let mut size = 0i64;
let mut hasher = sha2::Sha256::new();
while let Some(chunk) = payload.next().await {
let chunk = chunk.map_err(|e| {
GitError::Internal(format!("Payload error: {}", e))
})?;
size += chunk.len() as i64;
if size > LFS_MAX_OBJECT_SIZE {
let _ = tokio::fs::remove_file(&temp_path).await;
return Err(GitError::InvalidOid(format!(
"Object size exceeds maximum allowed size {}",
LFS_MAX_OBJECT_SIZE
)));
}
hasher.update(&chunk);
if let Err(e) = file.write_all(&chunk).await {
let _ = tokio::fs::remove_file(&temp_path).await;
return Err(GitError::Internal(format!(
"Failed to write file: {}",
e
)));
}
}
file.flush().await.map_err(|e| {
GitError::Internal(format!("Failed to flush file: {}", e))
})?;
drop(file);
let hash_bytes = hasher.finalize();
let calculated_oid = hex::encode(hash_bytes.as_slice());
if calculated_oid != oid {
let _ = tokio::fs::remove_file(&temp_path).await;
return Err(GitError::InvalidOid(format!(
"OID mismatch: expected {}, got {}",
oid, calculated_oid
)));
}
if let Err(e) = tokio::fs::rename(&temp_path, &object_path).await {
let _ = tokio::fs::remove_file(&temp_path).await;
return Err(GitError::Internal(format!(
"Failed to move file: {}",
e
)));
}
let now = chrono::Utc::now();
sqlx::query(
"INSERT INTO repo_lfs_object (repo, oid, size_bytes, storage_key, created_at) \
VALUES ($1, $2, $3, $4, $5)",
)
.bind(self.model.id)
.bind(oid)
.bind(size)
.bind(object_path.to_string_lossy().to_string())
.bind(now)
.execute(self.db.writer())
.await
.map_err(|e| GitError::Internal(e.to_string()))?;
Ok(HttpResponse::Ok().finish())
}
pub async fn download_object(
&self,
oid: &str,
) -> Result<HttpResponse, GitError> {
if !is_valid_lfs_oid(oid) {
return Err(GitError::InvalidOid(format!(
"Invalid OID format: {}",
oid
)));
}
let obj = sqlx::query_as::<_, RepoLfsObjectModel>(
"SELECT repo, oid, size_bytes, storage_key, created_at \
FROM repo_lfs_object \
WHERE oid = $1 AND repo = $2",
)
.bind(oid)
.bind(self.model.id)
.fetch_optional(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?
.ok_or_else(|| GitError::NotFound("Object not found".to_string()))?;
let expected_base = self.get_lfs_storage_path();
let obj_path = PathBuf::from(&obj.storage_key);
if !obj_path.starts_with(&expected_base) {
tracing::error!(
"LFS object path outside storage directory: {}",
obj.storage_key
);
return Err(GitError::AuthFailed(
"Invalid object path".to_string(),
));
}
let file = tokio::fs::File::open(&obj_path).await.map_err(|e| {
GitError::Internal(format!("Failed to open file: {}", e))
})?;
use actix_web::body::BodyStream;
use futures_util::stream;
use tokio::io::AsyncReadExt;
let chunk_size: usize = 65536;
let stream = stream::unfold(file, move |mut file| async move {
let mut buffer = vec![0u8; chunk_size];
match file.read(&mut buffer).await {
Ok(0) => None,
Ok(n) => {
buffer.truncate(n);
Some((
Ok::<_, std::io::Error>(actix_web::web::Bytes::from(
buffer,
)),
file,
))
}
Err(e) => Some((Err(e), file)),
}
});
Ok(HttpResponse::Ok()
.content_type("application/octet-stream")
.insert_header(("Content-Length", obj.size_bytes.to_string()))
.body(BodyStream::new(stream)))
}
pub async fn lock_object(
&self,
oid: &str,
user_id: uuid::Uuid,
) -> Result<LockResponse, GitError> {
if !is_valid_lfs_oid(oid) {
return Err(GitError::InvalidOid(format!(
"Invalid OID format: {}",
oid
)));
}
let now = chrono::Utc::now();
let lock_id = Uuid::now_v7();
let result = sqlx::query(
"INSERT INTO repo_lfs_lock (id, repo, path, locked_by, ref_name, created_at) \
VALUES ($1, $2, $3, $4, NULL, $5)",
)
.bind(lock_id)
.bind(self.model.id)
.bind(oid)
.bind(user_id)
.bind(now)
.execute(self.db.writer())
.await;
match result {
Ok(_) => Ok(LockResponse {
id: lock_id,
path: oid.to_string(),
locked_by: user_id,
locked_at: now.to_rfc3339(),
}),
Err(e) => {
let err_msg = format!("{}", e);
if err_msg.contains("duplicate key")
|| err_msg.contains("23505")
{
return Err(GitError::Locked("Already locked".to_string()));
}
Err(GitError::Internal(format!("DB error: {}", e)))
}
}
}
pub async fn unlock_object(
&self,
lock_id: &str,
user_id: uuid::Uuid,
) -> Result<(), GitError> {
let lock_uuid = Uuid::parse_str(lock_id)
.map_err(|_| GitError::NotFound("Invalid lock ID".to_string()))?;
let existing = sqlx::query_as::<_, RepoLfsLockModel>(
"SELECT id, repo, path, locked_by, ref_name, created_at \
FROM repo_lfs_lock \
WHERE id = $1 AND repo = $2",
)
.bind(lock_uuid)
.bind(self.model.id)
.fetch_optional(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?
.ok_or_else(|| GitError::NotFound("Lock not found".to_string()))?;
if existing.locked_by != user_id
&& existing.locked_by != self.model.created_by
{
return Err(GitError::PermissionDenied(
"Not allowed to unlock".to_string(),
));
}
sqlx::query("DELETE FROM repo_lfs_lock WHERE id = $1 AND repo = $2")
.bind(lock_uuid)
.bind(self.model.id)
.execute(self.db.writer())
.await
.map_err(|e| GitError::Internal(e.to_string()))?;
Ok(())
}
pub async fn list_locks(
&self,
maybe_oid: Option<&str>,
) -> Result<Vec<LockResponse>, GitError> {
let rows: Vec<RepoLfsLockModel> = if let Some(oid) = maybe_oid {
sqlx::query_as::<_, RepoLfsLockModel>(
"SELECT id, repo, path, locked_by, ref_name, created_at \
FROM repo_lfs_lock \
WHERE repo = $1 AND path = $2",
)
.bind(self.model.id)
.bind(oid)
.fetch_all(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?
} else {
sqlx::query_as::<_, RepoLfsLockModel>(
"SELECT id, repo, path, locked_by, ref_name, created_at \
FROM repo_lfs_lock \
WHERE repo = $1",
)
.bind(self.model.id)
.fetch_all(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?
};
Ok(rows
.into_iter()
.map(|r| LockResponse {
id: r.id,
path: r.path,
locked_by: r.locked_by,
locked_at: r.created_at.to_rfc3339(),
})
.collect())
}
pub async fn get_lock(
&self,
lock_id: &str,
) -> Result<LockResponse, GitError> {
let lock_uuid = Uuid::parse_str(lock_id)
.map_err(|_| GitError::NotFound("Invalid lock ID".to_string()))?;
let r = sqlx::query_as::<_, RepoLfsLockModel>(
"SELECT id, repo, path, locked_by, ref_name, created_at \
FROM repo_lfs_lock \
WHERE id = $1 AND repo = $2",
)
.bind(lock_uuid)
.bind(self.model.id)
.fetch_optional(self.db.reader())
.await
.map_err(|e| GitError::Internal(e.to_string()))?
.ok_or_else(|| GitError::NotFound("Lock not found".to_string()))?;
Ok(LockResponse {
id: r.id,
path: r.path,
locked_by: r.locked_by,
locked_at: r.created_at.to_rfc3339(),
})
}
}