482 lines
18 KiB
Rust
482 lines
18 KiB
Rust
pub mod log;
|
|
pub mod metrics;
|
|
pub mod redis;
|
|
pub mod types;
|
|
|
|
use db::cache::AppCache;
|
|
use db::database::AppDatabase;
|
|
use deadpool_redis::cluster::Pool as RedisPool;
|
|
use sea_orm::EntityTrait;
|
|
use slog::Logger;
|
|
use std::sync::Arc;
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
use tokio::sync::Semaphore;
|
|
use tokio::task::{JoinSet, spawn_blocking};
|
|
use tokio_util::sync::CancellationToken;
|
|
|
|
use crate::hook::pool::log::LogStream;
|
|
use crate::hook::pool::metrics::CpuMonitor;
|
|
use crate::hook::pool::redis::RedisConsumer;
|
|
use crate::hook::pool::types::{HookTask, PoolConfig, PoolMetrics, TaskType};
|
|
use crate::hook::sync::HookMetaDataSync;
|
|
|
|
pub struct GitHookPool {
|
|
config: PoolConfig,
|
|
db: AppDatabase,
|
|
cache: AppCache,
|
|
logger: Logger,
|
|
cpu_monitor: CpuMonitor,
|
|
consumer: RedisConsumer,
|
|
log_stream: LogStream,
|
|
running_count: Arc<AtomicU64>,
|
|
total_processed: Arc<AtomicU64>,
|
|
total_failed: Arc<AtomicU64>,
|
|
semaphore: Arc<Semaphore>,
|
|
http: Arc<reqwest::Client>,
|
|
}
|
|
|
|
impl GitHookPool {
|
|
pub async fn new(
|
|
config: PoolConfig,
|
|
db: AppDatabase,
|
|
cache: AppCache,
|
|
redis_pool: RedisPool,
|
|
logger: Logger,
|
|
http: Arc<reqwest::Client>,
|
|
) -> Result<Self, crate::GitError> {
|
|
let consumer = RedisConsumer::new(
|
|
redis_pool.clone(),
|
|
config.redis_list_prefix.clone(),
|
|
config.redis_block_timeout_secs,
|
|
logger.clone(),
|
|
);
|
|
|
|
let log_stream = LogStream::new(
|
|
config.redis_log_channel.clone(),
|
|
config.worker_id.clone(),
|
|
Arc::new(redis_pool),
|
|
);
|
|
|
|
Ok(Self {
|
|
config,
|
|
db,
|
|
cache,
|
|
logger,
|
|
cpu_monitor: CpuMonitor::new(),
|
|
consumer,
|
|
log_stream,
|
|
running_count: Arc::new(AtomicU64::new(0)),
|
|
total_processed: Arc::new(AtomicU64::new(0)),
|
|
total_failed: Arc::new(AtomicU64::new(0)),
|
|
semaphore: Arc::new(Semaphore::new(num_cpus::get())),
|
|
http,
|
|
})
|
|
}
|
|
|
|
pub async fn run(self: Arc<Self>, cancel: CancellationToken) {
|
|
let mut join_set = JoinSet::<()>::new();
|
|
let cancel_clone = cancel.clone();
|
|
|
|
// Task types to poll
|
|
let task_types = [TaskType::Sync, TaskType::Fsck, TaskType::Gc];
|
|
|
|
loop {
|
|
tokio::select! {
|
|
_ = cancel_clone.cancelled() => {
|
|
slog::info!(self.logger, "pool received shutdown signal, draining {} tasks", join_set.len());
|
|
while join_set.join_next().await.is_some() {}
|
|
slog::info!(self.logger, "pool shutdown complete");
|
|
break;
|
|
}
|
|
|
|
_ = tokio::time::sleep(tokio::time::Duration::from_millis(100)) => {}
|
|
}
|
|
|
|
let running = self.running_count.load(Ordering::Relaxed) as usize;
|
|
let can_accept = self
|
|
.cpu_monitor
|
|
.can_accept_task(
|
|
self.config.max_concurrent,
|
|
self.config.cpu_threshold,
|
|
running,
|
|
)
|
|
.await;
|
|
|
|
if !can_accept {
|
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
|
continue;
|
|
}
|
|
|
|
// Poll each task type in round-robin fashion
|
|
for task_type in &task_types {
|
|
let result = self.consumer.next(&task_type.to_string()).await;
|
|
|
|
let (task, task_json) = match result {
|
|
Ok(Some(pair)) => pair,
|
|
Ok(None) => continue, // timeout, try next queue
|
|
Err(e) => {
|
|
slog::warn!(self.logger, "failed to dequeue task: {}", e);
|
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
|
break;
|
|
}
|
|
};
|
|
|
|
let self_clone = self.clone();
|
|
|
|
// Compute queue/work keys for ACK/NAK
|
|
let queue_key = format!(
|
|
"{}:{}",
|
|
self_clone.config.redis_list_prefix,
|
|
task_type.to_string()
|
|
);
|
|
let work_key = format!("{}:work", queue_key);
|
|
|
|
let permit = match self_clone.semaphore.clone().acquire_owned().await {
|
|
Ok(p) => p,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
let self_clone2 = self.clone();
|
|
self_clone2.running_count.fetch_add(1, Ordering::Relaxed);
|
|
let logger_clone = self_clone2.logger.clone();
|
|
let counter_clone = self_clone2.running_count.clone();
|
|
join_set.spawn(async move {
|
|
let panicked = match spawn_blocking(move || {
|
|
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
tokio::runtime::Handle::current().block_on(async {
|
|
self_clone2
|
|
.execute_task_body(task, task_json, queue_key, work_key)
|
|
.await
|
|
})
|
|
}))
|
|
})
|
|
.await
|
|
{
|
|
Ok(Ok(Ok(()))) => false, // spawn_blocking Ok, catch_unwind Ok, body Ok
|
|
Ok(Ok(Err(_))) => true, // spawn_blocking Ok, catch_unwind Ok, body Err(()) — never hit
|
|
Ok(Err(_)) => true, // spawn_blocking Ok, catch_unwind Err = panic
|
|
Err(_) => true, // spawn_blocking Err = thread aborted
|
|
};
|
|
drop(permit);
|
|
counter_clone.fetch_sub(1, Ordering::Relaxed);
|
|
if panicked {
|
|
slog::error!(logger_clone, "task panicked";);
|
|
}
|
|
});
|
|
|
|
// Only process one task per loop iteration to avoid overwhelming the pool
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn execute_task_body(
|
|
&self,
|
|
task: HookTask,
|
|
task_json: String,
|
|
queue_key: String,
|
|
work_key: String,
|
|
) -> Result<(), ()> {
|
|
slog::info!(self.logger, "task started";
|
|
"task_id" => &task.id,
|
|
"task_type" => %task.task_type,
|
|
"repo_id" => &task.repo_id,
|
|
"worker_id" => &self.config.worker_id
|
|
);
|
|
|
|
self.log_stream
|
|
.info(
|
|
&task.id,
|
|
&task.repo_id,
|
|
&format!("task started: {}", task.task_type),
|
|
)
|
|
.await;
|
|
|
|
let result = match task.task_type {
|
|
TaskType::Sync => self.run_sync(&task).await,
|
|
TaskType::Fsck => self.run_fsck(&task).await,
|
|
TaskType::Gc => self.run_gc(&task).await,
|
|
};
|
|
|
|
let consumer = self.consumer.clone();
|
|
match result {
|
|
Ok(()) => {
|
|
if let Err(e) = consumer.ack(&work_key, &task_json).await {
|
|
slog::warn!(self.logger, "failed to ack task: {}", e);
|
|
}
|
|
self.total_processed.fetch_add(1, Ordering::Relaxed);
|
|
self.log_stream
|
|
.info(&task.id, &task.repo_id, "task completed")
|
|
.await;
|
|
}
|
|
Err(e) => {
|
|
if let Err(e) = consumer.nak(&work_key, &queue_key, &task_json).await {
|
|
slog::warn!(self.logger, "failed to nak task: {}", e);
|
|
}
|
|
self.total_failed.fetch_add(1, Ordering::Relaxed);
|
|
self.log_stream
|
|
.error(&task.id, &task.repo_id, &format!("task failed: {}", e))
|
|
.await;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_sync(&self, task: &HookTask) -> Result<(), crate::GitError> {
|
|
let repo_id = models::Uuid::parse_str(&task.repo_id)
|
|
.map_err(|_| crate::GitError::Internal("invalid repo_id uuid".into()))?;
|
|
|
|
let repo = models::repos::repo::Entity::find_by_id(repo_id)
|
|
.one(self.db.reader())
|
|
.await
|
|
.map_err(crate::GitError::from)?
|
|
.ok_or_else(|| crate::GitError::NotFound(format!("repo {} not found", repo_id)))?;
|
|
|
|
let db_clone = self.db.clone();
|
|
let cache_clone = self.cache.clone();
|
|
let repo_clone = repo.clone();
|
|
let logger_clone = self.logger.clone();
|
|
|
|
// Phase 1: capture before branch/tag tips.
|
|
let before_tips: (Vec<(String, String)>, Vec<(String, String)>) =
|
|
tokio::task::spawn_blocking({
|
|
let db = db_clone.clone();
|
|
let cache = cache_clone.clone();
|
|
let repo = repo_clone.clone();
|
|
let logger = logger_clone.clone();
|
|
move || {
|
|
let sync = HookMetaDataSync::new(db, cache, repo, logger)?;
|
|
Ok::<_, crate::GitError>((sync.list_branch_tips(), sync.list_tag_tips()))
|
|
}
|
|
})
|
|
.await
|
|
.map_err(|e| crate::GitError::Internal(format!("spawn_blocking failed: {}", e)))??;
|
|
|
|
// Phase 2: run sync (async operation).
|
|
let sync_result: Result<(), crate::GitError> = tokio::task::spawn_blocking({
|
|
let db = db_clone.clone();
|
|
let cache = cache_clone.clone();
|
|
let repo = repo_clone.clone();
|
|
let logger = logger_clone.clone();
|
|
move || {
|
|
let sync = HookMetaDataSync::new(db, cache, repo, logger)?;
|
|
tokio::runtime::Handle::current().block_on(async { sync.sync().await })
|
|
}
|
|
})
|
|
.await
|
|
.map_err(|e| crate::GitError::Internal(format!("spawn_blocking failed: {}", e)))?;
|
|
|
|
sync_result?;
|
|
|
|
// Phase 3: capture after branch/tag tips.
|
|
let after_tips: (Vec<(String, String)>, Vec<(String, String)>) =
|
|
tokio::task::spawn_blocking({
|
|
let db = db_clone.clone();
|
|
let cache = cache_clone.clone();
|
|
let repo = repo_clone.clone();
|
|
let logger = logger_clone.clone();
|
|
move || {
|
|
let sync = HookMetaDataSync::new(db, cache, repo, logger)?;
|
|
Ok::<_, crate::GitError>((sync.list_branch_tips(), sync.list_tag_tips()))
|
|
}
|
|
})
|
|
.await
|
|
.map_err(|e| crate::GitError::Internal(format!("spawn_blocking failed: {}", e)))??;
|
|
|
|
let (before_branch_tips, before_tag_tips) = before_tips;
|
|
let (after_branch_tips, after_tag_tips) = after_tips;
|
|
|
|
let repo_uuid = repo.id.to_string();
|
|
let repo_name = repo.repo_name.clone();
|
|
let default_branch = repo.default_branch.clone();
|
|
|
|
// Resolve namespace = project.name
|
|
let namespace = models::projects::Project::find_by_id(repo.project)
|
|
.one(self.db.reader())
|
|
.await
|
|
.map_err(|e| crate::GitError::Internal(format!("failed to fetch project: {}", e)))?
|
|
.map(|p| p.name)
|
|
.unwrap_or_default();
|
|
|
|
let logger = self.logger.clone();
|
|
let http = self.http.clone();
|
|
let db = self.db.clone();
|
|
|
|
// Dispatch branch push webhooks.
|
|
for (branch, after_oid) in &after_branch_tips {
|
|
let before_oid = before_branch_tips
|
|
.iter()
|
|
.find(|(n, _)| n == branch)
|
|
.map(|(_, o)| o.as_str());
|
|
let changed = before_oid.map(|o| o != after_oid.as_str()).unwrap_or(true);
|
|
if changed {
|
|
let before_oid = before_oid.map_or("0", |v| v).to_string();
|
|
let after = after_oid.clone();
|
|
let branch_name = branch.clone();
|
|
|
|
slog::info!(logger, "detected push on branch"; "branch" => &branch_name, "before" => &before_oid, "after" => &after);
|
|
|
|
let http = http.clone();
|
|
let db = db.clone();
|
|
let logs = logger.clone();
|
|
let ru = repo_uuid.clone();
|
|
let ns = namespace.clone();
|
|
let rn = repo_name.clone();
|
|
let db_branch = default_branch.clone();
|
|
|
|
tokio::spawn(async move {
|
|
crate::hook::webhook_dispatch::dispatch_repo_webhooks(
|
|
&db,
|
|
&http,
|
|
&logs,
|
|
&ru,
|
|
&ns,
|
|
&rn,
|
|
&db_branch,
|
|
"",
|
|
"",
|
|
crate::hook::webhook_dispatch::WebhookEventKind::Push {
|
|
r#ref: format!("refs/heads/{}", branch_name),
|
|
before: before_oid,
|
|
after,
|
|
commits: vec![],
|
|
},
|
|
)
|
|
.await;
|
|
});
|
|
}
|
|
}
|
|
|
|
// Dispatch tag push webhooks.
|
|
for (tag, after_oid) in &after_tag_tips {
|
|
let before_oid = before_tag_tips
|
|
.iter()
|
|
.find(|(n, _)| n == tag)
|
|
.map(|(_, o)| o.as_str());
|
|
let is_new = before_oid.is_none();
|
|
let was_updated = before_oid.map(|o| o != after_oid.as_str()).unwrap_or(false);
|
|
if is_new || was_updated {
|
|
let before_oid = before_oid.map_or("0", |v| v).to_string();
|
|
let after = after_oid.clone();
|
|
let tag_name = tag.clone();
|
|
|
|
slog::info!(logger, "detected tag push"; "tag" => &tag_name, "before" => &before_oid, "after" => &after);
|
|
|
|
let http = http.clone();
|
|
let db = db.clone();
|
|
let logs = logger.clone();
|
|
let ru = repo_uuid.clone();
|
|
let ns = namespace.clone();
|
|
let rn = repo_name.clone();
|
|
let db_branch = default_branch.clone();
|
|
|
|
tokio::spawn(async move {
|
|
crate::hook::webhook_dispatch::dispatch_repo_webhooks(
|
|
&db,
|
|
&http,
|
|
&logs,
|
|
&ru,
|
|
&ns,
|
|
&rn,
|
|
&db_branch,
|
|
"",
|
|
"",
|
|
crate::hook::webhook_dispatch::WebhookEventKind::TagPush {
|
|
r#ref: format!("refs/tags/{}", tag_name),
|
|
before: before_oid,
|
|
after,
|
|
},
|
|
)
|
|
.await;
|
|
});
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_fsck(&self, task: &HookTask) -> Result<(), crate::GitError> {
|
|
let repo_id = models::Uuid::parse_str(&task.repo_id)
|
|
.map_err(|_| crate::GitError::Internal("invalid repo_id uuid".into()))?;
|
|
|
|
let repo = models::repos::repo::Entity::find_by_id(repo_id)
|
|
.one(self.db.reader())
|
|
.await
|
|
.map_err(crate::GitError::from)?
|
|
.ok_or_else(|| crate::GitError::NotFound(format!("repo {} not found", repo_id)))?;
|
|
|
|
self.log_stream
|
|
.info(&task.id, &task.repo_id, "running fsck")
|
|
.await;
|
|
|
|
let db_clone = self.db.clone();
|
|
let cache_clone = self.cache.clone();
|
|
let logger_clone = self.logger.clone();
|
|
|
|
tokio::task::spawn_blocking(move || -> Result<(), crate::GitError> {
|
|
tokio::runtime::Handle::current().block_on(async move {
|
|
let sync =
|
|
HookMetaDataSync::new(db_clone.clone(), cache_clone, repo, logger_clone)?;
|
|
let mut txn = db_clone.begin().await.map_err(crate::GitError::from)?;
|
|
sync.run_fsck_and_rollback_if_corrupt(&mut txn).await
|
|
})
|
|
})
|
|
.await
|
|
.map_err(|e| crate::GitError::Internal(format!("spawn_blocking failed: {}", e)))??;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_gc(&self, task: &HookTask) -> Result<(), crate::GitError> {
|
|
let repo_id = models::Uuid::parse_str(&task.repo_id)
|
|
.map_err(|_| crate::GitError::Internal("invalid repo_id uuid".into()))?;
|
|
|
|
let repo = models::repos::repo::Entity::find_by_id(repo_id)
|
|
.one(self.db.reader())
|
|
.await
|
|
.map_err(crate::GitError::from)?
|
|
.ok_or_else(|| crate::GitError::NotFound(format!("repo {} not found", repo_id)))?;
|
|
|
|
self.log_stream
|
|
.info(&task.id, &task.repo_id, "running gc")
|
|
.await;
|
|
|
|
let db_clone = self.db.clone();
|
|
let cache_clone = self.cache.clone();
|
|
let logger_clone = self.logger.clone();
|
|
|
|
tokio::task::spawn_blocking(move || -> Result<(), crate::GitError> {
|
|
tokio::runtime::Handle::current().block_on(async move {
|
|
let sync = HookMetaDataSync::new(db_clone, cache_clone, repo, logger_clone)?;
|
|
sync.run_gc().await
|
|
})
|
|
})
|
|
.await
|
|
.map_err(|e| crate::GitError::Internal(format!("spawn_blocking failed: {}", e)))??;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn metrics(&self) -> PoolMetrics {
|
|
let running = self.running_count.load(Ordering::Relaxed) as usize;
|
|
PoolMetrics {
|
|
running,
|
|
max_concurrent: self.config.max_concurrent,
|
|
cpu_usage: 0.0,
|
|
total_processed: self.total_processed.load(Ordering::Relaxed),
|
|
total_failed: self.total_failed.load(Ordering::Relaxed),
|
|
can_accept: running < self.config.max_concurrent,
|
|
}
|
|
}
|
|
|
|
pub fn can_accept_task_sync(&self) -> bool {
|
|
let running = self.running_count.load(Ordering::Relaxed) as usize;
|
|
running < self.config.max_concurrent
|
|
}
|
|
|
|
pub fn log_stream(&self) -> &LogStream {
|
|
&self.log_stream
|
|
}
|
|
}
|