gitdataai/libs/git/hook/pool/redis.rs

use crate::error::GitError;
use crate::hook::pool::types::HookTask;
use deadpool_redis::cluster::Connection as RedisConn;
use slog::Logger;
use std::time::Duration;

/// Redis List consumer using BLMOVE for atomic move-from-queue-to-work pattern.
pub struct RedisConsumer {
    pool: deadpool_redis::cluster::Pool,
    /// Hash-tag-prefixed key prefix, e.g. "{hook}".
    prefix: String,
    block_timeout_secs: u64,
    logger: Logger,
}

const POOL_GET_TIMEOUT: Duration = Duration::from_secs(5);

impl RedisConsumer {
    pub fn new(
        pool: deadpool_redis::cluster::Pool,
        prefix: String,
        block_timeout_secs: u64,
        logger: Logger,
    ) -> Self {
        Self {
            pool,
            prefix,
            block_timeout_secs,
            logger,
        }
    }

    /// Atomically moves a task from the main queue to the work queue using BLMOVE.
    /// Blocks up to `block_timeout_secs` waiting for a task.
    ///
    /// Returns `Some((HookTask, task_json))` where `task_json` is the raw JSON string
    /// needed for LREM on ACK. Returns `None` if the blocking timed out.
    pub async fn next(&self, task_type: &str) -> Result<Option<(HookTask, String)>, GitError> {
        let queue_key = format!("{}:{}", self.prefix, task_type);
        let work_key = format!("{}:{}:work", self.prefix, task_type);

        let redis = tokio::time::timeout(POOL_GET_TIMEOUT, self.pool.get())
            .await
            .map_err(|_| GitError::Internal("redis pool get timed out".into()))?
            .map_err(|e| GitError::Internal(format!("redis pool get failed: {}", e)))?;

        let mut conn: RedisConn = redis;

        // BLMOVE source destination <LEFT|RIGHT> <LEFT|RIGHT> timeout
        let task_json: Option<String> = redis::cmd("BLMOVE")
            .arg(&queue_key)
            .arg(&work_key)
            .arg("RIGHT")
            .arg("LEFT")
            .arg(self.block_timeout_secs)
            .query_async(&mut conn)
            .await
            .map_err(|e| GitError::Internal(format!("BLMOVE failed: {}", e)))?;

        match task_json {
            Some(json) => {
                match serde_json::from_str::<HookTask>(&json) {
                    Ok(task) => {
                        slog::debug!(self.logger, "task dequeued";
                            "task_id" => %task.id,
                            "task_type" => %task.task_type,
                            "queue" => %queue_key
                        );
                        Ok(Some((task, json)))
                    }
                    Err(e) => {
                        // Malformed task — remove from work queue and discard
                        slog::warn!(self.logger, "malformed task JSON, discarding";
                            "error" => %e,
                            "queue" => %work_key
                        );
                        let _ = self.ack_raw(&work_key, &json).await;
                        Ok(None)
                    }
                }
            }
            None => {
                // Timed out, no task available
                Ok(None)
            }
        }
    }

    /// Acknowledge a task: remove it from the work queue (LREM).
    pub async fn ack(&self, work_key: &str, task_json: &str) -> Result<(), GitError> {
        self.ack_raw(work_key, task_json).await
    }

    async fn ack_raw(&self, work_key: &str, task_json: &str) -> Result<(), GitError> {
        let redis = tokio::time::timeout(POOL_GET_TIMEOUT, self.pool.get())
            .await
            .map_err(|_| GitError::Internal("redis pool get timed out".into()))?
            .map_err(|e| GitError::Internal(format!("redis pool get failed: {}", e)))?;

        let mut conn: RedisConn = redis;

        let _: i64 = redis::cmd("LREM")
            .arg(work_key)
            .arg(-1)
            .arg(task_json)
            .query_async(&mut conn)
            .await
            .map_err(|e| GitError::Internal(format!("LREM failed: {}", e)))?;

        Ok(())
    }

    /// Negative acknowledge (retry): remove from work queue and push back to main queue.
    pub async fn nak(
        &self,
        work_key: &str,
        queue_key: &str,
        task_json: &str,
    ) -> Result<(), GitError> {
        self.nak_with_retry(work_key, queue_key, task_json, task_json).await
    }

    /// Negative acknowledge with a different (updated) task JSON — used to
    /// requeue with an incremented retry_count.
    /// Uses a Lua script for atomic LREM + LPUSH to prevent task loss on crash.
    pub async fn nak_with_retry(
        &self,
        work_key: &str,
        queue_key: &str,
        old_task_json: &str,
        new_task_json: &str,
    ) -> Result<(), GitError> {
        let redis = tokio::time::timeout(POOL_GET_TIMEOUT, self.pool.get())
            .await
            .map_err(|_| GitError::Internal("redis pool get timed out".into()))?
            .map_err(|e| GitError::Internal(format!("redis pool get failed: {}", e)))?;

        let mut conn: RedisConn = redis;

        // Atomic: remove from work queue AND push to retry queue in one script.
        // If the process crashes mid-script, either both happen or neither — no lost tasks.
        let script = r#"
            redis.call("LREM", KEYS[1], 1, ARGV[1])
            redis.call("LPUSH", KEYS[2], ARGV[2])
            return 1
        "#;

        let _: i32 = redis::Script::new(script)
            .key(work_key)
            .key(queue_key)
            .arg(old_task_json)
            .arg(new_task_json)
            .invoke_async(&mut conn)
            .await
            .map_err(|e| GitError::Internal(format!("nak script failed: {}", e)))?;

        slog::warn!(self.logger, "task nack'd and requeued queue={}", queue_key);

        Ok(())
    }

    pub fn prefix(&self) -> &str {
        &self.prefix
    }
}

impl Clone for RedisConsumer {
    fn clone(&self) -> Self {
        Self {
            pool: self.pool.clone(),
            prefix: self.prefix.clone(),
            block_timeout_secs: self.block_timeout_secs,
            logger: self.logger.clone(),
        }
    }
}