gitdataai/libs/service/agent/code_review.rs

//! AI-powered code review service.
//!
//! Analyzes PR diffs and posts structured review comments.
//!
//! Triggered automatically on PR creation (if `repo.ai_code_review_enabled`) or
//! manually via the `trigger_ai_code_review` API.

use crate::AppService;
use crate::error::AppError;
use chrono::Utc;
use models::agents::ModelStatus;
use models::agents::model::{Column as MColumn, Entity as MEntity};
use models::pull_request::pull_request_review_comment;
use models::repos::repo;
use sea_orm::*;
use serde::{Deserialize, Serialize};
use session::Session;
use utoipa::ToSchema;
use uuid::Uuid;

use super::billing::BillingRecord;

const AI_BOT_UUID: Uuid = Uuid::nil();

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ReviewSeverity {
    Info,
    Warning,
    Error,
}

impl std::fmt::Display for ReviewSeverity {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ReviewSeverity::Info => write!(f, "info"),
            ReviewSeverity::Warning => write!(f, "warning"),
            ReviewSeverity::Error => write!(f, "error"),
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeReviewComment {
    pub path: String,
    pub line: Option<i64>,
    pub old_line: Option<i64>,
    pub side: Option<String>,
    pub body: String,
    pub severity: ReviewSeverity,
}

impl std::fmt::Display for CodeReviewComment {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(
            f,
            "**{}** | `{}:{}`",
            self.severity,
            self.path,
            self.line.map(|l| l.to_string()).unwrap_or_default()
        )?;
        writeln!(f, "{}", self.body)
    }
}

#[derive(Debug, Clone, Serialize, ToSchema)]
pub struct TriggerCodeReviewResponse {
    pub comments_posted: usize,
    pub comments: Vec<CommentCreated>,
    pub billing: Option<BillingRecord>,
}

#[derive(Debug, Clone, Serialize, ToSchema)]
pub struct CommentCreated {
    pub path: String,
    pub line: Option<i64>,
    pub severity: String,
}

#[derive(Debug, Clone, Deserialize, ToSchema)]
pub struct TriggerCodeReviewRequest {
    pub pr_number: Option<i64>,
    pub model_id: Option<Uuid>,
}

impl AppService {
    pub async fn trigger_ai_code_review(
        &self,
        namespace: String,
        repo_name: String,
        pr_number: Option<i64>,
        model_id: Option<Uuid>,
        ctx: &Session,
    ) -> Result<TriggerCodeReviewResponse, AppError> {
        let repo = self
            .utils_find_repo(namespace.clone(), repo_name.clone(), ctx)
            .await?;
        self.trigger_ai_code_review_internal(namespace, repo_name, pr_number, model_id, repo)
            .await
    }

    pub async fn trigger_ai_code_review_internal(
        &self,
        _namespace: String,
        _repo_name: String,
        pr_number: Option<i64>,
        model_id: Option<Uuid>,
        repo: repo::Model,
    ) -> Result<TriggerCodeReviewResponse, AppError> {
        let pr = match pr_number {
            Some(n) => models::pull_request::pull_request::Entity::find()
                .filter(models::pull_request::pull_request::Column::Repo.eq(repo.id))
                .filter(models::pull_request::pull_request::Column::Number.eq(n))
                .one(&self.db)
                .await?
                .ok_or_else(|| AppError::NotFound("Pull request not found".to_string()))?,
            None => {
                // Get the most recently created open PR
                models::pull_request::pull_request::Entity::find()
                    .filter(models::pull_request::pull_request::Column::Repo.eq(repo.id))
                    .order_by_desc(models::pull_request::pull_request::Column::CreatedAt)
                    .one(&self.db)
                    .await?
                    .ok_or_else(|| AppError::NotFound("No open pull request found".to_string()))?
            }
        };

        // Fetch the model first so we can use its context_length for diff truncation
        let model = match model_id {
            Some(id) => MEntity::find_by_id(id)
                .one(&self.db)
                .await?
                .ok_or(AppError::NotFound("Model not found".to_string()))?,
            None => MEntity::find()
                .filter(MColumn::Status.eq(ModelStatus::Active.to_string()))
                .order_by_asc(MColumn::Name)
                .one(&self.db)
                .await?
                .ok_or_else(|| {
                    AppError::InternalServerError(
                        "No active AI model found. Please configure an AI model first.".into(),
                    )
                })?,
        };

        let diff = self
            .get_pr_diff_for_review(&repo, &pr, &model.name, model.context_length)
            .await?;

        let prompt = build_code_review_prompt(&pr, &diff);

        let ai_response = call_ai_model(&model.name, &prompt, &self.config).await?;

        // Record billing (non-fatal — log warning but don't fail the review).
        let billing = self
            .record_ai_usage(
                repo.project,
                model.id,
                ai_response.input_tokens,
                ai_response.output_tokens,
            )
            .await
            .inspect_err(|e| {
                slog::warn!(
                    self.logs,
                    "failed to record AI billing for code review";
                    "project" => %repo.project,
                    "error" => ?e
                );
            })
            .ok();

        let comments = parse_ai_response(&ai_response.content);

        if comments.is_empty() {
            return Ok(TriggerCodeReviewResponse {
                comments_posted: 0,
                comments: vec![],
                billing: None,
            });
        }

        let mut created = Vec::new();
        let now = Utc::now();

        for comment in &comments {
            let max_id: Option<Option<i64>> = pull_request_review_comment::Entity::find()
                .filter(pull_request_review_comment::Column::Repo.eq(repo.id))
                .filter(pull_request_review_comment::Column::Number.eq(pr.number))
                .select_only()
                .column_as(pull_request_review_comment::Column::Id.max(), "max_id")
                .into_tuple::<Option<i64>>()
                .one(&self.db)
                .await?;
            let comment_id = max_id.flatten().unwrap_or(0) + 1;

            let body = format!(
                "🤖 **AI Review** | *{severity}*\n\n{body}",
                severity = comment.severity,
                body = comment.body
            );

            let active = pull_request_review_comment::ActiveModel {
                repo: Set(repo.id),
                number: Set(pr.number),
                id: Set(comment_id),
                review: Set(None),
                path: Set(Some(comment.path.clone())),
                side: Set(comment.side.clone()),
                line: Set(comment.line),
                old_line: Set(comment.old_line),
                body: Set(body),
                author: Set(AI_BOT_UUID),
                resolved: Set(false),
                in_reply_to: Set(None),
                created_at: Set(now),
                updated_at: Set(now),
            };

            match active.insert(&self.db).await {
                Ok(created_comment) => {
                    created.push(CommentCreated {
                        path: created_comment.path.unwrap_or_default(),
                        line: created_comment.line,
                        severity: comment.severity.to_string(),
                    });
                }
                Err(e) => {
                    slog::warn!(
                        self.logs,
                        "failed to create AI review comment";
                        "path" => %comment.path,
                        "error" => ?e
                    );
                }
            }
        }

        Ok(TriggerCodeReviewResponse {
            comments_posted: created.len(),
            comments: created,
            billing,
        })
    }

    async fn get_pr_diff_for_review(
        &self,
        repo: &repo::Model,
        pr: &models::pull_request::pull_request::Model,
        model_name: &str,
        context_limit: i64,
    ) -> Result<String, AppError> {
        let (base_oid, head_oid) = tokio::task::spawn_blocking({
            let base = pr.base.clone();
            let head = pr.head.clone();
            let repo_model = repo.clone();
            move || -> Result<(git2::Oid, git2::Oid), AppError> {
                let domain = crate::git::GitDomain::from_model(repo_model)?;
                let base_commit_oid = domain
                    .branch_target(&base)
                    .map_err(|e| crate::git::GitError::Internal(e.to_string()))?
                    .ok_or_else(|| {
                        crate::git::GitError::NotFound(format!("Branch '{}' not found", base))
                    })?;
                let head_commit_oid = domain
                    .branch_target(&head)
                    .map_err(|e| crate::git::GitError::Internal(e.to_string()))?
                    .ok_or_else(|| {
                        crate::git::GitError::NotFound(format!("Branch '{}' not found", head))
                    })?;
                let base_oid = base_commit_oid
                    .to_oid()
                    .map_err(|e| crate::git::GitError::Internal(e.to_string()))?;
                let head_oid = head_commit_oid
                    .to_oid()
                    .map_err(|e| crate::git::GitError::Internal(e.to_string()))?;
                Ok((base_oid, head_oid))
            }
        })
        .await
        .map_err(|e| AppError::InternalServerError(format!("Task join error: {e}")))?
        .map_err(AppError::from)?;

        // Get the unified diff as raw patch text
        let diff_text = tokio::task::spawn_blocking({
            let repo_model = repo.clone();
            let base = base_oid.to_string();
            let head = head_oid.to_string();
            move || -> Result<String, AppError> {
                let domain = crate::git::GitDomain::from_model(repo_model)?;

                let base_oid = git2::Oid::from_str(&base)
                    .map_err(|e| AppError::InternalServerError(e.to_string()))?;
                let head_oid = git2::Oid::from_str(&head)
                    .map_err(|e| AppError::InternalServerError(e.to_string()))?;

                let old_tree = domain
                    .repo()
                    .find_tree(base_oid)
                    .map_err(|e| AppError::InternalServerError(e.to_string()))?;
                let new_tree = domain
                    .repo()
                    .find_tree(head_oid)
                    .map_err(|e| AppError::InternalServerError(e.to_string()))?;

                let diff = domain
                    .repo()
                    .diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)
                    .map_err(|e| AppError::InternalServerError(e.to_string()))?;

                // Print as unified patch
                let mut patch_buf: Vec<u8> = Vec::new();
                diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
                    patch_buf.extend_from_slice(line.content());
                    patch_buf.push(b'\n');
                    true
                })
                .map_err(|e| AppError::InternalServerError(e.to_string()))?;

                String::from_utf8(patch_buf)
                    .map_err(|e| AppError::InternalServerError(e.to_string()))
            }
        })
        .await
        .map_err(|e| AppError::InternalServerError(format!("Task join error: {e}")))?
        .map_err(AppError::from)?;

        // Truncate if too large to stay within token budget.
        // Reserve 4096 tokens for output + system overhead (~512 tokens).
        let reserve = 4608;
        match agent::tokent::truncate_to_token_budget(
            &diff_text,
            model_name,
            context_limit as usize,
            reserve,
        ) {
            Ok(truncated) if truncated.len() < diff_text.len() => {
                let chars_shown = truncated.len();
                Ok(format!(
                    "[Diff truncated — showing first {} chars to fit token budget]\n{}",
                    chars_shown, truncated
                ))
            }
            _ => Ok(diff_text),
        }
    }
}

fn build_code_review_prompt(pr: &models::pull_request::pull_request::Model, diff: &str) -> String {
    let severity_note = r#"Respond with a JSON array of review comments. Each comment must have:
- "path": string (file path, e.g. "src/main.rs")
- "line": number | null (line number in the new version, null for general comments)
- "old_line": number | null (only for deleted lines)
- "side": "RIGHT" | "LEFT" | null ("RIGHT" = addition, "LEFT" = deletion)
- "body": string (Markdown-formatted comment text)
- "severity": "info" | "warning" | "error"

Example:
```json
[
  {"path": "src/main.rs", "line": 42, "side": "RIGHT", "body": "Consider using a constant here.", "severity": "info"}
]

Only suggest fixes that are correct and safe. Do not suggest style preferences unless the codebase has an explicit style guide. Focus on bugs, security issues, performance problems, and code clarity.
"#;

    format!(
        r#"You are a senior code reviewer. Review the following pull request.

## Pull Request
Title: {title}
Description: {body}
Base branch: {base}
Head branch: {head}

## Diff (unified format: +added, -removed)
```{diff}
{diff}
```

{severity_note}"#,
        title = pr.title,
        body = pr.body.as_deref().unwrap_or("(no description)"),
        base = pr.base,
        head = pr.head,
        diff = diff,
        severity_note = severity_note,
    )
}

async fn call_ai_model(
    model_name: &str,
    prompt: &str,
    app_config: &config::AppConfig,
) -> Result<agent::AiCallResponse, AppError> {
    let api_key = app_config
        .ai_api_key()
        .map_err(|e| AppError::InternalServerError(format!("AI API key not configured: {}", e)))?;

    let base_url = app_config
        .ai_basic_url()
        .unwrap_or_else(|_| "https://api.openai.com".into());

    let client_config = agent::AiClientConfig::new(api_key).with_base_url(base_url);

    let messages = vec![
        async_openai::types::chat::ChatCompletionRequestMessage::User(
            async_openai::types::chat::ChatCompletionRequestUserMessage {
                content: async_openai::types::chat::ChatCompletionRequestUserMessageContent::Text(
                    prompt.to_string(),
                ),
                ..Default::default()
            },
        ),
    ];

    agent::call_with_params(&messages, model_name, &client_config, 0.2, 8192, None, None)
        .await
        .map_err(|e| AppError::InternalServerError(format!("AI call failed: {}", e)))
}

fn parse_ai_response(response: &str) -> Vec<CodeReviewComment> {
    // Try to extract a JSON code block from the response
    let json_str = extract_json_block(response).unwrap_or_else(|| response.to_string());

    let parsed: Result<Vec<CodeReviewComment>, _> = serde_json::from_str(&json_str);

    match parsed {
        Ok(comments) => comments,
        Err(_) => {
            // Try to salvage by looking for common patterns
            extract_fallback_comments(response)
        }
    }
}

fn extract_json_block(text: &str) -> Option<String> {
    // Look for ```json ... ``` blocks
    for line in text.lines() {
        let line = line.trim();
        if line.starts_with("```json") || line.starts_with("```") {
            // Collect until closing ```
            let mut inside = false;
            let mut buf = String::new();
            for l in text.lines() {
                let l = l.trim();
                if l == "```json" || l == "```" {
                    inside = !inside;
                    continue;
                }
                if inside {
                    buf.push_str(l);
                    buf.push('\n');
                }
            }
            if !buf.trim().is_empty() {
                return Some(buf.trim().to_string());
            }
        }
    }

    // Try inline JSON array at start
    if text.trim().starts_with('[') {
        Some(text.trim().to_string())
    } else {
        None
    }
}

fn extract_fallback_comments(text: &str) -> Vec<CodeReviewComment> {
    // Simple heuristic: look for lines like "path: src/main.rs" or "src/main.rs:42"
    let mut comments = Vec::new();
    let mut current_path = String::new();
    let mut current_line: Option<i64> = None;
    let mut current_side: Option<String> = None;
    let mut current_severity = ReviewSeverity::Info;
    let mut current_body = String::new();
    let mut in_body = false;

    for line in text.lines() {
        let line = line.trim();
        if line.is_empty() {
            if in_body && !current_body.is_empty() {
                current_body.push_str(line);
                current_body.push('\n');
            }
            continue;
        }

        // Detect path + line pattern
        if let Some((path, rest)) = line.split_once(':') {
            if rest.trim().parse::<i64>().is_ok() {
                // Flush previous comment
                if !current_path.is_empty() && !current_body.trim().is_empty() {
                    comments.push(CodeReviewComment {
                        path: current_path.clone(),
                        line: current_line,
                        old_line: None,
                        side: current_side.clone(),
                        body: current_body.trim().to_string(),
                        severity: current_severity.clone(),
                    });
                }
                current_path = path.trim().to_string();
                current_line = rest.trim().parse().ok();
                current_side = None;
                current_body.clear();
                in_body = true;
                continue;
            }
        }

        // Detect severity markers
        let has_severity = line.contains("[error]")
            || line.starts_with("**Error**")
            || line.contains("[warning]")
            || line.starts_with("**Warning**");

        if line.contains("[error]") || line.starts_with("**Error**") {
            current_severity = ReviewSeverity::Error;
        } else if line.contains("[warning]") || line.starts_with("**Warning**") {
            current_severity = ReviewSeverity::Warning;
        }

        if in_body || has_severity {
            current_body.push_str(line);
            current_body.push('\n');
        }
    }

    // Flush last
    if !current_path.is_empty() && !current_body.trim().is_empty() {
        comments.push(CodeReviewComment {
            path: current_path,
            line: current_line,
            old_line: None,
            side: current_side,
            body: current_body.trim().to_string(),
            severity: current_severity,
        });
    }

    comments
}