gitdataai/libs/agent/orao/observe.rs

//! Observe phase: LLM-driven multi-channel environment perception.
//!
//! The Observe phase gives the LLM a set of read-only observation tools and
//! instructs it to explore the environment. All file/git/system access goes
//! through function calls (tools), never direct filesystem operations.
//!
//! After exploration, the LLM produces a structured [`PerceptionSnapshot`]
//! summarizing the current state of the project.

use rig::agent::AgentBuilder;
use rig::client::CompletionClient;
use rig::completion::Prompt;

use crate::client::AiClientConfig;
use crate::error::AgentError;

use super::types::{ActionResult, PerceptionSnapshot};

/// Prompt for the ORAO Observe phase.
const OBSERVE_SYSTEM_PROMPT: &str = r#"You are an expert software engineering agent using the ORAO (Observe-Reason-Act-Observe) framework.

## Your Role: OBSERVE Phase

You are currently in the OBSERVE phase. Your task is to explore the project environment
and gather all relevant information using the available tools.

## What to Observe

Use the tools provided to you to check:

1. **Git status**: What branch are we on? What files have changed? Any uncommitted work?
2. **Project structure**: What directories and key files exist?
3. **Code content**: Read relevant source files to understand the codebase state.
4. **Errors/warnings**: Check build output, test results, linter output for issues.
5. **Configuration**: Check project config files (Cargo.toml, package.json, etc.) if relevant.

## Rules

- Use tools to explore — do NOT guess or assume file contents.
- Focus on information relevant to the task at hand.
- Be thorough but efficient: 3-8 tool calls is typical.
- After gathering information, summarize your findings clearly.

## Output Format

After you have finished observing, provide a summary with these sections:

### Git Status
[Current branch, changed files, commit status]

### Project Structure
[Key directories and files relevant to the task]

### Key Files
[Important files you read, with brief notes on their content]

### Errors / Issues
[Any errors, warnings, or problems detected]

### Previous Action Result
[If a previous action was executed, describe its outcome]"#;

/// Run the Observe phase: let the LLM explore the environment via tools.
///
/// Returns a structured [`PerceptionSnapshot`] built from the LLM's observations.
/// All environment access goes through the provided `tools` — no direct
/// filesystem operations.
///
/// Takes ownership of `tools` (caller must clone if they need to reuse them).
pub async fn observe(
    config: &AiClientConfig,
    model_name: &str,
    task_goal: &str,
    previous_result: Option<ActionResult>,
    tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>>,
    max_turns: usize,
) -> Result<PerceptionSnapshot, AgentError> {
    let user_prompt = build_observe_prompt(task_goal, previous_result.as_ref());

    let client = config.build_rig_client();
    let model = client.completion_model(model_name);

    let agent = AgentBuilder::new(model)
        .preamble(OBSERVE_SYSTEM_PROMPT)
        .tools(tools)
        .default_max_turns(max_turns)
        .build();

    let response = agent
        .prompt(&user_prompt)
        .max_turns(max_turns)
        .extended_details()
        .await
        .map_err(|e: rig::completion::PromptError| AgentError::OpenAi(e.to_string()))?;

    // Build snapshot from the LLM's final summary
    let summary = response.output;
    let snapshot = parse_observation_summary(&summary, previous_result);

    Ok(snapshot)
}

/// Build the user prompt for the Observe phase.
fn build_observe_prompt(task_goal: &str, previous_result: Option<&ActionResult>) -> String {
    let mut prompt = format!(
        "## Task Goal\n\n{}\n\n## Instructions\n\n\
         Explore the project environment using the available tools. \
         Gather all information relevant to the task above. \
         After you have gathered sufficient information, provide a structured summary.",
        task_goal
    );

    if let Some(prev) = previous_result {
        prompt.push_str(&format!(
            "\n\n## Previous Action Result\n\n\
             - Action: {}\n\
             - Verdict: {:?}\n\
             - Exit code: {:?}\n\
             - stdout: {}\n\
             - stderr: {}",
            prev.action.description,
            prev.verdict,
            prev.exit_code,
            truncate_str(&prev.stdout, 2000),
            truncate_str(&prev.stderr, 2000),
        ));
    }

    prompt
}

/// Parse the LLM's observation summary into a structured snapshot.
fn parse_observation_summary(
    summary: &str,
    previous_result: Option<ActionResult>,
) -> PerceptionSnapshot {
    let mut snapshot = PerceptionSnapshot::default();

    // Extract sections from the markdown summary
    let mut current_section = "";
    let mut section_content: Vec<&str> = Vec::new();

    for line in summary.lines() {
        if line.starts_with("### ") {
            // Save previous section
            store_section(&mut snapshot, current_section, &section_content);
            current_section = line.trim_start_matches("### ").trim();
            section_content.clear();
        } else {
            section_content.push(line);
        }
    }
    // Save last section
    store_section(&mut snapshot, current_section, &section_content);

    snapshot.previous_action_result = previous_result;

    // If no structured data was parsed, store the raw summary
    if snapshot.git_status.is_none()
        && snapshot.project_structure.is_none()
        && snapshot.files.is_empty()
        && snapshot.errors.is_empty()
    {
        snapshot
            .notes
            .insert("raw_observation".to_string(), summary.to_string());
    }

    snapshot
}

fn store_section(snapshot: &mut PerceptionSnapshot, section: &str, content: &[&str]) {
    let text = content.join("\n").trim().to_string();
    if text.is_empty() {
        return;
    }

    match section.to_lowercase().as_str() {
        s if s.contains("git") => {
            snapshot.git_status = Some(text);
        }
        s if s.contains("project") && s.contains("structure") => {
            snapshot.project_structure = Some(text);
        }
        s if s.contains("file") => {
            // Parse file references from the text
            for line in content {
                let line = line.trim();
                if let Some(path) = extract_file_path(line) {
                    snapshot.files.push(super::types::PerceivedFile {
                        path,
                        size_bytes: 0,
                        content_preview: None,
                    });
                }
            }
        }
        s if s.contains("error") || s.contains("issue") || s.contains("warning") => {
            for line in content {
                let line = line.trim();
                if !line.is_empty() && !line.starts_with('#') {
                    snapshot.errors.push(line.to_string());
                }
            }
        }
        _ => {
            // Store unknown sections as notes
            snapshot.notes.insert(section.to_string(), text);
        }
    }
}

/// Extract a file path from a markdown list item or code reference.
fn extract_file_path(line: &str) -> Option<String> {
    // Match patterns like: - `src/main.rs` or - src/main.rs or `src/main.rs`
    let line = line.trim();

    // Backtick-wrapped path
    if let Some(start) = line.find('`') {
        let rest = &line[start + 1..];
        if let Some(end) = rest.find('`') {
            let path = rest[..end].to_string();
            if path.contains('.') || path.contains('/') || path.contains('\\') {
                return Some(path);
            }
        }
    }

    // Bare path pattern (word chars, slashes, dots)
    if line.starts_with('-') || line.starts_with('*') {
        let rest = line.trim_start_matches(&['-', '*', ' ']);
        if rest.contains('/') || (rest.contains('.') && !rest.starts_with("http")) {
            return Some(rest.to_string());
        }
    }

    None
}

fn truncate_str(s: &str, max_len: usize) -> String {
    if s.len() <= max_len {
        s.to_string()
    } else {
        format!("{}...", &s[..max_len])
    }
}

/// Determine whether the environment has changed since the last snapshot.
///
/// Used for deadlock detection: if 3 consecutive rounds show no change,
/// the loop is terminated.
pub fn has_environment_changed(
    previous: &PerceptionSnapshot,
    current: &PerceptionSnapshot,
) -> bool {
    if previous.git_status != current.git_status {
        return true;
    }

    let prev_files: Vec<&str> = previous.files.iter().map(|f| f.path.as_str()).collect();
    let curr_files: Vec<&str> = current.files.iter().map(|f| f.path.as_str()).collect();
    if prev_files != curr_files {
        return true;
    }

    if previous.errors != current.errors {
        return true;
    }

    let prev_has_result = previous.previous_action_result.is_some();
    let curr_has_result = current.previous_action_result.is_some();
    if prev_has_result != curr_has_result {
        return true;
    }

    false
}