fix(agent): extract JSON from model output even with leading text prefix

ReAct loop was terminating early when the model returned:
  [Agent ran through N steps...]
  {"thought": "...", "action": {...}}

The extract_json function only checked the string start or code fences.
Now scans for { or [ at non-word positions and uses depth-counting
to strip trailing text, allowing JSON buried anywhere in the response.
This commit is contained in:
ZhenYi 2026-04-24 13:17:06 +08:00
parent 261989fca3
commit 94825316dc

View File

@ -378,15 +378,21 @@ fn parse_react_response(content: &str) -> ParsedReActResponse {
}
}
/// Extract the first JSON object or array from a string, handling markdown fences.
/// Extract a JSON object or array from a string, even when wrapped in non-JSON text.
/// Handles: raw JSON at start, JSON in code fences, JSON buried in text (e.g. after
/// a prefix line like "[Agent ran through N steps]").
fn extract_json(s: &str) -> Option<String> {
let trimmed = s.trim();
// Direct match — starts with { or [
if trimmed.starts_with('{') || trimmed.starts_with('[') {
return Some(trimmed.to_string());
}
// Code fence handling
for line in trimmed.lines() {
let line = line.trim();
if line.starts_with("```json") || line.starts_with("```") {
if line.starts_with("```json") || line == "```" {
let mut buf = String::new();
let mut found_start = false;
for l in trimmed.lines() {
@ -409,6 +415,47 @@ fn extract_json(s: &str) -> Option<String> {
}
}
}
// Scan for JSON object/array buried in text (common with prefix lines).
// Find the first '{' or '[' that is NOT preceded by a word character,
// then try to parse from there (stripping trailing non-JSON text).
let chars: Vec<char> = trimmed.chars().collect();
for i in 0..chars.len() {
let c = chars[i];
if (c == '{' || c == '[') && i > 0 {
// Skip if preceded by a word character (would be part of a string value)
let prev = chars[i - 1];
if prev.is_alphanumeric() || prev == '_' || prev == '"' || prev == '\'' {
continue;
}
let candidate: String = chars[i..].iter().collect();
// Try full candidate first
if serde_json::from_str::<serde_json::Value>(&candidate).is_ok() {
return Some(candidate.trim_end().to_string());
}
// Try stripping trailing text (text after the JSON closing brace/bracket)
let mut depth = 0isize;
let mut in_string = false;
let mut escaped = false;
for (j, c) in candidate.char_indices() {
if escaped { escaped = false; continue; }
if c == '\\' { escaped = true; continue; }
if c == '"' { in_string = !in_string; continue; }
if in_string { continue; }
if c == '{' || c == '[' { depth += 1; }
if c == '}' || c == ']' { depth -= 1; }
if depth == 0 {
// Found the end of the JSON value
let json_end = j + c.len_utf8();
let trimmed_candidate = &candidate[..json_end];
if serde_json::from_str::<serde_json::Value>(trimmed_candidate).is_ok() {
return Some(trimmed_candidate.to_string());
}
}
}
}
}
None
}