- agent/client: full jitter backoff (random(0, base_ms)) instead of equal jitter - agent/tool/executor: fix buffer_unordered ordering mismatch with HashMap-by-index approach for concurrent tool execution - agent/chat: AiChunkType emit fixes, is_retryable_tool_error refinements, process_react uses request.max_tool_depth - agent/chat/context: fix Function message sender_name field - file_tools/curl: shared reqwest::Client via OnceLock, manual redirect following with per-hop SSRF validation, blocked sensitive headers - file_tools/grep: fix case-insensitive glob matching, segment consumption - file_tools/json: bracket notation support, remove .vscodeignore from JSONC - git_tools: git_diff_stats resolve base/head independently, DiffFileOut old_file.path for Deleted, reflog offset_minutes - git/repo: create_commit read parent tree into index, bare repo init - project_tools/repos: branch/path validation, .git/ prefix check - service/agent: tokent integration, billing, pr_summary, code_review fixes
311 lines
12 KiB
Rust
311 lines
12 KiB
Rust
//! read_json — parse, validate, and query JSON / JSONC files.
|
|
|
|
use crate::file_tools::MAX_FILE_SIZE;
|
|
use crate::git_tools::ctx::GitToolCtx;
|
|
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
|
use serde_json::Value as JsonValue;
|
|
use std::collections::HashMap;
|
|
|
|
/// Remove comments from JSONC (lines starting with // or /* */) for parsing.
|
|
fn strip_jsonc_comments(input: &str) -> String {
|
|
let mut result = String::with_capacity(input.len());
|
|
let mut chars = input.chars().peekable();
|
|
let mut in_string = false;
|
|
let mut escaped = false;
|
|
|
|
while let Some(c) = chars.next() {
|
|
if escaped {
|
|
result.push(c);
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
|
|
if c == '\\' && in_string {
|
|
result.push(c);
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
|
|
if c == '"' {
|
|
result.push(c);
|
|
in_string = !in_string;
|
|
continue;
|
|
}
|
|
|
|
if !in_string {
|
|
if c == '/' {
|
|
if let Some(&next) = chars.peek() {
|
|
if next == '/' {
|
|
// Line comment — skip to end of line
|
|
chars.next();
|
|
while let Some(nc) = chars.next() {
|
|
if nc == '\n' {
|
|
result.push(nc);
|
|
break;
|
|
}
|
|
}
|
|
continue;
|
|
} else if next == '*' {
|
|
// Block comment — skip until */
|
|
chars.next();
|
|
while let Some(nc) = chars.next() {
|
|
if nc == '*' {
|
|
if let Some(&'/') = chars.peek() {
|
|
chars.next();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
result.push(c);
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
fn infer_schema(value: &JsonValue, max_depth: usize) -> JsonValue {
|
|
if max_depth == 0 {
|
|
return serde_json::json!({ "type": "MAX_DEPTH" });
|
|
}
|
|
|
|
match value {
|
|
JsonValue::Null => serde_json::json!({ "type": "null" }),
|
|
JsonValue::Bool(_) => serde_json::json!({ "type": "boolean" }),
|
|
JsonValue::Number(_) => serde_json::json!({ "type": "number" }),
|
|
JsonValue::String(_) => serde_json::json!({ "type": "string" }),
|
|
JsonValue::Array(arr) => {
|
|
if arr.is_empty() {
|
|
serde_json::json!({ "type": "array", "items": null })
|
|
} else {
|
|
serde_json::json!({
|
|
"type": "array",
|
|
"length": arr.len(),
|
|
"items": infer_schema(&arr[0], max_depth - 1)
|
|
})
|
|
}
|
|
}
|
|
JsonValue::Object(obj) => {
|
|
let mut schema = serde_json::Map::new();
|
|
schema.insert("type".into(), serde_json::Value::String("object".into()));
|
|
let mut properties = serde_json::Map::new();
|
|
for (k, v) in obj {
|
|
properties.insert(k.clone(), infer_schema(v, max_depth - 1));
|
|
}
|
|
schema.insert("properties".into(), serde_json::Value::Object(properties));
|
|
schema.insert("keyCount".into(), serde_json::json!(obj.len()));
|
|
serde_json::Value::Object(schema)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn read_json_exec(
|
|
ctx: GitToolCtx,
|
|
args: serde_json::Value,
|
|
) -> Result<serde_json::Value, String> {
|
|
let p: serde_json::Map<String, serde_json::Value> =
|
|
serde_json::from_value(args).map_err(|e| e.to_string())?;
|
|
|
|
let project_name = p
|
|
.get("project_name")
|
|
.and_then(|v| v.as_str())
|
|
.ok_or("missing project_name")?;
|
|
let repo_name = p
|
|
.get("repo_name")
|
|
.and_then(|v| v.as_str())
|
|
.ok_or("missing repo_name")?;
|
|
let path = p.get("path").and_then(|v| v.as_str()).ok_or("missing path")?;
|
|
let rev = p
|
|
.get("rev")
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from)
|
|
.unwrap_or_else(|| "HEAD".to_string());
|
|
let query = p.get("query").and_then(|v| v.as_str()).map(String::from);
|
|
let max_depth = p.get("schema_depth").and_then(|v| v.as_u64()).unwrap_or(4) as usize;
|
|
let pretty = p.get("pretty").and_then(|v| v.as_bool()).unwrap_or(false);
|
|
|
|
let domain = ctx.open_repo(project_name, repo_name).await?;
|
|
|
|
let commit_oid = if rev.len() >= 40 {
|
|
git::commit::types::CommitOid::new(&rev)
|
|
} else {
|
|
domain
|
|
.commit_get_prefix(&rev)
|
|
.map_err(|e| e.to_string())?
|
|
.oid
|
|
};
|
|
|
|
let entry = domain
|
|
.tree_entry_by_path_from_commit(&commit_oid, path)
|
|
.map_err(|e| e.to_string())?;
|
|
let content = domain.blob_content(&entry.oid).map_err(|e| e.to_string())?;
|
|
|
|
let data = &content.content;
|
|
if data.len() > MAX_FILE_SIZE {
|
|
return Err(format!(
|
|
"file too large ({} bytes), max {} bytes",
|
|
data.len(),
|
|
MAX_FILE_SIZE
|
|
));
|
|
}
|
|
|
|
let text = String::from_utf8_lossy(data);
|
|
// Only treat as JSONC if the extension indicates it, or if we can
|
|
// confirm a comment-like pattern outside of a string context.
|
|
let is_jsonc = path.ends_with(".jsonc");
|
|
|
|
let json_text = if is_jsonc {
|
|
strip_jsonc_comments(&text)
|
|
} else {
|
|
text.to_string()
|
|
};
|
|
|
|
let parsed: JsonValue = serde_json::from_str(&json_text)
|
|
.map_err(|e| format!("JSON parse error at {}: {}", e.line(), e))?;
|
|
|
|
// Apply JSONPath-like query
|
|
let result = if let Some(ref q) = query {
|
|
query_json(&parsed, q)?
|
|
} else {
|
|
parsed
|
|
};
|
|
|
|
let schema = infer_schema(&result, max_depth);
|
|
|
|
let display = if pretty {
|
|
serde_json::to_string_pretty(&result).unwrap_or_default()
|
|
} else {
|
|
serde_json::to_string(&result).unwrap_or_default()
|
|
};
|
|
|
|
Ok(serde_json::json!({
|
|
"path": path,
|
|
"rev": rev,
|
|
"format": if is_jsonc { "jsonc" } else { "json" },
|
|
"size_bytes": data.len(),
|
|
"schema": schema,
|
|
"data": if display.chars().count() > 5000 {
|
|
let truncated: String = display.chars().take(5000).collect();
|
|
format!("{}... (truncated, {} chars total)", truncated, display.chars().count())
|
|
} else { display },
|
|
}))
|
|
}
|
|
|
|
/// Simple JSONPath-like query support.
|
|
/// Supports: $.key, $[0], $.key.nested, $.arr[0].field
|
|
/// Bracket notation ["key.with.dots"] allows accessing keys containing dots.
|
|
fn query_json(value: &JsonValue, query: &str) -> Result<JsonValue, String> {
|
|
let query = query.trim();
|
|
let query = if query.starts_with("$.") {
|
|
&query[2..]
|
|
} else if query.starts_with('$') && query.len() > 1 {
|
|
&query[1..]
|
|
} else {
|
|
query
|
|
};
|
|
|
|
let mut current = value.clone();
|
|
|
|
// Parse into access segments: Key("name"), Index(0), BracketKey("key.with.dots")
|
|
enum Segment { Key(String), Index(usize), BracketKey(String) }
|
|
let mut segments: Vec<Segment> = Vec::new();
|
|
let mut i = 0;
|
|
let q_chars: Vec<char> = query.chars().collect();
|
|
while i < q_chars.len() {
|
|
if q_chars[i] == '[' {
|
|
// Find matching ]
|
|
let mut j = i + 1;
|
|
let mut bracket_content = String::new();
|
|
while j < q_chars.len() && q_chars[j] != ']' {
|
|
bracket_content.push(q_chars[j]);
|
|
j += 1;
|
|
}
|
|
if j < q_chars.len() && q_chars[j] == ']' {
|
|
let content = bracket_content.trim();
|
|
// Check if it's a quoted string key or a numeric index
|
|
if content.starts_with('"') && content.ends_with('"') {
|
|
let key = content[1..content.len()-1].to_string();
|
|
segments.push(Segment::BracketKey(key));
|
|
} else if content.starts_with("'") && content.ends_with("'") {
|
|
let key = content[1..content.len()-1].to_string();
|
|
segments.push(Segment::BracketKey(key));
|
|
} else if let Ok(idx) = content.parse::<usize>() {
|
|
segments.push(Segment::Index(idx));
|
|
} else {
|
|
return Err(format!("Invalid bracket notation: [{}]", content));
|
|
}
|
|
i = j + 1;
|
|
// Skip dot after bracket if present
|
|
if i < q_chars.len() && q_chars[i] == '.' {
|
|
i += 1;
|
|
}
|
|
} else {
|
|
return Err("Unmatched [ in query".into());
|
|
}
|
|
} else {
|
|
// Read key until . or [
|
|
let mut key = String::new();
|
|
while i < q_chars.len() && q_chars[i] != '.' && q_chars[i] != '[' {
|
|
key.push(q_chars[i]);
|
|
i += 1;
|
|
}
|
|
if !key.is_empty() {
|
|
// Check if key contains a numeric-only segment (array index shorthand)
|
|
segments.push(Segment::Key(key));
|
|
}
|
|
if i < q_chars.len() && q_chars[i] == '.' {
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for seg in &segments {
|
|
match seg {
|
|
Segment::Key(key) | Segment::BracketKey(key) => {
|
|
if let JsonValue::Object(obj) = ¤t {
|
|
current = obj.get(key).cloned().unwrap_or(JsonValue::Null);
|
|
} else {
|
|
return Err(format!("cannot access property '{}' on non-object", key));
|
|
}
|
|
}
|
|
Segment::Index(idx) => {
|
|
if let JsonValue::Array(arr) = ¤t {
|
|
current = arr.get(*idx).cloned().unwrap_or(JsonValue::Null);
|
|
} else {
|
|
return Err(format!("index {} on non-array", idx));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(current)
|
|
}
|
|
|
|
pub fn register_json_tools(registry: &mut ToolRegistry) {
|
|
let p = HashMap::from([
|
|
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
|
|
("repo_name".into(), ToolParam { name: "repo_name".into(), param_type: "string".into(), description: Some("Repository name".into()), required: true, properties: None, items: None }),
|
|
("path".into(), ToolParam { name: "path".into(), param_type: "string".into(), description: Some("File path to the JSON or JSONC file".into()), required: true, properties: None, items: None }),
|
|
("rev".into(), ToolParam { name: "rev".into(), param_type: "string".into(), description: Some("Git revision (default: HEAD)".into()), required: false, properties: None, items: None }),
|
|
("query".into(), ToolParam { name: "query".into(), param_type: "string".into(), description: Some("JSONPath-like query (e.g. $.config.items[0].name) to extract a subset of the document".into()), required: false, properties: None, items: None }),
|
|
("schema_depth".into(), ToolParam { name: "schema_depth".into(), param_type: "integer".into(), description: Some("How deep to infer the JSON schema (default: 4)".into()), required: false, properties: None, items: None }),
|
|
("pretty".into(), ToolParam { name: "pretty".into(), param_type: "boolean".into(), description: Some("Pretty-print the output (default: false)".into()), required: false, properties: None, items: None }),
|
|
]);
|
|
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "repo_name".into(), "path".into()]) };
|
|
registry.register(
|
|
ToolDefinition::new("read_json")
|
|
.description("Parse, validate, and query JSON and JSONC files. Supports JSONPath-like queries ($.key, $.arr[0]), schema inference, and pretty-printing. Automatically detects JSONC (with // comments).")
|
|
.parameters(schema),
|
|
ToolHandler::new(|ctx, args| {
|
|
let gctx = GitToolCtx::new(ctx);
|
|
Box::pin(async move {
|
|
read_json_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
|
})
|
|
}),
|
|
);
|
|
}
|