gitdataai/libs/service/project_tools/curl.rs
ZhenYi f7e087e066 fix(agent/service): retry jitter, tool executor ordering, curl SSRF, grep/JSON
- agent/client: full jitter backoff (random(0, base_ms)) instead of equal jitter
- agent/tool/executor: fix buffer_unordered ordering mismatch with
  HashMap-by-index approach for concurrent tool execution
- agent/chat: AiChunkType emit fixes, is_retryable_tool_error refinements,
  process_react uses request.max_tool_depth
- agent/chat/context: fix Function message sender_name field
- file_tools/curl: shared reqwest::Client via OnceLock, manual redirect
  following with per-hop SSRF validation, blocked sensitive headers
- file_tools/grep: fix case-insensitive glob matching, segment consumption
- file_tools/json: bracket notation support, remove .vscodeignore from JSONC
- git_tools: git_diff_stats resolve base/head independently,
  DiffFileOut old_file.path for Deleted, reflog offset_minutes
- git/repo: create_commit read parent tree into index, bare repo init
- project_tools/repos: branch/path validation, .git/ prefix check
- service/agent: tokent integration, billing, pr_summary, code_review fixes
2026-04-25 09:53:31 +08:00

305 lines
12 KiB
Rust

//! Tool: project_curl — perform HTTP requests (GET/POST/PUT/DELETE)
//!
//! Security measures:
//! - SSRF protection: blocks private IPs and blocks redirects to private IPs
//! - Sensitive header injection: blocks Host, Authorization, Cookie, Proxy-*
//! - Connection pooling via a shared reqwest::Client
use agent::{ToolContext, ToolDefinition, ToolError, ToolParam, ToolSchema};
use std::collections::HashMap;
use std::sync::OnceLock;
/// Maximum response body size: 1 MB.
const MAX_BODY_BYTES: usize = 1 << 20;
/// Headers that are blocked from user-supplied values to prevent injection attacks.
const BLOCKED_HEADERS: &[&str] = &[
"host", "authorization", "cookie", "proxy-authorization",
"proxy-connection", "proxy-authenticate",
];
/// Shared reqwest::Client for connection pooling.
static SHARED_CLIENT: OnceLock<reqwest::Client> = OnceLock::new();
fn shared_client() -> &'static reqwest::Client {
SHARED_CLIENT.get_or_init(|| {
reqwest::Client::builder()
.connect_timeout(std::time::Duration::from_secs(10))
.timeout(std::time::Duration::from_secs(120))
// Block automatic redirect following so we can validate each hop
.redirect(reqwest::redirect::Policy::limited(0))
.build()
.expect("reqwest client build should not fail")
})
}
/// Check if a host string resolves to or is a private/internal IP.
fn is_private_host(host: &str) -> bool {
host.eq_ignore_ascii_case("localhost")
|| host.eq_ignore_ascii_case("127.0.0.1")
|| host.eq_ignore_ascii_case("::1")
|| host.eq_ignore_ascii_case("0.0.0.0")
|| host.eq_ignore_ascii_case("metadata.google.internal")
|| host.eq_ignore_ascii_case("169.254.169.254")
|| host.starts_with("10.")
|| host.starts_with("172.16.")
|| host.starts_with("172.17.")
|| host.starts_with("172.18.")
|| host.starts_with("172.19.")
|| host.starts_with("172.20.")
|| host.starts_with("172.21.")
|| host.starts_with("172.22.")
|| host.starts_with("172.23.")
|| host.starts_with("172.24.")
|| host.starts_with("172.25.")
|| host.starts_with("172.26.")
|| host.starts_with("172.27.")
|| host.starts_with("172.28.")
|| host.starts_with("172.29.")
|| host.starts_with("172.30.")
|| host.starts_with("172.31.")
|| host.starts_with("192.168.")
}
/// Validate URL and any redirect hops against SSRF rules.
fn validate_url_against_ssrf(url_str: &str) -> Result<reqwest::Url, ToolError> {
let parsed = reqwest::Url::parse(url_str)
.map_err(|e| ToolError::ExecutionError(format!("Invalid URL: {}", e)))?;
if let Some(host) = parsed.host_str() {
if is_private_host(host) {
return Err(ToolError::ExecutionError(
"Requests to internal/private IPs are not allowed for security reasons".into(),
));
}
}
Ok(parsed)
}
/// Perform an HTTP request and return the response body and metadata.
/// Supports GET, POST, PUT, DELETE methods. Useful for fetching web pages,
/// calling external APIs, or downloading resources.
pub async fn curl_exec(
_ctx: ToolContext,
args: serde_json::Value,
) -> Result<serde_json::Value, ToolError> {
let url_str = args
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| ToolError::ExecutionError("url is required".into()))?;
// SSRF protection: validate initial URL
validate_url_against_ssrf(url_str)?;
let method = args
.get("method")
.and_then(|v| v.as_str())
.unwrap_or("GET")
.to_uppercase();
let body = args.get("body").and_then(|v| v.as_str()).map(String::from);
let headers: Vec<(String, String)> = args
.get("headers")
.and_then(|v| v.as_object())
.map(|obj| {
obj.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect()
})
.unwrap_or_default();
// Block sensitive headers that could be used for injection attacks
for (key, _) in &headers {
if BLOCKED_HEADERS.contains(&key.to_lowercase().as_str()) {
return Err(ToolError::ExecutionError(
format!("Header '{}' is not allowed for security reasons", key),
));
}
}
let timeout_secs = args
.get("timeout")
.and_then(|v| v.as_u64())
.unwrap_or(30)
.min(120);
let client = shared_client();
// Build a per-request client with the specific timeout by using the shared
// client's connection pool but overriding timeout per request via request builder.
// Since reqwest::Client::builder().redirect(Policy::limited(0)) disables auto-redirects,
// we manually follow up to 5 redirects with SSRF validation on each hop.
let mut current_url = url_str.to_string();
let mut redirect_count = 0u32;
const MAX_REDIRECTS: u32 = 5;
loop {
let mut request = match method.as_str() {
"GET" => client.get(&current_url),
"POST" => client.post(&current_url),
"PUT" => client.put(&current_url),
"DELETE" => client.delete(&current_url),
"PATCH" => client.patch(&current_url),
"HEAD" => client.head(&current_url),
_ => {
return Err(ToolError::ExecutionError(format!(
"Unsupported HTTP method: {}. Use GET, POST, PUT, DELETE, PATCH, or HEAD.",
method
)))
}
};
request = request.timeout(std::time::Duration::from_secs(timeout_secs));
for (key, value) in &headers {
request = request.header(key, value);
}
// Set default Content-Type for POST/PUT/PATCH if not provided and body exists
if body.is_some() && !headers.iter().any(|(k, _)| k.to_lowercase() == "content-type") {
request = request.header("Content-Type", "application/json");
}
if let Some(ref b) = body {
request = request.body(b.clone());
}
let response = request
.send()
.await
.map_err(|e| ToolError::ExecutionError(format!("HTTP request failed: {}", e)))?;
let status = response.status().as_u16();
// Handle redirects manually with SSRF validation
if status >= 300 && status < 400 {
redirect_count += 1;
if redirect_count > MAX_REDIRECTS {
return Err(ToolError::ExecutionError(
format!("Too many redirects (max {})", MAX_REDIRECTS),
));
}
let location = response.headers()
.get("location")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
let location = match location {
Some(l) => l,
None => return Err(ToolError::ExecutionError("Redirect with no Location header".into())),
};
// Resolve relative redirect against current URL
let base = reqwest::Url::parse(&current_url)
.map_err(|e| ToolError::ExecutionError(format!("Invalid current URL: {}", e)))?;
let next_url = base.join(&location)
.map_err(|e| ToolError::ExecutionError(format!("Invalid redirect URL: {}", e)))?;
// Validate redirect target against SSRF rules
if let Some(host) = next_url.host_str() {
if is_private_host(host) {
return Err(ToolError::ExecutionError(
"Redirect to internal/private IP is not allowed".into(),
));
}
}
current_url = next_url.to_string();
continue;
}
let status_text = response.status().canonical_reason().unwrap_or("");
let response_headers: std::collections::HashMap<String, String> = response
.headers()
.iter()
.map(|(k, v)| {
(
k.to_string(),
v.to_str().unwrap_or("<binary>").to_string(),
)
})
.collect();
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string();
let is_text = content_type.starts_with("text/")
|| content_type.contains("json")
|| content_type.contains("xml")
|| content_type.contains("javascript");
let body_bytes = response
.bytes()
.await
.map_err(|e| ToolError::ExecutionError(format!("Failed to read response body: {}", e)))?;
let body_len = body_bytes.len();
let truncated = body_len > MAX_BODY_BYTES;
let body_text = if truncated {
String::from("[Response truncated — exceeds 1 MB limit]")
} else if is_text {
String::from_utf8_lossy(&body_bytes).to_string()
} else {
format!(
"[Binary body, {} bytes, Content-Type: {}]",
body_len, content_type
)
};
return Ok(serde_json::json!({
"url": current_url,
"method": method,
"status": status,
"status_text": status_text,
"headers": response_headers,
"body": body_text,
"truncated": truncated,
"size_bytes": body_len,
}));
}
}
// ─── tool definition ─────────────────────────────────────────────────────────
pub fn tool_definition() -> ToolDefinition {
let mut p = HashMap::new();
p.insert("url".into(), ToolParam {
name: "url".into(), param_type: "string".into(),
description: Some("Full URL to request (required).".into()),
required: true, properties: None, items: None,
});
p.insert("method".into(), ToolParam {
name: "method".into(), param_type: "string".into(),
description: Some("HTTP method: GET (default), POST, PUT, DELETE, PATCH, HEAD.".into()),
required: false, properties: None, items: None,
});
p.insert("body".into(), ToolParam {
name: "body".into(), param_type: "string".into(),
description: Some("Request body. Defaults to 'application/json' Content-Type if provided. Optional.".into()),
required: false, properties: None, items: None,
});
p.insert("headers".into(), ToolParam {
name: "headers".into(), param_type: "object".into(),
description: Some("HTTP headers as key-value pairs. Optional.".into()),
required: false, properties: None, items: None,
});
p.insert("timeout".into(), ToolParam {
name: "timeout".into(), param_type: "integer".into(),
description: Some("Request timeout in seconds (default 30, max 120). Optional.".into()),
required: false, properties: None, items: None,
});
ToolDefinition::new("project_curl")
.description(
"Perform an HTTP request to any URL. Supports GET, POST, PUT, DELETE, PATCH, HEAD. \
Returns status code, headers, and response body. \
Response body is truncated at 1 MB. Binary responses are described as text metadata. \
Useful for fetching web pages, calling APIs, or downloading resources.",
)
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some(p),
required: Some(vec!["url".into()]),
})
}