gitdataai/lib/service/metrics.rs
2026-06-01 22:04:38 +08:00

312 lines
9.1 KiB
Rust

use prometheus::{CounterVec, HistogramVec};
use track::MetricsRegistry;
#[derive(Clone)]
pub struct ServiceMetrics {
pub auth_login_total: CounterVec,
pub auth_register_total: CounterVec,
pub auth_2fa_triggered_total: CounterVec,
pub auth_password_reset_total: CounterVec,
pub repo_operations_total: CounterVec,
pub repo_fork_total: CounterVec,
pub repo_transfer_total: CounterVec,
pub workspace_operations_total: CounterVec,
pub workspace_join_total: CounterVec,
pub issue_operations_total: CounterVec,
pub pr_operations_total: CounterVec,
pub pr_merge_total: CounterVec,
pub ai_agent_runs_total: CounterVec,
pub ai_tool_calls_total: CounterVec,
pub ai_token_usage_total: CounterVec,
pub db_query_duration_seconds: HistogramVec,
pub db_queries_total: CounterVec,
pub cache_hits_total: CounterVec,
pub cache_misses_total: CounterVec,
pub cache_sets_total: CounterVec,
pub cache_removes_total: CounterVec,
pub storage_operations_total: CounterVec,
pub storage_bytes_total: CounterVec,
pub queue_messages_total: CounterVec,
pub queue_dlq_total: CounterVec,
}
impl ServiceMetrics {
pub fn record_ai_run(&self, model: &str, status: &str) {
self.ai_agent_runs_total
.with_label_values(&[model, status])
.inc();
track::record_otel_counter(
"ai_agent_runs_total",
1,
&[("model", model.to_string()), ("status", status.to_string())],
);
}
pub fn record_ai_tool_call(&self, tool_name: &str, status: &str) {
self.ai_tool_calls_total
.with_label_values(&[tool_name, status])
.inc();
track::record_otel_counter(
"ai_tool_calls_total",
1,
&[
("tool_name", tool_name.to_string()),
("status", status.to_string()),
],
);
}
pub fn record_ai_token_usage(
&self,
model: &str,
input_tokens: i64,
output_tokens: i64,
) {
if input_tokens > 0 {
self.ai_token_usage_total
.with_label_values(&[model, "input"])
.inc_by(input_tokens as f64);
track::record_otel_counter(
"ai_token_usage_total",
input_tokens as u64,
&[
("model", model.to_string()),
("direction", "input".to_string()),
],
);
}
if output_tokens > 0 {
self.ai_token_usage_total
.with_label_values(&[model, "output"])
.inc_by(output_tokens as f64);
track::record_otel_counter(
"ai_token_usage_total",
output_tokens as u64,
&[
("model", model.to_string()),
("direction", "output".to_string()),
],
);
}
}
pub fn init(registry: &MetricsRegistry) -> Self {
Self {
auth_login_total: cvec(
registry,
"auth_login_total",
"Total login attempts",
&["status"],
),
auth_register_total: cvec(
registry,
"auth_register_total",
"Total user registrations",
&["status"],
),
auth_2fa_triggered_total: cvec(
registry,
"auth_2fa_triggered_total",
"Total 2FA challenges triggered",
&[],
),
auth_password_reset_total: cvec(
registry,
"auth_password_reset_total",
"Total password reset operations",
&["status"],
),
repo_operations_total: cvec(
registry,
"repo_operations_total",
"Total repo operations",
&["operation", "status"],
),
repo_fork_total: cvec(
registry,
"repo_fork_total",
"Total fork creations",
&["status"],
),
repo_transfer_total: cvec(
registry,
"repo_transfer_total",
"Total repo transfers",
&["status"],
),
workspace_operations_total: cvec(
registry,
"workspace_operations_total",
"Total workspace operations",
&["operation", "status"],
),
workspace_join_total: cvec(
registry,
"workspace_join_total",
"Total workspace join operations",
&["operation"],
),
issue_operations_total: cvec(
registry,
"issue_operations_total",
"Total issue operations",
&["operation", "status"],
),
pr_operations_total: cvec(
registry,
"pr_operations_total",
"Total pull request operations",
&["operation", "status"],
),
pr_merge_total: cvec(
registry,
"pr_merge_total",
"Total PR merges",
&["method", "status"],
),
ai_agent_runs_total: cvec(
registry,
"ai_agent_runs_total",
"Total AI agent invocations",
&["model", "status"],
),
ai_tool_calls_total: cvec(
registry,
"ai_tool_calls_total",
"Total AI tool calls",
&["tool_name", "status"],
),
ai_token_usage_total: cvec(
registry,
"ai_token_usage_total",
"Total AI token usage",
&["model", "direction"],
),
db_query_duration_seconds: hvec(
registry,
"db_query_duration_seconds",
"DB query duration in seconds",
&["kind", "route"],
vec![
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5,
5.0,
],
),
db_queries_total: cvec(
registry,
"db_queries_total",
"Total database queries",
&["kind", "route", "status"],
),
cache_hits_total: cvec(
registry,
"cache_hits_total",
"Total cache hits",
&["tier"],
),
cache_misses_total: cvec(
registry,
"cache_misses_total",
"Total cache misses",
&[],
),
cache_sets_total: cvec(
registry,
"cache_sets_total",
"Total cache set operations",
&[],
),
cache_removes_total: cvec(
registry,
"cache_removes_total",
"Total cache remove operations",
&[],
),
storage_operations_total: cvec(
registry,
"storage_operations_total",
"Total storage operations",
&["operation", "backend"],
),
storage_bytes_total: cvec(
registry,
"storage_bytes_total",
"Total bytes transferred",
&["operation"],
),
queue_messages_total: cvec(
registry,
"queue_messages_total",
"Total queue messages",
&["topic", "status"],
),
queue_dlq_total: cvec(
registry,
"queue_dlq_total",
"Total messages routed to DLQ",
&["topic"],
),
}
}
}
/// Wraps an async operation and records a business metric with `success`/`error` status.
/// `op_labels` are the dimension labels (e.g., `["create"]`).
/// The final label `"success"` or `"error"` is appended automatically.
pub(crate) async fn with_op_metric<T, E, Fut>(
counter: &CounterVec,
op_labels: &[&str],
fut: Fut,
) -> Result<T, E>
where
Fut: std::future::Future<Output = Result<T, E>>,
{
let result = fut.await;
let mut labels: Vec<&str> = op_labels.to_vec();
labels.push(if result.is_ok() { "success" } else { "error" });
counter.with_label_values(&labels).inc();
result
}
fn cvec(
registry: &MetricsRegistry,
name: &str,
help: &str,
labels: &[&str],
) -> CounterVec {
registry
.register_counter_vec(name, help, labels)
.expect("failed to register counter metric")
}
fn hvec(
registry: &MetricsRegistry,
name: &str,
help: &str,
labels: &[&str],
buckets: Vec<f64>,
) -> HistogramVec {
registry
.register_histogram_vec(name, help, labels, buckets)
.expect("failed to register histogram metric")
}