gitdataai/libs/observability/src/metrics_middleware.rs
ZhenYi 962bf0312d feat(observability): Phase 6 OTLP tracing + Prometheus metrics endpoint
OTLP tracing:
- libs/observability/otlp.rs: SdkTracerProvider via HTTP/proto OTLP exporter
- libs/observability/tracing_middleware.rs: Actix-web span with trace_id propagation
- libs/observability/tracing_fmt.rs: JSON fmt + registry.try_init for layered init
- libs/rpc: gRPC method spans via info_span
- libs/agent, libs/room, libs/service, libs/api: structured tracing throughout

Prometheus metrics:
- libs/observability/prometheus_exporter.rs: /metrics HTTP handler + metrics crate
- libs/observability/metrics_middleware.rs: HttpMetrics middleware + AtomicU64
- libs/observability/redis_metrics.rs: Redis counter poller via RedisMetrics
- libs/room/metrics.rs: RoomMetrics (connections, messages, presence counters)

Config env vars: APP_OTEL_ENABLED, APP_OTEL_ENDPOINT, APP_OTEL_SERVICE_NAME
2026-04-22 10:27:54 +08:00

162 lines
5.7 KiB
Rust

//! Actix-web metrics middleware: counts requests and measures latency.
//!
//! Registers metrics into a shared atomic counter exposed as structured fields
//! on every request. No external metrics endpoint — logs are the export path.
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
use futures::future::{LocalBoxFuture, Ready, ok};
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, RwLock};
use std::task::{Context, Poll};
use std::time::Instant;
/// HTTP metrics collected by this middleware.
#[derive(Debug, Default)]
pub struct HttpMetrics {
/// Total number of requests processed.
pub request_count: AtomicU64,
/// Sum of all request durations in milliseconds.
pub total_duration_ms: AtomicU64,
/// Number of 2xx responses.
pub status_2xx: AtomicU64,
/// Number of 4xx responses.
pub status_4xx: AtomicU64,
/// Number of 5xx responses.
pub status_5xx: AtomicU64,
/// Per-endpoint request counters. Key format: "GET /api/room/{id}" or "POST /api/git/commit"
pub endpoint_counts: RwLock<HashMap<String, AtomicU64>>,
}
impl HttpMetrics {
/// Creates a new instance with all counters initialised to zero.
pub fn new() -> Self {
Self::default()
}
/// Increment the counter for a specific HTTP endpoint (method + path).
pub fn incr_endpoint(&self, method: &str, path: &str) {
let key = format!("{} {}", method, path);
let mut map = self.endpoint_counts.write().unwrap();
let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0));
counter.fetch_add(1, Ordering::Relaxed);
}
/// Returns a snapshot of all current counter values.
pub fn snapshot(&self) -> HashMap<String, serde_json::Value> {
let mut m = HashMap::new();
m.insert("http_requests_total".into(), serde_json::json!(self.request_count.load(Ordering::Relaxed)));
m.insert("http_request_duration_ms_total".into(), serde_json::json!(self.total_duration_ms.load(Ordering::Relaxed)));
m.insert("http_requests_2xx".into(), serde_json::json!(self.status_2xx.load(Ordering::Relaxed)));
m.insert("http_requests_4xx".into(), serde_json::json!(self.status_4xx.load(Ordering::Relaxed)));
m.insert("http_requests_5xx".into(), serde_json::json!(self.status_5xx.load(Ordering::Relaxed)));
// Per-endpoint counters
let map = self.endpoint_counts.read().unwrap();
for (key, counter) in map.iter() {
// Sanitize key for use as metric name: replace spaces and slashes with underscores
let sanitized = key.replace([' ', '/'], "_").to_lowercase();
let metric_key = format!("http_endpoint_{}", sanitized);
m.insert(metric_key, serde_json::json!(counter.load(Ordering::Relaxed)));
}
m
}
}
/// Actix-web middleware that collects per-request metrics and exposes them
/// via structured fields on every log line.
pub struct MetricsMiddleware {
metrics: Arc<HttpMetrics>,
}
impl MetricsMiddleware {
/// Constructs a new `MetricsMiddleware` wrapping the shared `HttpMetrics`.
pub fn new(metrics: Arc<HttpMetrics>) -> Self {
Self { metrics }
}
}
impl<S, B> Transform<S, ServiceRequest> for MetricsMiddleware
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Transform = MetricsMiddlewareService<S>;
type InitError = ();
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(MetricsMiddlewareService {
service: Arc::new(service),
metrics: self.metrics.clone(),
})
}
}
pub struct MetricsMiddlewareService<S> {
service: Arc<S>,
metrics: Arc<HttpMetrics>,
}
impl<S> Clone for MetricsMiddlewareService<S> {
fn clone(&self) -> Self {
Self {
service: self.service.clone(),
metrics: self.metrics.clone(),
}
}
}
impl<S, B> Service<ServiceRequest> for MetricsMiddlewareService<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&self, req: ServiceRequest) -> Self::Future {
let started = Instant::now();
let service = self.service.clone();
let metrics = self.metrics.clone();
let method = req.method().as_str().to_string();
let path = req.path().to_string();
Box::pin(async move {
let res = service.call(req).await?;
let elapsed_ms = started.elapsed().as_millis() as u64;
let status_code = res.status().as_u16();
// Update counters atomically.
metrics.request_count.fetch_add(1, Ordering::Relaxed);
metrics.total_duration_ms.fetch_add(elapsed_ms, Ordering::Relaxed);
metrics.incr_endpoint(&method, &path);
match status_code {
200..=299 => {
metrics.status_2xx.fetch_add(1, Ordering::Relaxed);
}
400..=499 => {
metrics.status_4xx.fetch_add(1, Ordering::Relaxed);
}
500..=599 => {
metrics.status_5xx.fetch_add(1, Ordering::Relaxed);
}
_ => {}
}
Ok(res)
})
}
}