gitdataai/libs/observability/src/otlp.rs
ZhenYi b4024aa690 feat(observability): Phase 6 OTLP tracing + Prometheus /metrics endpoint
- Add HTTP OTLP exporter (opentelemetry-otlp 0.31) via SdkTracerProvider +
  BatchSpanProcessor + tracing_opentelemetry layer
- Add Prometheus /metrics handler via metrics-exporter-prometheus 0.13
- Replace slog with tracing throughout: HttpMetrics, TracingSpanMiddleware
- Replace .init() with .try_init() to allow OTLP layer registration after
  init_tracing_subscriber()
- otlp.rs: SpanExporter::builder().with_http().with_endpoint(),
  Resource::builder().with_service_name(), .with_attribute(KeyValue::new(...))
- prometheus_exporter.rs: install_recorder(), prometheus_handler(),
  spawn_http_metrics_poller()
2026-04-21 22:28:15 +08:00

104 lines
3.2 KiB
Rust

//! OTLP tracer initialisation (Phase 6).
//!
//! Uses HTTP/proto transport to the OTLP endpoint.
//! The endpoint URL is passed as-is to the HTTP exporter.
//! Default Kubernetes otel-collector-agent accepts HTTP on :4318.
//!
//! Call `init_otlp()` **after** `init_tracing_subscriber()` so the fmt layer is
//! already registered. This function rebuilds the global subscriber with the
//! OTLP tracing layer on top.
use opentelemetry::trace::TracerProvider;
use opentelemetry::KeyValue;
use opentelemetry_otlp::{SpanExporter, WithExportConfig};
use opentelemetry_sdk::trace as sdktrace;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
/// Guard that shuts down the OTLP pipeline on drop.
#[must_use]
pub struct OtelGuard {
provider: sdktrace::SdkTracerProvider,
}
impl OtelGuard {
/// Force-flush any pending spans and shut down the OTLP exporter.
pub async fn shutdown(self) {
if let Err(e) = self.provider.shutdown() {
tracing::warn!(error = %e, "OTLP tracer shutdown error");
}
}
}
/// Initialise OTLP tracing and attach it to the global tracing subscriber.
///
/// Uses HTTP/proto transport to the given endpoint.
/// Returns `Ok(Some(guard))` on success; the caller should store the guard and
/// call `guard.shutdown()` during app shutdown for a clean flush.
pub fn init_otlp(
endpoint: &str,
service_name: &str,
service_version: &str,
log_level: &str,
) -> Result<Option<OtelGuard>, InitOtlError> {
if endpoint.is_empty() {
return Err(InitOtlError::EmptyEndpoint);
}
let endpoint = endpoint.trim_end_matches('/');
let exporter = SpanExporter::builder()
.with_http()
.with_endpoint(endpoint)
.build()
.map_err(|e| InitOtlError::ExporterInit(e.to_string()))?;
let env_filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new(log_level));
let fmt_layer = tracing_subscriber::fmt::layer()
.json()
.with_target(true)
.with_thread_ids(false)
.with_file(true)
.with_line_number(true)
.flatten_event(true);
let resource = opentelemetry_sdk::Resource::builder()
.with_service_name(service_name.to_string())
.with_attribute(KeyValue::new("service.version", service_version.to_string()))
.build();
let tracer_provider = sdktrace::SdkTracerProvider::builder()
.with_batch_exporter(exporter)
.with_resource(resource)
.build();
let tracer = tracer_provider.tracer(service_name.to_string());
let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer);
let registry = tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.with(otel_layer);
registry
.try_init()
.map_err(|e| InitOtlError::SubscriberInit(e.to_string()))?;
tracing::debug!(endpoint = %endpoint, "OTLP tracer installed");
Ok(Some(OtelGuard { provider: tracer_provider }))
}
#[derive(Debug, thiserror::Error)]
pub enum InitOtlError {
#[error("endpoint is empty")]
EmptyEndpoint,
#[error("failed to build OTLP exporter: {0}")]
ExporterInit(String),
#[error("failed to set tracing subscriber: {0}")]
SubscriberInit(String),
}