gitdataai/libs/observability/src/otlp.rs
ZhenYi 8defac98ad fix(observability): resolve tracing double-init runtime panic
Both init_tracing_subscriber() and init_otlp() were calling try_init()
on the global tracing dispatcher, causing "global default trace dispatcher
has already been set" at runtime when APP_OTEL_ENABLED=true.

Fix: simplify the API so init_tracing_subscriber() never installs the
subscriber — it either calls try_init() immediately (non-OTLP mode) or
returns without installing (OTLP mode, defer=true).  init_otlp() now
builds the complete subscriber stack (registry + env_filter + fmt_layer +
otel_layer) and calls try_init() once.

init_tracing_subscriber() signature: (level, defer) → ()
init_otlp() signature: (endpoint, service_name, _, log_level) → Result

The fmt layer is replicated inside init_otlp() for the OTLP path.
2026-04-22 23:28:56 +08:00

102 lines
3.3 KiB
Rust

//! OTLP tracer initialisation (Phase 6).
//!
//! Uses HTTP/proto transport to the OTLP endpoint.
//! The endpoint URL is passed as-is to the HTTP exporter.
//! Default Kubernetes otel-collector-agent accepts HTTP on :4318.
//!
//! Call `init_otlp()` **after** `init_tracing_subscriber()` so the fmt layer is
//! already registered. This function rebuilds the global subscriber with the
//! OTLP tracing layer on top.
use opentelemetry::trace::TracerProvider;
use opentelemetry_otlp::{SpanExporter, WithExportConfig};
use opentelemetry_sdk::trace as sdktrace;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
/// Guard that shuts down the OTLP pipeline on drop.
#[must_use]
pub struct OtelGuard {
provider: sdktrace::SdkTracerProvider,
}
impl OtelGuard {
/// Force-flush any pending spans and shut down the OTLP exporter.
pub async fn shutdown(self) {
if let Err(e) = self.provider.shutdown() {
tracing::warn!(error = %e, "OTLP tracer shutdown error");
}
}
}
/// Initialise OTLP tracing and attach it to the global tracing subscriber.
///
/// Uses HTTP/proto transport to the given endpoint.
/// Returns `Ok(Some(guard))` on success; the caller should store the guard and
/// call `guard.shutdown()` during app shutdown for a clean flush.
///
/// The `fmt_registry` parameter should be the value returned by
/// `init_tracing_subscriber(level, true)` — i.e. a registry that was built but
/// not yet installed. This function extends that registry with the OTLP tracing
/// layer and calls `try_init()` once, avoiding the "global default already set" error.
pub fn init_otlp(
endpoint: &str,
service_name: &str,
_service_version: &str,
log_level: &str,
) -> Result<Option<OtelGuard>, InitOtlError> {
if endpoint.is_empty() {
return Err(InitOtlError::EmptyEndpoint);
}
let endpoint = endpoint.trim_end_matches('/');
let exporter = SpanExporter::builder()
.with_http()
.with_endpoint(endpoint)
.build()
.map_err(|e| InitOtlError::ExporterInit(e.to_string()))?;
let env_filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new(log_level));
let fmt_layer = tracing_subscriber::fmt::layer()
.json()
.with_target(true)
.with_thread_ids(false)
.with_file(true)
.with_line_number(true)
.flatten_event(true);
let tracer_provider = sdktrace::SdkTracerProvider::builder()
.with_batch_exporter(exporter)
.build();
let tracer = tracer_provider.tracer(service_name.to_string());
let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer);
let layered = tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.with(otel_layer);
tracing::Dispatch::new(layered)
.try_init()
.map_err(|e| InitOtlError::SubscriberInit(e.to_string()))?;
tracing::debug!(endpoint = %endpoint, "OTLP tracer installed");
Ok(Some(OtelGuard { provider: tracer_provider }))
}
#[derive(Debug, thiserror::Error)]
pub enum InitOtlError {
#[error("endpoint is empty")]
EmptyEndpoint,
#[error("failed to build OTLP exporter: {0}")]
ExporterInit(String),
#[error("failed to set tracing subscriber: {0}")]
SubscriberInit(String),
}