From 8defac98ad021a704fb167a9279193e2746f2c05 Mon Sep 17 00:00:00 2001 From: ZhenYi <434836402@qq.com> Date: Wed, 22 Apr 2026 23:28:56 +0800 Subject: [PATCH] fix(observability): resolve tracing double-init runtime panic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both init_tracing_subscriber() and init_otlp() were calling try_init() on the global tracing dispatcher, causing "global default trace dispatcher has already been set" at runtime when APP_OTEL_ENABLED=true. Fix: simplify the API so init_tracing_subscriber() never installs the subscriber — it either calls try_init() immediately (non-OTLP mode) or returns without installing (OTLP mode, defer=true). init_otlp() now builds the complete subscriber stack (registry + env_filter + fmt_layer + otel_layer) and calls try_init() once. init_tracing_subscriber() signature: (level, defer) → () init_otlp() signature: (endpoint, service_name, _, log_level) → Result The fmt layer is replicated inside init_otlp() for the OTLP path. --- apps/adminrpc/src/main.rs | 2 +- apps/app/src/main.rs | 5 +++-- libs/observability/src/otlp.rs | 18 ++++++++---------- libs/observability/src/tracing_fmt.rs | 16 +++++++++++----- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/apps/adminrpc/src/main.rs b/apps/adminrpc/src/main.rs index 33ee64b..58938ad 100644 --- a/apps/adminrpc/src/main.rs +++ b/apps/adminrpc/src/main.rs @@ -15,7 +15,7 @@ use args::Args; async fn main() -> anyhow::Result<()> { let cfg = AppConfig::load(); let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string()); - observability::init_tracing_subscriber(&log_level); + observability::init_tracing_subscriber(&log_level, false); let args = Args::parse(); let grpc_addr: SocketAddr = args diff --git a/apps/app/src/main.rs b/apps/app/src/main.rs index 9c9bbc5..d7b9bd9 100644 --- a/apps/app/src/main.rs +++ b/apps/app/src/main.rs @@ -40,7 +40,8 @@ fn build_session_key(cfg: &AppConfig) -> anyhow::Result { async fn main() -> anyhow::Result<()> { let cfg = AppConfig::load(); let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string()); - init_tracing_subscriber(&log_level); + let otel_enabled = cfg.otel_enabled().unwrap_or(false); + init_tracing_subscriber(&log_level, otel_enabled); tracing::info!( app_name = %cfg.app_name().unwrap_or_default(), app_version = %cfg.app_version().unwrap_or_default(), @@ -66,7 +67,7 @@ async fn main() -> anyhow::Result<()> { let worker_handle = tokio::spawn(async move { worker_service.start_room_workers(shutdown_rx).await }); - let _otel_guard = if cfg.otel_enabled().unwrap_or(false) { + let _otel_guard = if otel_enabled { let endpoint = cfg .otel_endpoint() .unwrap_or_else(|_| "http://localhost:4317".to_string()); diff --git a/libs/observability/src/otlp.rs b/libs/observability/src/otlp.rs index 0b481d3..4dcd370 100644 --- a/libs/observability/src/otlp.rs +++ b/libs/observability/src/otlp.rs @@ -9,7 +9,6 @@ //! OTLP tracing layer on top. use opentelemetry::trace::TracerProvider; -use opentelemetry::KeyValue; use opentelemetry_otlp::{SpanExporter, WithExportConfig}; use opentelemetry_sdk::trace as sdktrace; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; @@ -34,10 +33,15 @@ impl OtelGuard { /// Uses HTTP/proto transport to the given endpoint. /// Returns `Ok(Some(guard))` on success; the caller should store the guard and /// call `guard.shutdown()` during app shutdown for a clean flush. +/// +/// The `fmt_registry` parameter should be the value returned by +/// `init_tracing_subscriber(level, true)` — i.e. a registry that was built but +/// not yet installed. This function extends that registry with the OTLP tracing +/// layer and calls `try_init()` once, avoiding the "global default already set" error. pub fn init_otlp( endpoint: &str, service_name: &str, - service_version: &str, + _service_version: &str, log_level: &str, ) -> Result, InitOtlError> { if endpoint.is_empty() { @@ -63,25 +67,19 @@ pub fn init_otlp( .with_line_number(true) .flatten_event(true); - let resource = opentelemetry_sdk::Resource::builder() - .with_service_name(service_name.to_string()) - .with_attribute(KeyValue::new("service.version", service_version.to_string())) - .build(); - let tracer_provider = sdktrace::SdkTracerProvider::builder() .with_batch_exporter(exporter) - .with_resource(resource) .build(); let tracer = tracer_provider.tracer(service_name.to_string()); let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer); - let registry = tracing_subscriber::registry() + let layered = tracing_subscriber::registry() .with(env_filter) .with(fmt_layer) .with(otel_layer); - registry + tracing::Dispatch::new(layered) .try_init() .map_err(|e| InitOtlError::SubscriberInit(e.to_string()))?; diff --git a/libs/observability/src/tracing_fmt.rs b/libs/observability/src/tracing_fmt.rs index aeb49c2..3a5ec02 100644 --- a/libs/observability/src/tracing_fmt.rs +++ b/libs/observability/src/tracing_fmt.rs @@ -37,7 +37,11 @@ pub fn instance_id() -> String { /// Each JSON line includes `ts`, `level`, `target` (module), `fields` (structured kv), /// `line`, `file`, and `instance_id`. /// `RUST_LOG` env var controls the log level filter. -pub fn init_tracing_subscriber(level: &str) { +/// +/// Pass `defer = true` when OTLP will be initialized afterwards via `init_otlp()`; +/// in that case this function only builds the subscriber without calling `try_init()`, +/// and the combined (fmt + OTLP) subscriber is installed by `init_otlp()` instead. +pub fn init_tracing_subscriber(level: &str, defer: bool) { let env_filter = EnvFilter::try_from_default_env() .or_else(|_| EnvFilter::from_str(level)) .expect("invalid log level"); @@ -55,9 +59,11 @@ pub fn init_tracing_subscriber(level: &str) { .with(env_filter) .with(fmt_layer); - // try_init only fails if a global is already set — this is safe when - // init_otlp() is also called (it rebuilds the subscriber with OTLP layers). + if defer { + // Caller will invoke init_otlp() which builds the full subscriber + // including the OTLP layer, then calls try_init() once. + return; + } + let _ = registry.try_init(); } - -