fix(observability): resolve tracing double-init runtime panic

Both init_tracing_subscriber() and init_otlp() were calling try_init()
on the global tracing dispatcher, causing "global default trace dispatcher
has already been set" at runtime when APP_OTEL_ENABLED=true.

Fix: simplify the API so init_tracing_subscriber() never installs the
subscriber — it either calls try_init() immediately (non-OTLP mode) or
returns without installing (OTLP mode, defer=true).  init_otlp() now
builds the complete subscriber stack (registry + env_filter + fmt_layer +
otel_layer) and calls try_init() once.

init_tracing_subscriber() signature: (level, defer) → ()
init_otlp() signature: (endpoint, service_name, _, log_level) → Result

The fmt layer is replicated inside init_otlp() for the OTLP path.
This commit is contained in:
ZhenYi 2026-04-22 23:28:56 +08:00
parent 2d2349a06b
commit 8defac98ad
4 changed files with 23 additions and 18 deletions

View File

@ -15,7 +15,7 @@ use args::Args;
async fn main() -> anyhow::Result<()> {
let cfg = AppConfig::load();
let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string());
observability::init_tracing_subscriber(&log_level);
observability::init_tracing_subscriber(&log_level, false);
let args = Args::parse();
let grpc_addr: SocketAddr = args

View File

@ -40,7 +40,8 @@ fn build_session_key(cfg: &AppConfig) -> anyhow::Result<Key> {
async fn main() -> anyhow::Result<()> {
let cfg = AppConfig::load();
let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string());
init_tracing_subscriber(&log_level);
let otel_enabled = cfg.otel_enabled().unwrap_or(false);
init_tracing_subscriber(&log_level, otel_enabled);
tracing::info!(
app_name = %cfg.app_name().unwrap_or_default(),
app_version = %cfg.app_version().unwrap_or_default(),
@ -66,7 +67,7 @@ async fn main() -> anyhow::Result<()> {
let worker_handle =
tokio::spawn(async move { worker_service.start_room_workers(shutdown_rx).await });
let _otel_guard = if cfg.otel_enabled().unwrap_or(false) {
let _otel_guard = if otel_enabled {
let endpoint = cfg
.otel_endpoint()
.unwrap_or_else(|_| "http://localhost:4317".to_string());

View File

@ -9,7 +9,6 @@
//! OTLP tracing layer on top.
use opentelemetry::trace::TracerProvider;
use opentelemetry::KeyValue;
use opentelemetry_otlp::{SpanExporter, WithExportConfig};
use opentelemetry_sdk::trace as sdktrace;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
@ -34,10 +33,15 @@ impl OtelGuard {
/// Uses HTTP/proto transport to the given endpoint.
/// Returns `Ok(Some(guard))` on success; the caller should store the guard and
/// call `guard.shutdown()` during app shutdown for a clean flush.
///
/// The `fmt_registry` parameter should be the value returned by
/// `init_tracing_subscriber(level, true)` — i.e. a registry that was built but
/// not yet installed. This function extends that registry with the OTLP tracing
/// layer and calls `try_init()` once, avoiding the "global default already set" error.
pub fn init_otlp(
endpoint: &str,
service_name: &str,
service_version: &str,
_service_version: &str,
log_level: &str,
) -> Result<Option<OtelGuard>, InitOtlError> {
if endpoint.is_empty() {
@ -63,25 +67,19 @@ pub fn init_otlp(
.with_line_number(true)
.flatten_event(true);
let resource = opentelemetry_sdk::Resource::builder()
.with_service_name(service_name.to_string())
.with_attribute(KeyValue::new("service.version", service_version.to_string()))
.build();
let tracer_provider = sdktrace::SdkTracerProvider::builder()
.with_batch_exporter(exporter)
.with_resource(resource)
.build();
let tracer = tracer_provider.tracer(service_name.to_string());
let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer);
let registry = tracing_subscriber::registry()
let layered = tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.with(otel_layer);
registry
tracing::Dispatch::new(layered)
.try_init()
.map_err(|e| InitOtlError::SubscriberInit(e.to_string()))?;

View File

@ -37,7 +37,11 @@ pub fn instance_id() -> String {
/// Each JSON line includes `ts`, `level`, `target` (module), `fields` (structured kv),
/// `line`, `file`, and `instance_id`.
/// `RUST_LOG` env var controls the log level filter.
pub fn init_tracing_subscriber(level: &str) {
///
/// Pass `defer = true` when OTLP will be initialized afterwards via `init_otlp()`;
/// in that case this function only builds the subscriber without calling `try_init()`,
/// and the combined (fmt + OTLP) subscriber is installed by `init_otlp()` instead.
pub fn init_tracing_subscriber(level: &str, defer: bool) {
let env_filter = EnvFilter::try_from_default_env()
.or_else(|_| EnvFilter::from_str(level))
.expect("invalid log level");
@ -55,9 +59,11 @@ pub fn init_tracing_subscriber(level: &str) {
.with(env_filter)
.with(fmt_layer);
// try_init only fails if a global is already set — this is safe when
// init_otlp() is also called (it rebuilds the subscriber with OTLP layers).
if defer {
// Caller will invoke init_otlp() which builds the full subscriber
// including the OTLP layer, then calls try_init() once.
return;
}
let _ = registry.try_init();
}