use std::collections::HashMap; use std::sync::Arc; use metrics::{ describe_counter, describe_gauge, describe_histogram, register_counter, register_gauge, register_histogram, Counter, Gauge, Histogram, Unit, }; use tokio::sync::RwLock; use uuid::Uuid; pub struct RoomMetrics { pub rooms_online: Gauge, pub users_online: Gauge, pub ws_connections_active: Gauge, pub ws_connections_total: Counter, pub ws_disconnections_total: Counter, pub messages_sent: Counter, pub messages_persisted: Counter, pub messages_persist_failed: Counter, pub broadcasts_sent: Counter, pub broadcasts_dropped: Counter, pub duplicates_skipped: Counter, pub redis_publish_failed: Counter, pub message_latency_ms: Histogram, pub ws_rate_limit_hits: Counter, pub ws_auth_failures: Counter, pub ws_heartbeat_sent_total: Counter, pub ws_heartbeat_timeout_total: Counter, pub ws_idle_timeout_total: Counter, room_connections: RwLock>, room_messages: RwLock>, } impl Default for RoomMetrics { fn default() -> Self { describe_gauge!("room_online_rooms", "Number of rooms with active workers"); describe_gauge!( "room_online_users", "Total number of online WebSocket users" ); describe_gauge!( "room_ws_connections_active", "Current number of active WebSocket connections" ); describe_counter!( "room_ws_connections_total", Unit::Count, "Total WebSocket connections established" ); describe_counter!( "room_ws_disconnections_total", Unit::Count, "Total WebSocket disconnections" ); describe_counter!( "room_messages_sent_total", Unit::Count, "Total messages sent to rooms" ); describe_counter!( "room_messages_persisted_total", Unit::Count, "Total messages persisted to database" ); describe_counter!( "room_messages_persist_failed_total", Unit::Count, "Total message persistence failures" ); describe_counter!( "room_broadcasts_sent_total", Unit::Count, "Total WebSocket broadcasts sent" ); describe_counter!( "room_duplicates_skipped_total", Unit::Count, "Total duplicate messages skipped (idempotency)" ); describe_counter!( "room_redis_publish_failed_total", Unit::Count, "Total Redis publish failures" ); describe_histogram!( "room_message_latency_ms", Unit::Milliseconds, "Message processing latency from publish to persist" ); describe_counter!( "room_ws_rate_limit_hits_total", Unit::Count, "Total WebSocket rate limit rejections" ); describe_counter!( "room_ws_auth_failures_total", Unit::Count, "Total WebSocket authentication/authorization failures" ); describe_counter!( "room_ws_heartbeat_sent_total", Unit::Count, "Total WebSocket heartbeat pings sent by server" ); describe_counter!( "room_ws_heartbeat_timeout_total", Unit::Count, "Total WebSocket connections closed due to heartbeat timeout" ); describe_counter!( "room_ws_idle_timeout_total", Unit::Count, "Total WebSocket connections closed due to idle timeout" ); describe_counter!( "room_broadcasts_dropped_total", Unit::Count, "Total broadcasts dropped due to channel full" ); Self { rooms_online: register_gauge!("room_online_rooms"), users_online: register_gauge!("room_online_users"), ws_connections_active: register_gauge!("room_ws_connections_active"), ws_connections_total: register_counter!("room_ws_connections_total"), ws_disconnections_total: register_counter!("room_ws_disconnections_total"), messages_sent: register_counter!("room_messages_sent_total"), messages_persisted: register_counter!("room_messages_persisted_total"), messages_persist_failed: register_counter!("room_messages_persist_failed_total"), broadcasts_sent: register_counter!("room_broadcasts_sent_total"), broadcasts_dropped: register_counter!("room_broadcasts_dropped_total"), duplicates_skipped: register_counter!("room_duplicates_skipped_total"), redis_publish_failed: register_counter!("room_redis_publish_failed_total"), message_latency_ms: register_histogram!("room_message_latency_ms"), ws_rate_limit_hits: register_counter!("room_ws_rate_limit_hits_total"), ws_auth_failures: register_counter!("room_ws_auth_failures_total"), ws_heartbeat_sent_total: register_counter!("room_ws_heartbeat_sent_total"), ws_heartbeat_timeout_total: register_counter!("room_ws_heartbeat_timeout_total"), ws_idle_timeout_total: register_counter!("room_ws_idle_timeout_total"), room_connections: RwLock::new(HashMap::new()), room_messages: RwLock::new(HashMap::new()), } } } impl RoomMetrics { pub fn new() -> Self { Self::default() } pub fn record_message_latency(&self, ms: f64) { self.message_latency_ms.record(ms); } pub fn incr_duplicates_skipped(&self) { self.duplicates_skipped.increment(1); } pub async fn incr_room_connections(&self, room_id: Uuid) { let mut map = self.room_connections.write().await; let counter = map.entry(room_id).or_insert_with(|| { register_gauge!(format!("room_connections{{room_id=\"{}\"}}", room_id)) }); counter.increment(1.0); } pub async fn dec_room_connections(&self, room_id: Uuid) { let map = self.room_connections.read().await; if let Some(counter) = map.get(&room_id) { counter.decrement(1.0); } } pub async fn incr_room_messages(&self, room_id: Uuid) { let mut map = self.room_messages.write().await; let counter = map.entry(room_id).or_insert_with(|| { register_counter!(format!("room_messages_total{{room_id=\"{}\"}}", room_id)) }); counter.increment(1); } pub async fn cleanup_stale_rooms(&self, active_room_ids: &[Uuid]) { let mut conn_map = self.room_connections.write().await; conn_map.retain(|room_id, _| active_room_ids.contains(room_id)); let mut msg_map = self.room_messages.write().await; msg_map.retain(|room_id, _| active_room_ids.contains(room_id)); } pub fn into_arc(self) -> Arc { Arc::new(self) } }